Spaces:
Sleeping
Sleeping
File size: 1,248 Bytes
173f415 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Base URL from open library
BASE_URL = "https://openlibrary.org/subjects/"
# Extract an specific genre
def scrape_books(genre, max_books=50):
url = f"{BASE_URL}{genre}.json?limit={max_books}"
response = requests.get(url)
if response.status_code != 200:
print(f"Error accesing website: {response.status_code}")
return []
# Parsear JSON
data = response.json()
books = []
for book in data.get("works", []):
books.append({
"title": book.get("title", "Unknown"),
"author": ", ".join(author["name"] for author in book.get("authors", [])),
"year": book.get("first_publish_year", "Unknown"),
"genre": genre,
"description": book.get("description", {}).get("value", "No description")
if isinstance(book.get("description"), dict)
else book.get("description", "No description")
})
return books
# Example: Scraping sci-fi books
genre = "science_fiction"
books = scrape_books(genre, max_books=100)
# Save in csv
df = pd.DataFrame(books)
df.to_csv(f"books_{genre}.csv", index=False, encoding="utf-8")
print(f"Data saved in books_{genre}.csv")
|