Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
# Base URL from open library | |
BASE_URL = "https://openlibrary.org/subjects/" | |
# Extract an specific genre | |
def scrape_books(genre, max_books=50): | |
url = f"{BASE_URL}{genre}.json?limit={max_books}" | |
response = requests.get(url) | |
if response.status_code != 200: | |
print(f"Error accesing website: {response.status_code}") | |
return [] | |
# Parsear JSON | |
data = response.json() | |
books = [] | |
for book in data.get("works", []): | |
books.append({ | |
"title": book.get("title", "Unknown"), | |
"author": ", ".join(author["name"] for author in book.get("authors", [])), | |
"year": book.get("first_publish_year", "Unknown"), | |
"genre": genre, | |
"description": book.get("description", {}).get("value", "No description") | |
if isinstance(book.get("description"), dict) | |
else book.get("description", "No description") | |
}) | |
return books | |
# Example: Scraping sci-fi books | |
genre = "science_fiction" | |
books = scrape_books(genre, max_books=100) | |
# Save in csv | |
df = pd.DataFrame(books) | |
df.to_csv(f"books_{genre}.csv", index=False, encoding="utf-8") | |
print(f"Data saved in books_{genre}.csv") | |