import requests from bs4 import BeautifulSoup import pandas as pd # Base URL from open library BASE_URL = "https://openlibrary.org/subjects/" # Extract an specific genre def scrape_books(genre, max_books=50): url = f"{BASE_URL}{genre}.json?limit={max_books}" response = requests.get(url) if response.status_code != 200: print(f"Error accesing website: {response.status_code}") return [] # Parsear JSON data = response.json() books = [] for book in data.get("works", []): books.append({ "title": book.get("title", "Unknown"), "author": ", ".join(author["name"] for author in book.get("authors", [])), "year": book.get("first_publish_year", "Unknown"), "genre": genre, "description": book.get("description", {}).get("value", "No description") if isinstance(book.get("description"), dict) else book.get("description", "No description") }) return books # Example: Scraping sci-fi books genre = "science_fiction" books = scrape_books(genre, max_books=100) # Save in csv df = pd.DataFrame(books) df.to_csv(f"books_{genre}.csv", index=False, encoding="utf-8") print(f"Data saved in books_{genre}.csv")