Spaces:

LuisMBA
/

fine-tuned-chatbot

Sleeping

File size: 1,248 Bytes

173f415

import requests
from bs4 import BeautifulSoup
import pandas as pd

# Base URL from open library
BASE_URL = "https://openlibrary.org/subjects/"

# Extract an specific genre
def scrape_books(genre, max_books=50):
    url = f"{BASE_URL}{genre}.json?limit={max_books}"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error accesing website: {response.status_code}")
        return []

    # Parsear JSON
    data = response.json()
    books = []
    for book in data.get("works", []):
        books.append({
            "title": book.get("title", "Unknown"),
            "author": ", ".join(author["name"] for author in book.get("authors", [])),
            "year": book.get("first_publish_year", "Unknown"),
            "genre": genre,
            "description": book.get("description", {}).get("value", "No description")
            if isinstance(book.get("description"), dict)
            else book.get("description", "No description")
        })
    return books

# Example: Scraping sci-fi books
genre = "science_fiction"
books = scrape_books(genre, max_books=100)

# Save in csv
df = pd.DataFrame(books)
df.to_csv(f"books_{genre}.csv", index=False, encoding="utf-8")
print(f"Data saved in books_{genre}.csv")