Spaces:

LuisMBA
/

fine-tuned-chatbot

Sleeping

fine-tuned-chatbot / books_scraper.py

Scraper version 00

173f415 verified 9 months ago

1.25 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd

	# Base URL from open library
	BASE_URL = "https://openlibrary.org/subjects/"

	# Extract an specific genre
	def scrape_books(genre, max_books=50):
	url = f"{BASE_URL}{genre}.json?limit={max_books}"
	response = requests.get(url)
	if response.status_code != 200:
	print(f"Error accesing website: {response.status_code}")
	return []

	# Parsear JSON
	data = response.json()
	books = []
	for book in data.get("works", []):
	books.append({
	"title": book.get("title", "Unknown"),
	"author": ", ".join(author["name"] for author in book.get("authors", [])),
	"year": book.get("first_publish_year", "Unknown"),
	"genre": genre,
	"description": book.get("description", {}).get("value", "No description")
	if isinstance(book.get("description"), dict)
	else book.get("description", "No description")
	})
	return books

	# Example: Scraping sci-fi books
	genre = "science_fiction"
	books = scrape_books(genre, max_books=100)

	# Save in csv
	df = pd.DataFrame(books)
	df.to_csv(f"books_{genre}.csv", index=False, encoding="utf-8")
	print(f"Data saved in books_{genre}.csv")