Spaces:

ugolefoo
/

bookscanner_app

Runtime error

App Files Files Community

bookscanner_app / app.py

ugolefoo

Update app.py

fde34e3 verified 2 months ago

raw

history blame

7.34 kB

	import cv2
	import numpy as np
	import pytesseract
	import requests
	import pandas as pd
	import gradio as gr
	import uuid
	import os

	# ──────────────────────────────────────────────────────────────
	# 1. OCR on the full image (always)
	# ──────────────────────────────────────────────────────────────
	def ocr_full_image(image: np.ndarray) -> str:
	"""
	Run Tesseract OCR on the entire image (no thresholding).
	Return the raw OCR text.
	"""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# We skip explicit thresholding—sometimes stylized covers lose detail under THRESH_OTSU.
	text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
	return text.strip()

	# ──────────────────────────────────────────────────────────────
	# 2. Query OpenLibrary API
	# ──────────────────────────────────────────────────────────────
	def query_openlibrary(title_text: str, author_text: str = None) -> dict \| None:
	"""
	Search OpenLibrary by title (and optional author).
	Return a dict with title, author_name, publisher, first_publish_year, or None.
	"""
	base_url = "https://openlibrary.org/search.json"
	params = {"title": title_text}
	if author_text:
	params["author"] = author_text

	try:
	resp = requests.get(base_url, params=params, timeout=5)
	resp.raise_for_status()
	data = resp.json()
	if data.get("docs"):
	doc = data["docs"][0]
	return {
	"title": doc.get("title", ""),
	"author_name": ", ".join(doc.get("author_name", [])),
	"publisher": ", ".join(doc.get("publisher", [])),
	"first_publish_year": doc.get("first_publish_year", ""),
	}
	except Exception as e:
	print(f"OpenLibrary query failed: {e}")

	return None

	# ──────────────────────────────────────────────────────────────
	# 3. Process one uploaded image (single OCR pass)
	# ──────────────────────────────────────────────────────────────
	def process_image(image_file):
	"""
	Gradio passes either a PIL image or None.
	If image_file is None, return an empty DataFrame and empty CSV.
	Otherwise, convert to OpenCV BGR, OCR the entire image, parse first two lines for title/author,
	query OpenLibrary once, and return a DataFrame + CSV file path.
	"""
	if image_file is None:
	# No image provided → return empty table + an empty CSV file
	df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
	empty_bytes = df_empty.to_csv(index=False).encode()
	unique_name = f"books_{uuid.uuid4().hex}.csv"
	temp_path = os.path.join("/tmp", unique_name)
	with open(temp_path, "wb") as f:
	f.write(empty_bytes)
	return df_empty, temp_path

	# Convert PIL to OpenCV BGR
	img = np.array(image_file)[:, :, ::-1].copy()

	# 1) Run OCR on full image
	try:
	full_text = ocr_full_image(img)
	except pytesseract.pytesseract.TesseractNotFoundError:
	# If Tesseract isn’t installed, return empty DataFrame and log the issue
	print("ERROR: Tesseract not found. Did you add apt.txt with 'tesseract-ocr'?")
	df_error = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
	error_bytes = df_error.to_csv(index=False).encode()
	unique_name = f"books_{uuid.uuid4().hex}.csv"
	temp_path = os.path.join("/tmp", unique_name)
	with open(temp_path, "wb") as f:
	f.write(error_bytes)
	return df_error, temp_path

	lines = [line.strip() for line in full_text.splitlines() if line.strip()]

	records = []
	if lines:
	# Use first line as title, second (if exists) as author
	title_guess = lines[0]
	author_guess = lines[1] if len(lines) > 1 else None
	meta = query_openlibrary(title_guess, author_guess)

	if meta:
	records.append(meta)
	else:
	# No OpenLibrary match → still include OCR guesses
	records.append({
	"title": title_guess,
	"author_name": author_guess or "",
	"publisher": "",
	"first_publish_year": "",
	})

	# Build DataFrame (even if empty)
	df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
	csv_bytes = df.to_csv(index=False).encode()

	# Write CSV to a unique temporary file
	unique_name = f"books_{uuid.uuid4().hex}.csv"
	temp_path = os.path.join("/tmp", unique_name)
	with open(temp_path, "wb") as f:
	f.write(csv_bytes)

	return df, temp_path

	# ──────────────────────────────────────────────────────────────
	# 4. Build the Gradio Interface
	# ──────────────────────────────────────────────────────────────
	def build_interface():
	with gr.Blocks(title="Single‐Cover OCR + OpenLibrary Lookup") as demo:
	gr.Markdown(
	"""
	## Book Cover OCR + OpenLibrary Lookup

	1. Upload a photo of a single book cover.
	2. The app will run OCR on the full image, take:
	- the first line as a “title” guess, and
	- the second line as an “author” guess (if present), then
	- query OpenLibrary for metadata.
	3. Results display in a table and can be downloaded as CSV.

	> Note:
	> • Ensure Tesseract OCR is installed (see `apt.txt`).
	> • If no image is uploaded, the table and CSV will be empty.
	"""
	)

	with gr.Row():
	img_in = gr.Image(type="pil", label="Upload Single Book Cover")
	run_button = gr.Button("Scan & Lookup")

	output_table = gr.Dataframe(
	headers=["title", "author_name", "publisher", "first_publish_year"],
	label="Detected Book Metadata",
	datatype="pandas",
	)
	download_file = gr.File(label="Download CSV")

	def on_run(image):
	df, filepath = process_image(image)
	return df, filepath

	run_button.click(
	fn=on_run,
	inputs=[img_in],
	outputs=[output_table, download_file],
	)

	return demo

	if __name__ == "__main__":
	demo_app = build_interface()
	demo_app.launch()