import cv2 import numpy as np import pytesseract import requests import pandas as pd import gradio as gr import uuid import os # ────────────────────────────────────────────────────────────── # 1. OCR on the full image (always) # ────────────────────────────────────────────────────────────── def ocr_full_image(image: np.ndarray) -> str: """ Run Tesseract OCR on the entire image (no thresholding). Return the raw OCR text. """ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # We skip explicit thresholding—sometimes stylized covers lose detail under THRESH_OTSU. text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6") return text.strip() # ────────────────────────────────────────────────────────────── # 2. Query OpenLibrary API # ────────────────────────────────────────────────────────────── def query_openlibrary(title_text: str, author_text: str = None) -> dict | None: """ Search OpenLibrary by title (and optional author). Return a dict with title, author_name, publisher, first_publish_year, or None. """ base_url = "https://openlibrary.org/search.json" params = {"title": title_text} if author_text: params["author"] = author_text try: resp = requests.get(base_url, params=params, timeout=5) resp.raise_for_status() data = resp.json() if data.get("docs"): doc = data["docs"][0] return { "title": doc.get("title", ""), "author_name": ", ".join(doc.get("author_name", [])), "publisher": ", ".join(doc.get("publisher", [])), "first_publish_year": doc.get("first_publish_year", ""), } except Exception as e: print(f"OpenLibrary query failed: {e}") return None # ────────────────────────────────────────────────────────────── # 3. Process one uploaded image (single OCR pass) # ────────────────────────────────────────────────────────────── def process_image(image_file): """ Gradio passes either a PIL image or None. If image_file is None, return an empty DataFrame and empty CSV. Otherwise, convert to OpenCV BGR, OCR the entire image, parse first two lines for title/author, query OpenLibrary once, and return a DataFrame + CSV file path. """ if image_file is None: # No image provided → return empty table + an empty CSV file df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"]) empty_bytes = df_empty.to_csv(index=False).encode() unique_name = f"books_{uuid.uuid4().hex}.csv" temp_path = os.path.join("/tmp", unique_name) with open(temp_path, "wb") as f: f.write(empty_bytes) return df_empty, temp_path # Convert PIL to OpenCV BGR img = np.array(image_file)[:, :, ::-1].copy() # 1) Run OCR on full image try: full_text = ocr_full_image(img) except pytesseract.pytesseract.TesseractNotFoundError: # If Tesseract isn’t installed, return empty DataFrame and log the issue print("ERROR: Tesseract not found. Did you add apt.txt with 'tesseract-ocr'?") df_error = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"]) error_bytes = df_error.to_csv(index=False).encode() unique_name = f"books_{uuid.uuid4().hex}.csv" temp_path = os.path.join("/tmp", unique_name) with open(temp_path, "wb") as f: f.write(error_bytes) return df_error, temp_path lines = [line.strip() for line in full_text.splitlines() if line.strip()] records = [] if lines: # Use first line as title, second (if exists) as author title_guess = lines[0] author_guess = lines[1] if len(lines) > 1 else None meta = query_openlibrary(title_guess, author_guess) if meta: records.append(meta) else: # No OpenLibrary match → still include OCR guesses records.append({ "title": title_guess, "author_name": author_guess or "", "publisher": "", "first_publish_year": "", }) # Build DataFrame (even if empty) df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"]) csv_bytes = df.to_csv(index=False).encode() # Write CSV to a unique temporary file unique_name = f"books_{uuid.uuid4().hex}.csv" temp_path = os.path.join("/tmp", unique_name) with open(temp_path, "wb") as f: f.write(csv_bytes) return df, temp_path # ────────────────────────────────────────────────────────────── # 4. Build the Gradio Interface # ────────────────────────────────────────────────────────────── def build_interface(): with gr.Blocks(title="Single‐Cover OCR + OpenLibrary Lookup") as demo: gr.Markdown( """ ## Book Cover OCR + OpenLibrary Lookup 1. Upload a photo of a single book cover. 2. The app will run OCR on the full image, take: - the **first line** as a “title” guess, and - the **second line** as an “author” guess (if present), then - query OpenLibrary for metadata. 3. Results display in a table and can be downloaded as CSV. > **Note:** > • Ensure Tesseract OCR is installed (see `apt.txt`). > • If no image is uploaded, the table and CSV will be empty. """ ) with gr.Row(): img_in = gr.Image(type="pil", label="Upload Single Book Cover") run_button = gr.Button("Scan & Lookup") output_table = gr.Dataframe( headers=["title", "author_name", "publisher", "first_publish_year"], label="Detected Book Metadata", datatype="pandas", ) download_file = gr.File(label="Download CSV") def on_run(image): df, filepath = process_image(image) return df, filepath run_button.click( fn=on_run, inputs=[img_in], outputs=[output_table, download_file], ) return demo if __name__ == "__main__": demo_app = build_interface() demo_app.launch()