import cv2 import numpy as np import pytesseract import requests import pandas as pd import gradio as gr import uuid import os # ────────────────────────────────────────────────────────────── # 1. OCR on the full image (always) # ────────────────────────────────────────────────────────────── def ocr_full_image(image: np.ndarray) -> str: """ Run Tesseract OCR on the entire image (no thresholding). Return the raw OCR text. """ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Note: we’re NOT thresholding here—sometimes stylized covers lose detail under THRESH_OTSU. text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6") return text.strip() # ────────────────────────────────────────────────────────────── # 2. Query OpenLibrary API # ────────────────────────────────────────────────────────────── def query_openlibrary(title_text: str, author_text: str = None) -> dict | None: """ Search OpenLibrary by title (and optional author). Return a dict with title, author_name, publisher, first_publish_year, or None. """ base_url = "https://openlibrary.org/search.json" params = {"title": title_text} if author_text: params["author"] = author_text try: resp = requests.get(base_url, params=params, timeout=5) resp.raise_for_status() data = resp.json() if data.get("docs"): doc = data["docs"][0] return { "title": doc.get("title", ""), "author_name": ", ".join(doc.get("author_name", [])), "publisher": ", ".join(doc.get("publisher", [])), "first_publish_year": doc.get("first_publish_year", ""), } except Exception as e: print(f"OpenLibrary query failed: {e}") return None # ────────────────────────────────────────────────────────────── # 3. Process one uploaded image (single OCR pass) # ────────────────────────────────────────────────────────────── def process_image(image_file): """ Gradio passes a PIL image or numpy array. Convert to OpenCV BGR, OCR the entire image, parse first two lines for title/author, query OpenLibrary once, and return a DataFrame + CSV file path. """ # Convert PIL to OpenCV BGR img = np.array(image_file)[:, :, ::-1].copy() # 1) Run OCR on full image full_text = ocr_full_image(img) lines = [line.strip() for line in full_text.splitlines() if line.strip()] records = [] if lines: # Use first line as title, second (if exists) as author title_guess = lines[0] author_guess = lines[1] if len(lines) > 1 else None meta = query_openlibrary(title_guess, author_guess) if meta: records.append(meta) else: # No match → still include OCR guesses records.append({ "title": title_guess, "author_name": author_guess or "", "publisher": "", "first_publish_year": "", }) # Build DataFrame (even if empty) df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"]) csv_bytes = df.to_csv(index=False).encode() # Write CSV to a unique temporary file unique_name = f"books_{uuid.uuid4().hex}.csv" temp_path = os.path.join("/tmp", unique_name) with open(temp_path, "wb") as f: f.write(csv_bytes) return df, temp_path # ────────────────────────────────────────────────────────────── # 4. Build the Gradio Interface # ────────────────────────────────────────────────────────────── def build_interface(): with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo: gr.Markdown( """ ## Book Cover OCR + OpenLibrary Lookup 1. Upload a photo of a single book cover (or any cover‐style image). 2. The app will run OCR on the full image, take: - the **first line** as a “title” guess, and - the **second line** (if any) as an “author” guess, then - query OpenLibrary once for metadata. 3. You’ll see the result in a table and can download a CSV. > **Note:** > • Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up. > • If you have multiple covers in one photo, only the first “title/author” will be used. """ ) with gr.Row(): img_in = gr.Image(type="pil", label="Upload Single Book Cover") run_button = gr.Button("Scan & Lookup") output_table = gr.Dataframe( headers=["title", "author_name", "publisher", "first_publish_year"], label="Detected Book Metadata", datatype="pandas", ) download_file = gr.File(label="Download CSV") def on_run(image): df, filepath = process_image(image) return df, filepath run_button.click( fn=on_run, inputs=[img_in], outputs=[output_table, download_file], ) return demo if __name__ == "__main__": demo_app = build_interface() demo_app.launch()