Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
import pytesseract | |
import requests | |
import pandas as pd | |
import gradio as gr | |
import uuid | |
import os | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 1. OCR on the full image (always) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def ocr_full_image(image: np.ndarray) -> str: | |
""" | |
Run Tesseract OCR on the entire image (no thresholding). | |
Return the raw OCR text. | |
""" | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
# We skip explicit thresholdingβsometimes stylized covers lose detail under THRESH_OTSU. | |
text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6") | |
return text.strip() | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 2. Query OpenLibrary API | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def query_openlibrary(title_text: str, author_text: str = None) -> dict | None: | |
""" | |
Search OpenLibrary by title (and optional author). | |
Return a dict with title, author_name, publisher, first_publish_year, or None. | |
""" | |
base_url = "https://openlibrary.org/search.json" | |
params = {"title": title_text} | |
if author_text: | |
params["author"] = author_text | |
try: | |
resp = requests.get(base_url, params=params, timeout=5) | |
resp.raise_for_status() | |
data = resp.json() | |
if data.get("docs"): | |
doc = data["docs"][0] | |
return { | |
"title": doc.get("title", ""), | |
"author_name": ", ".join(doc.get("author_name", [])), | |
"publisher": ", ".join(doc.get("publisher", [])), | |
"first_publish_year": doc.get("first_publish_year", ""), | |
} | |
except Exception as e: | |
print(f"OpenLibrary query failed: {e}") | |
return None | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 3. Process one uploaded image (single OCR pass) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def process_image(image_file): | |
""" | |
Gradio passes either a PIL image or None. | |
If image_file is None, return an empty DataFrame and empty CSV. | |
Otherwise, convert to OpenCV BGR, OCR the entire image, parse first two lines for title/author, | |
query OpenLibrary once, and return a DataFrame + CSV file path. | |
""" | |
if image_file is None: | |
# No image provided β return empty table + an empty CSV file | |
df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"]) | |
empty_bytes = df_empty.to_csv(index=False).encode() | |
unique_name = f"books_{uuid.uuid4().hex}.csv" | |
temp_path = os.path.join("/tmp", unique_name) | |
with open(temp_path, "wb") as f: | |
f.write(empty_bytes) | |
return df_empty, temp_path | |
# Convert PIL to OpenCV BGR | |
img = np.array(image_file)[:, :, ::-1].copy() | |
# 1) Run OCR on full image | |
try: | |
full_text = ocr_full_image(img) | |
except pytesseract.pytesseract.TesseractNotFoundError: | |
# If Tesseract isnβt installed, return empty DataFrame and log the issue | |
print("ERROR: Tesseract not found. Did you add apt.txt with 'tesseract-ocr'?") | |
df_error = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"]) | |
error_bytes = df_error.to_csv(index=False).encode() | |
unique_name = f"books_{uuid.uuid4().hex}.csv" | |
temp_path = os.path.join("/tmp", unique_name) | |
with open(temp_path, "wb") as f: | |
f.write(error_bytes) | |
return df_error, temp_path | |
lines = [line.strip() for line in full_text.splitlines() if line.strip()] | |
records = [] | |
if lines: | |
# Use first line as title, second (if exists) as author | |
title_guess = lines[0] | |
author_guess = lines[1] if len(lines) > 1 else None | |
meta = query_openlibrary(title_guess, author_guess) | |
if meta: | |
records.append(meta) | |
else: | |
# No OpenLibrary match β still include OCR guesses | |
records.append({ | |
"title": title_guess, | |
"author_name": author_guess or "", | |
"publisher": "", | |
"first_publish_year": "", | |
}) | |
# Build DataFrame (even if empty) | |
df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"]) | |
csv_bytes = df.to_csv(index=False).encode() | |
# Write CSV to a unique temporary file | |
unique_name = f"books_{uuid.uuid4().hex}.csv" | |
temp_path = os.path.join("/tmp", unique_name) | |
with open(temp_path, "wb") as f: | |
f.write(csv_bytes) | |
return df, temp_path | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 4. Build the Gradio Interface | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def build_interface(): | |
with gr.Blocks(title="SingleβCover OCR + OpenLibrary Lookup") as demo: | |
gr.Markdown( | |
""" | |
## Book Cover OCR + OpenLibrary Lookup | |
1. Upload a photo of a single book cover. | |
2. The app will run OCR on the full image, take: | |
- the **first line** as a βtitleβ guess, and | |
- the **second line** as an βauthorβ guess (if present), then | |
- query OpenLibrary for metadata. | |
3. Results display in a table and can be downloaded as CSV. | |
> **Note:** | |
> β’ Ensure Tesseract OCR is installed (see `apt.txt`). | |
> β’ If no image is uploaded, the table and CSV will be empty. | |
""" | |
) | |
with gr.Row(): | |
img_in = gr.Image(type="pil", label="Upload Single Book Cover") | |
run_button = gr.Button("Scan & Lookup") | |
output_table = gr.Dataframe( | |
headers=["title", "author_name", "publisher", "first_publish_year"], | |
label="Detected Book Metadata", | |
datatype="pandas", | |
) | |
download_file = gr.File(label="Download CSV") | |
def on_run(image): | |
df, filepath = process_image(image) | |
return df, filepath | |
run_button.click( | |
fn=on_run, | |
inputs=[img_in], | |
outputs=[output_table, download_file], | |
) | |
return demo | |
if __name__ == "__main__": | |
demo_app = build_interface() | |
demo_app.launch() | |