Spaces:

Wassymk
/

OCRArena

Running

App Files Files Community

OCRArena / app.py

Wassymk

app

c63c0e5 5 days ago

raw

history blame contribute delete

19.9 kB

	"""
	OCR Arena - Main Application
	A Gradio web application for comparing OCR results from different AI models.
	"""

	import gradio as gr
	import logging
	import os
	import datetime
	from dotenv import load_dotenv
	from storage import upload_file_to_bucket
	from db import add_vote, get_all_votes, calculate_elo_ratings_from_votes
	from ocr_models import process_model_ocr, initialize_gemini, initialize_mistral, initialize_openai
	from ui_helpers import (
	get_model_display_name, select_random_models, format_votes_table,
	format_elo_leaderboard
	)

	# Load environment variables
	load_dotenv()

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
	logger = logging.getLogger(__name__)

	# Initialize API keys and models
	initialize_gemini()
	initialize_mistral()
	initialize_openai()

	# Get Supabase credentials
	SUPABASE_URL = os.getenv("SUPABASE_URL")
	SUPABASE_KEY = os.getenv("SUPABASE_KEY")

	# Global variables to store current OCR results and image URL
	current_gemini_output = ""
	current_mistral_output = ""
	current_openai_output = ""
	current_gpt5_output = ""
	current_gpt5_output = ""
	current_image_url = ""
	current_voted_users = set() # Track users who have already voted
	current_model_a = "" # Store which model was selected as model A
	current_model_b = "" # Store which model was selected as model B


	def get_default_username(profile: gr.OAuthProfile \| None) -> str:
	"""Returns the username if the user is logged in, or an empty string if not logged in."""
	if profile is None:
	return ""
	return profile.username

	def get_current_username(profile_or_username) -> str:
	"""Returns the username from login or "Anonymous" if not logged in."""
	# Check if profile_or_username is a profile object with username attribute
	if hasattr(profile_or_username, 'username') and profile_or_username.username:
	return profile_or_username.username
	# Check if profile_or_username is a direct username string
	elif isinstance(profile_or_username, str) and profile_or_username.strip():
	# Extract username from "Logout (username)" format
	if profile_or_username.startswith("Logout (") and profile_or_username.endswith(")"):
	return profile_or_username[8:-1] # Remove "Logout (" and ")"
	# If it's just a username string, return it
	elif profile_or_username != "Sign in with Hugging Face":
	return profile_or_username.strip()

	# Return "Anonymous" if no valid username found
	return "Anonymous"

	def process_image(image):
	"""Process uploaded image and select random models for comparison."""
	global current_gemini_output, current_mistral_output, current_openai_output, current_image_url, current_voted_users, current_model_a, current_model_b

	if image is None:
	return (
	"Please upload an image.",
	"Please upload an image.",
	gr.update(visible=False), # Hide vote buttons
	gr.update(visible=False) # Hide vote buttons
	)

	# Reset voted users for new image
	current_voted_users.clear()

	# Select two random models
	model_a, model_b = select_random_models()
	current_model_a = model_a
	current_model_b = model_b

	logger.info(f"🎲 Randomly selected two models for comparison")

	try:
	# Save the PIL image to a temporary file
	temp_filename = f"temp_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
	image.save(temp_filename)

	# Upload the temporary file to Supabase storage
	logger.info(f"📤 Uploading image to Supabase storage: {temp_filename}")
	upload_result = upload_file_to_bucket(
	file_path=temp_filename,
	bucket_name="images",
	storage_path=f"ocr_images/{temp_filename}",
	file_options={"cache-control": "3600", "upsert": "false"}
	)

	if upload_result["success"]:
	logger.info(f"✅ Image uploaded successfully: {upload_result['storage_path']}")
	logger.info(f"🔗 Public URL: {upload_result['public_url']}")
	# Store the image URL for voting
	current_image_url = upload_result.get('public_url') or f"{SUPABASE_URL}/storage/v1/object/public/images/ocr_images/{temp_filename}"
	else:
	logger.error(f"❌ Image upload failed: {upload_result['error']}")
	current_image_url = ""

	# Clean up temporary file
	try:
	os.remove(temp_filename)
	logger.info(f"🗑️ Cleaned up temporary file: {temp_filename}")
	except Exception as e:
	logger.warning(f"⚠️ Could not remove temporary file {temp_filename}: {e}")

	# Return initial state - OCR processing will happen via separate button clicks
	return (
	"Please click 'Run OCR' to start processing.",
	"Please click 'Run OCR' to start processing.",
	gr.update(visible=False), # Hide vote buttons initially
	gr.update(visible=False) # Hide vote buttons initially
	)

	except Exception as e:
	logger.error(f"Error processing image: {e}")
	return (
	f"Error processing image: {e}",
	f"Error processing image: {e}",
	gr.update(visible=False), # Hide vote buttons
	gr.update(visible=False) # Hide vote buttons
	)

	def check_ocr_completion(model_a_output, model_b_output):
	"""Check if both OCR results are ready and update UI accordingly."""
	global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_model_a, current_model_b

	# Check if both results are complete (not processing messages)
	model_a_ready = (model_a_output and
	model_a_output != "Please upload an image." and
	model_a_output != "Processing OCR..." and
	model_a_output != "Please click 'Run OCR' to start processing." and
	not model_a_output.startswith("OCR error:"))

	model_b_ready = (model_b_output and
	model_b_output != "Please upload an image." and
	model_b_output != "Processing OCR..." and
	model_b_output != "Please click 'Run OCR' to start processing." and
	not model_b_output.startswith("OCR error:"))

	# Update global variables with actual results based on which models were selected
	if model_a_ready:
	if current_model_a == "gemini":
	current_gemini_output = model_a_output
	elif current_model_a == "mistral":
	current_mistral_output = model_a_output
	elif current_model_a == "openai":
	current_openai_output = model_a_output
	elif current_model_a == "gpt5":
	current_gpt5_output = model_a_output

	if model_b_ready:
	if current_model_b == "gemini":
	current_gemini_output = model_b_output
	elif current_model_b == "mistral":
	current_mistral_output = model_b_output
	elif current_model_b == "openai":
	current_openai_output = model_b_output
	elif current_model_b == "gpt5":
	current_gpt5_output = model_b_output

	# Show vote buttons only when both are ready
	if model_a_ready and model_b_ready:
	return (
	gr.update(visible=True), # Show Model A vote button
	gr.update(visible=True) # Show Model B vote button
	)
	else:
	return (
	gr.update(visible=False), # Hide vote buttons
	gr.update(visible=False) # Hide vote buttons
	)

	def load_vote_data():
	"""Load and format vote data for display."""
	try:
	# Get all votes
	votes = get_all_votes()
	votes_table_html = format_votes_table(votes)

	return votes_table_html

	except Exception as e:
	logger.error(f"Error loading vote data: {e}")
	error_html = f"<p style='color: red;'>Error loading data: {e}</p>"
	return error_html

	def load_elo_leaderboard():
	"""Load and format ELO leaderboard data."""
	try:
	# Get all votes
	votes = get_all_votes()

	# Calculate ELO ratings
	elo_ratings = calculate_elo_ratings_from_votes(votes)

	# Calculate vote counts for each model
	vote_counts = {
	"gemini": 0,
	"mistral": 0,
	"openai": 0,
	"gpt5": 0
	}

	for vote in votes:
	model_a = vote.get('model_a')
	model_b = vote.get('model_b')
	vote_choice = vote.get('vote')

	if vote_choice == 'model_a' and model_a in vote_counts:
	vote_counts[model_a] += 1
	elif vote_choice == 'model_b' and model_b in vote_counts:
	vote_counts[model_b] += 1

	# Format leaderboard with vote counts
	leaderboard_html = format_elo_leaderboard(elo_ratings, vote_counts)

	return leaderboard_html

	except Exception as e:
	logger.error(f"Error loading ELO leaderboard: {e}")
	error_html = f"<p style='color: red;'>Error loading ELO leaderboard: {e}</p>"
	return error_html

	# Create the Gradio interface
	with gr.Blocks(title="OCR Comparison", css="""
	.output-box {
	border: 2px solid #e0e0e0;
	border-radius: 8px;
	padding: 15px;
	margin: 10px 0;
	background-color: #f9f9f9;
	min-height: 200px;
	}
	.output-box:hover {
	border-color: #007bff;
	box-shadow: 0 2px 8px rgba(0,123,255,0.1);
	}
	.vote-table {
	border-collapse: collapse;
	width: 100%;
	margin: 10px 0;
	min-width: 800px;
	}
	.vote-table th, .vote-table td {
	border: 1px solid #ddd;
	padding: 6px;
	text-align: left;
	vertical-align: top;
	}
	.vote-table th {
	background-color: #f2f2f2;
	font-weight: bold;
	position: sticky;
	top: 0;
	z-index: 10;
	}
	.vote-table tr:nth-child(even) {
	background-color: #f9f9f9;
	}
	.vote-table tr:hover {
	background-color: #f5f5f5;
	}
	.vote-table img {
	transition: transform 0.2s ease;
	max-width: 100%;
	height: auto;
	}
	.vote-table img:hover {
	transform: scale(1.1);
	box-shadow: 0 4px 8px rgba(0,0,0,0.2);
	}
	""") as demo:

	with gr.Tabs():
	# Arena Tab (default)
	with gr.Tab("⚔️ Arena", id=0):
	gr.Markdown("# ⚔️ OCR Arena: Random Model Selection")
	gr.Markdown("Upload an image to compare two randomly selected OCR models.")

	# Authentication section (optional)
	with gr.Row():
	with gr.Column(scale=3):
	username_display = gr.Textbox(
	label="Current User",
	placeholder="Login with Hugging Face to vote (optional) - Anonymous users welcome!",
	interactive=False,
	show_label=False
	)
	with gr.Column(scale=1):
	login_button = gr.LoginButton()

	with gr.Row():
	with gr.Column():
	gemini_vote_btn = gr.Button("A is better", variant="primary", size="sm", visible=False)
	gemini_output = gr.Markdown(label="Model A Output", elem_classes=["output-box"])

	image_input = gr.Image(type="pil", label="Upload or Paste Image")

	with gr.Column():
	mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)
	mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"])



	with gr.Row():
	process_btn = gr.Button("🔍 Run OCR", variant="primary")

	# Data Tab
	with gr.Tab("📊 Data", id=1):
	gr.Markdown("# 📊 Vote Data")
	gr.Markdown("View all votes from the OCR Arena")

	with gr.Row():
	refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary")

	with gr.Row():
	votes_table = gr.HTML(
	value="<p>Loading vote data...</p>",
	label="📋 All Votes (Latest First)"
	)

	# Leaderboard Tab
	with gr.Tab("🏆 Leaderboard", id=2):
	gr.Markdown("# 🏆 ELO Leaderboard")
	gr.Markdown("See how the models rank based on their ELO ratings from head-to-head comparisons.")

	with gr.Row():
	refresh_leaderboard_btn = gr.Button("🔄 Refresh Leaderboard", variant="secondary")

	with gr.Row():
	leaderboard_display = gr.HTML(
	value="<p>Loading ELO leaderboard...</p>",
	label="🏆 Model Rankings"
	)

	# Vote functions
	def vote_model_a(profile_or_username):
	global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b

	# Get current username
	username = get_current_username(profile_or_username)

	if not username:
	username = "Anonymous"

	# Check if user has already voted
	if username in current_voted_users:
	gr.Info(f"You have already voted for this image, {username}!")
	return

	try:
	# Use the stored image URL from the upload
	image_url = current_image_url if current_image_url else "no_image"

	# Add vote to database
	logger.info(f"📊 Adding Model A vote for user: {username}")
	def output_for(model: str) -> str:
	return {
	"gemini": current_gemini_output,
	"mistral": current_mistral_output,
	"openai": current_openai_output,
	"gpt5": current_gpt5_output,
	}.get(model, "")

	add_vote(
	username=username,
	model_a=current_model_a,
	model_b=current_model_b,
	model_a_output=output_for(current_model_a),
	model_b_output=output_for(current_model_b),
	vote="model_a",
	image_url=image_url
	)

	# Mark user as voted
	current_voted_users.add(username)

	model_a_name = get_model_display_name(current_model_a)
	model_b_name = get_model_display_name(current_model_b)
	info_message = (
	f"<p>You voted for <strong style='color:green;'>{model_a_name}</strong>.</p>"
	f"<p><span style='color:green;'>{model_a_name}</span> - "
	f"<span style='color:blue;'>{model_b_name}</span></p>"
	)
	gr.Info(info_message)

	except Exception as e:
	logger.error(f"❌ Error adding Model A vote: {e}")
	gr.Info(f"Error recording vote: {e}")

	def vote_model_b(profile_or_username):
	global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output, current_image_url, current_voted_users, current_model_a, current_model_b

	# Get current username
	username = get_current_username(profile_or_username)

	if not username:
	username = "Anonymous"

	# Check if user has already voted
	if username in current_voted_users:
	gr.Info(f"You have already voted for this image, {username}!")
	return

	try:
	# Use the stored image URL from the upload
	image_url = current_image_url if current_image_url else "no_image"

	# Add vote to database
	logger.info(f"📊 Adding Model B vote for user: {username}")
	def output_for(model: str) -> str:
	return {
	"gemini": current_gemini_output,
	"mistral": current_mistral_output,
	"openai": current_openai_output,
	"gpt5": current_gpt5_output,
	}.get(model, "")

	add_vote(
	username=username,
	model_a=current_model_a,
	model_b=current_model_b,
	model_a_output=output_for(current_model_a),
	model_b_output=output_for(current_model_b),
	vote="model_b",
	image_url=image_url
	)

	# Mark user as voted
	current_voted_users.add(username)

	model_a_name = get_model_display_name(current_model_a)
	model_b_name = get_model_display_name(current_model_b)
	info_message = (
	f"<p>You voted for <strong style='color:blue;'>{model_b_name}</strong>.</p>"
	f"<p><span style='color:green;'>{model_a_name}</span> - "
	f"<span style='color:blue;'>{model_b_name}</span></p>"
	)
	gr.Info(info_message)

	except Exception as e:
	logger.error(f"❌ Error adding Model B vote: {e}")
	gr.Info(f"Error recording vote: {e}")

	# Event handlers
	process_btn.click(
	process_image,
	inputs=[image_input],
	outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn],
	)

	# Process both randomly selected OCRs when the process button is clicked
	def process_model_a_ocr(image):
	global current_model_a
	return process_model_ocr(image, current_model_a)

	def process_model_b_ocr(image):
	global current_model_b
	return process_model_ocr(image, current_model_b)

	process_btn.click(
	process_model_a_ocr,
	inputs=[image_input],
	outputs=[gemini_output],
	)

	process_btn.click(
	process_model_b_ocr,
	inputs=[image_input],
	outputs=[mistral_output],
	)

	# Check completion status when either OCR output changes
	gemini_output.change(
	check_ocr_completion,
	inputs=[gemini_output, mistral_output],
	outputs=[gemini_vote_btn, mistral_vote_btn],
	)

	mistral_output.change(
	check_ocr_completion,
	inputs=[gemini_output, mistral_output],
	outputs=[gemini_vote_btn, mistral_vote_btn],
	)

	gemini_vote_btn.click(
	vote_model_a,
	inputs=[login_button]
	)

	mistral_vote_btn.click(
	vote_model_b,
	inputs=[login_button]
	)

	# Refresh data button
	refresh_btn.click(
	load_vote_data,
	inputs=None,
	outputs=[votes_table]
	)

	# Refresh leaderboard button
	refresh_leaderboard_btn.click(
	load_elo_leaderboard,
	inputs=None,
	outputs=[leaderboard_display]
	)

	# Update username display when user logs in
	demo.load(fn=get_default_username, inputs=None, outputs=username_display)

	# Load vote data when app starts
	demo.load(fn=load_vote_data, inputs=None, outputs=[votes_table])

	# Load leaderboard when app starts
	demo.load(fn=load_elo_leaderboard, inputs=None, outputs=[leaderboard_display])

	if __name__ == "__main__":
	logger.info("Starting OCR Comparison App...")
	try:
	# Try to launch on localhost first
	demo.launch(share=True)
	except ValueError as e:
	logger.warning(f"Localhost not accessible: {e}")
	logger.info("Launching with public URL...")
	demo.launch(share=True)