{ui['title']}
{ui['subtitle']}
import gradio as gr import os import json import random from datetime import datetime import pandas as pd from typing import Dict, List, Tuple, Optional, Generator import sqlite3 from dataclasses import dataclass, asdict import hashlib import time from enum import Enum import numpy as np import threading import queue import re # For LLM API integration try: from openai import OpenAI except ImportError: print("OpenAI library not installed. Install with: pip install openai") OpenAI = None try: from gradio_client import Client as GradioClient except ImportError: print("Gradio client not installed. Install with: pip install gradio_client") GradioClient = None # For Gemini API try: from google import genai from google.genai import types except ImportError: print("Google GenAI library not installed. Install with: pip install google-genai") genai = None types = None # For Claude API try: import anthropic except ImportError: print("Anthropic library not installed. Install with: pip install anthropic") anthropic = None # For Hugging Face Dataset integration try: from huggingface_hub import HfApi, login, create_repo, upload_file, hf_hub_download from datasets import Dataset, load_dataset import pyarrow.parquet as pq import pyarrow as pa except ImportError: print("Hugging Face libraries not installed. Install with: pip install huggingface_hub datasets pyarrow") HfApi = None Dataset = None # ==================== Configuration ==================== class Category(Enum): STORYTELLING = "storytelling" INNOVATION = "innovation" BUSINESS = "business" @dataclass class Battle: id: str prompt_id: str prompt_text: str model_a: str model_b: str response_a: str response_b: str winner: Optional[str] voter_id: str timestamp: datetime category: Category custom_prompt: bool = False language: str = "en" # ==================== Language Configurations ==================== LANGUAGES = { "en": "English", "ko": "ํ๊ตญ์ด" } UI_TEXT = { "en": { "title": "๐จ AI Models Creativity Battle Arena", "subtitle": "Test cutting-edge AI models in creative challenges", "battle_tab": "โ๏ธ Battle Arena", "leaderboard_tab": "๐ Leaderboard", "category_label": "Select Category", "custom_prompt_label": "โ๏ธ Custom Challenge (Optional)", "custom_prompt_placeholder": "Enter your creative challenge for the models...", "new_battle_btn": "๐ฒ Start New Battle", "model_a": "### ๐ ฐ๏ธ Model A", "model_b": "### ๐ ฑ๏ธ Model B", "vote_a": "๐ ฐ๏ธ Model A is more creative", "vote_b": "๐ ฑ๏ธ Model B is more creative", "vote_complete": "### ๐ Vote Complete!", "winner": "Winner", "leaderboard_title": "## ๐ AI Models Leaderboard", "category_filter": "Category Filter", "refresh_btn": "๐ Refresh", "language_label": "Language", "contact": "Contact: arxivgpt@gmail.com", "challenge_task": "### ๐ Challenge Task", "category": "Category", "prompt": "Challenge", "model_identity": "Model Identity", "elo_updated": "Scores have been updated!", "generating": "๐ Generating response...", "categories": { "random": "๐ฒ Random", "storytelling": "๐ Storytelling", "innovation": "๐ก Innovation", "business": "๐ผ Business" }, "filter_categories": { "overall": "Overall", "storytelling": "Storytelling", "innovation": "Innovation", "business": "Business" } }, "ko": { "title": "๐จ AI ๋ชจ๋ธ ์ฐฝ์์ฑ ๋ฐฐํ ์๋ ๋", "subtitle": "์ต์ฒจ๋จ AI ๋ชจ๋ธ๋ค์ ์ฐฝ์๋ ฅ ๋๊ฒฐ", "battle_tab": "โ๏ธ ๋ฐฐํ ์๋ ๋", "leaderboard_tab": "๐ ๋ฆฌ๋๋ณด๋", "category_label": "์นดํ ๊ณ ๋ฆฌ ์ ํ", "custom_prompt_label": "โ๏ธ ์ปค์คํ ๋์ ๊ณผ์ (์ ํ์ฌํญ)", "custom_prompt_placeholder": "๋ชจ๋ธ๋ค์๊ฒ ๋์ ํ ์ฐฝ์์ ์ธ ๊ณผ์ ๋ฅผ ์ ๋ ฅํ์ธ์...", "new_battle_btn": "๐ฒ ์๋ก์ด ๋ฐฐํ ์์", "model_a": "### ๐ ฐ๏ธ ๋ชจ๋ธ A", "model_b": "### ๐ ฑ๏ธ ๋ชจ๋ธ B", "vote_a": "๐ ฐ๏ธ ๋ชจ๋ธ A๊ฐ ๋ ์ฐฝ์์ ์ด๋ค", "vote_b": "๐ ฑ๏ธ ๋ชจ๋ธ B๊ฐ ๋ ์ฐฝ์์ ์ด๋ค", "vote_complete": "### ๐ ํฌํ ์๋ฃ!", "winner": "์น์", "leaderboard_title": "## ๐ AI ๋ชจ๋ธ ๋ฆฌ๋๋ณด๋", "category_filter": "์นดํ ๊ณ ๋ฆฌ ํํฐ", "refresh_btn": "๐ ์๋ก๊ณ ์นจ", "language_label": "์ธ์ด", "contact": "๋ฌธ์: arxivgpt@gmail.com", "challenge_task": "### ๐ ๋์ ๊ณผ์ ", "category": "์นดํ ๊ณ ๋ฆฌ", "prompt": "๋์ ๊ณผ์ ", "model_identity": "๋ชจ๋ธ ์ ์ฒด", "elo_updated": "์ ์๊ฐ ์ ๋ฐ์ดํธ๋์์ต๋๋ค!", "generating": "๐ ์๋ต ์์ฑ ์ค...", "categories": { "random": "๐ฒ ๋๋ค", "storytelling": "๐ ์คํ ๋ฆฌํ ๋ง", "innovation": "๐ก ํ์ /๋ฐ๋ช ", "business": "๐ผ ๋น์ฆ๋์ค" }, "filter_categories": { "overall": "์ ์ฒด", "storytelling": "์คํ ๋ฆฌํ ๋ง", "innovation": "ํ์ /๋ฐ๋ช ", "business": "๋น์ฆ๋์ค" } } } # ==================== Simplified Prompt Database ==================== PROMPTS = { Category.STORYTELLING: { "en": [ {"text": "Write a sci-fi movie proposal with a never-before-explored concept", "difficulty": "high"}, {"text": "Create a story where the protagonists never meet but fall deeply in love", "difficulty": "high"}, {"text": "Design a thriller where the twist is revealed in the first scene but still surprises at the end", "difficulty": "high"} ], "ko": [ {"text": "ํ ๋ฒ๋ ๋ค๋ค์ง์ง ์์ ์์ฌ๋ก SF ์ํ ๊ธฐํ์์ ์์ฑํ์ธ์", "difficulty": "high"}, {"text": "์ฃผ์ธ๊ณต๋ค์ด ํ ๋ฒ๋ ๋ง๋์ง ์์ง๋ง ๊น์ ์ฌ๋์ ๋น ์ง๋ ์คํ ๋ฆฌ๋ฅผ ์ฐฝ์ํ์ธ์", "difficulty": "high"}, {"text": "์ฒซ ์ฅ๋ฉด์์ ๋ฐ์ ์ ๊ณต๊ฐํ์ง๋ง ๋ง์ง๋ง์ ์ฌ์ ํ ์ถฉ๊ฒฉ์ ์ธ ์ค๋ฆด๋ฌ๋ฅผ ์ค๊ณํ์ธ์", "difficulty": "high"} ] }, Category.INNOVATION: { "en": [ {"text": "Present 5 innovative ideas that could revolutionize the bicycle", "difficulty": "high"}, {"text": "Propose 5 breakthrough innovations that could transform email communication", "difficulty": "high"}, {"text": "Design 5 inventions that could make elevators obsolete", "difficulty": "high"} ], "ko": [ {"text": "์์ ๊ฑฐ๋ฅผ ํ์ ํ ์ ์๋ ํ๊ธฐ์ ์ธ ๋ฐ๋ช ์์ด๋์ด๋ฅผ 5๊ฐ ์ ์ํ์ธ์", "difficulty": "high"}, {"text": "์ด๋ฉ์ผ ์ปค๋ฎค๋์ผ์ด์ ์ ์์ ํ ๋ณํ์ํฌ ํ์ ์์ด๋์ด๋ฅผ 5๊ฐ ์ ์ํ์ธ์", "difficulty": "high"}, {"text": "์๋ฆฌ๋ฒ ์ดํฐ๋ฅผ ๋์ฒดํ ์ ์๋ 5๊ฐ์ง ํ์ ์ ๋ฐ๋ช ์ ์ค๊ณํ์ธ์", "difficulty": "high"} ] }, Category.BUSINESS: { "en": [ {"text": "Design a business model in robotics/drone sector that could become a unicorn startup", "difficulty": "high"}, {"text": "Create a one-person SaaS business that could scale to $1M ARR", "difficulty": "high"}, {"text": "Develop a subscription model that people would happily pay $1000/month for", "difficulty": "high"} ], "ko": [ {"text": "๋ก๋ด/๋๋ก ๋ถ์ผ์์ ์ ๋์ฝ ๊ธฐ์ ์ด ๋ ์ ์๋ ๋น์ฆ๋์ค ๋ชจ๋ธ์ ์ค๊ณํ์ธ์", "difficulty": "high"}, {"text": "์ฐ ๋งค์ถ 10์ต์์ ๋ฌ์ฑํ ์ ์๋ 1์ธ SaaS ์ฐฝ์ ์์ดํ ์ ๊ธฐํํ์ธ์", "difficulty": "high"}, {"text": "์ฌ๋๋ค์ด ๊ธฐ๊บผ์ด ์ 100๋ง์์ ์ง๋ถํ ๋งํ ๊ตฌ๋ ๋น์ฆ๋์ค๋ฅผ ๊ฐ๋ฐํ์ธ์", "difficulty": "high"} ] } } # ==================== Database Management ==================== class ArenaDatabase: def __init__(self, db_path="ai_models_arena.db", use_hf=True): self.db_path = db_path self.use_hf = use_hf and HfApi is not None self.hf_token = os.getenv("HF_TOKEN") self.hf_dataset_name = os.getenv("HF_DATASET_NAME", "ai_models_arena") self.hf_username = None if self.use_hf and self.hf_token: try: login(token=self.hf_token) self.api = HfApi() user_info = self.api.whoami() self.hf_username = user_info["name"] self.hf_repo_id = f"{self.hf_username}/{self.hf_dataset_name}" # Create or access the dataset repository self._init_hf_dataset() print(f"โ Connected to Hugging Face Dataset: {self.hf_repo_id}") # โญ CRITICAL: Try to restore from HF FIRST if self._restore_from_hf(): print("โ Successfully restored data from Hugging Face Dataset") return # โญ EXIT HERE if data exists - DO NOT initialize new database else: print("๐ No existing data in HF Dataset, will create new database") except Exception as e: print(f"โ Failed to connect to Hugging Face: {e}") self.use_hf = False # โญ ONLY initialize new database if HF restore failed or HF not available print("๐ Initializing new local database") self.init_database() def _init_hf_dataset(self): """Initialize Hugging Face dataset repository""" try: # Try to create the repository (it will fail if it already exists) create_repo( repo_id=self.hf_repo_id, repo_type="dataset", private=True, exist_ok=True ) print(f"โ HF Dataset repository ready: {self.hf_repo_id}") except Exception as e: print(f"Dataset repo creation note: {e}") def _restore_from_hf(self): """โญ NEW METHOD: Restore complete database from HF - returns True if successful""" try: print("๐ Attempting to restore data from Hugging Face...") # Try to load battles data try: dataset = load_dataset(self.hf_repo_id, split="train", token=self.hf_token) except Exception as e: print(f"No existing battles data found: {e}") return False if not dataset or len(dataset) == 0: print("Dataset exists but is empty") return False print(f"Found {len(dataset)} battles in HF Dataset") # Create fresh local database with data from HF conn = sqlite3.connect(self.db_path) cursor = conn.cursor() # Create tables cursor.execute(''' CREATE TABLE IF NOT EXISTS battles ( id TEXT PRIMARY KEY, prompt_id TEXT, prompt_text TEXT, category TEXT, model_a TEXT, model_b TEXT, response_a TEXT, response_b TEXT, winner TEXT, voter_id TEXT, timestamp DATETIME, custom_prompt INTEGER DEFAULT 0, language TEXT DEFAULT 'en' ) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS model_stats ( model_name TEXT PRIMARY KEY, overall_score REAL DEFAULT 5.0, storytelling_score REAL DEFAULT 5.0, innovation_score REAL DEFAULT 5.0, business_score REAL DEFAULT 5.0, total_battles INTEGER DEFAULT 0, wins INTEGER DEFAULT 0, losses INTEGER DEFAULT 0, elo_rating INTEGER DEFAULT 1500 ) ''') # Restore battles data battles_df = dataset.to_pandas() battles_df.to_sql('battles', conn, if_exists='replace', index=False) print(f"โ Restored {len(battles_df)} battles") # Try to restore model stats stats_restored = False try: stats_dataset = load_dataset(self.hf_repo_id, split="stats", token=self.hf_token) if stats_dataset and len(stats_dataset) > 0: stats_df = stats_dataset.to_pandas() stats_df.to_sql('model_stats', conn, if_exists='replace', index=False) print(f"โ Restored model stats") stats_restored = True except Exception as e: print(f"Could not restore stats: {e}") # If stats not restored, recalculate from battles if not stats_restored: print("๐ Recalculating stats from battle history...") self._recalculate_stats_from_battles(cursor) conn.commit() conn.close() return True # Successfully restored except Exception as e: print(f"Failed to restore from HF: {e}") return False def _recalculate_stats_from_battles(self, cursor): """Recalculate model stats from battle history""" # Initialize all 4 models for model in ["GPT-5", "jetXA", "Gemini-2.5-Pro", "Claude-Opus-4.1"]: cursor.execute(''' INSERT OR REPLACE INTO model_stats (model_name, overall_score, storytelling_score, innovation_score, business_score, total_battles, wins, losses, elo_rating) VALUES (?, 5.0, 5.0, 5.0, 5.0, 0, 0, 0, 1500) ''', (model,)) # Get all battles with winners cursor.execute(''' SELECT model_a, model_b, winner, category FROM battles WHERE winner IS NOT NULL ''') battles = cursor.fetchall() # Process each battle for model_a, model_b, winner, category in battles: # Update win/loss counts if winner == model_a: cursor.execute('UPDATE model_stats SET wins = wins + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_a,)) cursor.execute('UPDATE model_stats SET losses = losses + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_b,)) # Update category scores self._update_category_scores(cursor, model_a, Category(category), True) self._update_category_scores(cursor, model_b, Category(category), False) else: cursor.execute('UPDATE model_stats SET wins = wins + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_b,)) cursor.execute('UPDATE model_stats SET losses = losses + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_a,)) # Update category scores self._update_category_scores(cursor, model_b, Category(category), True) self._update_category_scores(cursor, model_a, Category(category), False) # Recalculate ELO ratings self._recalculate_elo_from_battles(cursor) print(f"โ Recalculated stats from {len(battles)} battles") def _recalculate_elo_from_battles(self, cursor): """Recalculate ELO ratings from battle history""" # Reset ELO to 1500 cursor.execute('UPDATE model_stats SET elo_rating = 1500') # Get battles in chronological order cursor.execute(''' SELECT model_a, model_b, winner FROM battles WHERE winner IS NOT NULL ORDER BY timestamp ''') battles = cursor.fetchall() for model_a, model_b, winner in battles: # Get current ELO ratings cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (model_a,)) elo_a = cursor.fetchone()[0] cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (model_b,)) elo_b = cursor.fetchone()[0] # Calculate new ELO K = 32 if winner == model_a: expected_a = 1 / (1 + 10**((elo_b - elo_a) / 400)) new_elo_a = int(elo_a + K * (1 - expected_a)) new_elo_b = int(elo_b + K * (0 - (1 - expected_a))) else: expected_b = 1 / (1 + 10**((elo_a - elo_b) / 400)) new_elo_a = int(elo_a + K * (0 - (1 - expected_b))) new_elo_b = int(elo_b + K * (1 - expected_b)) cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_elo_a, model_a)) cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_elo_b, model_b)) def _sync_to_hf(self): """Sync local database to Hugging Face with improved error handling""" if not self.use_hf: print("HF sync disabled") return try: conn = sqlite3.connect(self.db_path) # Export battles battles_df = pd.read_sql_query("SELECT * FROM battles", conn) if len(battles_df) > 0: print(f"๐ค Syncing {len(battles_df)} battles to HF...") # Convert to Dataset battles_dataset = Dataset.from_pandas(battles_df) # Push to hub with retry logic max_retries = 3 for attempt in range(max_retries): try: battles_dataset.push_to_hub( self.hf_repo_id, split="train", token=self.hf_token, private=True ) print(f"โ Successfully pushed {len(battles_df)} battles to HF") break except Exception as push_error: if attempt < max_retries - 1: print(f"โ ๏ธ Push attempt {attempt + 1} failed, retrying...") time.sleep(2) # Wait before retry else: print(f"โ Failed to push to HF after {max_retries} attempts: {push_error}") # Also sync model stats for backup stats_df = pd.read_sql_query("SELECT * FROM model_stats", conn) if len(stats_df) > 0: try: stats_dataset = Dataset.from_pandas(stats_df) stats_dataset.push_to_hub( self.hf_repo_id, split="stats", token=self.hf_token, private=True ) print(f"โ Model stats synced to HF") except Exception as e: print(f"โ ๏ธ Could not sync stats: {e}") conn.close() except Exception as e: print(f"โ Critical error in HF sync: {e}") def init_database(self): """Initialize SQLite database - ONLY called when no existing data""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS battles ( id TEXT PRIMARY KEY, prompt_id TEXT, prompt_text TEXT, category TEXT, model_a TEXT, model_b TEXT, response_a TEXT, response_b TEXT, winner TEXT, voter_id TEXT, timestamp DATETIME, custom_prompt INTEGER DEFAULT 0, language TEXT DEFAULT 'en' ) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS model_stats ( model_name TEXT PRIMARY KEY, overall_score REAL DEFAULT 5.0, storytelling_score REAL DEFAULT 5.0, innovation_score REAL DEFAULT 5.0, business_score REAL DEFAULT 5.0, total_battles INTEGER DEFAULT 0, wins INTEGER DEFAULT 0, losses INTEGER DEFAULT 0, elo_rating INTEGER DEFAULT 1500 ) ''') conn.commit() conn.close() self._init_models() def _init_models(self): """Initialize all 4 models""" models = ["GPT-5", "jetXA", "Gemini-2.5-Pro", "Claude-Opus-4.1"] conn = sqlite3.connect(self.db_path) cursor = conn.cursor() for model in models: cursor.execute(''' INSERT OR IGNORE INTO model_stats (model_name) VALUES (?) ''', (model,)) conn.commit() conn.close() def save_battle(self, battle: Battle): """Save battle result with proper duplicate prevention and sync""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() try: # First check if this battle already exists cursor.execute('SELECT id, winner FROM battles WHERE id = ?', (battle.id,)) existing = cursor.fetchone() if existing and existing[1]: print(f"โ ๏ธ Battle {battle.id} already has a winner: {existing[1]}") conn.close() return # Don't update if already voted # Insert or update the battle cursor.execute(''' INSERT OR REPLACE INTO battles VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( battle.id, battle.prompt_id, battle.prompt_text, battle.category.value, battle.model_a, battle.model_b, battle.response_a, battle.response_b, battle.winner, battle.voter_id, battle.timestamp.isoformat(), 1 if battle.custom_prompt else 0, battle.language )) if battle.winner: winner = battle.winner loser = battle.model_b if winner == battle.model_a else battle.model_a # Only update stats if this is a new vote if not existing or not existing[1]: print(f"๐ Updating stats: {winner} wins, {loser} loses") # Update winner stats cursor.execute(''' UPDATE model_stats SET total_battles = total_battles + 1, wins = wins + 1 WHERE model_name = ? ''', (winner,)) # Update loser stats cursor.execute(''' UPDATE model_stats SET total_battles = total_battles + 1, losses = losses + 1 WHERE model_name = ? ''', (loser,)) # Update category scores self._update_category_scores(cursor, winner, battle.category, True) self._update_category_scores(cursor, loser, battle.category, False) # Update ELO ratings self._update_elo_ratings(cursor, winner, loser) print(f"โ Stats updated for battle {battle.id}") conn.commit() print(f"๐พ Battle {battle.id} saved to local database") except Exception as e: print(f"โ Error saving battle: {e}") conn.rollback() finally: conn.close() # Sync to Hugging Face after saving self._sync_to_hf() def _update_category_scores(self, cursor, model, category, is_winner): """Update category-specific scores""" column_map = { Category.STORYTELLING: "storytelling_score", Category.INNOVATION: "innovation_score", Category.BUSINESS: "business_score" } score_column = column_map.get(category, "overall_score") cursor.execute(f'SELECT {score_column} FROM model_stats WHERE model_name = ?', (model,)) result = cursor.fetchone() if result: current_score = result[0] else: current_score = 5.0 if is_winner: new_score = min(10, current_score + 0.2) else: new_score = max(0, current_score - 0.1) cursor.execute(f'UPDATE model_stats SET {score_column} = ? WHERE model_name = ?', (new_score, model)) # Update overall score cursor.execute(''' UPDATE model_stats SET overall_score = (storytelling_score + innovation_score + business_score) / 3.0 WHERE model_name = ? ''', (model,)) def _update_elo_ratings(self, cursor, winner, loser): """Update ELO ratings""" K = 32 cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (winner,)) winner_elo = cursor.fetchone()[0] cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (loser,)) loser_elo = cursor.fetchone()[0] expected_winner = 1 / (1 + 10**((loser_elo - winner_elo) / 400)) expected_loser = 1 / (1 + 10**((winner_elo - loser_elo) / 400)) new_winner_elo = int(winner_elo + K * (1 - expected_winner)) new_loser_elo = int(loser_elo + K * (0 - expected_loser)) cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_winner_elo, winner)) cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_loser_elo, loser)) def get_leaderboard(self, category: Optional[Category] = None) -> pd.DataFrame: """Get leaderboard data""" conn = sqlite3.connect(self.db_path) if category: column_map = { Category.STORYTELLING: "storytelling_score", Category.INNOVATION: "innovation_score", Category.BUSINESS: "business_score" } sort_column = column_map.get(category, "overall_score") else: sort_column = "overall_score" query = f''' SELECT model_name, ROUND(overall_score, 1) as overall_score, ROUND(storytelling_score, 1) as storytelling_score, ROUND(innovation_score, 1) as innovation_score, ROUND(business_score, 1) as business_score, total_battles, wins, CASE WHEN total_battles > 0 THEN ROUND(100.0 * wins / total_battles, 1) ELSE 0 END as win_rate, elo_rating FROM model_stats ORDER BY {sort_column} DESC, elo_rating DESC ''' df = pd.read_sql_query(query, conn) conn.close() df.insert(0, 'rank', range(1, len(df) + 1)) return df def debug_database_state(self): """Debug method to check current database state""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() # Check battles count cursor.execute("SELECT COUNT(*) FROM battles") total_battles = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM battles WHERE winner IS NOT NULL") voted_battles = cursor.fetchone()[0] # Check model stats cursor.execute("SELECT * FROM model_stats ORDER BY elo_rating DESC") stats = cursor.fetchall() conn.close() print("\n" + "="*50) print("๐ DATABASE STATE DEBUG") print("="*50) print(f"Total battles: {total_battles}") print(f"Voted battles: {voted_battles}") print("\nModel Stats:") print("-"*50) for stat in stats: print(f"{stat[0]:20} | Battles: {stat[5]:3} | Wins: {stat[6]:3} | ELO: {stat[8]:4}") print("="*50 + "\n") return { "total_battles": total_battles, "voted_battles": voted_battles, "model_stats": stats } # ==================== Fixed LLM Interface with 4 Models ==================== class LLMInterface: """Interface for GPT-5, jetXA, Gemini 2.5 Pro, and Claude Opus 4.1 models""" def __init__(self): self.models = ["GPT-5", "jetXA", "Gemini-2.5-Pro", "Claude-Opus-4.1"] self.response_cache = {} self.cache_enabled = False # Disable caching by default # Initialize OpenAI client for GPT-5 self.openai_client = None openai_key = os.getenv("OPENAI_API_KEY") if openai_key and OpenAI: try: self.openai_client = OpenAI(api_key=openai_key) print("โ GPT-5 client initialized") except Exception as e: print(f"โ GPT-5 initialization failed: {e}") else: print("โ ๏ธ GPT-5: No API key or OpenAI library not installed") # Initialize Gradio client for jetXA self.gradio_client = None jetxa_space = os.getenv("jetXA_API", "aiqtech/tests") hf_token = os.getenv("HF_TOKEN") if GradioClient: connection_attempts = [ lambda: GradioClient(jetxa_space, hf_token=hf_token) if hf_token else GradioClient(jetxa_space), lambda: GradioClient(f"https://huggingface.co/spaces/{jetxa_space}"), lambda: GradioClient(f"https://{jetxa_space.replace('/', '-')}.hf.space"), lambda: GradioClient(src=jetxa_space), lambda: GradioClient("aiqtech/tests") ] for i, attempt in enumerate(connection_attempts, 1): try: self.gradio_client = attempt() if hasattr(self.gradio_client, 'view_api'): api_info = self.gradio_client.view_api() print(f"โ jetXA client initialized successfully using method {i}!") break except Exception as e: if i == len(connection_attempts): print(f"โ ๏ธ jetXA: All connection attempts failed. Last error: {e}") print("Will use fallback responses for jetXA") else: continue else: print("โ ๏ธ jetXA: Gradio client not installed") # Initialize Gemini client self.gemini_client = None gemini_key = os.getenv("GEMINI_API_KEY") if gemini_key and genai: try: self.gemini_client = genai.Client(api_key=gemini_key) print("โ Gemini 2.5 Pro client initialized") except Exception as e: print(f"โ Gemini initialization failed: {e}") else: print("โ ๏ธ Gemini: No API key or google-genai library not installed") # Initialize Claude client self.claude_client = None claude_key = os.getenv("ANTHROPIC_API_KEY") if claude_key and anthropic: try: self.claude_client = anthropic.Anthropic(api_key=claude_key) print("โ Claude Opus 4.1 client initialized") except Exception as e: print(f"โ Claude initialization failed: {e}") else: print("โ ๏ธ Claude: No API key or anthropic library not installed") def clear_cache(self): """Clear all cached responses""" self.response_cache = {} print("โ Cache cleared") def generate_response_stream(self, model: str, prompt: str, language: str = "en") -> Generator[str, None, None]: """Generate streaming response with proper accumulation""" # Add language and creativity instructions if language == "ko": instruction = "์ฐฝ์์ ์ด๊ณ ํ์ ์ ์ธ ํ๊ตญ์ด ๋ต๋ณ์ ์์ฑํด์ฃผ์ธ์. ๋ ์ฐฝ์ ์ด๊ณ ์์ธํ ์์ด๋์ด๋ฅผ ์ ์ํ์ธ์." else: instruction = "Provide a highly creative and innovative response. Be original and detailed." full_prompt = f"{instruction}\n\n{prompt}" try: if model == "GPT-5": # Stream GPT-5 with proper accumulation accumulated = "" for chunk in self._stream_gpt5(full_prompt): accumulated += chunk yield accumulated # Always yield the accumulated text elif model == "jetXA": # Get full response and simulate streaming full_response = self._get_jetxa_response(full_prompt) if full_response: # Format jetXA response with proper spacing formatted_response = self._format_jetxa_response(full_response) # Simulate streaming word by word for jetXA for smoother effect words = formatted_response.split() accumulated = "" # Stream words in small batches for natural effect batch_size = 2 # Stream 2 words at a time for i in range(0, len(words), batch_size): batch = words[i:i+batch_size] for word in batch: if accumulated: accumulated += " " accumulated += word yield accumulated # Yield accumulated text after each batch time.sleep(0.03) # Small delay between batches else: # Use fallback if jetXA fails fallback = self._generate_fallback(model, prompt, language) # Stream fallback with accumulation words = fallback.split() accumulated = "" for word in words: if accumulated: accumulated += " " accumulated += word yield accumulated time.sleep(0.02) elif model == "Gemini-2.5-Pro": # Stream Gemini with proper accumulation accumulated = "" for chunk in self._stream_gemini(full_prompt): accumulated += chunk yield accumulated elif model == "Claude-Opus-4.1": # Stream Claude with proper accumulation accumulated = "" for chunk in self._stream_claude(full_prompt): accumulated += chunk yield accumulated else: # Unknown model - use fallback fallback = self._generate_fallback(model, prompt, language) # Stream fallback with accumulation words = fallback.split() accumulated = "" for word in words: if accumulated: accumulated += " " accumulated += word yield accumulated time.sleep(0.02) except Exception as e: print(f"Error streaming {model}: {e}") fallback = self._generate_fallback(model, prompt, language) yield fallback def _stream_gemini(self, prompt: str) -> Generator[str, None, None]: """Stream Gemini 2.5 Pro response""" if not self.gemini_client: fallback = self._generate_fallback("Gemini-2.5-Pro", prompt, "en") words = fallback.split() for word in words: yield word + " " time.sleep(0.02) return try: contents = [ types.Content( role="user", parts=[types.Part.from_text(text=prompt)], ), ] # ์์ ๋ ์ค์ - max_output_tokens ์ฆ๊ฐ ๋ฐ thinking_config ์ ๊ฑฐ generate_content_config = types.GenerateContentConfig( response_mime_type="text/plain", temperature=0.9, # ์ฐฝ์์ฑ์ ์ํด ์จ๋ ์์น max_output_tokens=2048, # ํ ํฐ ์ ์ฆ๊ฐ top_p=0.95, top_k=40, ) # ์ ์ฒด ์๋ต์ ์์ง full_response = "" for chunk in self.gemini_client.models.generate_content_stream( model="gemini-2.0-flash-exp", # ๋๋ "gemini-2.0-flash-thinking-exp-1219" contents=contents, config=generate_content_config, ): if chunk.text: full_response += chunk.text yield chunk.text # ์๋ต์ด ๋๋ฌด ์งง์ผ๋ฉด ์ฌ์๋ if len(full_response) < 100: print(f"โ ๏ธ Gemini response too short ({len(full_response)} chars), using fallback") fallback = self._generate_fallback("Gemini-2.5-Pro", prompt, "en") yield fallback except Exception as e: print(f"Gemini streaming error: {e}") fallback = self._generate_fallback("Gemini-2.5-Pro", prompt, "en") yield fallback def _stream_claude(self, prompt: str) -> Generator[str, None, None]: """Stream Claude Opus 4.1 response""" if not self.claude_client: fallback = self._generate_fallback("Claude-Opus-4.1", prompt, "en") words = fallback.split() for word in words: yield word + " " time.sleep(0.02) return try: with self.claude_client.messages.stream( model="claude-opus-4-1-20250805", max_tokens=1500, temperature=0.8, messages=[ { "role": "user", "content": [ { "type": "text", "text": prompt } ] } ] ) as stream: for text in stream.text_stream: yield text except Exception as e: print(f"Claude streaming error: {e}") fallback = self._generate_fallback("Claude-Opus-4.1", prompt, "en") yield fallback def _format_jetxa_response(self, text: str) -> str: """Format jetXA response with proper spacing and line breaks for better readability""" # Clean up the response first text = self._clean_markdown_response(text) # Split into lines lines = text.split('\n') formatted_lines = [] for i, line in enumerate(lines): line = line.strip() if not line: # Keep empty lines for spacing formatted_lines.append('') continue # Add extra spacing around headers if line.startswith('#'): # Add double blank line before headers (except first line) if i > 0 and formatted_lines and formatted_lines[-1].strip(): formatted_lines.append('') formatted_lines.append('') formatted_lines.append(line) # Add blank line after major headers if line.startswith('# ') or line.startswith('## '): formatted_lines.append('') # Add spacing around lists elif line.startswith('- ') or line.startswith('* ') or re.match(r'^\d+\. ', line): # Add blank line before first list item if i > 0 and formatted_lines and formatted_lines[-1].strip() and not ( formatted_lines[-1].startswith('- ') or formatted_lines[-1].startswith('* ') or re.match(r'^\d+\. ', formatted_lines[-1]) ): formatted_lines.append('') formatted_lines.append(line) else: formatted_lines.append(line) # Join with newlines result = '\n'.join(formatted_lines) # Clean up excessive blank lines (max 2 consecutive) while '\n\n\n\n' in result: result = result.replace('\n\n\n\n', '\n\n') while '\n\n\n' in result: result = result.replace('\n\n\n', '\n\n') return result.strip() def _stream_gpt5(self, prompt: str) -> Generator[str, None, None]: """Stream GPT-5 API response - returns chunks only (not accumulated)""" if not self.openai_client: fallback = self._generate_fallback("GPT-5", prompt, "en") words = fallback.split() for word in words: yield word + " " time.sleep(0.02) return try: stream = self.openai_client.chat.completions.create( model="gpt-4", # Use gpt-4 as fallback if gpt-5 not available messages=[{"role": "user", "content": prompt}], max_tokens=1500, temperature=0.8, stream=True ) for chunk in stream: if chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content # Yield only the chunk except Exception as e: print(f"GPT-5 streaming error: {e}") fallback = self._generate_fallback("GPT-5", prompt, "en") yield fallback def _get_jetxa_response(self, prompt: str) -> str: """Get complete response from jetXA""" if not self.gradio_client: return "" try: result = self.gradio_client.predict( message=prompt, history=[], use_search=False, show_agent_thoughts=False, search_count=5, api_name="/process_query_optimized" ) response_text = "" if result and isinstance(result, (tuple, list)) and len(result) >= 1: chat_history = result[0] if isinstance(chat_history, list) and len(chat_history) > 0: for msg in reversed(chat_history): if isinstance(msg, dict): content = msg.get('content', '') if content: response_text = str(content) break elif isinstance(msg, (list, tuple)) and len(msg) >= 2: if msg[1]: response_text = str(msg[1]) break if not response_text: for i in range(1, min(3, len(result))): if result[i] and isinstance(result[i], str) and result[i].strip(): response_text = result[i] break if response_text: # Clean up any potential formatting issues response_text = self._clean_markdown_response(response_text) return response_text except Exception as e: print(f"jetXA response error: {e}") return "" def _clean_markdown_response(self, text: str) -> str: """Clean and fix common markdown formatting issues""" # Remove any duplicate markers or broken formatting text = text.replace('| ---', '|---') # Fix table separators text = text.replace('---\n---', '---') # Remove duplicate horizontal rules # Ensure proper spacing around headers lines = text.split('\n') cleaned_lines = [] for i, line in enumerate(lines): # Fix header formatting if line.strip().startswith('#'): # Ensure space after # symbols if '#' in line and not line.startswith('# '): parts = line.split('#', 1) if len(parts) > 1: hash_count = len(line) - len(line.lstrip('#')) line = '#' * hash_count + ' ' + parts[-1].strip() # Add blank line before headers (except first line) if i > 0 and cleaned_lines and cleaned_lines[-1].strip(): cleaned_lines.append('') # Fix table formatting if '|' in line: # Ensure proper table separator if all(c in ['-', '|', ' '] for c in line.strip()): line = line.replace(' ', '').replace('|-', '|---').replace('-|', '---|') if not line.startswith('|'): line = '|' + line if not line.endswith('|'): line = line + '|' cleaned_lines.append(line) return '\n'.join(cleaned_lines) def _generate_fallback(self, model: str, prompt: str, language: str) -> str: """Generate high-quality fallback response with language support and proper markdown""" # Determine category from prompt if any(word in prompt.lower() for word in ["story", "movie", "novel", "plot", "์คํ ๋ฆฌ", "์ํ", "์์ค"]): category = "story" elif any(word in prompt.lower() for word in ["innovate", "invent", "revolution", "ํ์ ", "๋ฐ๋ช ", "๊ฐ๋ฐ"]): category = "innovation" else: category = "business" # Korean responses with better markdown formatting if language == "ko": responses = { "story": { "GPT-5": """# ์์ ๊ฑฐ์ธ ## ์๋์์ค ํ ํ์ฌ๊ฐ ๋์์ ๋ชจ๋ ๊ฑฐ์ธ์ด ์ค์ ๋ก **๋ฒ์ฃ๊ฐ ์๋ฐฉ๋ ๋ค๋ฅธ ํ์๋ผ์ธ**์ผ๋ก ํตํ๋ ํฌํธ์์ ๋ฐ๊ฒฌํ๋ค.""", "jetXA": """# ๊ฐ์ ๊ณ ๊ณ ํ ## ๊ธฐํ ์๋ 2045๋ , ๊ณ ๊ณ ํ์๋ค์ ์ ๋ฌผ์ ๋ฐ๊ตดํ์ง ์๋๋คโ๊ทธ๋ค์ **๋น๊ทน์ ์ฅ์์ ๋จ๊ฒจ์ง ์์ถ๋ ์ธ๊ฐ ๊ฐ์ **์ ๋ฐ๊ตดํ๋ค.""", "Gemini-2.5-Pro": """# ๊ธฐ์ต์ ๋์๊ด ## ์ค๊ฑฐ๋ฆฌ ์ฃฝ์ ์ฌ๋๋ค์ ๋ง์ง๋ง ๊ธฐ์ต์ด ์ฑ ์ผ๋ก ๋ณํ๋ **์ฌํ ๋์๊ด**์ ๋ฐ๊ฒฌํ ์ฌ์์ ์ด์ผ๊ธฐ.""", "Claude-Opus-4.1": """# ์๊ฐ์ ์ ์์ฌ ## ๊ฐ์ ๋งค์ผ ๋ฐค ๋ค๋ฅธ ์๋๋ก ์ด๋ํ๋ ์ ์์ ๊ด๋ฆฌํ๋ฉฐ **์ญ์ฌ์ ์๊ฐ๋ค์ ๊ฐ๊พธ๋** ์ ์์ฌ์ ๋ชจํ.""" }, "innovation": { "GPT-5": """# ๐ฒ ์์ ๊ฑฐ ํ์ 5๊ฐ์ง ## 1. **์ค๋ ฅ ๋ฌด์ ๋ฐํด** (Gravity Defiance Wheels) - **๊ธฐ์ **: ์ ์๊ธฐ ๋ฆผ์ด ์ค๋ฅด๋ง๊ธธ์์ ๋ฌด๊ฒ๋ฅผ ๊ฑฐ์ 0์ผ๋ก ๊ฐ์""", "jetXA": """# ๐ง ์ด๋ฉ์ผ ํ๋ช 5๊ฐ์ง ## 1. **์๊ฐ ๋ฉ์์ง** (Temporal Messaging) ### ํต์ฌ ๊ธฐ๋ฅ - โฐ ๊ณผ๊ฑฐ/๋ฏธ๋๋ก ์ด๋ฉ์ผ ์ ์ก""", "Gemini-2.5-Pro": """# ๐ฒ ์์ ๊ฑฐ ๋ฏธ๋ ํ์ ## 1. **AI ๊ท ํ ์์คํ ** - ์์ด๋ก์ค์ฝํ์ AI๊ฐ ๊ฒฐํฉ๋์ด ์ ๋ ๋์ด์ง์ง ์๋ ์์ ๊ฑฐ""", "Claude-Opus-4.1": """# ๐ง ์ด๋ฉ์ผ ์งํ ## 1. **๊ฐ์ ์ ์ก ์์คํ ** - ํ ์คํธ์ ํจ๊ป ์์ฑ์์ ๊ฐ์ ์ํ๋ฅผ ์ ๋ฌํ๋ ๊ธฐ์ """ }, "business": { "GPT-5": """# ๐ NeuralNest - 10์ต๋ฌ๋ฌ ๋๋ก ์ฌ๋ฆฌ ํ๋ซํผ ## ์ฌ์ ๊ฐ์ ### ๋น์ > **"์๊ธฐ ์ง์ญ์์ ์ค์๊ฐ ์ ์ ๊ฑด๊ฐ ์ง์์ ์ ๊ณตํ๋ ์ธ๊ณ ์ต์ด AI ๋๋ก ํ๋ซํผ"**""", "jetXA": """# ๐พ MemoryBank - ์ 100๋ง์ ๊ตฌ๋ ์๋น์ค ## ์๋น์ค ๊ฐ์ ### ํต์ฌ ๊ฐ์น > **"๋น์ ์ ๋ชจ๋ ๊ธฐ์ต์ ์์ํ ๋ณด์กดํ๊ณ ๋ค์ ๊ฒฝํํ์ธ์"**""", "Gemini-2.5-Pro": """# ๐ค RoboChef - ๋ก๋ด ์๋ฆฌ์ฌ ํ๋ซํผ ## ๋น์ฆ๋์ค ๋ชจ๋ธ ### ๋ชฉํ > **"๋ฏธ์๋ญ ์คํ ์ ฐํ์ ์๋ฆฌ๋ฅผ ์ง์์ ์ฌํํ๋ AI ๋ก๋ด"**""", "Claude-Opus-4.1": """# ๐ข VirtualOffice - ๋ฉํ๋ฒ์ค ์ฌ๋ฌด์ค ## ์๋น์ค ์ปจ์ ### ๋ฏธ์ > **"๋ฌผ๋ฆฌ์ ์ฌ๋ฌด์ค์ด ํ์ ์๋ ์๋ฒฝํ ๊ฐ์ ๊ทผ๋ฌด ํ๊ฒฝ"**""" } } else: # English responses responses = { "story": { "GPT-5": """# The Quantum Mirror ## Synopsis A detective discovers that every mirror in the city is actually a portal to **alternate timelines where crimes were prevented**.""", "jetXA": """# Emotional Archaeology ## Concept In 2045, archaeologists don't dig for artifactsโthey excavate **compressed human emotions left in places of tragedy**.""", "Gemini-2.5-Pro": """# The Memory Library ## Plot A librarian discovers a **posthumous library** where dead people's last memories transform into books.""", "Claude-Opus-4.1": """# The Time Gardener ## Overview Adventures of a gardener who tends to a garden that **shifts to different historical eras** each night.""" }, "innovation": { "GPT-5": """# ๐ฒ 5 Bicycle Innovations ## 1. **Gravity Defiance Wheels** - **Tech**: Electromagnetic rims reduce weight to near-zero when pedaling uphill""", "jetXA": """# ๐ง 5 Email Revolutionaries ## 1. **Temporal Messaging** ### Core Features - โฐ Send emails to past/future""", "Gemini-2.5-Pro": """# ๐ฒ Future Bicycle Tech ## 1. **AI Balance System** - Gyroscope + AI creates a bicycle that never falls over""", "Claude-Opus-4.1": """# ๐ง Email Evolution ## 1. **Emotion Transfer System** - Technology that transmits the sender's emotional state with text""" }, "business": { "GPT-5": """# ๐ NeuralNest - $1B Drone Psychology Platform ## Business Overview ### Vision > **"World's first AI drone platform providing real-time mental health support in crisis zones"**""", "jetXA": """# ๐พ MemoryBank - $1000/month Subscription ## Service Overview ### Core Value > **"Preserve and re-experience all your memories forever"**""", "Gemini-2.5-Pro": """# ๐ค RoboChef - Robot Chef Platform ## Business Model ### Goal > **"AI robots that recreate Michelin star chef dishes at home"**""", "Claude-Opus-4.1": """# ๐ข VirtualOffice - Metaverse Workspace ## Service Concept ### Mission > **"Perfect virtual work environment eliminating need for physical offices"**""" } } return responses[category].get(model, responses[category]["GPT-5"]) # ==================== Main Arena Class ==================== class CreativityArena: def __init__(self): self.db = ArenaDatabase() self.llm = LLMInterface() self.current_battle = None def get_random_prompt(self, category: Category, language: str = "en") -> dict: """Get random prompt from database""" prompts = PROMPTS[category].get(language, PROMPTS[category]["en"]) return random.choice(prompts) def start_new_battle_stream(self, category: str, custom_prompt: str = None, language: str = "en"): """Start new battle with streaming responses""" # Select category if category == "random": category = random.choice(list(Category)) else: category = Category(category) # Get or set prompt if custom_prompt and custom_prompt.strip(): prompt_text = custom_prompt.strip() is_custom = True else: prompt_data = self.get_random_prompt(category, language) prompt_text = prompt_data["text"] is_custom = False # Randomly select 2 models from the 4 available models = random.sample(["GPT-5", "jetXA", "Gemini-2.5-Pro", "Claude-Opus-4.1"], 2) # Create battle structure battle = Battle( id=hashlib.md5(f"{datetime.now().isoformat()}-{random.randint(0,999999)}".encode()).hexdigest(), prompt_id=hashlib.md5(prompt_text.encode()).hexdigest(), prompt_text=prompt_text, model_a=models[0], model_b=models[1], response_a="", response_b="", winner=None, voter_id="", timestamp=datetime.now(), category=category, custom_prompt=is_custom, language=language ) self.current_battle = battle return { "prompt": prompt_text, "category": category.value, "models": models, "battle": battle } def vote(self, choice: str, voter_id: str = None): """Process vote with better error handling""" if not self.current_battle: print("โ No active battle to vote on") return {"error": "No active battle"} # Ensure we have the complete battle data if not self.current_battle.response_a or not self.current_battle.response_b: print("โ ๏ธ Battle responses not complete") return {"error": "Battle responses not complete"} # Set the winner self.current_battle.winner = self.current_battle.model_a if choice == "A" else self.current_battle.model_b self.current_battle.voter_id = voter_id or f"anonymous_{datetime.now().timestamp()}" print(f"๐ณ๏ธ Vote recorded: {choice} -> {self.current_battle.winner}") # Save to database self.db.save_battle(self.current_battle) # Force immediate sync to HF self.db._sync_to_hf() return { "model_a": self.current_battle.model_a, "model_b": self.current_battle.model_b, "winner": self.current_battle.winner } def get_leaderboard(self, category: Optional[Category] = None): """Get leaderboard from database""" return self.db.get_leaderboard(category) # ==================== Periodic Sync Function ==================== def periodic_sync(arena): """Periodically sync to HF every 30 seconds""" while True: time.sleep(30) try: arena.db._sync_to_hf() print(f"โฐ Periodic sync completed at {datetime.now()}") except Exception as e: print(f"โฐ Periodic sync failed: {e}") # ==================== Gradio Interface ==================== def create_app(): arena = CreativityArena() # Updated CSS with pastel colors and proper markdown rendering css = """ .gradio-container { background: linear-gradient(135deg, #f5e6ff 0%, #e6f3ff 50%, #ffeef5 100%); font-family: 'Inter', sans-serif; } .main-header { background: rgba(255, 255, 255, 0.98); border-radius: 20px; padding: 2rem; text-align: center; margin-bottom: 2rem; box-shadow: 0 4px 20px rgba(150, 100, 200, 0.15); border: 1px solid rgba(200, 180, 220, 0.3); } .response-container { background: rgba(255, 255, 255, 0.95); border-radius: 15px; padding: 1.5rem; min-height: 400px; max-height: 800px; overflow-y: auto; box-shadow: 0 3px 15px rgba(150, 100, 200, 0.1); transition: transform 0.3s ease; border: 1px solid rgba(200, 180, 220, 0.2); } .response-container:hover { transform: translateY(-3px); box-shadow: 0 6px 20px rgba(150, 100, 200, 0.2); } /* Markdown specific styles */ .markdown-text { line-height: 1.6; color: #2d3748; } .markdown-text h1 { font-size: 2.5em !important; font-weight: bold; color: #6b46c1; margin-top: 1em; margin-bottom: 0.5em; border-bottom: 2px solid #e9d8fd; padding-bottom: 0.3em; } .markdown-text h2 { font-size: 2em !important; font-weight: bold; color: #805ad5; margin-top: 0.8em; margin-bottom: 0.4em; } .markdown-text h3 { font-size: 1.5em !important; font-weight: bold; color: #9f7aea; margin-top: 0.6em; margin-bottom: 0.3em; } """ with gr.Blocks(title="AI Models Battle Arena", theme=gr.themes.Soft(), css=css) as app: current_lang = gr.State(value="en") # Language change handler def update_language(lang_value): return lang_value def update_ui_text(lang): ui = UI_TEXT[lang] return ( f"""
{ui['subtitle']}
Test cutting-edge AI models in creative challenges
GPT-5 vs jetXA vs Gemini 2.5 Pro vs Claude Opus 4.1