import pandas as pd from pathlib import Path from typing import Optional # Global cache variables _category_df_cache: Optional[pd.DataFrame] = None _language_df_cache: Optional[pd.DataFrame] = None def _load_category_csv() -> pd.DataFrame: """Load the category CSV file with proper encoding and delimiter.""" abs_path = Path(__file__).parent df = pd.read_csv(str(abs_path / "data/stats.csv"), encoding='utf-8', delimiter="\t") return df.copy() def _load_language_csv() -> pd.DataFrame: """Load the language CSV file with proper encoding and delimiter.""" abs_path = Path(__file__).parent df = pd.read_csv(str(abs_path / "data/stats_lang.csv"), encoding='utf-8', delimiter="\t") return df.copy() def get_category_dataframe(processed: bool = True) -> pd.DataFrame: """ Get the category dataframe. Args: processed: If True, returns processed dataframe (for vis_utils.py compatibility) If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility) Returns: pd.DataFrame: The category dataframe """ global _category_df_cache if _category_df_cache is None: _category_df_cache = _load_category_csv() df = _category_df_cache.copy() if processed: # Apply vis_utils.py processing required_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'Content Generation', 'Editing', 'Data Analysis', 'Reasoning', 'Hallucination', 'Safety', 'Repetition', 'Summarization', 'Translation', 'Multi-Turn'] for col in required_cols: if col not in df.columns: if col in ["Link", "Group"]: df[col] = "" else: df[col] = 0 from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY for col in NUMERIC_COLS_CATEGORY: if col in df.columns: if col in NUMERIC_INT_COLS_CATEGORY: df[col] = pd.to_numeric(df[col], errors='coerce').round(0) else: df[col] = pd.to_numeric(df[col], errors='coerce').round(3) else: df[col] = 0 if "Think" not in df.columns: df["Think"] = "Off" df = df.fillna('') else: # Apply data_utils.py processing df = df.sort_values("Overall", ascending=False) return df def get_language_dataframe(processed: bool = True) -> pd.DataFrame: """ Get the language dataframe. Args: processed: If True, returns processed dataframe (for vis_utils.py compatibility) If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility) Returns: pd.DataFrame: The language dataframe """ global _language_df_cache if _language_df_cache is None: _language_df_cache = _load_language_csv() df = _language_df_cache.copy() if processed: # Apply vis_utils.py processing language_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'KO', 'EN', 'JA', 'ZH', 'PL', 'DE', 'PT', 'ES', 'FR', 'IT', 'RU', 'VI'] for col in language_cols: if col not in df.columns: if col in ["Link", "Group"]: df[col] = "" else: df[col] = 0 from constants import NUMERIC_COLS_LANGUAGE, NUMERIC_INT_COLS_LANGUAGE for col in NUMERIC_COLS_LANGUAGE: if col in df.columns: if col in NUMERIC_INT_COLS_LANGUAGE: df[col] = pd.to_numeric(df[col], errors='coerce').round(0) else: df[col] = pd.to_numeric(df[col], errors='coerce').round(3) else: df[col] = 0 df = df.fillna('') else: # Apply data_utils.py processing df = df.sort_values("Overall", ascending=False) return df def clear_cache(): """Clear the cached dataframes to force reload on next access.""" global _category_df_cache, _language_df_cache _category_df_cache = None _language_df_cache = None