Spaces:
Running
Running
import pandas as pd | |
from pathlib import Path | |
from typing import Optional | |
# Global cache variables | |
_category_df_cache: Optional[pd.DataFrame] = None | |
_language_df_cache: Optional[pd.DataFrame] = None | |
def _load_category_csv() -> pd.DataFrame: | |
"""Load the category CSV file with proper encoding and delimiter.""" | |
abs_path = Path(__file__).parent | |
df = pd.read_csv(str(abs_path / "data/stats.csv"), encoding='utf-8', delimiter="\t") | |
return df.copy() | |
def _load_language_csv() -> pd.DataFrame: | |
"""Load the language CSV file with proper encoding and delimiter.""" | |
abs_path = Path(__file__).parent | |
df = pd.read_csv(str(abs_path / "data/stats_lang.csv"), encoding='utf-8', delimiter="\t") | |
return df.copy() | |
def get_category_dataframe(processed: bool = True) -> pd.DataFrame: | |
""" | |
Get the category dataframe. | |
Args: | |
processed: If True, returns processed dataframe (for vis_utils.py compatibility) | |
If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility) | |
Returns: | |
pd.DataFrame: The category dataframe | |
""" | |
global _category_df_cache | |
if _category_df_cache is None: | |
_category_df_cache = _load_category_csv() | |
df = _category_df_cache.copy() | |
if processed: | |
# Apply vis_utils.py processing | |
required_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'Content Generation', 'Editing', 'Data Analysis', | |
'Reasoning', 'Hallucination', 'Safety', 'Repetition', | |
'Summarization', 'Translation', 'Multi-Turn'] | |
for col in required_cols: | |
if col not in df.columns: | |
if col in ["Link", "Group"]: | |
df[col] = "" | |
else: | |
df[col] = 0 | |
from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY | |
for col in NUMERIC_COLS_CATEGORY: | |
if col in df.columns: | |
if col in NUMERIC_INT_COLS_CATEGORY: | |
df[col] = pd.to_numeric(df[col], errors='coerce').round(0) | |
else: | |
df[col] = pd.to_numeric(df[col], errors='coerce').round(3) | |
else: | |
df[col] = 0 | |
if "Think" not in df.columns: | |
df["Think"] = "Off" | |
df = df.fillna('') | |
else: | |
# Apply data_utils.py processing | |
df = df.sort_values("Overall", ascending=False) | |
return df | |
def get_language_dataframe(processed: bool = True) -> pd.DataFrame: | |
""" | |
Get the language dataframe. | |
Args: | |
processed: If True, returns processed dataframe (for vis_utils.py compatibility) | |
If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility) | |
Returns: | |
pd.DataFrame: The language dataframe | |
""" | |
global _language_df_cache | |
if _language_df_cache is None: | |
_language_df_cache = _load_language_csv() | |
df = _language_df_cache.copy() | |
if processed: | |
# Apply vis_utils.py processing | |
language_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'KO', 'EN', 'JA', 'ZH', 'PL', 'DE', 'PT', 'ES', 'FR', 'IT', 'RU', 'VI'] | |
for col in language_cols: | |
if col not in df.columns: | |
if col in ["Link", "Group"]: | |
df[col] = "" | |
else: | |
df[col] = 0 | |
from constants import NUMERIC_COLS_LANGUAGE, NUMERIC_INT_COLS_LANGUAGE | |
for col in NUMERIC_COLS_LANGUAGE: | |
if col in df.columns: | |
if col in NUMERIC_INT_COLS_LANGUAGE: | |
df[col] = pd.to_numeric(df[col], errors='coerce').round(0) | |
else: | |
df[col] = pd.to_numeric(df[col], errors='coerce').round(3) | |
else: | |
df[col] = 0 | |
df = df.fillna('') | |
else: | |
# Apply data_utils.py processing | |
df = df.sort_values("Overall", ascending=False) | |
return df | |
def clear_cache(): | |
"""Clear the cached dataframes to force reload on next access.""" | |
global _category_df_cache, _language_df_cache | |
_category_df_cache = None | |
_language_df_cache = None | |