Spaces:
Running
Running
File size: 4,411 Bytes
8a254d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import pandas as pd
from pathlib import Path
from typing import Optional
# Global cache variables
_category_df_cache: Optional[pd.DataFrame] = None
_language_df_cache: Optional[pd.DataFrame] = None
def _load_category_csv() -> pd.DataFrame:
"""Load the category CSV file with proper encoding and delimiter."""
abs_path = Path(__file__).parent
df = pd.read_csv(str(abs_path / "data/stats.csv"), encoding='utf-8', delimiter="\t")
return df.copy()
def _load_language_csv() -> pd.DataFrame:
"""Load the language CSV file with proper encoding and delimiter."""
abs_path = Path(__file__).parent
df = pd.read_csv(str(abs_path / "data/stats_lang.csv"), encoding='utf-8', delimiter="\t")
return df.copy()
def get_category_dataframe(processed: bool = True) -> pd.DataFrame:
"""
Get the category dataframe.
Args:
processed: If True, returns processed dataframe (for vis_utils.py compatibility)
If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility)
Returns:
pd.DataFrame: The category dataframe
"""
global _category_df_cache
if _category_df_cache is None:
_category_df_cache = _load_category_csv()
df = _category_df_cache.copy()
if processed:
# Apply vis_utils.py processing
required_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'Content Generation', 'Editing', 'Data Analysis',
'Reasoning', 'Hallucination', 'Safety', 'Repetition',
'Summarization', 'Translation', 'Multi-Turn']
for col in required_cols:
if col not in df.columns:
if col in ["Link", "Group"]:
df[col] = ""
else:
df[col] = 0
from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY
for col in NUMERIC_COLS_CATEGORY:
if col in df.columns:
if col in NUMERIC_INT_COLS_CATEGORY:
df[col] = pd.to_numeric(df[col], errors='coerce').round(0)
else:
df[col] = pd.to_numeric(df[col], errors='coerce').round(3)
else:
df[col] = 0
if "Think" not in df.columns:
df["Think"] = "Off"
df = df.fillna('')
else:
# Apply data_utils.py processing
df = df.sort_values("Overall", ascending=False)
return df
def get_language_dataframe(processed: bool = True) -> pd.DataFrame:
"""
Get the language dataframe.
Args:
processed: If True, returns processed dataframe (for vis_utils.py compatibility)
If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility)
Returns:
pd.DataFrame: The language dataframe
"""
global _language_df_cache
if _language_df_cache is None:
_language_df_cache = _load_language_csv()
df = _language_df_cache.copy()
if processed:
# Apply vis_utils.py processing
language_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'KO', 'EN', 'JA', 'ZH', 'PL', 'DE', 'PT', 'ES', 'FR', 'IT', 'RU', 'VI']
for col in language_cols:
if col not in df.columns:
if col in ["Link", "Group"]:
df[col] = ""
else:
df[col] = 0
from constants import NUMERIC_COLS_LANGUAGE, NUMERIC_INT_COLS_LANGUAGE
for col in NUMERIC_COLS_LANGUAGE:
if col in df.columns:
if col in NUMERIC_INT_COLS_LANGUAGE:
df[col] = pd.to_numeric(df[col], errors='coerce').round(0)
else:
df[col] = pd.to_numeric(df[col], errors='coerce').round(3)
else:
df[col] = 0
df = df.fillna('')
else:
# Apply data_utils.py processing
df = df.sort_values("Overall", ascending=False)
return df
def clear_cache():
"""Clear the cached dataframes to force reload on next access."""
global _category_df_cache, _language_df_cache
_category_df_cache = None
_language_df_cache = None
|