File size: 4,411 Bytes
8a254d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
from pathlib import Path
from typing import Optional

# Global cache variables
_category_df_cache: Optional[pd.DataFrame] = None
_language_df_cache: Optional[pd.DataFrame] = None

def _load_category_csv() -> pd.DataFrame:
    """Load the category CSV file with proper encoding and delimiter."""
    abs_path = Path(__file__).parent
    df = pd.read_csv(str(abs_path / "data/stats.csv"), encoding='utf-8', delimiter="\t")
    return df.copy()

def _load_language_csv() -> pd.DataFrame:
    """Load the language CSV file with proper encoding and delimiter."""
    abs_path = Path(__file__).parent
    df = pd.read_csv(str(abs_path / "data/stats_lang.csv"), encoding='utf-8', delimiter="\t")
    return df.copy()

def get_category_dataframe(processed: bool = True) -> pd.DataFrame:
    """
    Get the category dataframe.
    
    Args:
        processed: If True, returns processed dataframe (for vis_utils.py compatibility)
                  If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility)
    
    Returns:
        pd.DataFrame: The category dataframe
    """
    global _category_df_cache
    
    if _category_df_cache is None:
        _category_df_cache = _load_category_csv()
    
    df = _category_df_cache.copy()
    
    if processed:
        # Apply vis_utils.py processing
        required_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'Content Generation', 'Editing', 'Data Analysis', 
                        'Reasoning', 'Hallucination', 'Safety', 'Repetition', 
                        'Summarization', 'Translation', 'Multi-Turn']
        
        for col in required_cols:
            if col not in df.columns:
                if col in ["Link", "Group"]:
                    df[col] = ""
                else:
                    df[col] = 0

        from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY
        for col in NUMERIC_COLS_CATEGORY:
            if col in df.columns:
                if col in NUMERIC_INT_COLS_CATEGORY:
                    df[col] = pd.to_numeric(df[col], errors='coerce').round(0)
                else:
                    df[col] = pd.to_numeric(df[col], errors='coerce').round(3)
            else:
                df[col] = 0

        if "Think" not in df.columns:
            df["Think"] = "Off"

        df = df.fillna('')
        
    else:
        # Apply data_utils.py processing
        df = df.sort_values("Overall", ascending=False)

    return df

def get_language_dataframe(processed: bool = True) -> pd.DataFrame:
    """
    Get the language dataframe.
    
    Args:
        processed: If True, returns processed dataframe (for vis_utils.py compatibility)
                  If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility)
    
    Returns:
        pd.DataFrame: The language dataframe
    """
    global _language_df_cache
    
    if _language_df_cache is None:
        _language_df_cache = _load_language_csv()
    
    df = _language_df_cache.copy()
    
    if processed:
        # Apply vis_utils.py processing
        language_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Type", "Model Type", "Think", 'KO', 'EN', 'JA', 'ZH', 'PL', 'DE', 'PT', 'ES', 'FR', 'IT', 'RU', 'VI']
        for col in language_cols:
            if col not in df.columns:
                if col in ["Link", "Group"]:
                    df[col] = ""
                else:
                    df[col] = 0
        
        from constants import NUMERIC_COLS_LANGUAGE, NUMERIC_INT_COLS_LANGUAGE
        for col in NUMERIC_COLS_LANGUAGE:
            if col in df.columns:
                if col in NUMERIC_INT_COLS_LANGUAGE:
                    df[col] = pd.to_numeric(df[col], errors='coerce').round(0)
                else:
                    df[col] = pd.to_numeric(df[col], errors='coerce').round(3)
            else:
                df[col] = 0
        
        df = df.fillna('')
    else:
        # Apply data_utils.py processing
        df = df.sort_values("Overall", ascending=False)
    
    return df

def clear_cache():
    """Clear the cached dataframes to force reload on next access."""
    global _category_df_cache, _language_df_cache
    _category_df_cache = None
    _language_df_cache = None