TRUEBench / src /data_utils.py
송종윤/AI Productivity팀(SR)/삼성전자
Initial commit
8a254d6
import pandas as pd
from pathlib import Path
def get_dataframe_category():
from src.data_loader import get_category_dataframe
return get_category_dataframe(processed=False)
def get_dataframe_language():
from src.data_loader import get_language_dataframe
return get_language_dataframe(processed=False)
import json
def get_length_category_df(selected_category):
"""
Loads length_data.json and returns a DataFrame for the selected category.
Columns: Model Name, {Category} Min, {Category} Max, {Category} Med, {Category} Med Resp
"""
abs_path = Path(__file__).parent
json_path = abs_path / "data/length_data.json"
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
rows = []
for model_name, stats in data.items():
cat = stats.get(selected_category, {})
row = {
"Model Name": model_name,
f"Min Len. ({selected_category})": cat.get("Min", None),
f"Max Len. ({selected_category}))": cat.get("Max", None),
f"Med. Len. ({selected_category})": cat.get("Med", None),
f"Med. Resp. Len. ({selected_category})": cat.get("Med Resp", None),
}
rows.append(row)
df = pd.DataFrame(rows)
return df
def get_length_category_list():
"""
Returns the list of available categories in length_data.json (excluding 'Overall').
"""
abs_path = Path(__file__).parent
json_path = abs_path / "data/length_data.json"
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
if not data:
return []
# Get categories from the first model
first_model = next(iter(data.values()))
categories = [k for k in first_model.keys() if k != "Overall"]
return categories