Spaces:
Running
Running
File size: 5,305 Bytes
3b3db42 fed47e0 8b1f7a0 8fc70f8 3b3db42 8fc70f8 8b1f7a0 54eae7e 8b1f7a0 fed47e0 8b1f7a0 fed47e0 54eae7e fed47e0 54eae7e 8b1f7a0 fed47e0 8b1f7a0 fed47e0 8fc70f8 8b1f7a0 fed47e0 8b1f7a0 fed47e0 8b1f7a0 fed47e0 8b1f7a0 fed47e0 8b1f7a0 8fc70f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import json
import os
import pandas as pd
from datetime import datetime, timezone
from src.about import Tasks, SpeechTasks
from src.display.formatting import styled_error, styled_message, styled_warning
from src.display.utils import REGION_MAP
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO, EVAL_RESULTS_PATH
REQUESTED_MODELS = None
USERS_TO_SUBMISSION_DATES = None
def handle_csv_submission(
model_name: str,
csv_file, # uploaded file path
result_type: str,
):
if model_name == "" or model_name is None:
return styled_error("Please provide a model name.")
if csv_file is None:
return styled_error("Please provide a CSV file with results.")
df = pd.read_csv(csv_file)
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# Save uploaded CSV
subdir = os.path.join(EVAL_REQUESTS_PATH, result_type)
os.makedirs(subdir, exist_ok=True)
filename = f"{current_time}_{model_name}_{result_type}_results.csv"
remote_path = f"msteb_{result_type}_requests/{filename}"
csv_save_path = os.path.join(subdir,filename)
df.to_csv(csv_save_path, index=False)
print(f"Uploading to {QUEUE_REPO}/{remote_path}")
API.upload_file(
path_or_fileobj=csv_save_path,
path_in_repo=remote_path,
repo_id=QUEUE_REPO,
repo_type="dataset", # or "model" if you made the repo that way
commit_message=f"Add {result_type} request for {model_name} at {current_time}",
)
# Remove the local file
os.remove(csv_save_path)
# this converts dataframe to json and uploads it to results
try:
convert_csv_to_json_and_upload(df, model_name, result_type)
except ValueError as e:
return styled_error(f"{str(e)}")
return styled_message(f"Results CSV successfully submitted for `{model_name}`!")
def find_task_by_col_name(col_name, enum_cls):
for task in enum_cls:
if task.value.col_name == col_name:
return task
return None
def convert_csv_to_json_and_upload(df: pd.DataFrame, model_name: str, result_type: str):
task_enum = Tasks if result_type == "text" else SpeechTasks
task_display_names = {t.value.col_name for t in task_enum}
region_names = df["Region"].tolist()
average_row = "Average (Micro)"
# --- Validation ---
df_columns = set(df.columns[1:]) # exclude Region column
if not df_columns.issubset(task_display_names):
extra = df_columns - task_display_names
raise ValueError(f"Extra columns in CSV: {extra}")
if average_row not in df["Region"].values:
raise ValueError("Missing row for 'Average (Micro)'")
data_region_names = [r for r in region_names if r != average_row]
for region in data_region_names:
if region not in REGION_MAP:
raise ValueError(f"Region '{region}' not found in REGION_MAP keys.")
# --- Build JSON ---
# I go over the regions in the CSV and create a JSON object.
model_json = {
"config": {"model_name": model_name},
"results": {},
"regions": {},
}
at_least_one_number = False
for _, row in df.iterrows():
region_display = row["Region"]
if region_display == average_row:
for col, val in row.items():
if col == "Region":
continue
task = find_task_by_col_name(col, task_enum)
if val is not None and not pd.isna(val) and isinstance(val, (int, float)):
print(f" value {val}")
at_least_one_number = True
model_json["results"][task.value.benchmark] = {task.value.metric: val/100}
else:
model_json["regions"][REGION_MAP[region_display]] = {}
for col, val in row.items():
if col == "Region":
continue
task = find_task_by_col_name(col, task_enum)
if val is not None and not pd.isna(val) and isinstance(val, (int, float)):
model_json["regions"][REGION_MAP[region_display]][task.value.benchmark] = {task.value.metric: val/100}
# Check if at least one number is present in the results
print(at_least_one_number)
if at_least_one_number is False:
raise ValueError("No valid numeric results found in the CSV. Please check your input.")
# --- Save locally ---
subdir = os.path.join(EVAL_RESULTS_PATH, result_type)
os.makedirs(subdir, exist_ok=True)
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
filename = f"{current_time}_{model_name}_{result_type}.json"
json_save_path = os.path.join(subdir,filename)
with open(json_save_path, "w") as f:
json.dump(model_json, f, indent=2)
# --- Upload to HF Hub ---
remote_path = f"msteb_leaderboard/msteb_{result_type}_results/{filename}"
API.upload_file(
path_or_fileobj=json_save_path,
path_in_repo=remote_path,
repo_id=RESULTS_REPO,
repo_type="dataset",
commit_message=f"Upload results for {model_name} ({result_type}) at {current_time}",
)
os.remove(json_save_path)
print(f"Uploaded to {RESULTS_REPO}/{current_time}")
return f"Uploaded to {RESULTS_REPO}/{current_time}"
|