import json import os import pandas as pd from datetime import datetime, timezone from src.about import Tasks, SpeechTasks from src.display.formatting import styled_error, styled_message, styled_warning from src.display.utils import REGION_MAP from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO, EVAL_RESULTS_PATH REQUESTED_MODELS = None USERS_TO_SUBMISSION_DATES = None def handle_csv_submission( model_name: str, csv_file, # uploaded file path result_type: str, ): if model_name == "" or model_name is None: return styled_error("Please provide a model name.") if csv_file is None: return styled_error("Please provide a CSV file with results.") df = pd.read_csv(csv_file) current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Save uploaded CSV subdir = os.path.join(EVAL_REQUESTS_PATH, result_type) os.makedirs(subdir, exist_ok=True) filename = f"{current_time}_{model_name}_{result_type}_results.csv" remote_path = f"msteb_{result_type}_requests/{filename}" csv_save_path = os.path.join(subdir,filename) df.to_csv(csv_save_path, index=False) print(f"Uploading to {QUEUE_REPO}/{remote_path}") API.upload_file( path_or_fileobj=csv_save_path, path_in_repo=remote_path, repo_id=QUEUE_REPO, repo_type="dataset", # or "model" if you made the repo that way commit_message=f"Add {result_type} request for {model_name} at {current_time}", ) # Remove the local file os.remove(csv_save_path) # this converts dataframe to json and uploads it to results try: convert_csv_to_json_and_upload(df, model_name, result_type) except ValueError as e: return styled_error(f"{str(e)}") return styled_message(f"Results CSV successfully submitted for `{model_name}`!") def find_task_by_col_name(col_name, enum_cls): for task in enum_cls: if task.value.col_name == col_name: return task return None def convert_csv_to_json_and_upload(df: pd.DataFrame, model_name: str, result_type: str): task_enum = Tasks if result_type == "text" else SpeechTasks task_display_names = {t.value.col_name for t in task_enum} region_names = df["Region"].tolist() average_row = "Average (Micro)" # --- Validation --- df_columns = set(df.columns[1:]) # exclude Region column if not df_columns.issubset(task_display_names): extra = df_columns - task_display_names raise ValueError(f"Extra columns in CSV: {extra}") if average_row not in df["Region"].values: raise ValueError("Missing row for 'Average (Micro)'") data_region_names = [r for r in region_names if r != average_row] for region in data_region_names: if region not in REGION_MAP: raise ValueError(f"Region '{region}' not found in REGION_MAP keys.") # --- Build JSON --- # I go over the regions in the CSV and create a JSON object. model_json = { "config": {"model_name": model_name}, "results": {}, "regions": {}, } at_least_one_number = False for _, row in df.iterrows(): region_display = row["Region"] if region_display == average_row: for col, val in row.items(): if col == "Region": continue task = find_task_by_col_name(col, task_enum) if val is not None and not pd.isna(val) and isinstance(val, (int, float)): print(f" value {val}") at_least_one_number = True model_json["results"][task.value.benchmark] = {task.value.metric: val/100} else: model_json["regions"][REGION_MAP[region_display]] = {} for col, val in row.items(): if col == "Region": continue task = find_task_by_col_name(col, task_enum) if val is not None and not pd.isna(val) and isinstance(val, (int, float)): model_json["regions"][REGION_MAP[region_display]][task.value.benchmark] = {task.value.metric: val/100} # Check if at least one number is present in the results print(at_least_one_number) if at_least_one_number is False: raise ValueError("No valid numeric results found in the CSV. Please check your input.") # --- Save locally --- subdir = os.path.join(EVAL_RESULTS_PATH, result_type) os.makedirs(subdir, exist_ok=True) current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") filename = f"{current_time}_{model_name}_{result_type}.json" json_save_path = os.path.join(subdir,filename) with open(json_save_path, "w") as f: json.dump(model_json, f, indent=2) # --- Upload to HF Hub --- remote_path = f"msteb_leaderboard/msteb_{result_type}_results/{filename}" API.upload_file( path_or_fileobj=json_save_path, path_in_repo=remote_path, repo_id=RESULTS_REPO, repo_type="dataset", commit_message=f"Upload results for {model_name} ({result_type}) at {current_time}", ) os.remove(json_save_path) print(f"Uploaded to {RESULTS_REPO}/{current_time}") return f"Uploaded to {RESULTS_REPO}/{current_time}"