Spaces:
Running
Running
import json | |
import yaml | |
import os | |
import re | |
from datetime import datetime, timezone, timedelta | |
from typing import Optional | |
from src.display.formatting import styled_error, styled_message, styled_warning | |
from src.envs import API, EVAL_REQUESTS_PATH, FAILED_EVAL_REQUESTS_PATH, TOKEN, FAILED_QUEUE_REPO, QUEUE_REPO, REPO_ID | |
from src.submission.check_validity import ( | |
already_submitted_models, | |
check_model_card, | |
get_model_size | |
) | |
import gradio as gr | |
from utils import download_with_restart | |
from huggingface_hub import snapshot_download | |
REQUESTED_MODELS = None | |
USERS_TO_SUBMISSION_DATES = None | |
def restart_space(): | |
API.restart_space(repo_id=REPO_ID) | |
def add_new_eval_option( | |
contact_email: str, | |
model: str, | |
model_type: str, | |
think_type: str, | |
precision: str, | |
response_prefix: str, | |
requirements: str, | |
user_state: str, | |
organization_list: list, | |
yml_textbox: str, | |
upbox, | |
): | |
ERROR_MESSAGE = None | |
# Validate email format | |
email_regex = r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$" | |
if not re.match(email_regex, contact_email): | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "Please provide a valid email address." | |
# Synchronize: Just before submission, copy the latest QUEUE_REPO to EVAL_REQUESTS_PATH | |
download_with_restart( | |
snapshot_download, | |
repo_id=QUEUE_REPO, | |
local_dir=EVAL_REQUESTS_PATH, | |
repo_type="dataset", | |
token=TOKEN, | |
restart_func=restart_space | |
) | |
# Synchronize: Just before submission, copy the latest FAILED_QUEUE_REPO to FAILED_EVAL_REQUESTS_PATH | |
download_with_restart( | |
snapshot_download, | |
repo_id=FAILED_QUEUE_REPO, | |
local_dir=FAILED_EVAL_REQUESTS_PATH, | |
repo_type="dataset", | |
token=TOKEN, | |
restart_func=restart_space | |
) | |
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH) | |
user_name = "" | |
model_path = model | |
if "/" in model: | |
user_name = model.split("/")[0] | |
model_path = model.split("/")[1] | |
precision = precision.split(" ")[0] | |
KST = timezone(timedelta(hours=9)) | |
current_time = datetime.now(KST).strftime("%Y-%m-%dT%H:%M:%S %z") | |
# Remove space in benchmark name | |
benchmark = "TRUEBench" | |
# Check submitter qualification | |
if user_name != user_state and user_name not in organization_list: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "The submitter does not have submission rights for this model." | |
# Does the organization submit more than three times in a day? | |
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark] | |
submission_cnt = 0 | |
for i in range(len(submission_times)): | |
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600 | |
if hours_diff <= 24: | |
submission_cnt += 1 | |
if submission_cnt >= 3: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "The organization already submitted three times for this benchmark today." | |
# Does the model actually exist? | |
revision = "main" | |
# Is the model info correctly filled? | |
model_info = None | |
model_size = "Unknown" | |
try: | |
model_info = API.model_info(repo_id=model, revision=revision) | |
model_size = get_model_size(model_info=model_info, precision=precision) | |
except Exception: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "Could not get your model information. Please fill it up properly." | |
# Were the model card and license filled? | |
license = "Unknown" | |
if model_info is not None: | |
try: | |
license = model_info.cardData["license"] | |
except Exception: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "Please select a license for your model." | |
modelcard_OK, error_msg = check_model_card(model) | |
if not modelcard_OK: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = error_msg | |
# Response prefix check | |
if think_type == "On": | |
if response_prefix == "": | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "It is required to fill in the response prefix when 'Think' is 'On'." | |
else: | |
response_prefix = "" | |
# Handle YAML config input (file or textbox) | |
config_dict = None | |
# Case 1: File uploaded | |
if upbox is not None and getattr(upbox, "name", ""): | |
file_name = upbox.name | |
if not file_name.lower().endswith(".yaml") and not file_name.lower().endswith(".yml"): | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "Please submit a .yaml or .yml file." | |
try: | |
with open(file_name, 'r', encoding='utf-8') as f: | |
config_dict = yaml.safe_load(f) | |
except yaml.YAMLError: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "The file is not a valid YAML format." | |
except Exception as e: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = f"An error occurred while reading the file. {e}" | |
if config_dict is None: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "The YAML file is empty or invalid." | |
else: | |
# Case 2: No file uploaded | |
if not yml_textbox or not yml_textbox.strip(): | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "Please fill in the configuration box or submit a YAML file." | |
try: | |
config_dict = yaml.safe_load(yml_textbox) | |
except yaml.YAMLError: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "Please provide a valid configuration." | |
if config_dict is None: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "Please provide a valid configuration." | |
# Restrict config keys | |
allowed_keys = {"llm_serve_args", "sampling_params", "extra_body"} | |
if not isinstance(config_dict, dict): | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "The configuration must be a YAML dictionary at the top level." | |
extra_keys = set(config_dict.keys()) - allowed_keys | |
if extra_keys: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = f"Only the following keys are allowed in the configuration: llm_serve_args, sampling_params, extra_body. Found invalid keys: {', '.join(sorted(extra_keys))}." | |
configs = json.dumps(config_dict, indent=4, ensure_ascii=False) | |
# Check for duplicate submission | |
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark and item['model'] == model] | |
submission_cnt = 0 | |
submission_total_cnt = 0 | |
for i in range(len(submission_times)): | |
submission_total_cnt += 1 | |
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600 | |
if hours_diff <= 24: | |
submission_cnt += 1 | |
if submission_cnt >= 1: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "This model has been already submitted within 24 hours." | |
if submission_total_cnt >= 3: | |
if ERROR_MESSAGE is None: | |
ERROR_MESSAGE = "This model has been already submitted three times for this benchmark." | |
print("Creating eval file") | |
if ERROR_MESSAGE is None: | |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}/{benchmark}_{model_path}" | |
else: | |
OUT_DIR = f"{FAILED_EVAL_REQUESTS_PATH}/{user_name}/{benchmark}_{model_path}" | |
os.makedirs(OUT_DIR, exist_ok=True) | |
current_time_replaced = current_time.replace("-", "").replace(":", "").replace("T", "_").split()[0] | |
out_path = f"{OUT_DIR}/{current_time_replaced}.json" | |
# Seems good, creating the eval | |
print("Adding new eval") | |
if ERROR_MESSAGE is None: | |
eval_entry = { | |
"benchmark": benchmark, | |
"contact_email": contact_email, | |
"model": model, | |
"type": "open", | |
"model_type": model_type, | |
"think_type": think_type, | |
"precision": precision, | |
"response_prefix": response_prefix, | |
"requirements": requirements, | |
"status": "PENDING", | |
"submitted_time": current_time, | |
"likes": getattr(model_info, "likes", -1), | |
"params": model_size, | |
"license": license, | |
"private": False, | |
"configs": configs | |
} | |
else: | |
eval_entry = { | |
"benchmark": benchmark, | |
"contact_email": contact_email, | |
"model": model, | |
"type": "open", | |
"model_type": model_type, | |
"think_type": think_type, | |
"precision": precision, | |
"response_prefix": response_prefix, | |
"requirements": requirements, | |
"status": "Failed", | |
"submitted_time": current_time, | |
"likes": getattr(model_info, "likes", -1), | |
"params": model_size, | |
"license": license, | |
"private": False, | |
"configs": configs, | |
"error_message": ERROR_MESSAGE | |
} | |
with open(out_path, "w") as f: | |
f.write(json.dumps(eval_entry)) | |
print("Uploading eval file") | |
if ERROR_MESSAGE is None: | |
API.upload_file( | |
path_or_fileobj=out_path, | |
path_in_repo=out_path.split("eval-queue/")[1], | |
repo_id=QUEUE_REPO, | |
repo_type="dataset", | |
commit_message=f"Add {model} to eval queue", | |
) | |
else: | |
API.upload_file( | |
path_or_fileobj=out_path, | |
path_in_repo=out_path.split("failed-eval-queue/")[1], | |
repo_id=FAILED_QUEUE_REPO, | |
repo_type="dataset", | |
commit_message=f"Add {model} to failed eval queue", | |
) | |
# Remove the local file | |
os.remove(out_path) | |
if ERROR_MESSAGE is None: | |
return styled_message( | |
"Your request has been submitted to the evaluation queue!" | |
) | |
else: | |
return styled_error( | |
ERROR_MESSAGE | |
) | |