Spaces:
Running
Running
File size: 10,245 Bytes
8a254d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 |
import json
import yaml
import os
import re
from datetime import datetime, timezone, timedelta
from typing import Optional
from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, FAILED_EVAL_REQUESTS_PATH, TOKEN, FAILED_QUEUE_REPO, QUEUE_REPO, REPO_ID
from src.submission.check_validity import (
already_submitted_models,
check_model_card,
get_model_size
)
import gradio as gr
from utils import download_with_restart
from huggingface_hub import snapshot_download
REQUESTED_MODELS = None
USERS_TO_SUBMISSION_DATES = None
def restart_space():
API.restart_space(repo_id=REPO_ID)
def add_new_eval_option(
contact_email: str,
model: str,
model_type: str,
think_type: str,
precision: str,
response_prefix: str,
requirements: str,
user_state: str,
organization_list: list,
yml_textbox: str,
upbox,
):
ERROR_MESSAGE = None
# Validate email format
email_regex = r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$"
if not re.match(email_regex, contact_email):
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "Please provide a valid email address."
# Synchronize: Just before submission, copy the latest QUEUE_REPO to EVAL_REQUESTS_PATH
download_with_restart(
snapshot_download,
repo_id=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH,
repo_type="dataset",
token=TOKEN,
restart_func=restart_space
)
# Synchronize: Just before submission, copy the latest FAILED_QUEUE_REPO to FAILED_EVAL_REQUESTS_PATH
download_with_restart(
snapshot_download,
repo_id=FAILED_QUEUE_REPO,
local_dir=FAILED_EVAL_REQUESTS_PATH,
repo_type="dataset",
token=TOKEN,
restart_func=restart_space
)
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
user_name = ""
model_path = model
if "/" in model:
user_name = model.split("/")[0]
model_path = model.split("/")[1]
precision = precision.split(" ")[0]
KST = timezone(timedelta(hours=9))
current_time = datetime.now(KST).strftime("%Y-%m-%dT%H:%M:%S %z")
# Remove space in benchmark name
benchmark = "TRUEBench"
# Check submitter qualification
if user_name != user_state and user_name not in organization_list:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "The submitter does not have submission rights for this model."
# Does the organization submit more than three times in a day?
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark]
submission_cnt = 0
for i in range(len(submission_times)):
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
if hours_diff <= 24:
submission_cnt += 1
if submission_cnt >= 3:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "The organization already submitted three times for this benchmark today."
# Does the model actually exist?
revision = "main"
# Is the model info correctly filled?
model_info = None
model_size = "Unknown"
try:
model_info = API.model_info(repo_id=model, revision=revision)
model_size = get_model_size(model_info=model_info, precision=precision)
except Exception:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "Could not get your model information. Please fill it up properly."
# Were the model card and license filled?
license = "Unknown"
if model_info is not None:
try:
license = model_info.cardData["license"]
except Exception:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "Please select a license for your model."
modelcard_OK, error_msg = check_model_card(model)
if not modelcard_OK:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = error_msg
# Response prefix check
if think_type == "On":
if response_prefix == "":
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "It is required to fill in the response prefix when 'Think' is 'On'."
else:
response_prefix = ""
# Handle YAML config input (file or textbox)
config_dict = None
# Case 1: File uploaded
if upbox is not None and getattr(upbox, "name", ""):
file_name = upbox.name
if not file_name.lower().endswith(".yaml") and not file_name.lower().endswith(".yml"):
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "Please submit a .yaml or .yml file."
try:
with open(file_name, 'r', encoding='utf-8') as f:
config_dict = yaml.safe_load(f)
except yaml.YAMLError:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "The file is not a valid YAML format."
except Exception as e:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = f"An error occurred while reading the file. {e}"
if config_dict is None:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "The YAML file is empty or invalid."
else:
# Case 2: No file uploaded
if not yml_textbox or not yml_textbox.strip():
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "Please fill in the configuration box or submit a YAML file."
try:
config_dict = yaml.safe_load(yml_textbox)
except yaml.YAMLError:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "Please provide a valid configuration."
if config_dict is None:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "Please provide a valid configuration."
# Restrict config keys
allowed_keys = {"llm_serve_args", "sampling_params", "extra_body"}
if not isinstance(config_dict, dict):
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "The configuration must be a YAML dictionary at the top level."
extra_keys = set(config_dict.keys()) - allowed_keys
if extra_keys:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = f"Only the following keys are allowed in the configuration: llm_serve_args, sampling_params, extra_body. Found invalid keys: {', '.join(sorted(extra_keys))}."
configs = json.dumps(config_dict, indent=4, ensure_ascii=False)
# Check for duplicate submission
submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark and item['model'] == model]
submission_cnt = 0
submission_total_cnt = 0
for i in range(len(submission_times)):
submission_total_cnt += 1
hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600
if hours_diff <= 24:
submission_cnt += 1
if submission_cnt >= 1:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "This model has been already submitted within 24 hours."
if submission_total_cnt >= 3:
if ERROR_MESSAGE is None:
ERROR_MESSAGE = "This model has been already submitted three times for this benchmark."
print("Creating eval file")
if ERROR_MESSAGE is None:
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}/{benchmark}_{model_path}"
else:
OUT_DIR = f"{FAILED_EVAL_REQUESTS_PATH}/{user_name}/{benchmark}_{model_path}"
os.makedirs(OUT_DIR, exist_ok=True)
current_time_replaced = current_time.replace("-", "").replace(":", "").replace("T", "_").split()[0]
out_path = f"{OUT_DIR}/{current_time_replaced}.json"
# Seems good, creating the eval
print("Adding new eval")
if ERROR_MESSAGE is None:
eval_entry = {
"benchmark": benchmark,
"contact_email": contact_email,
"model": model,
"type": "open",
"model_type": model_type,
"think_type": think_type,
"precision": precision,
"response_prefix": response_prefix,
"requirements": requirements,
"status": "PENDING",
"submitted_time": current_time,
"likes": getattr(model_info, "likes", -1),
"params": model_size,
"license": license,
"private": False,
"configs": configs
}
else:
eval_entry = {
"benchmark": benchmark,
"contact_email": contact_email,
"model": model,
"type": "open",
"model_type": model_type,
"think_type": think_type,
"precision": precision,
"response_prefix": response_prefix,
"requirements": requirements,
"status": "Failed",
"submitted_time": current_time,
"likes": getattr(model_info, "likes", -1),
"params": model_size,
"license": license,
"private": False,
"configs": configs,
"error_message": ERROR_MESSAGE
}
with open(out_path, "w") as f:
f.write(json.dumps(eval_entry))
print("Uploading eval file")
if ERROR_MESSAGE is None:
API.upload_file(
path_or_fileobj=out_path,
path_in_repo=out_path.split("eval-queue/")[1],
repo_id=QUEUE_REPO,
repo_type="dataset",
commit_message=f"Add {model} to eval queue",
)
else:
API.upload_file(
path_or_fileobj=out_path,
path_in_repo=out_path.split("failed-eval-queue/")[1],
repo_id=FAILED_QUEUE_REPO,
repo_type="dataset",
commit_message=f"Add {model} to failed eval queue",
)
# Remove the local file
os.remove(out_path)
if ERROR_MESSAGE is None:
return styled_message(
"Your request has been submitted to the evaluation queue!"
)
else:
return styled_error(
ERROR_MESSAGE
)
|