Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 4,175 Bytes
0061e14 c887522 0061e14 b74992f c887522 b74992f 0061e14 c887522 61885ca c887522 053a0cd 80fb2c0 c887522 053a0cd 80fb2c0 a3d4fda 0940c68 80fb2c0 b74992f 61885ca b74992f a3d4fda 80fb2c0 b74992f 80fb2c0 b74992f a3d4fda 80fb2c0 c887522 80fb2c0 61885ca 44a4b77 b74992f 0061e14 61885ca 0061e14 c887522 0061e14 61885ca c887522 0061e14 b74992f 0061e14 61885ca 0061e14 61885ca c887522 5f7ca36 c887522 61885ca c887522 b74992f c887522 0061e14 b74992f 0061e14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import json
import os
from datetime import datetime, timezone
import time
from datasets import Dataset, DatasetDict
import pandas as pd
from pandas.api.types import is_integer_dtype, is_string_dtype
from src.datamodel.data import F1Data
from src.display.formatting import styled_error, styled_message, styled_warning
from src.display.utils import ModelType
from src.envs import API, SUBMISSIONS_REPO, TOKEN
from src.logger import get_logger
# from src.submission.check_validity import (
# already_submitted_models,
# check_model_card,
# get_model_size,
# is_model_on_hub,
# )
logger = get_logger(__name__)
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
expected_cols = ["problem_id", "solution"]
if set(pd_ds.columns) != set(expected_cols):
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
if not is_integer_dtype(pd_ds["problem_id"]):
return "problem_id must be str convertible to int"
if any(type(v) != str for v in pd_ds["solution"]):
return "solution must be of type str"
submitted_ids = set(pd_ds.problem_id.astype(str))
if submitted_ids != lbdb.code_problem_ids:
missing = lbdb.code_problem_ids - submitted_ids
unknown = submitted_ids - lbdb.code_problem_ids
return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
if len(pd_ds) > len(lbdb.code_problem_ids):
return "Duplicate problem IDs exist in uploaded file"
return None
def add_new_solutions(
lbdb: F1Data,
system_name: str,
org: str,
sys_type: str,
submission_path: str,
skip_validation: bool = False,
):
logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
if not system_name:
return styled_error("Please fill system name")
if not org:
return styled_error("Please fill organization name")
if not sys_type:
return styled_error("Please select system type")
sys_type = ModelType.from_str(sys_type).name
if not submission_path:
return styled_error("Please upload JSONL solutions file")
try:
submission_df = pd.read_json(submission_path, lines=True)
except Exception as e:
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
if not skip_validation:
validation_error = validate_submission(lbdb, submission_df)
if validation_error:
return styled_error(validation_error)
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
# Seems good, creating the eval
print(f"Adding new submission: {submission_id}")
submission_ts = time.time_ns()
def add_info(row):
return {
**row,
"system_name": system_name,
"organization": org,
"system_type": sys_type,
"submission_id": submission_id,
"submission_ts": submission_ts,
}
ds = Dataset.from_pandas(submission_df).map(add_info)
# dsdict = DatasetDict({submission_id: ds})
# dsdict.push_to_hub(SUBMISSIONS_REPO, private=True)
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
# print("Creating eval file")
# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
# os.makedirs(OUT_DIR, exist_ok=True)
# out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
# with open(out_path, "w") as f:
# f.write(json.dumps(eval_entry))
# print("Uploading eval file")
# API.upload_file(
# path_or_fileobj=out_path,
# path_in_repo=out_path.split("eval-queue/")[1],
# repo_id=QUEUE_REPO,
# repo_type="dataset",
# commit_message=f"Add {model} to eval queue",
# )
# # Remove the local file
# os.remove(out_path)
return styled_message(
"Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
)
|