Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

App Files Files Community

FormulaOne-Leaderboard / src /submission /submit.py

Alvinn-aai

fix add info on submission

5f7ca36 25 days ago

raw

history blame

4.18 kB

	import json
	import os
	from datetime import datetime, timezone
	import time

	from datasets import Dataset, DatasetDict
	import pandas as pd
	from pandas.api.types import is_integer_dtype, is_string_dtype

	from src.datamodel.data import F1Data
	from src.display.formatting import styled_error, styled_message, styled_warning
	from src.display.utils import ModelType
	from src.envs import API, SUBMISSIONS_REPO, TOKEN
	from src.logger import get_logger

	# from src.submission.check_validity import (
	# already_submitted_models,
	# check_model_card,
	# get_model_size,
	# is_model_on_hub,
	# )

	logger = get_logger(__name__)


	def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str \| None:
	logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
	expected_cols = ["problem_id", "solution"]

	if set(pd_ds.columns) != set(expected_cols):
	return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"

	if not is_integer_dtype(pd_ds["problem_id"]):
	return "problem_id must be str convertible to int"

	if any(type(v) != str for v in pd_ds["solution"]):
	return "solution must be of type str"

	submitted_ids = set(pd_ds.problem_id.astype(str))
	if submitted_ids != lbdb.code_problem_ids:
	missing = lbdb.code_problem_ids - submitted_ids
	unknown = submitted_ids - lbdb.code_problem_ids
	return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
	if len(pd_ds) > len(lbdb.code_problem_ids):
	return "Duplicate problem IDs exist in uploaded file"

	return None


	def add_new_solutions(
	lbdb: F1Data,
	system_name: str,
	org: str,
	sys_type: str,
	submission_path: str,
	skip_validation: bool = False,
	):
	logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
	if not system_name:
	return styled_error("Please fill system name")

	if not org:
	return styled_error("Please fill organization name")

	if not sys_type:
	return styled_error("Please select system type")
	sys_type = ModelType.from_str(sys_type).name

	if not submission_path:
	return styled_error("Please upload JSONL solutions file")

	try:
	submission_df = pd.read_json(submission_path, lines=True)
	except Exception as e:
	return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")

	if not skip_validation:
	validation_error = validate_submission(lbdb, submission_df)
	if validation_error:
	return styled_error(validation_error)

	submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"

	# Seems good, creating the eval
	print(f"Adding new submission: {submission_id}")
	submission_ts = time.time_ns()

	def add_info(row):
	return {
	**row,
	"system_name": system_name,
	"organization": org,
	"system_type": sys_type,
	"submission_id": submission_id,
	"submission_ts": submission_ts,
	}

	ds = Dataset.from_pandas(submission_df).map(add_info)

	# dsdict = DatasetDict({submission_id: ds})
	# dsdict.push_to_hub(SUBMISSIONS_REPO, private=True)

	ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
	# print("Creating eval file")
	# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
	# os.makedirs(OUT_DIR, exist_ok=True)
	# out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"

	# with open(out_path, "w") as f:
	# f.write(json.dumps(eval_entry))

	# print("Uploading eval file")
	# API.upload_file(
	# path_or_fileobj=out_path,
	# path_in_repo=out_path.split("eval-queue/")[1],
	# repo_id=QUEUE_REPO,
	# repo_type="dataset",
	# commit_message=f"Add {model} to eval queue",
	# )

	# # Remove the local file
	# os.remove(out_path)

	return styled_message(
	"Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
	)