Alvinn-aai's picture
better banner
cc4e1bd
raw
history blame
9.49 kB
from functools import partial
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
# from huggingface_hub import snapshot_download
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.datamodel.data import F1Data
from src.display.css_html_js import custom_css
from src.display.utils import (
# BENCHMARK_COLS,
COLS,
EVAL_COLS,
EVAL_TYPES,
AutoEvalColumn,
ModelType,
fields,
WeightType,
Precision,
)
from src.envs import API, REPO_ID, TOKEN, CODE_PROBLEMS_REPO, SUBMISSIONS_REPO, RESULTS_REPO
from src.logger import get_logger
from src.populate import get_evaluation_queue_df, get_leaderboard_df
from src.submission.submit import add_new_solutions
logger = get_logger(__name__)
SPLIT = "warmup" # TODO temp
SKIP_VALIDATION = True # TODO temp
def restart_space():
API.restart_space(repo_id=REPO_ID)
lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT)
leaderboard_df = get_leaderboard_df(RESULTS_REPO)
logger.info("Initialized LBDB")
# (
# finished_eval_queue_df,
# running_eval_queue_df,
# pending_eval_queue_df,
# ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
return Leaderboard(
value=dataframe,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
label="Select Columns to Display:",
),
search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.system_type.name],
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
filter_columns=[
ColumnFilter(AutoEvalColumn.system_type.name, type="checkboxgroup", label="Model types"),
# ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
# ColumnFilter(
# AutoEvalColumn.params.name,
# type="slider",
# min=0.01,
# max=150,
# label="Select the number of parameters (B)",
# ),
# ColumnFilter(AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True),
],
bool_checkboxgroup_label="Hide models",
interactive=False,
)
# Display image using Markdown
# banner = "![Leaderboard Banner](file/assets/banner.png)"
demo = gr.Blocks(css=custom_css)
with demo:
gr.Image(
"assets/banner.png",
interactive=False,
show_label=False,
show_download_button=False,
container=False,
)
# gr.Markdown(banner)
gr.HTML(
"""
<style>
body {
background-color: #121212;
color: white;
margin: 0; /* Reset browser default */
}
/* Outer container margin & spacing */
.gradio-container {
max-width: 1100px;
margin: 2rem auto; /* top/bottom spacing + horizontal centering */
padding: 2rem; /* inner spacing */
background-color: rgba(0, 0, 0, 0.6); /* optional: semi-transparent panel */
border-radius: 12px; /* rounded corners */
}
</style>
"""
)
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… FormulaOne Leaderboard", elem_id="formulaone-leaderboar-tab-table", id=0):
leaderboard = init_leaderboard(leaderboard_df)
# with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=1):
# logger.info("Tab about")
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=2):
logger.info("Tab submission")
with gr.Column():
with gr.Row():
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
# with gr.Column():
# with gr.Accordion(
# f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
# open=False,
# ):
# with gr.Row():
# finished_eval_table = gr.components.Dataframe(
# value=finished_eval_queue_df,
# headers=EVAL_COLS,
# datatype=EVAL_TYPES,
# row_count=5,
# )
# with gr.Accordion(
# f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
# open=False,
# ):
# with gr.Row():
# running_eval_table = gr.components.Dataframe(
# value=running_eval_queue_df,
# headers=EVAL_COLS,
# datatype=EVAL_TYPES,
# row_count=5,
# )
# with gr.Accordion(
# f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
# open=False,
# ):
# with gr.Row():
# pending_eval_table = gr.components.Dataframe(
# value=pending_eval_queue_df,
# headers=EVAL_COLS,
# datatype=EVAL_TYPES,
# row_count=5,
# )
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your solutions here!", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
sys_type_dropdown = gr.Dropdown(
choices=[t.to_str(" ") for t in ModelType],
label=AutoEvalColumn.system_type.name,
multiselect=False,
value=ModelType.LLM.to_str(" "),
interactive=True,
)
# with gr.Column():
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
# precision = gr.Dropdown(
# choices=[i.value.name for i in Precision if i != Precision.Unknown],
# label="Precision",
# multiselect=False,
# value="float16",
# interactive=True,
# )
# weight_type = gr.Dropdown(
# choices=[i.value.name for i in WeightType],
# label="Weights type",
# multiselect=False,
# value="Original",
# interactive=True,
# )
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
logger.info("Submit button")
submit_button = gr.Button("Submit")
submission_result = gr.Markdown()
def add_solution_cbk(system_name, org, sys_type, submission_path):
return add_new_solutions(
lbdb, system_name, org, sys_type, submission_path, skip_validation=SKIP_VALIDATION
)
submit_button.click(
add_solution_cbk,
[
system_name_textbox,
org_textbox,
sys_type_dropdown,
submission_file,
],
submission_result,
)
with gr.Row():
logger.info("Citation")
with gr.Accordion(CITATION_BUTTON_LABEL, open=False):
gr.Code(
value=CITATION_BUTTON_TEXT.strip(),
elem_id="citation-block",
)
# citation_button = gr.Textbox(
# value=CITATION_BUTTON_TEXT,
# # label=CITATION_BUTTON_LABEL,
# lines=20,
# elem_id="citation-button",
# show_copy_button=True,
# )
logger.info("Scheduler")
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
logger.info("Launch")
demo.queue(default_concurrency_limit=40).launch()
logger.info("Done")