Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import re | |
from datasets import load_dataset | |
import src.check_validity as cv | |
from src.submit import ModelSizeChecker | |
import os | |
from huggingface_hub import HfApi | |
st.set_page_config(page_title="IVACE Leaderboard", layout="wide") | |
def validate_model(model, precision, base_model, weight_type, use_chat_template): | |
""" | |
Validate model with some checkers to assure tha can be evaluated | |
:param model: hf model name | |
:param precision: model parameters data type | |
:param base_model: base model (if it is need it) | |
:param weight_type: | |
:param use_chat_template: | |
:return: | |
""" | |
API = HfApi() | |
try: | |
model_info = API.model_info(repo_id=model, revision="main") | |
except Exception as e: | |
return "Could not get your model information. Please fill it up properly." | |
# TODO: think if it makes sense. Maybe we need to allow upload sumissions more than once | |
# # Has it been submitted already? | |
# model_key = f"{model}_{model_info.sha}_{precision}" | |
# if model_key in requested_models: | |
# return st.error( | |
# f"The model '{model}' with revision '{model_info.sha}' and precision '{precision}' has already been submitted.") | |
# Check model size early | |
model_size, error_text = cv.get_model_size(model_info=model_info, precision=precision, base_model=base_model) | |
if model_size is None: | |
return error_text | |
# Absolute size limit for float16 and bfloat16 | |
if precision in ["float16", "bfloat16"] and model_size > 100: | |
error_message = f"Sadly, models larger than 100B parameters cannot be submitted in {precision} precision at this time. " \ | |
f"Your model size: {model_size:.2f}B parameters." | |
return error_message | |
# Precision-adjusted size limit for 8bit, 4bit, and GPTQ | |
if precision in ["8bit", "4bit", "GPTQ"]: | |
size_checker = ModelSizeChecker(model=model, precision=precision, model_size_in_b=model_size) | |
if not size_checker.can_evaluate(): | |
precision_factor = size_checker.get_precision_factor() | |
max_size = 140 * precision_factor | |
error_message = f"Sadly, models this big ({model_size:.2f}B parameters) cannot be evaluated automatically " \ | |
f"at the moment on our cluster. The maximum size for {precision} precision is {max_size:.2f}B parameters." | |
return error_message | |
architecture = "?" | |
# Is the model on the hub? | |
if weight_type in ["Delta", "Adapter"]: | |
base_model_on_hub, error, _ = cv.is_model_on_hub( | |
model_name=base_model, revision="main", token=None, test_tokenizer=True | |
) | |
if not base_model_on_hub: | |
return f'Base model "{base_model}" {error}' | |
if not weight_type == "Adapter": | |
model_on_hub, error, model_config = cv.is_model_on_hub(model_name=model, revision=model_info.sha, | |
test_tokenizer=True) | |
if not model_on_hub or model_config is None: | |
return f'Model "{model}" {error}' | |
if model_config is not None: | |
architectures = getattr(model_config, "architectures", None) | |
if architectures: | |
architecture = ";".join(architectures) | |
# Were the model card and license filled? | |
try: | |
model_info.cardData["license"] | |
except Exception: | |
return "Please select a license for your model" | |
modelcard_OK, error_msg, model_card = cv.check_model_card(model) | |
if not modelcard_OK: | |
return error_msg | |
# Check the chat template submission | |
if use_chat_template: | |
chat_template_valid, chat_template_error = cv.check_chat_template(model, "main") | |
if not chat_template_valid: | |
return chat_template_error | |
return None | |
# Function to send email | |
def log_submission(model_name, description, user_contact): | |
# todo: create email or log in dataset | |
... | |
return True | |
def get_url(html_content): | |
match = re.search(r'href=["\'](https?://[^\s"\']+)', html_content) | |
if match: | |
url = match.group(1) | |
return url | |
return None | |
def load_data(): | |
try: | |
columns = ["eval_name", "Model", "Type", "Average ⬆️", "IFEval", "MMLU-PRO", "GPQA", "MUSR", "CO₂ cost (kg)"] | |
data = load_dataset("open-llm-leaderboard/contents")["train"].to_pandas().head(10) | |
# print(data.columns) | |
data = data[columns] | |
# TODO: check if from submit this is neede it | |
data["Model"] = data["Model"].apply(get_url) | |
data.sort_values(by="Average ⬆️", ascending=False, inplace=True) | |
data.reset_index(drop=True, inplace=True) | |
return data | |
except FileNotFoundError: | |
st.error("open-llm-leaderboard/contents was not found in the hub") | |
return pd.DataFrame() | |
leaderboard_data = load_data() | |
tabs = st.tabs(["Leaderboard", "Submit model"]) # , "Vote for next model" | |
with tabs[0]: | |
# logo | |
cols_logo = st.columns(5, vertical_alignment="center") | |
with cols_logo[2]: | |
st.image("assets/images/hf-logo.png", use_container_width=True) | |
# title | |
st.markdown( | |
""" | |
<div style="text-align: center;"> | |
<h1>IVACE LLM Leaderboard</h1> | |
<p style="font-size: 1.2rem;"> | |
Comparing Large Language Models in an <span style="font-weight: 600;">open</span> | |
and <span style="font-weight: 600;">reproducible</span> way | |
</p> | |
</div> | |
""", | |
unsafe_allow_html=True, | |
) | |
leaderboard_cols = st.columns([0.1, 0.8, 0.1], vertical_alignment="center") | |
with leaderboard_cols[1]: | |
if not leaderboard_data.empty: | |
st.data_editor( | |
leaderboard_data, | |
column_config={ | |
"Model": st.column_config.LinkColumn("Model") | |
}, | |
hide_index=False, | |
) | |
else: | |
st.write("No data found to display on leaderboard.") | |
with tabs[1]: | |
st.header("Submit model") | |
import streamlit as st | |
def get_id_number(id_val): | |
html_template = f""" | |
<div style="display: flex; align-items: flex-start; margin-bottom: 1rem;"> | |
<div style=" | |
width: 32px; | |
height: 32px; | |
border-radius: 50%; | |
display: flex; | |
align-items: center; | |
justify-content: center; | |
border: 1px solid #007BFF; | |
color: #007BFF; | |
font-size: 0.875rem; | |
font-weight: 600; | |
background-color: transparent;"> | |
{id_val} | |
</div>""" | |
return html_template | |
# create guide info | |
guide_info_list = [] | |
html_path = "assets/html" | |
for filename in os.listdir(html_path): | |
file_path = os.path.join(html_path, filename) | |
with open(file_path, 'r', encoding='utf-8') as file: | |
guide_info_list.append(file.read()) | |
# display adding number id | |
for i, info_div in enumerate(guide_info_list): | |
st.markdown(get_id_number(i+1) + info_div, unsafe_allow_html=True) | |
with st.form("submit_model_form"): | |
model_name = st.text_input("Model Name (format: user_name/model_name)", | |
help="Your model should be public on the Hub and follow the username/model-id format (e.g. mistralai/Mistral-7B-v0.1).") | |
description = st.text_area("Description", help="Add a description of the proposed model for the evaluation to help prioritize its evaluation") | |
user_contact = st.text_input("Your Contact Email", help="User e-mail to contact when there are updates") | |
precision_option = st.selectbox( | |
"Choose precision format:", | |
help="Size limits vary by precision: • FP16/BF16: up to 100B parameters • 8-bit: up to 280B parameters (2x) • 4-bit: up to 560B parameters (4x) Choose carefully as incorrect precision can cause evaluation errors.", | |
options=["float16", "bfloat16", "8bit", "4bit", "GPTQ"], | |
index=0 | |
) | |
weight_type_option = st.selectbox( | |
"Select what type of weights are being loaded from the checkpoint provided:", | |
help="Original: Complete model weights in safetensors format Delta: Weight differences from base model (requires base model for size calculation) Adapter: Lightweight fine-tuning layers (requires base model for size calculation)", | |
options=["Original", "Adapter", "Delta"], | |
index=0 | |
) | |
base_model_name = st.text_input("Base model", | |
help="Required for delta weights or adapters. This information is used to identify the original model and calculate the total parameter count by combining base model and adapter/delta parameters.", | |
value="") | |
model_type = st.selectbox( | |
"Choose model type:", | |
help="🟢 Pretrained: Base models trained on text using masked modeling 🟩 Continuously Pretrained: Extended training on additional corpus 🔶 Fine-tuned: Domain-specific optimization 💬 Chat: Models using RLHF, DPO, or IFT for conversation 🤝 Merge: Combined weights without additional training", | |
options=["🟢 Pretrained", "🟩 Continuously Pretrained", "🔶 Fine-tuned", "💬 Chat", "🤝 Merge"], | |
) | |
submit_button = st.form_submit_button("Submit Request") | |
if submit_button: | |
# validate model size, license, chat_templates | |
use_chat_template = True if model_type == "💬 Chat" else False | |
validation_error = validate_model(model_name, precision_option, base_model_name, weight_type_option, use_chat_template) | |
if validation_error is not None: | |
st.error(validation_error) | |
elif not re.match(r"[^@]+@[^@]+\.[^@]+", user_contact): | |
st.error("Invalid email address.") | |
else: | |
if log_submission(model_name, description, user_contact): | |
st.success("Your request has been sent successfully.") | |
else: | |
st.error("Failed to send your request. Please try again later.") | |
# with tabs[2]: | |
# st.header("Vote for next model") | |
# st.write("Esta sección estará disponible próximamente.") | |