Spaces:

iberbench
/

leaderboard

Running

leaderboard / main.py

Alvaro Romo

Initial commit

77175ac 7 months ago

10.5 kB

	import streamlit as st
	import pandas as pd
	import re
	from datasets import load_dataset
	import src.check_validity as cv
	from src.submit import ModelSizeChecker
	import os
	from huggingface_hub import HfApi

	st.set_page_config(page_title="IVACE Leaderboard", layout="wide")


	def validate_model(model, precision, base_model, weight_type, use_chat_template):
	"""
	Validate model with some checkers to assure tha can be evaluated
	:param model: hf model name
	:param precision: model parameters data type
	:param base_model: base model (if it is need it)
	:param weight_type:
	:param use_chat_template:
	:return:
	"""
	API = HfApi()

	try:
	model_info = API.model_info(repo_id=model, revision="main")
	except Exception as e:
	return "Could not get your model information. Please fill it up properly."

	# TODO: think if it makes sense. Maybe we need to allow upload sumissions more than once
	# # Has it been submitted already?
	# model_key = f"{model}_{model_info.sha}_{precision}"
	# if model_key in requested_models:
	# return st.error(
	# f"The model '{model}' with revision '{model_info.sha}' and precision '{precision}' has already been submitted.")

	# Check model size early
	model_size, error_text = cv.get_model_size(model_info=model_info, precision=precision, base_model=base_model)
	if model_size is None:
	return error_text

	# Absolute size limit for float16 and bfloat16
	if precision in ["float16", "bfloat16"] and model_size > 100:
	error_message = f"Sadly, models larger than 100B parameters cannot be submitted in {precision} precision at this time. " \
	f"Your model size: {model_size:.2f}B parameters."
	return error_message

	# Precision-adjusted size limit for 8bit, 4bit, and GPTQ
	if precision in ["8bit", "4bit", "GPTQ"]:
	size_checker = ModelSizeChecker(model=model, precision=precision, model_size_in_b=model_size)

	if not size_checker.can_evaluate():
	precision_factor = size_checker.get_precision_factor()
	max_size = 140 * precision_factor
	error_message = f"Sadly, models this big ({model_size:.2f}B parameters) cannot be evaluated automatically " \
	f"at the moment on our cluster. The maximum size for {precision} precision is {max_size:.2f}B parameters."
	return error_message

	architecture = "?"
	# Is the model on the hub?
	if weight_type in ["Delta", "Adapter"]:
	base_model_on_hub, error, _ = cv.is_model_on_hub(
	model_name=base_model, revision="main", token=None, test_tokenizer=True
	)
	if not base_model_on_hub:
	return f'Base model "{base_model}" {error}'
	if not weight_type == "Adapter":
	model_on_hub, error, model_config = cv.is_model_on_hub(model_name=model, revision=model_info.sha,
	test_tokenizer=True)
	if not model_on_hub or model_config is None:
	return f'Model "{model}" {error}'
	if model_config is not None:
	architectures = getattr(model_config, "architectures", None)
	if architectures:
	architecture = ";".join(architectures)

	# Were the model card and license filled?
	try:
	model_info.cardData["license"]
	except Exception:
	return "Please select a license for your model"

	modelcard_OK, error_msg, model_card = cv.check_model_card(model)
	if not modelcard_OK:
	return error_msg

	# Check the chat template submission
	if use_chat_template:
	chat_template_valid, chat_template_error = cv.check_chat_template(model, "main")
	if not chat_template_valid:
	return chat_template_error

	return None

	# Function to send email
	def log_submission(model_name, description, user_contact):
	# todo: create email or log in dataset
	...

	return True


	def get_url(html_content):
	match = re.search(r'href=["\'](https?://[^\s"\']+)', html_content)
	if match:
	url = match.group(1)
	return url

	return None


	@st.cache_data
	def load_data():
	try:
	columns = ["eval_name", "Model", "Type", "Average ⬆️", "IFEval", "MMLU-PRO", "GPQA", "MUSR", "CO₂ cost (kg)"]
	data = load_dataset("open-llm-leaderboard/contents")["train"].to_pandas().head(10)
	# print(data.columns)
	data = data[columns]
	# TODO: check if from submit this is neede it
	data["Model"] = data["Model"].apply(get_url)
	data.sort_values(by="Average ⬆️", ascending=False, inplace=True)
	data.reset_index(drop=True, inplace=True)
	return data
	except FileNotFoundError:
	st.error("open-llm-leaderboard/contents was not found in the hub")
	return pd.DataFrame()


	leaderboard_data = load_data()
	tabs = st.tabs(["Leaderboard", "Submit model"]) # , "Vote for next model"

	with tabs[0]:
	# logo
	cols_logo = st.columns(5, vertical_alignment="center")
	with cols_logo[2]:
	st.image("assets/images/hf-logo.png", use_container_width=True)

	# title
	st.markdown(
	"""
	<div style="text-align: center;">
	<h1>IVACE LLM Leaderboard</h1>
	<p style="font-size: 1.2rem;">
	Comparing Large Language Models in an <span style="font-weight: 600;">open</span>
	and <span style="font-weight: 600;">reproducible</span> way
	</p>
	</div>
	""",
	unsafe_allow_html=True,
	)
	leaderboard_cols = st.columns([0.1, 0.8, 0.1], vertical_alignment="center")
	with leaderboard_cols[1]:
	if not leaderboard_data.empty:
	st.data_editor(
	leaderboard_data,
	column_config={
	"Model": st.column_config.LinkColumn("Model")
	},
	hide_index=False,
	)
	else:
	st.write("No data found to display on leaderboard.")

	with tabs[1]:
	st.header("Submit model")
	import streamlit as st

	def get_id_number(id_val):
	html_template = f"""
	<div style="display: flex; align-items: flex-start; margin-bottom: 1rem;">
	<div style="
	width: 32px;
	height: 32px;
	border-radius: 50%;
	display: flex;
	align-items: center;
	justify-content: center;
	border: 1px solid #007BFF;
	color: #007BFF;
	font-size: 0.875rem;
	font-weight: 600;
	background-color: transparent;">
	{id_val}
	</div>"""
	return html_template

	# create guide info
	guide_info_list = []
	html_path = "assets/html"
	for filename in os.listdir(html_path):
	file_path = os.path.join(html_path, filename)
	with open(file_path, 'r', encoding='utf-8') as file:
	guide_info_list.append(file.read())

	# display adding number id
	for i, info_div in enumerate(guide_info_list):
	st.markdown(get_id_number(i+1) + info_div, unsafe_allow_html=True)

	with st.form("submit_model_form"):
	model_name = st.text_input("Model Name (format: user_name/model_name)",
	help="Your model should be public on the Hub and follow the username/model-id format (e.g. mistralai/Mistral-7B-v0.1).")
	description = st.text_area("Description", help="Add a description of the proposed model for the evaluation to help prioritize its evaluation")
	user_contact = st.text_input("Your Contact Email", help="User e-mail to contact when there are updates")
	precision_option = st.selectbox(
	"Choose precision format:",
	help="Size limits vary by precision: • FP16/BF16: up to 100B parameters • 8-bit: up to 280B parameters (2x) • 4-bit: up to 560B parameters (4x) Choose carefully as incorrect precision can cause evaluation errors.",
	options=["float16", "bfloat16", "8bit", "4bit", "GPTQ"],
	index=0
	)
	weight_type_option = st.selectbox(
	"Select what type of weights are being loaded from the checkpoint provided:",
	help="Original: Complete model weights in safetensors format Delta: Weight differences from base model (requires base model for size calculation) Adapter: Lightweight fine-tuning layers (requires base model for size calculation)",
	options=["Original", "Adapter", "Delta"],
	index=0
	)
	base_model_name = st.text_input("Base model",
	help="Required for delta weights or adapters. This information is used to identify the original model and calculate the total parameter count by combining base model and adapter/delta parameters.",
	value="")
	model_type = st.selectbox(
	"Choose model type:",
	help="🟢 Pretrained: Base models trained on text using masked modeling 🟩 Continuously Pretrained: Extended training on additional corpus 🔶 Fine-tuned: Domain-specific optimization 💬 Chat: Models using RLHF, DPO, or IFT for conversation 🤝 Merge: Combined weights without additional training",
	options=["🟢 Pretrained", "🟩 Continuously Pretrained", "🔶 Fine-tuned", "💬 Chat", "🤝 Merge"],
	)
	submit_button = st.form_submit_button("Submit Request")

	if submit_button:
	# validate model size, license, chat_templates
	use_chat_template = True if model_type == "💬 Chat" else False
	validation_error = validate_model(model_name, precision_option, base_model_name, weight_type_option, use_chat_template)
	if validation_error is not None:
	st.error(validation_error)
	elif not re.match(r"[^@]+@[^@]+\.[^@]+", user_contact):
	st.error("Invalid email address.")
	else:
	if log_submission(model_name, description, user_contact):
	st.success("Your request has been sent successfully.")
	else:
	st.error("Failed to send your request. Please try again later.")

	# with tabs[2]:
	# st.header("Vote for next model")
	# st.write("Esta sección estará disponible próximamente.")