Spaces:

HPAI-BSC
/

TuRTLe-Leaderboard

Running

App Files Files Community

TuRTLe-Leaderboard / results /parse.py

ggcristian

Add DeepSeek R1-0528

7741a44 10 days ago

raw

history blame contribute delete

7.17 kB

	import csv
	import json
	import locale
	import os
	from typing import Dict, Union

	import pandas as pd

	model_details = {
	"DeepSeek R1-0528": (
	"https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
	685,
	"General",
	"V2",
	),
	"DeepSeek R1": (
	"https://huggingface.co/deepseek-ai/DeepSeek-R1",
	685,
	"General",
	"V1",
	),
	"Llama 3.1 405B": (
	"https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
	406,
	"General",
	"V1",
	),
	"Qwen3 236B A22B": (
	"https://huggingface.co/Qwen/Qwen3-235B-A22B",
	235,
	"General",
	"V2",
	),
	"Llama 3.(1-3) 70B": (
	"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
	70.6,
	"General",
	"V1",
	),
	"Qwen2.5 72B": (
	"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
	72.7,
	"General",
	"V1",
	),
	"QwQ 32B": ("https://huggingface.co/Qwen/QwQ-32B", 32.8, "General", "V2"),
	"Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General", "V1"),
	"StarChat2 15B v0.1": (
	"https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
	16,
	"General",
	"V1",
	),
	"DeepSeek R1 Distill Qwen 14B": (
	"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
	14.8,
	"General",
	"V1",
	),
	"CodeLlama 70B": (
	"https://huggingface.co/codellama/CodeLlama-70b-hf",
	69,
	"Coding",
	"V1",
	),
	"QwenCoder 2.5 32B": (
	"https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
	32.5,
	"Coding",
	"V1",
	),
	"DeepSeek Coder 33B": (
	"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
	33.3,
	"Coding",
	"V1",
	),
	"QwenCoder 2.5 14B": (
	"https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
	14.7,
	"Coding",
	"V1",
	),
	"DeepCoder 14B": (
	"https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
	14.8,
	"Coding",
	"V2",
	),
	"OpenCoder 8B": (
	"https://huggingface.co/infly/OpenCoder-8B-Instruct",
	7.77,
	"Coding",
	"V1",
	),
	"SeedCoder 8B": (
	"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
	8.25,
	"Coding",
	"V2",
	),
	"SeedCoder 8B Reasoning": (
	"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
	8.25,
	"Coding",
	"V2",
	),
	"QwenCoder 2.5 7B": (
	"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
	7.61,
	"Coding",
	"V1",
	),
	"DeepSeek Coder 6,7B": (
	"https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
	6.74,
	"Coding",
	"V1",
	),
	"HaVen-CodeQwen": (
	"https://huggingface.co/yangyiyao/HaVen-CodeQwen",
	7.25,
	"RTL-Specific",
	"V1",
	),
	"CodeV R1 Distill Qwen 7B": (
	"https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
	7.62,
	"RTL-Specific",
	"V2",
	),
	"CodeV-CL-7B": (
	"https://huggingface.co/yang-z/CodeV-CL-7B",
	6.74,
	"RTL-Specific",
	"V1",
	),
	"CodeV-QW-7B": (
	"https://huggingface.co/yang-z/CodeV-QW-7B",
	7.25,
	"RTL-Specific",
	"V1",
	),
	"CodeV-DS-6.7B": (
	"https://huggingface.co/yang-z/CodeV-DS-6.7B",
	6.74,
	"RTL-Specific",
	"V1",
	),
	"RTLCoder Mistral": (
	"https://huggingface.co/ishorn5/RTLCoder-v1.1",
	7.24,
	"RTL-Specific",
	"V1",
	),
	"RTLCoder DeepSeek": (
	"https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
	6.74,
	"RTL-Specific",
	"V1",
	),
	"OriGen": ("https://huggingface.co/henryen/OriGen", 6.74, "RTL-Specific", "V1"),
	}


	def get_headers(reader, agg=False) -> Union[list, list]:
	metrics, benchs = [], []
	for i, row in enumerate(reader):
	if i == 0:
	metrics = row[1:]
	elif i == 1 and not agg:
	benchs = row[1:]
	break
	else:
	return metrics
	return metrics, benchs


	def get_model_params_and_url(model) -> Union[str, str, float, str]:
	if model not in model_details:
	return "-", "-", "-"
	url = model_details[model][0]
	params = model_details[model][1]
	type = model_details[model][2]
	release = model_details[model][3]
	return url, params, type, release


	def parse_results(csv_path: str) -> list[dict]:
	"""
	Each row has the following format:
	MODEL \| BENCHMARK \| TASK \| METRIC \| RESULT
	"""
	dataset = []
	models = []
	with open(os.path.join("results", csv_path), newline="") as csvfile:
	reader = csv.reader(csvfile, delimiter=",")
	metrics, benchs = get_headers(reader)
	for i, row in enumerate(reader):
	model = row[0]
	url, params, type, release = get_model_params_and_url(model)
	models.append(model)
	row = row[1:]
	ctr = 0
	for metric, bench in zip(metrics, benchs):
	if metric == "EM":
	metric = "Exact Matching (EM)"
	record = {}
	record["Model"] = model
	record["Model Type"] = type
	record["Benchmark"] = bench
	record["Task"] = metric
	record["Result"] = float(row[ctr].replace(",", "."))
	record["Model URL"] = url
	record["Params"] = params
	record["Release"] = release
	dataset.append(record)
	ctr += 1
	print(models)
	return dataset


	def parse_agg(csv_path: str) -> list[dict]:
	"""
	Each row has the following format:
	MODEL \| BENCHMARK \| TASK \| METRIC \| RESULT
	"""
	return pd.read_csv("results/aggregated_scores.csv")


	def writeJson(data: list):
	with open("results/results.json", "w") as f:
	json.dump(data, f, indent=4, ensure_ascii=False)
	print("Done")


	def read_json():
	json_path = "results/results.json"
	with open(json_path, "r", encoding="utf-8") as file:
	data = json.load(file)
	return data


	def read_data() -> Union[pd.DataFrame, list, list, str]:
	data = read_json()
	df = pd.DataFrame(data)
	df.rename(
	columns={
	"Model": "Model",
	"Benchmark": "Benchmark",
	"Task": "Metric",
	"Result": "Score",
	"EM": "Exact Matching (EM)",
	},
	inplace=True,
	)
	df["Params"] = pd.to_numeric(df["Params"], errors="coerce")
	benchmarks = sorted(df["Benchmark"].unique().tolist(), reverse=True)
	metrics = df["Metric"].unique().tolist()
	default_metric = (
	"Functionality (FNC)" if "Functionality (FNC)" in metrics else metrics[0]
	)
	return df, benchmarks, metrics, default_metric


	if __name__ == "__main__":
	csv_path = "./results.csv"
	d = parse_results(csv_path)
	writeJson(d)