Spaces:
Running
Running
import csv | |
import json | |
import locale | |
import os | |
from typing import Dict, Union | |
import pandas as pd | |
model_details = { | |
"DeepSeek R1-0528": ( | |
"https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", | |
685, | |
"General", | |
"V2", | |
), | |
"DeepSeek R1": ( | |
"https://huggingface.co/deepseek-ai/DeepSeek-R1", | |
685, | |
"General", | |
"V1", | |
), | |
"Llama 3.1 405B": ( | |
"https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8", | |
406, | |
"General", | |
"V1", | |
), | |
"Qwen3 236B A22B": ( | |
"https://huggingface.co/Qwen/Qwen3-235B-A22B", | |
235, | |
"General", | |
"V2", | |
), | |
"Llama 3.(1-3) 70B": ( | |
"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct", | |
70.6, | |
"General", | |
"V1", | |
), | |
"Qwen2.5 72B": ( | |
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", | |
72.7, | |
"General", | |
"V1", | |
), | |
"QwQ 32B": ("https://huggingface.co/Qwen/QwQ-32B", 32.8, "General", "V2"), | |
"Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General", "V1"), | |
"StarChat2 15B v0.1": ( | |
"https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1", | |
16, | |
"General", | |
"V1", | |
), | |
"DeepSeek R1 Distill Qwen 14B": ( | |
"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", | |
14.8, | |
"General", | |
"V1", | |
), | |
"CodeLlama 70B": ( | |
"https://huggingface.co/codellama/CodeLlama-70b-hf", | |
69, | |
"Coding", | |
"V1", | |
), | |
"QwenCoder 2.5 32B": ( | |
"https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct", | |
32.5, | |
"Coding", | |
"V1", | |
), | |
"DeepSeek Coder 33B": ( | |
"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct", | |
33.3, | |
"Coding", | |
"V1", | |
), | |
"QwenCoder 2.5 14B": ( | |
"https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct", | |
14.7, | |
"Coding", | |
"V1", | |
), | |
"DeepCoder 14B": ( | |
"https://huggingface.co/agentica-org/DeepCoder-14B-Preview", | |
14.8, | |
"Coding", | |
"V2", | |
), | |
"OpenCoder 8B": ( | |
"https://huggingface.co/infly/OpenCoder-8B-Instruct", | |
7.77, | |
"Coding", | |
"V1", | |
), | |
"SeedCoder 8B": ( | |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct", | |
8.25, | |
"Coding", | |
"V2", | |
), | |
"SeedCoder 8B Reasoning": ( | |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16", | |
8.25, | |
"Coding", | |
"V2", | |
), | |
"QwenCoder 2.5 7B": ( | |
"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct", | |
7.61, | |
"Coding", | |
"V1", | |
), | |
"DeepSeek Coder 6,7B": ( | |
"https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct", | |
6.74, | |
"Coding", | |
"V1", | |
), | |
"HaVen-CodeQwen": ( | |
"https://huggingface.co/yangyiyao/HaVen-CodeQwen", | |
7.25, | |
"RTL-Specific", | |
"V1", | |
), | |
"CodeV R1 Distill Qwen 7B": ( | |
"https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B", | |
7.62, | |
"RTL-Specific", | |
"V2", | |
), | |
"CodeV-CL-7B": ( | |
"https://huggingface.co/yang-z/CodeV-CL-7B", | |
6.74, | |
"RTL-Specific", | |
"V1", | |
), | |
"CodeV-QW-7B": ( | |
"https://huggingface.co/yang-z/CodeV-QW-7B", | |
7.25, | |
"RTL-Specific", | |
"V1", | |
), | |
"CodeV-DS-6.7B": ( | |
"https://huggingface.co/yang-z/CodeV-DS-6.7B", | |
6.74, | |
"RTL-Specific", | |
"V1", | |
), | |
"RTLCoder Mistral": ( | |
"https://huggingface.co/ishorn5/RTLCoder-v1.1", | |
7.24, | |
"RTL-Specific", | |
"V1", | |
), | |
"RTLCoder DeepSeek": ( | |
"https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1", | |
6.74, | |
"RTL-Specific", | |
"V1", | |
), | |
"OriGen": ("https://huggingface.co/henryen/OriGen", 6.74, "RTL-Specific", "V1"), | |
} | |
def get_headers(reader, agg=False) -> Union[list, list]: | |
metrics, benchs = [], [] | |
for i, row in enumerate(reader): | |
if i == 0: | |
metrics = row[1:] | |
elif i == 1 and not agg: | |
benchs = row[1:] | |
break | |
else: | |
return metrics | |
return metrics, benchs | |
def get_model_params_and_url(model) -> Union[str, str, float, str]: | |
if model not in model_details: | |
return "-", "-", "-" | |
url = model_details[model][0] | |
params = model_details[model][1] | |
type = model_details[model][2] | |
release = model_details[model][3] | |
return url, params, type, release | |
def parse_results(csv_path: str) -> list[dict]: | |
""" | |
Each row has the following format: | |
MODEL | BENCHMARK | TASK | METRIC | RESULT | |
""" | |
dataset = [] | |
models = [] | |
with open(os.path.join("results", csv_path), newline="") as csvfile: | |
reader = csv.reader(csvfile, delimiter=",") | |
metrics, benchs = get_headers(reader) | |
for i, row in enumerate(reader): | |
model = row[0] | |
url, params, type, release = get_model_params_and_url(model) | |
models.append(model) | |
row = row[1:] | |
ctr = 0 | |
for metric, bench in zip(metrics, benchs): | |
if metric == "EM": | |
metric = "Exact Matching (EM)" | |
record = {} | |
record["Model"] = model | |
record["Model Type"] = type | |
record["Benchmark"] = bench | |
record["Task"] = metric | |
record["Result"] = float(row[ctr].replace(",", ".")) | |
record["Model URL"] = url | |
record["Params"] = params | |
record["Release"] = release | |
dataset.append(record) | |
ctr += 1 | |
print(models) | |
return dataset | |
def parse_agg(csv_path: str) -> list[dict]: | |
""" | |
Each row has the following format: | |
MODEL | BENCHMARK | TASK | METRIC | RESULT | |
""" | |
return pd.read_csv("results/aggregated_scores.csv") | |
def writeJson(data: list): | |
with open("results/results.json", "w") as f: | |
json.dump(data, f, indent=4, ensure_ascii=False) | |
print("Done") | |
def read_json(): | |
json_path = "results/results.json" | |
with open(json_path, "r", encoding="utf-8") as file: | |
data = json.load(file) | |
return data | |
def read_data() -> Union[pd.DataFrame, list, list, str]: | |
data = read_json() | |
df = pd.DataFrame(data) | |
df.rename( | |
columns={ | |
"Model": "Model", | |
"Benchmark": "Benchmark", | |
"Task": "Metric", | |
"Result": "Score", | |
"EM": "Exact Matching (EM)", | |
}, | |
inplace=True, | |
) | |
df["Params"] = pd.to_numeric(df["Params"], errors="coerce") | |
benchmarks = sorted(df["Benchmark"].unique().tolist(), reverse=True) | |
metrics = df["Metric"].unique().tolist() | |
default_metric = ( | |
"Functionality (FNC)" if "Functionality (FNC)" in metrics else metrics[0] | |
) | |
return df, benchmarks, metrics, default_metric | |
if __name__ == "__main__": | |
csv_path = "./results.csv" | |
d = parse_results(csv_path) | |
writeJson(d) | |