ggcristian's picture
Add DeepSeek R1-0528
7741a44
import csv
import json
import locale
import os
from typing import Dict, Union
import pandas as pd
model_details = {
"DeepSeek R1-0528": (
"https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
685,
"General",
"V2",
),
"DeepSeek R1": (
"https://huggingface.co/deepseek-ai/DeepSeek-R1",
685,
"General",
"V1",
),
"Llama 3.1 405B": (
"https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
406,
"General",
"V1",
),
"Qwen3 236B A22B": (
"https://huggingface.co/Qwen/Qwen3-235B-A22B",
235,
"General",
"V2",
),
"Llama 3.(1-3) 70B": (
"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
70.6,
"General",
"V1",
),
"Qwen2.5 72B": (
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
72.7,
"General",
"V1",
),
"QwQ 32B": ("https://huggingface.co/Qwen/QwQ-32B", 32.8, "General", "V2"),
"Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General", "V1"),
"StarChat2 15B v0.1": (
"https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
16,
"General",
"V1",
),
"DeepSeek R1 Distill Qwen 14B": (
"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
14.8,
"General",
"V1",
),
"CodeLlama 70B": (
"https://huggingface.co/codellama/CodeLlama-70b-hf",
69,
"Coding",
"V1",
),
"QwenCoder 2.5 32B": (
"https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
32.5,
"Coding",
"V1",
),
"DeepSeek Coder 33B": (
"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
33.3,
"Coding",
"V1",
),
"QwenCoder 2.5 14B": (
"https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
14.7,
"Coding",
"V1",
),
"DeepCoder 14B": (
"https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
14.8,
"Coding",
"V2",
),
"OpenCoder 8B": (
"https://huggingface.co/infly/OpenCoder-8B-Instruct",
7.77,
"Coding",
"V1",
),
"SeedCoder 8B": (
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
8.25,
"Coding",
"V2",
),
"SeedCoder 8B Reasoning": (
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
8.25,
"Coding",
"V2",
),
"QwenCoder 2.5 7B": (
"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
7.61,
"Coding",
"V1",
),
"DeepSeek Coder 6,7B": (
"https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
6.74,
"Coding",
"V1",
),
"HaVen-CodeQwen": (
"https://huggingface.co/yangyiyao/HaVen-CodeQwen",
7.25,
"RTL-Specific",
"V1",
),
"CodeV R1 Distill Qwen 7B": (
"https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
7.62,
"RTL-Specific",
"V2",
),
"CodeV-CL-7B": (
"https://huggingface.co/yang-z/CodeV-CL-7B",
6.74,
"RTL-Specific",
"V1",
),
"CodeV-QW-7B": (
"https://huggingface.co/yang-z/CodeV-QW-7B",
7.25,
"RTL-Specific",
"V1",
),
"CodeV-DS-6.7B": (
"https://huggingface.co/yang-z/CodeV-DS-6.7B",
6.74,
"RTL-Specific",
"V1",
),
"RTLCoder Mistral": (
"https://huggingface.co/ishorn5/RTLCoder-v1.1",
7.24,
"RTL-Specific",
"V1",
),
"RTLCoder DeepSeek": (
"https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
6.74,
"RTL-Specific",
"V1",
),
"OriGen": ("https://huggingface.co/henryen/OriGen", 6.74, "RTL-Specific", "V1"),
}
def get_headers(reader, agg=False) -> Union[list, list]:
metrics, benchs = [], []
for i, row in enumerate(reader):
if i == 0:
metrics = row[1:]
elif i == 1 and not agg:
benchs = row[1:]
break
else:
return metrics
return metrics, benchs
def get_model_params_and_url(model) -> Union[str, str, float, str]:
if model not in model_details:
return "-", "-", "-"
url = model_details[model][0]
params = model_details[model][1]
type = model_details[model][2]
release = model_details[model][3]
return url, params, type, release
def parse_results(csv_path: str) -> list[dict]:
"""
Each row has the following format:
MODEL | BENCHMARK | TASK | METRIC | RESULT
"""
dataset = []
models = []
with open(os.path.join("results", csv_path), newline="") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
metrics, benchs = get_headers(reader)
for i, row in enumerate(reader):
model = row[0]
url, params, type, release = get_model_params_and_url(model)
models.append(model)
row = row[1:]
ctr = 0
for metric, bench in zip(metrics, benchs):
if metric == "EM":
metric = "Exact Matching (EM)"
record = {}
record["Model"] = model
record["Model Type"] = type
record["Benchmark"] = bench
record["Task"] = metric
record["Result"] = float(row[ctr].replace(",", "."))
record["Model URL"] = url
record["Params"] = params
record["Release"] = release
dataset.append(record)
ctr += 1
print(models)
return dataset
def parse_agg(csv_path: str) -> list[dict]:
"""
Each row has the following format:
MODEL | BENCHMARK | TASK | METRIC | RESULT
"""
return pd.read_csv("results/aggregated_scores.csv")
def writeJson(data: list):
with open("results/results.json", "w") as f:
json.dump(data, f, indent=4, ensure_ascii=False)
print("Done")
def read_json():
json_path = "results/results.json"
with open(json_path, "r", encoding="utf-8") as file:
data = json.load(file)
return data
def read_data() -> Union[pd.DataFrame, list, list, str]:
data = read_json()
df = pd.DataFrame(data)
df.rename(
columns={
"Model": "Model",
"Benchmark": "Benchmark",
"Task": "Metric",
"Result": "Score",
"EM": "Exact Matching (EM)",
},
inplace=True,
)
df["Params"] = pd.to_numeric(df["Params"], errors="coerce")
benchmarks = sorted(df["Benchmark"].unique().tolist(), reverse=True)
metrics = df["Metric"].unique().tolist()
default_metric = (
"Functionality (FNC)" if "Functionality (FNC)" in metrics else metrics[0]
)
return df, benchmarks, metrics, default_metric
if __name__ == "__main__":
csv_path = "./results.csv"
d = parse_results(csv_path)
writeJson(d)