File size: 6,080 Bytes
229c9d9 12502a1 229c9d9 12502a1 229c9d9 12502a1 229c9d9 a43bf61 229c9d9 d39dc14 229c9d9 d39dc14 229c9d9 d39dc14 229c9d9 d39dc14 229c9d9 d39dc14 229c9d9 d39dc14 12502a1 229c9d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import json
from pathlib import Path
import gradio as gr
import pandas as pd
TITLE = """<h1 align="center" id="space-title">LLM Leaderboard for Minecraft</h1>"""
DESCRIPTION = f"""
Evaluation of VLM on Minecraft
"""
BENCHMARKS_TO_SKIP = []
def get_leaderboard_df(score_path):
with open(score_path, "r") as f:
scores = json.load(f)
rows = []
for model, metrics in scores.items():
row = {"Model": model} # Initialize with the model name
for key, value in metrics.items():
if isinstance(value, dict): # If it's a dictionary, further flatten it
for sub_key, sub_value in value.items():
if sub_key != "20":
continue
#row[f"{key}_{sub_key}"] = sub_value
row[f"{key.replace('_', ' ')}"] = sub_value
else:
row[key] = value
rows.append(row)
df = pd.DataFrame(rows)
df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: f"{x * 100:.2f}" if isinstance(x, (int, float)) else x)
return df
leaderboard_df = get_leaderboard_df("score.json")
import gradio as gr
import pandas as pd
# 示例:你已有的 dataframe
# leaderboard_df = pd.read_csv("your_data.csv")
# 示例任务列字典
TASKS = {
"VQA": ["VQA"],
"QA": ["QA"],
"VQA Reasoning": ["VQA_Reasoning"],
"Reason": ["Reason"], # 请确保这个列名正确
"Embodied Grounding": ["Embodied Grounding"],
"GUI Grounding": ["Gui Grounding"],
}
# 筛选函数:只根据模型名称关键词搜索
def filter_and_search(search_query: str, task_name: str):
df = leaderboard_df.copy()
task_cols = TASKS[task_name]
score_col = task_cols[0]
df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
df = df.sort_values(by=score_col, ascending=False, na_position='last')
if search_query.strip():
terms = [term.strip().lower() for term in search_query.split(";")]
pattern = "|".join(terms)
df = df[df["Model"].str.lower().str.contains(pattern, regex=True)]
return df[["Model"] + task_cols]
# Gradio UI 构建
with gr.Blocks() as demo:
gr.HTML("<h2>Leaderboard</h2>")
with gr.Column():
gr.Markdown("Search and view results for each task.", elem_classes="markdown-text")
with gr.Tabs(elem_classes="tabs-buttons") as tabs:
for task_name, task_cols in TASKS.items():
with gr.TabItem(task_name):
# 初始数据:按得分降序
sub_df = leaderboard_df[["Model"] + task_cols].copy()
sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce")
sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last")
with gr.Row():
search_bar = gr.Textbox(placeholder="Search model name...", show_label=False)
with gr.Group():
table = gr.Dataframe(
value=sub_df,
wrap=True,
column_widths=[400] + [110 for _ in task_cols],
)
# 绑定搜索逻辑
search_bar.submit(
fn=lambda query, t=task_name: filter_and_search(query, t),
inputs=search_bar,
outputs=table,
)
gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: <b>20</b>")
demo.launch()
# 筛选函数:只根据模型名称关键词搜索
def filter_and_search(search_query: str, task_name: str):
df = leaderboard_df.copy()
task_cols = TASKS[task_name]
score_col = task_cols[0]
df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
df = df.sort_values(by=score_col, ascending=False, na_position='last')
if search_query.strip():
terms = [term.strip().lower() for term in search_query.split(";")]
pattern = "|".join(terms)
df = df[df["Model"].str.lower().str.contains(pattern, regex=True)]
return df[["Model"] + task_cols]
def get_initial_table(task_name: str):
df = leaderboard_df.copy()
task_cols = TASKS[task_name]
score_col = task_cols[0]
df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
df = df.sort_values(by=score_col, ascending=False, na_position='last')
return df[["Model"] + task_cols]
# Gradio UI 构建
with gr.Blocks() as demo:
gr.HTML("<h2>Leaderboard</h2>")
with gr.Column():
gr.Markdown("Search and view results for each task.", elem_classes="markdown-text")
with gr.Tabs(elem_classes="tabs-buttons") as tabs:
for task_name, task_cols in TASKS.items():
with gr.TabItem(task_name):
# 初始数据:按得分降序
sub_df = leaderboard_df[["Model"] + task_cols].copy()
sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce")
sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last")
with gr.Row():
search_bar = gr.Textbox(placeholder="Search model name...", show_label=False)
refresh_btn = gr.Button("Refresh")
with gr.Group():
table = gr.Dataframe(
value=sub_df,
wrap=True,
column_widths=[400] + [110 for _ in task_cols],
)
# 绑定搜索逻辑
search_bar.submit(
fn=lambda query, t=task_name: filter_and_search(query, t),
inputs=search_bar,
outputs=table,
)
def refresh(task=task_name):
return "", get_initial_table(task)
refresh_btn.click(
fn=refresh,
outputs=[search_bar, table]
)
gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: <b>20</b>")
demo.launch() |