zhwang4ai's picture
Update app.py
d39dc14 verified
import json
from pathlib import Path
import gradio as gr
import pandas as pd
TITLE = """<h1 align="center" id="space-title">LLM Leaderboard for Minecraft</h1>"""
DESCRIPTION = f"""
Evaluation of VLM on Minecraft
"""
BENCHMARKS_TO_SKIP = []
def get_leaderboard_df(score_path):
with open(score_path, "r") as f:
scores = json.load(f)
rows = []
for model, metrics in scores.items():
row = {"Model": model} # Initialize with the model name
for key, value in metrics.items():
if isinstance(value, dict): # If it's a dictionary, further flatten it
for sub_key, sub_value in value.items():
if sub_key != "20":
continue
#row[f"{key}_{sub_key}"] = sub_value
row[f"{key.replace('_', ' ')}"] = sub_value
else:
row[key] = value
rows.append(row)
df = pd.DataFrame(rows)
df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: f"{x * 100:.2f}" if isinstance(x, (int, float)) else x)
return df
leaderboard_df = get_leaderboard_df("score.json")
import gradio as gr
import pandas as pd
# 示例:你已有的 dataframe
# leaderboard_df = pd.read_csv("your_data.csv")
# 示例任务列字典
TASKS = {
"VQA": ["VQA"],
"QA": ["QA"],
"VQA Reasoning": ["VQA_Reasoning"],
"Reason": ["Reason"], # 请确保这个列名正确
"Embodied Grounding": ["Embodied Grounding"],
"GUI Grounding": ["Gui Grounding"],
}
# 筛选函数:只根据模型名称关键词搜索
def filter_and_search(search_query: str, task_name: str):
df = leaderboard_df.copy()
task_cols = TASKS[task_name]
score_col = task_cols[0]
df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
df = df.sort_values(by=score_col, ascending=False, na_position='last')
if search_query.strip():
terms = [term.strip().lower() for term in search_query.split(";")]
pattern = "|".join(terms)
df = df[df["Model"].str.lower().str.contains(pattern, regex=True)]
return df[["Model"] + task_cols]
# Gradio UI 构建
with gr.Blocks() as demo:
gr.HTML("<h2>Leaderboard</h2>")
with gr.Column():
gr.Markdown("Search and view results for each task.", elem_classes="markdown-text")
with gr.Tabs(elem_classes="tabs-buttons") as tabs:
for task_name, task_cols in TASKS.items():
with gr.TabItem(task_name):
# 初始数据:按得分降序
sub_df = leaderboard_df[["Model"] + task_cols].copy()
sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce")
sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last")
with gr.Row():
search_bar = gr.Textbox(placeholder="Search model name...", show_label=False)
with gr.Group():
table = gr.Dataframe(
value=sub_df,
wrap=True,
column_widths=[400] + [110 for _ in task_cols],
)
# 绑定搜索逻辑
search_bar.submit(
fn=lambda query, t=task_name: filter_and_search(query, t),
inputs=search_bar,
outputs=table,
)
gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: <b>20</b>")
demo.launch()
# 筛选函数:只根据模型名称关键词搜索
def filter_and_search(search_query: str, task_name: str):
df = leaderboard_df.copy()
task_cols = TASKS[task_name]
score_col = task_cols[0]
df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
df = df.sort_values(by=score_col, ascending=False, na_position='last')
if search_query.strip():
terms = [term.strip().lower() for term in search_query.split(";")]
pattern = "|".join(terms)
df = df[df["Model"].str.lower().str.contains(pattern, regex=True)]
return df[["Model"] + task_cols]
def get_initial_table(task_name: str):
df = leaderboard_df.copy()
task_cols = TASKS[task_name]
score_col = task_cols[0]
df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
df = df.sort_values(by=score_col, ascending=False, na_position='last')
return df[["Model"] + task_cols]
# Gradio UI 构建
with gr.Blocks() as demo:
gr.HTML("<h2>Leaderboard</h2>")
with gr.Column():
gr.Markdown("Search and view results for each task.", elem_classes="markdown-text")
with gr.Tabs(elem_classes="tabs-buttons") as tabs:
for task_name, task_cols in TASKS.items():
with gr.TabItem(task_name):
# 初始数据:按得分降序
sub_df = leaderboard_df[["Model"] + task_cols].copy()
sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce")
sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last")
with gr.Row():
search_bar = gr.Textbox(placeholder="Search model name...", show_label=False)
refresh_btn = gr.Button("Refresh")
with gr.Group():
table = gr.Dataframe(
value=sub_df,
wrap=True,
column_widths=[400] + [110 for _ in task_cols],
)
# 绑定搜索逻辑
search_bar.submit(
fn=lambda query, t=task_name: filter_and_search(query, t),
inputs=search_bar,
outputs=table,
)
def refresh(task=task_name):
return "", get_initial_table(task)
refresh_btn.click(
fn=refresh,
outputs=[search_bar, table]
)
gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: <b>20</b>")
demo.launch()