import json from pathlib import Path import gradio as gr import pandas as pd TITLE = """

LLM Leaderboard for Minecraft

""" DESCRIPTION = f""" Evaluation of VLM on Minecraft """ BENCHMARKS_TO_SKIP = [] def get_leaderboard_df(score_path): with open(score_path, "r") as f: scores = json.load(f) rows = [] for model, metrics in scores.items(): row = {"Model": model} # Initialize with the model name for key, value in metrics.items(): if isinstance(value, dict): # If it's a dictionary, further flatten it for sub_key, sub_value in value.items(): if sub_key != "20": continue #row[f"{key}_{sub_key}"] = sub_value row[f"{key.replace('_', ' ')}"] = sub_value else: row[key] = value rows.append(row) df = pd.DataFrame(rows) df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: f"{x * 100:.2f}" if isinstance(x, (int, float)) else x) return df leaderboard_df = get_leaderboard_df("score.json") import gradio as gr import pandas as pd # 示例:你已有的 dataframe # leaderboard_df = pd.read_csv("your_data.csv") # 示例任务列字典 TASKS = { "VQA": ["VQA"], "QA": ["QA"], "VQA Reasoning": ["VQA_Reasoning"], "Reason": ["Reason"], # 请确保这个列名正确 "Embodied Grounding": ["Embodied Grounding"], "GUI Grounding": ["Gui Grounding"], } # 筛选函数:只根据模型名称关键词搜索 def filter_and_search(search_query: str, task_name: str): df = leaderboard_df.copy() task_cols = TASKS[task_name] score_col = task_cols[0] df[score_col] = pd.to_numeric(df[score_col], errors='coerce') df = df.sort_values(by=score_col, ascending=False, na_position='last') if search_query.strip(): terms = [term.strip().lower() for term in search_query.split(";")] pattern = "|".join(terms) df = df[df["Model"].str.lower().str.contains(pattern, regex=True)] return df[["Model"] + task_cols] # Gradio UI 构建 with gr.Blocks() as demo: gr.HTML("

Leaderboard

") with gr.Column(): gr.Markdown("Search and view results for each task.", elem_classes="markdown-text") with gr.Tabs(elem_classes="tabs-buttons") as tabs: for task_name, task_cols in TASKS.items(): with gr.TabItem(task_name): # 初始数据:按得分降序 sub_df = leaderboard_df[["Model"] + task_cols].copy() sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce") sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last") with gr.Row(): search_bar = gr.Textbox(placeholder="Search model name...", show_label=False) with gr.Group(): table = gr.Dataframe( value=sub_df, wrap=True, column_widths=[400] + [110 for _ in task_cols], ) # 绑定搜索逻辑 search_bar.submit( fn=lambda query, t=task_name: filter_and_search(query, t), inputs=search_bar, outputs=table, ) gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: 20") demo.launch() # 筛选函数:只根据模型名称关键词搜索 def filter_and_search(search_query: str, task_name: str): df = leaderboard_df.copy() task_cols = TASKS[task_name] score_col = task_cols[0] df[score_col] = pd.to_numeric(df[score_col], errors='coerce') df = df.sort_values(by=score_col, ascending=False, na_position='last') if search_query.strip(): terms = [term.strip().lower() for term in search_query.split(";")] pattern = "|".join(terms) df = df[df["Model"].str.lower().str.contains(pattern, regex=True)] return df[["Model"] + task_cols] def get_initial_table(task_name: str): df = leaderboard_df.copy() task_cols = TASKS[task_name] score_col = task_cols[0] df[score_col] = pd.to_numeric(df[score_col], errors='coerce') df = df.sort_values(by=score_col, ascending=False, na_position='last') return df[["Model"] + task_cols] # Gradio UI 构建 with gr.Blocks() as demo: gr.HTML("

Leaderboard

") with gr.Column(): gr.Markdown("Search and view results for each task.", elem_classes="markdown-text") with gr.Tabs(elem_classes="tabs-buttons") as tabs: for task_name, task_cols in TASKS.items(): with gr.TabItem(task_name): # 初始数据:按得分降序 sub_df = leaderboard_df[["Model"] + task_cols].copy() sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce") sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last") with gr.Row(): search_bar = gr.Textbox(placeholder="Search model name...", show_label=False) refresh_btn = gr.Button("Refresh") with gr.Group(): table = gr.Dataframe( value=sub_df, wrap=True, column_widths=[400] + [110 for _ in task_cols], ) # 绑定搜索逻辑 search_bar.submit( fn=lambda query, t=task_name: filter_and_search(query, t), inputs=search_bar, outputs=table, ) def refresh(task=task_name): return "", get_initial_table(task) refresh_btn.click( fn=refresh, outputs=[search_bar, table] ) gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: 20") demo.launch()