Spaces:

CraftJarvis
/

Minecraft-VLM-Leaderboard

Running

App Files Files Community

Minecraft-VLM-Leaderboard / app.py

zhwang4ai

Update app.py

d39dc14 verified 4 months ago

raw

history blame contribute delete

6.08 kB

	import json
	from pathlib import Path

	import gradio as gr
	import pandas as pd

	TITLE = """<h1 align="center" id="space-title">LLM Leaderboard for Minecraft</h1>"""

	DESCRIPTION = f"""
	Evaluation of VLM on Minecraft
	"""

	BENCHMARKS_TO_SKIP = []


	def get_leaderboard_df(score_path):
	with open(score_path, "r") as f:
	scores = json.load(f)

	rows = []
	for model, metrics in scores.items():
	row = {"Model": model} # Initialize with the model name
	for key, value in metrics.items():
	if isinstance(value, dict): # If it's a dictionary, further flatten it
	for sub_key, sub_value in value.items():
	if sub_key != "20":
	continue
	#row[f"{key}_{sub_key}"] = sub_value
	row[f"{key.replace('_', ' ')}"] = sub_value
	else:
	row[key] = value
	rows.append(row)

	df = pd.DataFrame(rows)
	df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: f"{x * 100:.2f}" if isinstance(x, (int, float)) else x)

	return df


	leaderboard_df = get_leaderboard_df("score.json")

	import gradio as gr
	import pandas as pd

	# 示例：你已有的 dataframe
	# leaderboard_df = pd.read_csv("your_data.csv")

	# 示例任务列字典
	TASKS = {
	"VQA": ["VQA"],
	"QA": ["QA"],
	"VQA Reasoning": ["VQA_Reasoning"],
	"Reason": ["Reason"], # 请确保这个列名正确
	"Embodied Grounding": ["Embodied Grounding"],
	"GUI Grounding": ["Gui Grounding"],
	}

	# 筛选函数：只根据模型名称关键词搜索
	def filter_and_search(search_query: str, task_name: str):
	df = leaderboard_df.copy()
	task_cols = TASKS[task_name]
	score_col = task_cols[0]

	df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
	df = df.sort_values(by=score_col, ascending=False, na_position='last')

	if search_query.strip():
	terms = [term.strip().lower() for term in search_query.split(";")]
	pattern = "\|".join(terms)
	df = df[df["Model"].str.lower().str.contains(pattern, regex=True)]

	return df[["Model"] + task_cols]

	# Gradio UI 构建
	with gr.Blocks() as demo:
	gr.HTML("<h2>Leaderboard</h2>")
	with gr.Column():
	gr.Markdown("Search and view results for each task.", elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tabs-buttons") as tabs:
	for task_name, task_cols in TASKS.items():
	with gr.TabItem(task_name):
	# 初始数据：按得分降序
	sub_df = leaderboard_df[["Model"] + task_cols].copy()
	sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce")
	sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last")

	with gr.Row():
	search_bar = gr.Textbox(placeholder="Search model name...", show_label=False)

	with gr.Group():
	table = gr.Dataframe(
	value=sub_df,
	wrap=True,
	column_widths=[400] + [110 for _ in task_cols],
	)

	# 绑定搜索逻辑
	search_bar.submit(
	fn=lambda query, t=task_name: filter_and_search(query, t),
	inputs=search_bar,
	outputs=table,
	)

	gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: <b>20</b>")

	demo.launch()

	# 筛选函数：只根据模型名称关键词搜索
	def filter_and_search(search_query: str, task_name: str):
	df = leaderboard_df.copy()
	task_cols = TASKS[task_name]
	score_col = task_cols[0]

	df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
	df = df.sort_values(by=score_col, ascending=False, na_position='last')

	if search_query.strip():
	terms = [term.strip().lower() for term in search_query.split(";")]
	pattern = "\|".join(terms)
	df = df[df["Model"].str.lower().str.contains(pattern, regex=True)]

	return df[["Model"] + task_cols]

	def get_initial_table(task_name: str):
	df = leaderboard_df.copy()
	task_cols = TASKS[task_name]
	score_col = task_cols[0]

	df[score_col] = pd.to_numeric(df[score_col], errors='coerce')
	df = df.sort_values(by=score_col, ascending=False, na_position='last')
	return df[["Model"] + task_cols]

	# Gradio UI 构建
	with gr.Blocks() as demo:
	gr.HTML("<h2>Leaderboard</h2>")
	with gr.Column():
	gr.Markdown("Search and view results for each task.", elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tabs-buttons") as tabs:
	for task_name, task_cols in TASKS.items():
	with gr.TabItem(task_name):
	# 初始数据：按得分降序
	sub_df = leaderboard_df[["Model"] + task_cols].copy()
	sub_df[task_cols[0]] = pd.to_numeric(sub_df[task_cols[0]], errors="coerce")
	sub_df = sub_df.sort_values(by=task_cols[0], ascending=False, na_position="last")

	with gr.Row():
	search_bar = gr.Textbox(placeholder="Search model name...", show_label=False)

	refresh_btn = gr.Button("Refresh")
	with gr.Group():
	table = gr.Dataframe(
	value=sub_df,
	wrap=True,
	column_widths=[400] + [110 for _ in task_cols],
	)

	# 绑定搜索逻辑
	search_bar.submit(
	fn=lambda query, t=task_name: filter_and_search(query, t),
	inputs=search_bar,
	outputs=table,
	)
	def refresh(task=task_name):
	return "", get_initial_table(task)

	refresh_btn.click(
	fn=refresh,
	outputs=[search_bar, table]
	)

	gr.HTML("Threshold corresponding to the values of GUI and Embodied Grounding: <b>20</b>")

	demo.launch()