Spaces:

SamsungResearch
/

TRUEBench

Running

송종윤/AI Productivity팀(SR)/삼성전자

Initial commit

8a254d6 6 days ago

12 kB

	import pandas as pd
	from constants import (
	NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY,
	NUMERIC_COLS_LANGUAGE, NUMERIC_INT_COLS_LANGUAGE
	)

	def format_leaderboard_cell(cell, col, key="Category"):
	"""
	Apply integer/two-decimal formatting to numeric columns.
	key: "Category" or "Language"
	"""
	if key == "Language":
	numeric_cols = NUMERIC_COLS_LANGUAGE
	int_cols = NUMERIC_INT_COLS_LANGUAGE
	else:
	numeric_cols = NUMERIC_COLS_CATEGORY
	int_cols = NUMERIC_INT_COLS_CATEGORY
	if pd.isna(cell) or (isinstance(cell, str) and cell.strip() == ""):
	return ""
	try:
	if col in int_cols:
	return str(int(round(float(cell))))
	elif col in numeric_cols:
	return "{:.2f}".format(float(cell))
	else:
	return str(cell)
	except Exception:
	return ""


	def styled_error(error):
	return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"


	def styled_warning(warn):
	return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"


	def styled_message(message):
	return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"


	def has_no_nan_values(df, columns):
	return df[columns].notna().all(axis=1)


	def has_nan_values(df, columns):
	return df[columns].isna().any(axis=1)

	def get_display_model_name(full_model_name: str) -> str:
	"""
	Removes text within parentheses from the model name for display purposes.
	Example: "Model (v1)" -> "Model"
	"""
	import re
	return re.sub(r'\s\(.?\)', '', full_model_name)

	def get_score_stars(score, unique_id=None):
	"""
	Generate HTML for a 5-star rating visualization.

	Args:
	score (float or int): Overall score, can be in 0~1 or 0~100 range.
	- If 0~1, it will be automatically scaled to 0~100.
	- If None, NaN, or negative, treated as 0.
	unique_id (optional): Unique identifier for SVG gradient.

	Returns:
	str: HTML string with 5-star visualization, filled in proportion to score.
	"""
	# Robust normalization: 0~1 -> 0~100, None/NaN/negative -> 0
	max_stars = 5
	full_stars = int(score // 20)
	partial = (score % 20) / 20 # 0.0 ~ 0.999
	stars_html = ""
	star_size = 18 # px

	# If unique_id is not provided, use "default"
	uid = str(unique_id) if unique_id is not None else "default"

	def star_svg(fill_ratio, idx):
	# fill_ratio: 0.0 (empty) ~ 1.0 (full)
	# White fill, gray background
	grad_id = f"star-grad-{uid}-{idx}"
	return f'''
	<svg width="{star_size}" height="{star_size}" viewBox="0 0 24 24" style="margin-right:0.5px;vertical-align:middle;">
	<defs>
	<linearGradient id="{grad_id}" x1="0" x2="1" y1="0" y2="0">
	<stop offset="0%" stop-color="#fff"/>
	<stop offset="{fill_ratio*100:.1f}%" stop-color="#fff"/>
	<stop offset="{fill_ratio*100:.1f}%" stop-color="#666666"/>
	<stop offset="100%" stop-color="#666666"/>
	</linearGradient>
	</defs>
	<polygon points="12,2 15,9 22,9.5 17,14.2 18.5,21 12,17.5 5.5,21 7,14.2 2,9.5 9,9"
	fill="url(#{grad_id})" stroke="#888" stroke-width="1"/>
	</svg>
	'''

	# Full stars
	for i in range(full_stars):
	stars_html += star_svg(1.0, i)
	# Partial star (if needed)
	if full_stars < max_stars:
	if partial > 0:
	stars_html += star_svg(partial, full_stars)
	empty_stars = max_stars - full_stars - 1
	start_empty = full_stars + 1
	else:
	empty_stars = max_stars - full_stars
	start_empty = full_stars
	else:
	empty_stars = 0
	start_empty = max_stars
	# Empty stars
	for i in range(start_empty, start_empty + empty_stars):
	stars_html += star_svg(0.0, i)

	# Score text
	score_text = f'<span style="color:#fff;font-size:16px;margin-left:8px;">{score:.2f}</span>'

	return f'''
	<div style="display:flex;align-items:center;gap:4px;">
	{stars_html}
	{score_text}
	</div>
	'''

	def get_type_badge(type_value):
	"""
	type_value: e.g. 'Open', 'Proprietary'
	Returns a badge with class depending on type (Open/Proprietary).
	"""
	label = str(type_value).capitalize()
	badge_class = ""
	if str(type_value).lower() == "open":
	badge_class = "badge-type-open"
	elif str(type_value).lower() == "proprietary":
	badge_class = "badge-type-proprietary"
	else:
	badge_class = "badge-type-proprietary"

	return f'<span class="badge {badge_class}">{label}</span>'

	def get_model_type_badge(model_type):
	"""
	Model Type badge: Style varies depending on Think/Normal
	"""
	label = str(model_type).capitalize()
	model_type_str = str(model_type).strip().lower()
	if model_type_str == "think":
	badge_class = "badge-modeltype-think"
	elif model_type_str == "instruct":
	badge_class = "badge-modeltype-instruct"
	elif model_type_str == "hybrid":
	badge_class = "badge-modeltype-hybrid"
	else:
	badge_class = "badge-modeltype-instruct"
	return f'<span class="badge {badge_class}">{label}</span>'

	def get_think_badge(think_type):
	label = str(think_type).capitalize()
	if str(think_type).lower() == "on":
	return f'<span class="badge badge-think-on">{label}</span>'
	elif str(think_type).lower() == "off":
	return f'<span class="badge badge-think-off">{label}</span>'
	else:
	return f'<span class="badge badge-think-off">{label}</span>'

	import pandas as pd

	def render_leaderboard_html(df, overall_col="Overall", key="Category"):
	"""
	Render a DataFrame as an HTML table, replacing the overall_col with a star rating visualization.
	key: "Category" or "Language"
	"""

	# Force column order
	desired_order = ["Rank", "Model Name", "Link", "Type", "Model Type", "Think", "Overall"]
	cols = list(df.columns)
	# Remaining columns
	rest = [c for c in cols if c not in desired_order]
	new_cols = []
	for c in desired_order:
	if c in cols:
	new_cols.append(c)
	new_cols += rest
	df = df[new_cols]

	# Columns to hide
	hidden_cols = ["Comment", "Link"]

	# Build table header
	def get_sort_arrow():
	# Arrow buttons removed as requested
	return ""

	# Extract sort state (from State or use default)
	sort_col = getattr(df, "_sort_col", None) or (df.columns[0] if len(df.columns) > 0 else None)
	sort_asc = getattr(df, "_sort_asc", None)
	if sort_asc is None:
	sort_asc = True

	html = '<table class="pretty-leaderboard-table">\n<thead><tr>'
	for col in df.columns:
	if col in hidden_cols:
	continue
	# Info icon for Model Name, Med. Len. and Med. Resp. Len.
	if col == "Model Name":
	html += (
	f'<th>{col}'
	'<span class="info-icon" title="Hovering the mouse displays additional details, and clicking the model name navigates to the corresponding page.">ⓘ</span>'
	f'{get_sort_arrow()}</th>'
	)
	elif col == "Med. Len.":
	html += (
	f'<th>{col}'
	'<span class="info-icon" title="Median token length of think and response for the model.">ⓘ</span>'
	f'{get_sort_arrow()}</th>'
	)
	elif col == "Med. Resp. Len.":
	html += (
	f'<th>{col}'
	'<span class="info-icon" title="Median token length of the model\'s responses (excluding think).">ⓘ</span>'
	f'{get_sort_arrow()}</th>'
	)
	elif col == overall_col:
	html += f'<th style="min-width: 120px; max-width: 300px; width: 150px;">{col}{get_sort_arrow()}</th>'
	else:
	html += f'<th>{col}{get_sort_arrow()}</th>'
	html += '</tr></thead>\n<tbody>\n'

	# Build table rows
	for _, row in df.iterrows():
	html += '<tr>'
	for col in df.columns:
	if col in hidden_cols:
	continue
	cell = row[col]
	if col == overall_col:
	try:
	# Use "Model Name" of the row as unique_id
	unique_id = row.get("Model Name", None)
	unique_id = unique_id.replace(" ", "_").replace("-", "_").replace("(", "_").replace(")", "_")
	cell_html = get_score_stars(float(cell), unique_id=unique_id)
	except Exception:
	cell_html = str(cell)
	html += f'<td style="min-width: 120px; max-width: 300px; width: 150px;">{cell_html}</td>'
	elif col == "Rank":
	# For 1st, 2nd, and 3rd place, emphasize with medal emoji and color
	medal = ""
	style = "color: #fff; font-weight: 600;"
	if cell == 1 or cell == "1":
	medal = "🥇"
	style = "color: #ffd700; font-weight: bold; text-shadow: 0 0 4px #fff2;"
	elif cell == 2 or cell == "2":
	medal = "🥈"
	style = "color: #b0b0b0; font-weight: bold;"
	elif cell == 3 or cell == "3":
	medal = "🥉"
	style = "color: #cd7f32; font-weight: bold;"
	html += f'<td><span style="{style}">{medal if medal else cell}</span></td>'
	elif col in ["Model Name"]:
	# Only highlight top 1~3, do not apply badge
	rank = row.get("Rank", None)
	highlight_style = ""
	if rank == 1 or rank == "1":
	highlight_style = "color: #ffd700; font-weight: bold; text-shadow: 0 0 4px #fff2;"
	elif rank == 2 or rank == "2":
	highlight_style = "color: #b0b0b0; font-weight: bold;"
	elif rank == 3 or rank == "3":
	highlight_style = "color: #cd7f32; font-weight: bold;"
	else:
	highlight_style = "color: #fff; font-weight: 600;"
	display_name = get_display_model_name(str(cell))

	# --- Start of new logic for tooltip ---
	comment_value = ""
	# Check if 'Comment' column exists and the value is not NaN/empty
	if "Comment" in row and pd.notna(row["Comment"]) and str(row["Comment"]).strip() != "":
	comment_value = str(row["Comment"]).strip()

	title_attribute = f' title="{comment_value}"' if comment_value else ""
	# --- End of new logic for tooltip ---

	# Link logic
	link_value = row["Link"] if "Link" in row and pd.notna(row["Link"]) and str(row["Link"]).strip() != "" else None
	if link_value:
	clickable_name = f'<a href="{link_value}" target="_blank" style="color:inherit;">{display_name}</a>'
	else:
	clickable_name = display_name

	html += f'<td><span style="{highlight_style}"{title_attribute}>{clickable_name}</span></td>'
	elif col == "Type":
	html += f'<td>{get_type_badge(row.get("Type", ""))}</td>'
	elif col == "Model Type":
	html += f'<td>{get_model_type_badge(row.get("Model Type", ""))}</td>'
	elif col == "Think":
	html += f'<td>{get_think_badge(row.get("Think", ""))}</td>'
	else:
	html += f'<td>{format_leaderboard_cell(cell, col, key)}</td>'
	html += '</tr>\n'
	html += '</tbody></table>'
	# Wrap in scrollable div for sticky header
	return f'<div class="leaderboard-table-container" style="max-height:900px;overflow-y:auto;">{html}</div>'