Spaces:

bigcode
/

arena

Running

App Files Files

xet

Community

arena / voting.py

terryyz

No data available

aa2b984 13 days ago

raw

history blame contribute delete

14.7 kB

	"""
	Voting module for BigCodeArena
	Handles vote submission, data management, and UI components
	"""

	import gradio as gr
	import pandas as pd
	import datetime
	import os
	import threading
	from datasets import Dataset, load_dataset
	from sandbox.code_analyzer import extract_code_from_markdown


	# HuggingFace dataset configuration
	HF_DATASET_NAME = os.getenv("HF_DATASET_NAME")
	HF_TOKEN = os.getenv("HF_TOKEN")


	def serialize_interactions(interactions):
	"""Convert datetime objects in interactions to ISO format strings"""
	if not interactions:
	return interactions

	serialized = []
	for interaction in interactions:
	# Handle case where interaction might be a list instead of a dict
	if isinstance(interaction, list):
	# If it's a list, recursively serialize each item
	serialized.append(serialize_interactions(interaction))
	elif isinstance(interaction, dict):
	# If it's a dict, serialize it normally
	serialized_interaction = {}
	for key, value in interaction.items():
	if isinstance(value, datetime.datetime):
	serialized_interaction[key] = value.isoformat()
	else:
	serialized_interaction[key] = value
	serialized.append(serialized_interaction)
	else:
	# If it's neither list nor dict, just add it as is
	serialized.append(interaction)
	return serialized


	def extract_code_snippets_from_conversation(conversation):
	"""
	Extract code snippets and install commands from all assistant messages in a conversation.

	Args:
	conversation: List of message dicts with 'role' and 'content' keys

	Returns:
	List of dicts containing code snippets and install commands for each turn
	"""
	if not conversation:
	return []

	code_snippets = []
	for msg in conversation:
	if msg.get("role") == "assistant":
	content = msg.get("content", "")
	if content:
	# Extract code from markdown in the assistant message
	extract_result = extract_code_from_markdown(
	message=content,
	enable_auto_env=True
	)

	if extract_result is not None:
	code, code_language, env_selection, install_command = extract_result
	# Add code snippet info for this turn
	code_snippets.append({
	"code": code,
	"code_language": code_language,
	"install_command": install_command,
	"environment": str(env_selection) if env_selection else None
	})

	return code_snippets


	def save_vote_to_hf(
	model_a, model_b, prompt, response_a, response_b, vote_result, interactions_a=None, interactions_b=None, conversation_a=None, conversation_b=None, hf_token=None
	):
	"""Save vote result to HuggingFace dataset with full conversation history"""
	try:
	# Use global token if not provided
	token = hf_token or HF_TOKEN
	if not token:
	return False, "HuggingFace token not found in environment (HF_TOKEN)"

	if not HF_DATASET_NAME:
	return False, "HuggingFace dataset name not found in environment (HF_DATASET_NAME)"

	# Serialize conversations for JSON compatibility
	serialized_conversation_a = serialize_interactions(conversation_a or [])
	serialized_conversation_b = serialize_interactions(conversation_b or [])

	# Organize interactions by turns - each turn contains a list of interactions
	def organize_interactions_by_turns(interactions, conversation):
	"""Organize interactions by conversation turns"""
	if not interactions:
	return []

	# For now, put all interactions in a single turn
	# This can be enhanced later to properly group by conversation turns
	# when we have more context about how interactions are timestamped
	return interactions if interactions else []

	# Organize interactions by turns for both models
	action_a = organize_interactions_by_turns(interactions_a or [], conversation_a or [])
	action_b = organize_interactions_by_turns(interactions_b or [], conversation_b or [])

	# Serialize actions for JSON compatibility
	serialized_action_a = serialize_interactions(action_a)
	serialized_action_b = serialize_interactions(action_b)

	# Extract code snippets and install commands from conversations
	code_a = extract_code_snippets_from_conversation(conversation_a or [])
	code_b = extract_code_snippets_from_conversation(conversation_b or [])

	# Create vote data with full conversation history and actions organized by turns
	# Each conversation is a list of messages in format: [{"role": "user"/"assistant", "content": "...", "action": [...]}, ...]
	# Actions are organized as list of lists: [[turn1_interactions], [turn2_interactions], ...]
	vote_data = {
	"timestamp": datetime.datetime.now().isoformat(),
	"model_a": model_a,
	"model_b": model_b,
	"initial_prompt": prompt, # Convert list to single string
	"action_a": serialized_action_a, # Actions organized by turns for model A
	"action_b": serialized_action_b, # Actions organized by turns for model B
	"conversation_a": serialized_conversation_a, # Full conversation history for model A
	"conversation_b": serialized_conversation_b, # Full conversation history for model B
	"code_a": code_a, # List of code snippets and install commands for model A
	"code_b": code_b, # List of code snippets and install commands for model B
	"vote": vote_result, # "left", "right", "tie", "both_bad"
	}

	# Try to load existing dataset or create new one
	try:
	dataset = load_dataset(HF_DATASET_NAME, split="train", token=token, download_mode="force_redownload")
	# Convert to pandas DataFrame - handle both Dataset and DatasetDict
	if hasattr(dataset, "to_pandas"):
	df = dataset.to_pandas()
	else:
	df = pd.DataFrame(dataset)
	# Add new vote
	new_df = pd.concat([df, pd.DataFrame([vote_data])], ignore_index=True)
	except Exception as load_error:
	# Create new dataset if it doesn't exist
	new_df = pd.DataFrame([vote_data])

	# Convert back to dataset and push
	new_dataset = Dataset.from_pandas(new_df)
	try:
	new_dataset.push_to_hub(HF_DATASET_NAME, token=token)
	return True, "Vote saved successfully!"
	except Exception as upload_error:
	return False, f"Error uploading to HuggingFace: {str(upload_error)}"
	except Exception as e:
	return False, f"Error saving vote: {str(e)}"


	def handle_vote(state0, state1, vote_type):
	"""Handle vote submission"""
	if (
	not state0
	or not state1
	or not state0.get("has_output")
	or not state1.get("has_output")
	):
	return (
	"No output to vote on!",
	gr.update(),
	"Last Updated: No enough data available",
	)

	# Get all user messages and the last responses
	user_messages = []
	response_a = ""
	response_b = ""

	# Collect all user messages from the conversation
	for msg in state0["messages"]:
	if msg["role"] == "user":
	user_messages.append(msg["content"])

	for msg in reversed(state0["messages"]):
	if msg["role"] == "assistant":
	response_a = msg["content"]
	break

	for msg in reversed(state1["messages"]):
	if msg["role"] == "assistant":
	response_b = msg["content"]
	break

	# Get interactions and full conversation history for remote dataset saving
	interactions_a = state0.get("interactions", [])
	interactions_b = state1.get("interactions", [])

	# Get full conversation history for both models
	conversation_a = state0.get("messages", [])
	conversation_b = state1.get("messages", [])

	# Save vote with full conversation history to remote dataset in background (async)
	def save_vote_background():
	try:
	success, message = save_vote_to_hf(
	state0["model_name"],
	state1["model_name"],
	user_messages[0],
	response_a,
	response_b,
	vote_type,
	interactions_a,
	interactions_b,
	conversation_a,
	conversation_b,
	)

	except Exception as e:
	print(f"Error saving vote: {str(e)}")
	pass

	print("Saving vote in background...")
	# Start background upload thread
	upload_thread = threading.Thread(target=save_vote_background)
	upload_thread.daemon = True
	upload_thread.start()

	# Return immediately without waiting for upload
	success = True # Assume success for immediate UI response
	message = "Vote recorded! Uploading data in background..."

	if success:
	# Return immediately without waiting for ranking refresh
	return (
	message + " Clearing conversation...",
	gr.update(), # Keep existing ranking table
	"Last Updated: Processing in background...",
	)
	else:
	return message, gr.update(), "Last Updated: Error occurred"


	def create_vote_ui():
	"""Create vote UI components"""
	# Vote buttons section - only visible after output
	with gr.Row(visible=False) as vote_section:
	gr.Markdown("### 🗳️ Which response is better?")

	with gr.Row(visible=False) as vote_buttons_row:
	vote_left_btn = gr.Button(
	"👍 A is Better", variant="primary", size="lg"
	)
	vote_tie_btn = gr.Button(
	"🤝 It's a Tie", variant="secondary", size="lg"
	)
	vote_both_bad_btn = gr.Button(
	"👎 Both are Bad", variant="secondary", size="lg"
	)
	vote_right_btn = gr.Button(
	"👍 B is Better", variant="primary", size="lg"
	)

	# Vote status message
	vote_status = gr.Markdown("", visible=False)

	return {
	'vote_section': vote_section,
	'vote_buttons_row': vote_buttons_row,
	'vote_left_btn': vote_left_btn,
	'vote_right_btn': vote_right_btn,
	'vote_tie_btn': vote_tie_btn,
	'vote_both_bad_btn': vote_both_bad_btn,
	'vote_status': vote_status
	}


	def should_show_vote_buttons(state0, state1):
	"""Check if vote buttons should be shown"""
	return (
	state0
	and state0.get("has_output", False)
	and not state0.get("generating", False)
	and state1
	and state1.get("has_output", False)
	and not state1.get("generating", False)
	)


	def get_vote_ui_updates(show_buttons=False):
	"""Get UI updates for vote components"""
	return {
	'vote_section': gr.update(visible=show_buttons),
	'vote_buttons_row': gr.update(visible=show_buttons),
	'vote_status': gr.update(visible=False),
	'vote_left_btn': gr.update(interactive=show_buttons),
	'vote_right_btn': gr.update(interactive=show_buttons),
	'vote_tie_btn': gr.update(interactive=show_buttons),
	'vote_both_bad_btn': gr.update(interactive=show_buttons),
	}


	def setup_vote_handlers(vote_components, state0_var, state1_var, text_input, ranking_table, ranking_last_update):
	"""Setup vote button event handlers"""

	def process_vote(state0, state1, vote_type, current_text):
	# Save the vote and get updates
	message, ranking_update, last_update = handle_vote(
	state0, state1, vote_type
	)

	# Show thank you message
	gr.Info(
	"Thank you for your vote! 🎉 Your feedback has been recorded.",
	duration=5,
	)

	# Return only vote status, ranking updates and hide voting interface
	return (
	message, # vote status message
	gr.update(), # Keep state0 unchanged
	gr.update(), # Keep state1 unchanged
	gr.update(), # Keep chatbot_a unchanged
	gr.update(), # Keep chatbot_b unchanged
	gr.update(), # Keep response_a unchanged
	gr.update(), # Keep response_b unchanged
	gr.update(), # Keep code_a unchanged
	gr.update(), # Keep code_b unchanged
	gr.update(), # Keep sandbox_view_a unchanged
	gr.update(), # Keep sandbox_view_b unchanged
	gr.update(), # Keep sandbox_component_a unchanged
	gr.update(), # Keep sandbox_component_b unchanged
	gr.update(), # Keep chat_stats_a unchanged
	gr.update(), # Keep chat_stats_b unchanged
	gr.update(), # Keep model_display_a unchanged
	gr.update(), # Keep model_display_b unchanged
	gr.update(visible=False), # Hide vote_section
	gr.update(visible=False), # Hide vote_buttons_row
	gr.update(), # Keep state0_var unchanged
	gr.update(), # Keep state1_var unchanged
	ranking_update, # Update ranking_table
	last_update, # Update ranking_last_update
	gr.update(), # Keep vote_left_btn unchanged
	gr.update(), # Keep vote_right_btn unchanged
	gr.update(), # Keep vote_tie_btn unchanged
	gr.update(), # Keep vote_both_bad_btn unchanged
	gr.update(), # Keep text_input unchanged
	)

	# Vote button click handlers
	for vote_btn, vote_type in [
	(vote_components['vote_left_btn'], "left"),
	(vote_components['vote_right_btn'], "right"),
	(vote_components['vote_tie_btn'], "tie"),
	(vote_components['vote_both_bad_btn'], "both_bad"),
	]:
	vote_btn.click(
	fn=process_vote,
	inputs=[state0_var, state1_var, gr.State(vote_type), text_input],
	outputs=[
	vote_components['vote_status'], # vote status message
	state0_var, # state0
	state1_var, # state1
	# Note: The actual outputs list will need to be filled in by the calling code
	# as it depends on the specific UI components in the main app
	],
	)

	return vote_components