Spaces:
Running
Running
""" | |
Voting module for BigCodeArena | |
Handles vote submission, data management, and UI components | |
""" | |
import gradio as gr | |
import pandas as pd | |
import datetime | |
import os | |
import threading | |
from datasets import Dataset, load_dataset | |
from sandbox.code_analyzer import extract_code_from_markdown | |
# HuggingFace dataset configuration | |
HF_DATASET_NAME = os.getenv("HF_DATASET_NAME") | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
def serialize_interactions(interactions): | |
"""Convert datetime objects in interactions to ISO format strings""" | |
if not interactions: | |
return interactions | |
serialized = [] | |
for interaction in interactions: | |
# Handle case where interaction might be a list instead of a dict | |
if isinstance(interaction, list): | |
# If it's a list, recursively serialize each item | |
serialized.append(serialize_interactions(interaction)) | |
elif isinstance(interaction, dict): | |
# If it's a dict, serialize it normally | |
serialized_interaction = {} | |
for key, value in interaction.items(): | |
if isinstance(value, datetime.datetime): | |
serialized_interaction[key] = value.isoformat() | |
else: | |
serialized_interaction[key] = value | |
serialized.append(serialized_interaction) | |
else: | |
# If it's neither list nor dict, just add it as is | |
serialized.append(interaction) | |
return serialized | |
def extract_code_snippets_from_conversation(conversation): | |
""" | |
Extract code snippets and install commands from all assistant messages in a conversation. | |
Args: | |
conversation: List of message dicts with 'role' and 'content' keys | |
Returns: | |
List of dicts containing code snippets and install commands for each turn | |
""" | |
if not conversation: | |
return [] | |
code_snippets = [] | |
for msg in conversation: | |
if msg.get("role") == "assistant": | |
content = msg.get("content", "") | |
if content: | |
# Extract code from markdown in the assistant message | |
extract_result = extract_code_from_markdown( | |
message=content, | |
enable_auto_env=True | |
) | |
if extract_result is not None: | |
code, code_language, env_selection, install_command = extract_result | |
# Add code snippet info for this turn | |
code_snippets.append({ | |
"code": code, | |
"code_language": code_language, | |
"install_command": install_command, | |
"environment": str(env_selection) if env_selection else None | |
}) | |
return code_snippets | |
def save_vote_to_hf( | |
model_a, model_b, prompt, response_a, response_b, vote_result, interactions_a=None, interactions_b=None, conversation_a=None, conversation_b=None, hf_token=None | |
): | |
"""Save vote result to HuggingFace dataset with full conversation history""" | |
try: | |
# Use global token if not provided | |
token = hf_token or HF_TOKEN | |
if not token: | |
return False, "HuggingFace token not found in environment (HF_TOKEN)" | |
if not HF_DATASET_NAME: | |
return False, "HuggingFace dataset name not found in environment (HF_DATASET_NAME)" | |
# Serialize conversations for JSON compatibility | |
serialized_conversation_a = serialize_interactions(conversation_a or []) | |
serialized_conversation_b = serialize_interactions(conversation_b or []) | |
# Organize interactions by turns - each turn contains a list of interactions | |
def organize_interactions_by_turns(interactions, conversation): | |
"""Organize interactions by conversation turns""" | |
if not interactions: | |
return [] | |
# For now, put all interactions in a single turn | |
# This can be enhanced later to properly group by conversation turns | |
# when we have more context about how interactions are timestamped | |
return interactions if interactions else [] | |
# Organize interactions by turns for both models | |
action_a = organize_interactions_by_turns(interactions_a or [], conversation_a or []) | |
action_b = organize_interactions_by_turns(interactions_b or [], conversation_b or []) | |
# Serialize actions for JSON compatibility | |
serialized_action_a = serialize_interactions(action_a) | |
serialized_action_b = serialize_interactions(action_b) | |
# Extract code snippets and install commands from conversations | |
code_a = extract_code_snippets_from_conversation(conversation_a or []) | |
code_b = extract_code_snippets_from_conversation(conversation_b or []) | |
# Create vote data with full conversation history and actions organized by turns | |
# Each conversation is a list of messages in format: [{"role": "user"/"assistant", "content": "...", "action": [...]}, ...] | |
# Actions are organized as list of lists: [[turn1_interactions], [turn2_interactions], ...] | |
vote_data = { | |
"timestamp": datetime.datetime.now().isoformat(), | |
"model_a": model_a, | |
"model_b": model_b, | |
"initial_prompt": prompt, # Convert list to single string | |
"action_a": serialized_action_a, # Actions organized by turns for model A | |
"action_b": serialized_action_b, # Actions organized by turns for model B | |
"conversation_a": serialized_conversation_a, # Full conversation history for model A | |
"conversation_b": serialized_conversation_b, # Full conversation history for model B | |
"code_a": code_a, # List of code snippets and install commands for model A | |
"code_b": code_b, # List of code snippets and install commands for model B | |
"vote": vote_result, # "left", "right", "tie", "both_bad" | |
} | |
# Try to load existing dataset or create new one | |
try: | |
dataset = load_dataset(HF_DATASET_NAME, split="train", token=token, download_mode="force_redownload") | |
# Convert to pandas DataFrame - handle both Dataset and DatasetDict | |
if hasattr(dataset, "to_pandas"): | |
df = dataset.to_pandas() | |
else: | |
df = pd.DataFrame(dataset) | |
# Add new vote | |
new_df = pd.concat([df, pd.DataFrame([vote_data])], ignore_index=True) | |
except Exception as load_error: | |
# Create new dataset if it doesn't exist | |
new_df = pd.DataFrame([vote_data]) | |
# Convert back to dataset and push | |
new_dataset = Dataset.from_pandas(new_df) | |
try: | |
new_dataset.push_to_hub(HF_DATASET_NAME, token=token) | |
return True, "Vote saved successfully!" | |
except Exception as upload_error: | |
return False, f"Error uploading to HuggingFace: {str(upload_error)}" | |
except Exception as e: | |
return False, f"Error saving vote: {str(e)}" | |
def handle_vote(state0, state1, vote_type): | |
"""Handle vote submission""" | |
if ( | |
not state0 | |
or not state1 | |
or not state0.get("has_output") | |
or not state1.get("has_output") | |
): | |
return ( | |
"No output to vote on!", | |
gr.update(), | |
"**Last Updated:** No enough data available", | |
) | |
# Get all user messages and the last responses | |
user_messages = [] | |
response_a = "" | |
response_b = "" | |
# Collect all user messages from the conversation | |
for msg in state0["messages"]: | |
if msg["role"] == "user": | |
user_messages.append(msg["content"]) | |
for msg in reversed(state0["messages"]): | |
if msg["role"] == "assistant": | |
response_a = msg["content"] | |
break | |
for msg in reversed(state1["messages"]): | |
if msg["role"] == "assistant": | |
response_b = msg["content"] | |
break | |
# Get interactions and full conversation history for remote dataset saving | |
interactions_a = state0.get("interactions", []) | |
interactions_b = state1.get("interactions", []) | |
# Get full conversation history for both models | |
conversation_a = state0.get("messages", []) | |
conversation_b = state1.get("messages", []) | |
# Save vote with full conversation history to remote dataset in background (async) | |
def save_vote_background(): | |
try: | |
success, message = save_vote_to_hf( | |
state0["model_name"], | |
state1["model_name"], | |
user_messages[0], | |
response_a, | |
response_b, | |
vote_type, | |
interactions_a, | |
interactions_b, | |
conversation_a, | |
conversation_b, | |
) | |
except Exception as e: | |
print(f"Error saving vote: {str(e)}") | |
pass | |
print("Saving vote in background...") | |
# Start background upload thread | |
upload_thread = threading.Thread(target=save_vote_background) | |
upload_thread.daemon = True | |
upload_thread.start() | |
# Return immediately without waiting for upload | |
success = True # Assume success for immediate UI response | |
message = "Vote recorded! Uploading data in background..." | |
if success: | |
# Return immediately without waiting for ranking refresh | |
return ( | |
message + " Clearing conversation...", | |
gr.update(), # Keep existing ranking table | |
"**Last Updated:** Processing in background...", | |
) | |
else: | |
return message, gr.update(), "**Last Updated:** Error occurred" | |
def create_vote_ui(): | |
"""Create vote UI components""" | |
# Vote buttons section - only visible after output | |
with gr.Row(visible=False) as vote_section: | |
gr.Markdown("### π³οΈ Which response is better?") | |
with gr.Row(visible=False) as vote_buttons_row: | |
vote_left_btn = gr.Button( | |
"π A is Better", variant="primary", size="lg" | |
) | |
vote_tie_btn = gr.Button( | |
"π€ It's a Tie", variant="secondary", size="lg" | |
) | |
vote_both_bad_btn = gr.Button( | |
"π Both are Bad", variant="secondary", size="lg" | |
) | |
vote_right_btn = gr.Button( | |
"π B is Better", variant="primary", size="lg" | |
) | |
# Vote status message | |
vote_status = gr.Markdown("", visible=False) | |
return { | |
'vote_section': vote_section, | |
'vote_buttons_row': vote_buttons_row, | |
'vote_left_btn': vote_left_btn, | |
'vote_right_btn': vote_right_btn, | |
'vote_tie_btn': vote_tie_btn, | |
'vote_both_bad_btn': vote_both_bad_btn, | |
'vote_status': vote_status | |
} | |
def should_show_vote_buttons(state0, state1): | |
"""Check if vote buttons should be shown""" | |
return ( | |
state0 | |
and state0.get("has_output", False) | |
and not state0.get("generating", False) | |
and state1 | |
and state1.get("has_output", False) | |
and not state1.get("generating", False) | |
) | |
def get_vote_ui_updates(show_buttons=False): | |
"""Get UI updates for vote components""" | |
return { | |
'vote_section': gr.update(visible=show_buttons), | |
'vote_buttons_row': gr.update(visible=show_buttons), | |
'vote_status': gr.update(visible=False), | |
'vote_left_btn': gr.update(interactive=show_buttons), | |
'vote_right_btn': gr.update(interactive=show_buttons), | |
'vote_tie_btn': gr.update(interactive=show_buttons), | |
'vote_both_bad_btn': gr.update(interactive=show_buttons), | |
} | |
def setup_vote_handlers(vote_components, state0_var, state1_var, text_input, ranking_table, ranking_last_update): | |
"""Setup vote button event handlers""" | |
def process_vote(state0, state1, vote_type, current_text): | |
# Save the vote and get updates | |
message, ranking_update, last_update = handle_vote( | |
state0, state1, vote_type | |
) | |
# Show thank you message | |
gr.Info( | |
"Thank you for your vote! π Your feedback has been recorded.", | |
duration=5, | |
) | |
# Return only vote status, ranking updates and hide voting interface | |
return ( | |
message, # vote status message | |
gr.update(), # Keep state0 unchanged | |
gr.update(), # Keep state1 unchanged | |
gr.update(), # Keep chatbot_a unchanged | |
gr.update(), # Keep chatbot_b unchanged | |
gr.update(), # Keep response_a unchanged | |
gr.update(), # Keep response_b unchanged | |
gr.update(), # Keep code_a unchanged | |
gr.update(), # Keep code_b unchanged | |
gr.update(), # Keep sandbox_view_a unchanged | |
gr.update(), # Keep sandbox_view_b unchanged | |
gr.update(), # Keep sandbox_component_a unchanged | |
gr.update(), # Keep sandbox_component_b unchanged | |
gr.update(), # Keep chat_stats_a unchanged | |
gr.update(), # Keep chat_stats_b unchanged | |
gr.update(), # Keep model_display_a unchanged | |
gr.update(), # Keep model_display_b unchanged | |
gr.update(visible=False), # Hide vote_section | |
gr.update(visible=False), # Hide vote_buttons_row | |
gr.update(), # Keep state0_var unchanged | |
gr.update(), # Keep state1_var unchanged | |
ranking_update, # Update ranking_table | |
last_update, # Update ranking_last_update | |
gr.update(), # Keep vote_left_btn unchanged | |
gr.update(), # Keep vote_right_btn unchanged | |
gr.update(), # Keep vote_tie_btn unchanged | |
gr.update(), # Keep vote_both_bad_btn unchanged | |
gr.update(), # Keep text_input unchanged | |
) | |
# Vote button click handlers | |
for vote_btn, vote_type in [ | |
(vote_components['vote_left_btn'], "left"), | |
(vote_components['vote_right_btn'], "right"), | |
(vote_components['vote_tie_btn'], "tie"), | |
(vote_components['vote_both_bad_btn'], "both_bad"), | |
]: | |
vote_btn.click( | |
fn=process_vote, | |
inputs=[state0_var, state1_var, gr.State(vote_type), text_input], | |
outputs=[ | |
vote_components['vote_status'], # vote status message | |
state0_var, # state0 | |
state1_var, # state1 | |
# Note: The actual outputs list will need to be filled in by the calling code | |
# as it depends on the specific UI components in the main app | |
], | |
) | |
return vote_components | |