Spaces:

ltg
/

fluency-annotation

Running

App Files Files Community

fluency-annotation / app.py

davda54

Update app.py

9e9cd21 verified 10 days ago

raw

history blame contribute delete

26.1 kB

	from __future__ import annotations

	import os
	import gradio as gr
	import json
	import random
	from datetime import datetime
	from typing import Dict, List, Tuple
	import hashlib
	import itertools
	from datasets import load_dataset, Dataset, DatasetDict
	from huggingface_hub import HfApi, create_repo, repo_exists, Repository
	from huggingface_hub import HfFolder
	import shutil
	import threading
	import json

	from collections.abc import Iterable

	from gradio.themes.base import Base
	from gradio.themes.utils import colors, fonts, sizes

	HF_TOKEN = os.environ.get("HF_TOKEN")
	os.environ['HF_AUTH'] = HF_TOKEN
	HfApi(token=HF_TOKEN)

	USER_IDS = set(json.loads(os.environ.get("USER_IDS")) + json.loads(os.environ.get("USER_IDS_2")))


	class Soft(Base):
	def __init__(
	self,
	*,
	primary_hue: colors.Color \| str = colors.indigo,
	secondary_hue: colors.Color \| str = colors.indigo,
	neutral_hue: colors.Color \| str = colors.gray,
	spacing_size: sizes.Size \| str = sizes.spacing_md,
	radius_size: sizes.Size \| str = sizes.radius_md,
	text_size: sizes.Size \| str = sizes.text_md,
	font: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	# fonts.LocalFont("Montserrat"),
	"ui-sans-serif",
	"system-ui",
	"sans-serif",
	),
	font_mono: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	# fonts.LocalFont("IBM Plex Mono"),
	"ui-monospace",
	"Consolas",
	"monospace",
	),
	):
	super().__init__(
	primary_hue=primary_hue,
	secondary_hue=secondary_hue,
	neutral_hue=neutral_hue,
	spacing_size=spacing_size,
	radius_size=radius_size,
	text_size=text_size,
	font=font,
	font_mono=font_mono,
	)
	self.name = "soft"
	super().set(
	# Colors
	background_fill_primary="*neutral_50",
	slider_color="*primary_500",
	slider_color_dark="*primary_600",
	# Shadows
	shadow_drop="0 1px 4px 0 rgb(0 0 0 / 0.1)",
	shadow_drop_lg="0 2px 5px 0 rgb(0 0 0 / 0.2)",
	# Block Labels
	block_background_fill="white",
	block_label_padding="spacing_sm spacing_md",
	block_label_background_fill="*primary_100",
	block_label_background_fill_dark="*primary_600",
	block_label_radius="*radius_md",
	block_label_text_size="*text_md",
	block_label_text_weight="600",
	block_label_text_color="*primary_500",
	block_label_text_color_dark="white",
	block_title_radius="*block_label_radius",
	block_title_padding="*block_label_padding",
	block_title_background_fill="*block_label_background_fill",
	block_title_text_weight="600",
	block_title_text_color="*primary_500",
	block_title_text_color_dark="white",
	block_label_margin="*spacing_md",

	# Inputs
	input_background_fill="white",
	input_border_color="*neutral_100",
	input_shadow="*shadow_drop",
	input_shadow_focus="*shadow_drop_lg",
	checkbox_shadow="none",
	# Buttons
	shadow_spread="6px",
	button_primary_shadow="*shadow_drop_lg",
	button_primary_shadow_hover="*shadow_drop_lg",
	button_primary_shadow_active="*shadow_inset",
	button_secondary_shadow="*shadow_drop_lg",
	button_secondary_shadow_hover="*shadow_drop_lg",
	button_secondary_shadow_active="*shadow_inset",
	checkbox_label_shadow="*shadow_drop_lg",
	button_primary_background_fill="*primary_500",
	button_primary_background_fill_hover="*primary_400",
	button_primary_background_fill_hover_dark="*primary_500",
	button_primary_text_color="white",
	button_secondary_background_fill="white",
	button_secondary_background_fill_hover="*neutral_100",
	button_secondary_background_fill_hover_dark="*primary_500",
	button_secondary_text_color="*neutral_800",
	button_cancel_background_fill="*button_secondary_background_fill",
	button_cancel_background_fill_hover="*button_secondary_background_fill_hover",
	button_cancel_background_fill_hover_dark="*button_secondary_background_fill_hover",
	button_cancel_text_color="*button_secondary_text_color",
	checkbox_label_background_fill_selected="*primary_500",
	checkbox_label_background_fill_selected_dark="*primary_600",
	checkbox_border_width="1px",
	checkbox_border_color="*neutral_100",
	checkbox_border_color_dark="*neutral_600",
	checkbox_background_color_selected="*primary_600",
	checkbox_background_color_selected_dark="*primary_700",
	checkbox_border_color_focus="*primary_500",
	checkbox_border_color_focus_dark="*primary_600",
	checkbox_border_color_selected="*primary_600",
	checkbox_border_color_selected_dark="*primary_700",
	checkbox_label_text_color_selected="white",
	# Borders
	block_border_width="0px",
	panel_border_width="0px",
	)


	guideline = open("guidelines.md").read().strip()

	# Configuration for the output dataset
	ANNOTATIONS_REPO = "ltg/fluency-annotations" # Change to your repo name
	DATA_DIR = "annotation_data"
	ANNOTATIONS_FILE = os.path.join(DATA_DIR, "train.jsonl")

	# Model names for the three responses
	MODEL_NAMES = ["mistral-Nemo", "translated-SFT", "on-policy-RL"]

	# Create all pairwise comparisons
	MODEL_PAIRS = list(itertools.combinations(MODEL_NAMES, 2))

	# Initialize repository
	def init_repository():
	"""Initialize or clone the repository"""
	try:
	repo = Repository(
	local_dir=DATA_DIR,
	clone_from=ANNOTATIONS_REPO,
	use_auth_token=HF_TOKEN,
	repo_type="dataset"
	)
	repo.git_pull()
	return repo
	except Exception as e:
	print(f"Error initializing repository: {e}")
	# Create local directory if repo doesn't exist
	os.makedirs(DATA_DIR, exist_ok=True)
	return None

	# Initialize on startup
	annotation_repo = init_repository()

	def load_existing_annotations():
	"""Load existing annotations from the jsonl file"""
	annotations = {}

	if os.path.exists(ANNOTATIONS_FILE):
	try:
	with open(ANNOTATIONS_FILE, "r") as f:
	for line in f:
	if line.strip():
	ann = json.loads(line)
	user_id = ann.get("user_id")
	if user_id:
	if user_id not in annotations:
	annotations[user_id] = []
	annotations[user_id].append(ann)
	print(f"Loaded {sum(len(v) for v in annotations.values())} existing annotations")
	except Exception as e:
	print(f"Error loading annotations: {e}")

	return annotations

	def save_annotation_to_file(annotation_data):
	"""Save a single annotation to the jsonl file and push to hub"""
	global annotation_repo

	try:
	# Pull latest changes
	if annotation_repo:
	annotation_repo.git_pull()

	# Append to jsonl file
	with open(ANNOTATIONS_FILE, "a") as f:
	line = json.dumps(annotation_data, ensure_ascii=False)
	f.write(f"{line}\n")

	# Push to hub asynchronously
	if annotation_repo:
	annotation_repo.push_to_hub(blocking=False)

	except Exception as e:
	print(f"Error saving annotation: {e}")
	# Try to reinitialize repository
	try:
	shutil.rmtree(DATA_DIR)
	annotation_repo = init_repository()

	# Retry saving
	with open(ANNOTATIONS_FILE, "a") as f:
	line = json.dumps(annotation_data, ensure_ascii=False)
	f.write(f"{line}\n")

	if annotation_repo:
	annotation_repo.push_to_hub(blocking=False)
	except Exception as e2:
	print(f"Failed to save annotation after retry: {e2}")

	def load_dataset_samples():
	"""Load and prepare dataset samples with pairwise comparisons"""
	try:
	# Load the private dataset (requires authentication)
	dataset = load_dataset("ltg/fluency-generations", split="train", token=HF_TOKEN)

	# Transform dataset into pairwise comparison format
	pairwise_samples = []

	for item in dataset:
	sample_id = item["sample_id"]
	prompt = item["prompt"]
	responses = item["responses"]

	# Create pairwise comparisons for this sample
	for model_a, model_b in MODEL_PAIRS:
	pairwise_samples.append({
	"id": f"{sample_id}_{model_a}_vs_{model_b}",
	"original_id": sample_id,
	"prompt": prompt,
	"response_a": responses[model_a],
	"response_b": responses[model_b],
	"model_a": model_a,
	"model_b": model_b,
	"dataset": "NTNU"
	})

	extra_dataset = load_dataset("ltg/fluency-generations", split="test", token=HF_TOKEN)
	extra_pairwise_samples = []
	for i, item in enumerate(extra_dataset):
	sample_id = item["sample_id"]
	prompt = item["prompt"]
	responses = item["responses"]
	model_a, model_b = MODEL_PAIRS[i % len(MODEL_PAIRS)]
	model_a, model_b = (model_a, model_b) if i % 2 == 0 else (model_b, model_a)
	extra_pairwise_samples.append({
	"id": f"{sample_id}_{model_a}_vs_{model_b}",
	"original_id": sample_id,
	"prompt": prompt,
	"response_a": responses[model_a],
	"response_b": responses[model_b],
	"model_a": model_a,
	"model_b": model_b,
	"dataset": "training_examples"
	})

	return pairwise_samples, extra_pairwise_samples

	except Exception as e:
	print(f"Error loading dataset: {e}")
	print("Using dummy data for testing...")
	# Fallback to dummy data for testing
	return [
	{
	"id": "dummy_001_modelA_vs_modelB",
	"original_id": "dummy_001",
	"prompt": "Test prompt for development",
	"response_a": "This is response A for testing.",
	"response_b": "This is response B for testing.",
	"model_a": "modelA",
	"model_b": "modelB",
	"dataset": "test"
	}
	], []


	def swap_sample(sample):
	return {
	"id": str(sample["original_id"]) + '_' + sample["model_b"] + '_vs_' + sample["model_a"],
	"original_id": sample["original_id"],
	"prompt": sample["prompt"],
	"response_a": sample["response_b"],
	"response_b": sample["response_a"],
	"model_a": sample["model_b"],
	"model_b": sample["model_a"],
	"dataset": sample["dataset"]
	}

	# Load dataset on startup
	DATASET_SAMPLES, EXTRA_DATASET_SAMPLES = load_dataset_samples()

	class AnnotationManager:
	def __init__(self):
	# Load existing annotations from file
	self.annotations = load_existing_annotations()
	self.user_states = {}

	# Rebuild user states from loaded annotations
	for user_id, user_annotations in self.annotations.items():
	annotated_ids = [ann["sample_id"] for ann in user_annotations]
	self.user_states[user_id] = {
	"current_index": 0,
	"annotations": annotated_ids
	}

	def get_user_seed(self, user_id: str) -> int:
	"""Generate consistent seed for user"""
	return int(hashlib.md5(user_id.encode()).hexdigest(), 16)

	def get_user_samples(self, user_id: str) -> List[Dict]:
	"""Get shuffled samples for user based on their ID"""
	seed = self.get_user_seed(user_id)
	samples = DATASET_SAMPLES.copy()
	random.Random(seed).shuffle(samples)
	samples = [
	sample if random.Random(seed + i).randint(0, 1) == 0 else swap_sample(sample)
	for i, sample in enumerate(samples)
	]
	samples = EXTRA_DATASET_SAMPLES.copy() + samples
	return samples

	def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
	"""Get next unannotated sample for user"""
	if user_id not in self.user_states:
	# Check if user has existing annotations
	if user_id in self.annotations:
	annotated_ids = [ann["sample_id"] for ann in self.annotations[user_id]]
	self.user_states[user_id] = {
	"current_index": 0,
	"annotations": annotated_ids
	}
	else:
	self.user_states[user_id] = {
	"current_index": 0,
	"annotations": []
	}

	samples = self.get_user_samples(user_id)
	state = self.user_states[user_id]

	# Count total annotations for this user
	total_annotated = len(state["annotations"])

	# Find next unannotated sample
	for idx, sample in enumerate(samples):
	if not self.is_annotated(user_id, sample["id"]):
	return sample, total_annotated + 1, len(samples)

	# All samples annotated
	return None, len(samples), len(samples)

	def is_annotated(self, user_id: str, sample_id: str) -> bool:
	"""Check if user has annotated this sample"""
	if user_id not in self.annotations:
	return False
	return any(ann["sample_id"] == sample_id for ann in self.annotations[user_id])

	def save_annotation(self, user_id: str, sample_id: str, choice: str,
	model_a: str = None, model_b: str = None,
	original_id: str = None, dataset_name: str = None):
	"""Save user's annotation and persist to file"""
	if user_id not in self.annotations:
	self.annotations[user_id] = []

	annotation = {
	"user_id": user_id,
	"sample_id": sample_id,
	"original_sample_id": original_id,
	"dataset": dataset_name,
	"model_a": model_a,
	"model_b": model_b,
	"choice": choice,
	"timestamp": datetime.now().isoformat()
	}

	# Save to memory
	self.annotations[user_id].append(annotation)

	# Update user state
	if user_id in self.user_states:
	self.user_states[user_id]["annotations"].append(sample_id)
	else:
	self.user_states[user_id] = {
	"current_index": 0,
	"annotations": [sample_id]
	}

	# Save to file asynchronously
	threading.Thread(
	target=save_annotation_to_file,
	args=(annotation,)
	).start()

	print(f"Saved annotation: {annotation}")

	def get_user_progress(self, user_id: str) -> Dict:
	"""Get user's annotation progress"""
	if user_id not in self.annotations:
	return {"completed": 0, "total": len(DATASET_SAMPLES)}

	completed = len(self.annotations[user_id])
	return {"completed": completed, "total": len(DATASET_SAMPLES)}


	# Initialize manager
	manager = AnnotationManager()

	def login(user_id: str) -> Tuple:
	"""Handle user login"""
	if not user_id or user_id.strip() == "" or user_id.strip() not in USER_IDS:
	return (
	gr.update(visible=True), # login_interface
	gr.update(visible=False), # annotation_interface
	"", # user_state
	gr.update(value="Please enter a valid ID"), # login_status
	gr.update(), # prompt
	gr.update(), # response_a
	gr.update(), # response_b
	gr.update() # progress
	)

	user_id = user_id.strip()
	sample, current, total = manager.get_next_sample(user_id)

	if sample is None:
	return (
	gr.update(visible=True), # login_interface
	gr.update(visible=False), # annotation_interface
	user_id, # user_state
	gr.update(value=f"All {total} samples completed for user: {user_id}! 🎉"), # login_status
	gr.update(), # prompt
	gr.update(), # response_a
	gr.update(), # response_b
	gr.update() # progress
	)

	# Show which models are being compared
	model_info = f" \| Comparing: {sample.get('model_a', 'A')} vs {sample.get('model_b', 'B')}"

	return (
	gr.update(visible=False), # login_interface
	gr.update(visible=True), # annotation_interface
	user_id, # user_state
	gr.update(value=""), # login_status
	gr.update(value=sample["prompt"]), # prompt
	gr.update(value=sample["response_a"]), # response_a
	gr.update(value=sample["response_b"]), # response_b
	gr.update(value=f"Progress: {current}/{total}") # progress
	)

	def annotate(choice: str, user_id: str) -> Tuple:
	"""Handle annotation submission"""
	if not user_id:
	return (
	gr.update(), # prompt
	gr.update(), # response_a
	gr.update(), # response_b
	gr.update(), # progress
	gr.update(value="Error: No user logged in", visible=True) # status
	)

	# Get current sample to save annotation
	sample, _, _ = manager.get_next_sample(user_id)
	if sample:
	# Map button choice to annotation value
	choice_map = {
	"a_better": "A is more fluent",
	"b_better": "B is more fluent",
	"equal": "Equally fluent"
	}
	# Save with all metadata
	manager.save_annotation(
	user_id=user_id,
	sample_id=sample["id"],
	choice=choice_map[choice],
	model_a=sample.get("model_a"),
	model_b=sample.get("model_b"),
	original_id=sample.get("original_id"),
	dataset_name=sample.get("dataset")
	)

	# Get next sample
	next_sample, current, total = manager.get_next_sample(user_id)

	if next_sample is None:
	return (
	gr.update(value="All samples completed! Thank you for your annotations."), # prompt
	gr.update(value=""), # response_a
	gr.update(value=""), # response_b
	gr.update(value=f"Progress: {total}/{total} - Complete!"), # progress
	gr.update(value="All annotations complete!", visible=True) # status
	)

	# Show which models are being compared
	model_info = f" \| Comparing: {next_sample.get('model_a', 'A')} vs {next_sample.get('model_b', 'B')}"

	return (
	gr.update(value=next_sample["prompt"]), # prompt
	gr.update(value=next_sample["response_a"]), # response_a
	gr.update(value=next_sample["response_b"]), # response_b
	gr.update(value=f"Progress: {current}/{total}"), # progress
	gr.update(value="Annotation saved!", visible=True) # status
	)

	def logout() -> Tuple:
	"""Handle user logout"""
	return (
	gr.update(visible=True), # login_interface
	gr.update(visible=False), # annotation_interface
	"", # user_state
	gr.update(value=""), # login_status
	gr.update(value=""), # prompt
	gr.update(value=""), # response_a
	gr.update(value=""), # response_b
	gr.update(value="") # progress
	)

	# Create Gradio interface
	custom_css = """
	#login-group {
	background-color: white !important;
	}
	#login-group > * {
	background-color: white !important;
	}
	#login-group .gr-group {
	background-color: white !important;
	}
	#login-group .gr-form {
	background-color: white !important;
	}
	.light-shadow {
	box-shadow: 0 1px 4px 0 rgb(0 0 0 / 0.1) !important;
	}
	/* Target the textbox container */
	.no-style-textbox {
	border: none !important;
	box-shadow: none !important;
	}

	/* Target both input and textarea elements */
	.no-style-textbox input,
	.no-style-textbox textarea {
	border: none !important;
	box-shadow: none !important;
	padding: 0 !important;
	outline: none !important;
	}

	/* Target the Gradio textbox wrapper */
	.no-style-textbox .gr-textbox {
	border: none !important;
	box-shadow: none !important;
	}

	/* Target focus states */
	.no-style-textbox input:focus,
	.no-style-textbox textarea:focus {
	border: none !important;
	box-shadow: none !important;
	outline: none !important;
	}

	/* Additional targeting for stubborn Gradio elements */
	.no-style-textbox .gr-form,
	.no-style-textbox .gr-input {
	border: none !important;
	box-shadow: none !important;
	}
	"""

	# Create Gradio interface
	with gr.Blocks(theme=Soft(font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial"]), title="Dataset Annotation Tool", css=custom_css) as app:
	gr.Markdown("# Norwegian Fluency Annotation")
	with gr.Accordion("Click here to see the full annotation guidelines:", open=False, elem_classes="light-shadow"):
	gr.Markdown(guideline, padding=True)

	user_state = gr.State("")

	# Login Interface
	with gr.Column(visible=True) as login_interface:
	with gr.Column(variant="panel", elem_id="login-group", elem_classes="light-shadow"):
	gr.Markdown("## Log in", padding=True)
	user_id_input = gr.Textbox(
	label="Enter your unique annotator ID to begin",
	placeholder="Annotator ID"
	)
	with gr.Row():
	login_btn = gr.Button("Login", variant="primary", scale=0.2, min_width=100)
	gr.HTML("")
	login_status = gr.Markdown("", padding=True)

	# Annotation Interface
	with gr.Column(visible=False, elem_id="annotation-group") as annotation_interface:
	progress_label = gr.Markdown("")

	# Row 1: Prompt
	with gr.Row(elem_classes="light-shadow"):
	prompt_display = gr.Textbox(
	label="Prompt",
	interactive=False,
	lines=1,
	elem_classes="no-style-textbox",
	autoscroll=False
	)

	# Row 2: Responses
	with gr.Row(elem_classes="light-shadow"):
	response_a_display = gr.Textbox(
	label="Response A",
	interactive=False,
	lines=1,
	scale=1,
	elem_classes="no-style-textbox",
	autoscroll=False,
	max_lines=100
	)
	response_b_display = gr.Textbox(
	label="Response B",
	interactive=False,
	lines=1,
	scale=1,
	elem_classes="no-style-textbox",
	autoscroll=False,
	max_lines=100
	)

	# Row 3: Buttons
	with gr.Row():
	btn_a = gr.Button("A is more fluent", variant="primary")
	btn_equal = gr.Button("Equally fluent", variant="primary")
	btn_b = gr.Button("B is more fluent", variant="primary")

	status_message = gr.Markdown("", visible=False)

	with gr.Row(visible=False):
	logout_btn = gr.Button("Logout", variant="stop", size="sm")

	# Event handlers
	login_btn.click(
	fn=login,
	inputs=[user_id_input],
	outputs=[
	login_interface,
	annotation_interface,
	user_state,
	login_status,
	prompt_display,
	response_a_display,
	response_b_display,
	progress_label
	]
	)

	user_id_input.submit(
	fn=login,
	inputs=[user_id_input],
	outputs=[
	login_interface,
	annotation_interface,
	user_state,
	login_status,
	prompt_display,
	response_a_display,
	response_b_display,
	progress_label
	]
	)

	btn_a.click(
	fn=lambda user_id: annotate("a_better", user_id),
	inputs=[user_state],
	outputs=[
	prompt_display,
	response_a_display,
	response_b_display,
	progress_label,
	status_message
	]
	)

	btn_b.click(
	fn=lambda user_id: annotate("b_better", user_id),
	inputs=[user_state],
	outputs=[
	prompt_display,
	response_a_display,
	response_b_display,
	progress_label,
	status_message
	]
	)

	btn_equal.click(
	fn=lambda user_id: annotate("equal", user_id),
	inputs=[user_state],
	outputs=[
	prompt_display,
	response_a_display,
	response_b_display,
	progress_label,
	status_message
	]
	)

	logout_btn.click(
	fn=logout,
	inputs=[],
	outputs=[
	login_interface,
	annotation_interface,
	user_state,
	login_status,
	prompt_display,
	response_a_display,
	response_b_display,
	progress_label
	]
	)

	if __name__ == "__main__":
	app.launch()