Spaces:

Suzana
/

labelit-mini-ner

Sleeping

App Files Files Community

labelit-mini-ner / app.py

Suzana

Update app.py

9ed6d9a verified 2 months ago

raw

history blame

3.44 kB

	import gradio as gr
	import pandas as pd

	# In-memory token DataFrame
	token_df = pd.DataFrame()

	def make_sample_data(n=100):
	people = ["Alice","Bob","Charlie","Diane","Eve"]
	orgs = ["Acme","Globex","Initech","Umbrella","Stark"]
	locs = ["Paris","NYC","London","Tokyo","Sydney"]
	verbs = ["visited","joined","founded","traveled to","met with"]
	rows = []
	for i in range(n):
	p = people[i % len(people)]
	v = verbs[i % len(verbs)]
	o = orgs[i % len(orgs)]
	l = locs[i % len(locs)]
	rows.append({"text": f"{p} {v} {o} in {l}."})
	return pd.DataFrame(rows)

	def load_data(file):
	global token_df
	# Load uploaded or sample
	if file:
	df = pd.read_csv(file.name)
	else:
	df = make_sample_data(100)
	if "text" not in df.columns:
	return (
	gr.update(visible=False),
	"❌ CSV must contain a `text` column.",
	gr.update(visible=False)
	)
	# Tokenize
	records = []
	for sid, txt in enumerate(df["text"]):
	for tok in txt.split():
	records.append({"sentence_id": sid, "token": tok, "label": "O"})
	token_df = pd.DataFrame(records)
	return (
	gr.update(value=token_df, visible=True),
	f"✅ Loaded {len(df)} sentences → {len(token_df)} tokens.",
	gr.update(visible=True),
	)

	def save_edits(table):
	global token_df
	token_df = pd.DataFrame(table, columns=["sentence_id","token","label"])
	return "💾 Edits saved."

	def download_tokens():
	token_df.to_csv("raw_tokens.csv", index=False)
	return "raw_tokens.csv"

	def download_iob():
	# Build IOB tags
	iob, prev = [], {}
	for _, r in token_df.iterrows():
	sid, lbl = r["sentence_id"], r["label"]
	if lbl == "O":
	iob.append("O")
	prev[sid] = None
	else:
	tag = ("I-" if prev.get(sid)==lbl else "B-") + lbl
	iob.append(tag)
	prev[sid] = lbl
	out = token_df.copy()
	out["iob"] = iob
	out.to_csv("ner_iob.csv", index=False)
	return "ner_iob.csv"

	with gr.Blocks() as app:
	gr.Markdown("# 🏷️ Label It! Mini-NER")
	gr.Markdown("Step 1: Upload a CSV with a `text` column (or leave blank for sample).")

	with gr.Row():
	file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"])
	load_btn = gr.Button("Load Data")

	status = gr.Textbox(label="Status", interactive=False)
	table = gr.Dataframe(
	headers=["sentence_id","token","label"],
	interactive=True,
	visible=False,
	label="📝 Annotate Tokens"
	)

	# Action buttons: Save + Downloads
	with gr.Row(visible=False) as actions:
	save_btn = gr.Button("💾 Save Edits")
	dl_tokens = gr.DownloadButton(fn=download_tokens, file_name="raw_tokens.csv", label="⬇️ Download Tokens CSV")
	dl_iob = gr.DownloadButton(fn=download_iob, file_name="ner_iob.csv", label="⬇️ Download IOB CSV")

	load_btn.click(load_data, inputs=file_in, outputs=[table, status, actions])
	save_btn.click(save_edits, inputs=table, outputs=status)

	gr.Markdown("""
	Step 2:
	• Click into the label column and type one of: `PER`, `ORG`, `LOC`, or leave as `O`.
	• Press Save Edits to lock your annotations.
	• Download your Tokens CSV or IOB CSV with the buttons above.
	""")

	app.launch()