Suzana's picture
Update app.py
9ed6d9a verified
raw
history blame
3.44 kB
import gradio as gr
import pandas as pd
# In-memory token DataFrame
token_df = pd.DataFrame()
def make_sample_data(n=100):
people = ["Alice","Bob","Charlie","Diane","Eve"]
orgs = ["Acme","Globex","Initech","Umbrella","Stark"]
locs = ["Paris","NYC","London","Tokyo","Sydney"]
verbs = ["visited","joined","founded","traveled to","met with"]
rows = []
for i in range(n):
p = people[i % len(people)]
v = verbs[i % len(verbs)]
o = orgs[i % len(orgs)]
l = locs[i % len(locs)]
rows.append({"text": f"{p} {v} {o} in {l}."})
return pd.DataFrame(rows)
def load_data(file):
global token_df
# Load uploaded or sample
if file:
df = pd.read_csv(file.name)
else:
df = make_sample_data(100)
if "text" not in df.columns:
return (
gr.update(visible=False),
"❌ CSV must contain a `text` column.",
gr.update(visible=False)
)
# Tokenize
records = []
for sid, txt in enumerate(df["text"]):
for tok in txt.split():
records.append({"sentence_id": sid, "token": tok, "label": "O"})
token_df = pd.DataFrame(records)
return (
gr.update(value=token_df, visible=True),
f"βœ… Loaded {len(df)} sentences β†’ {len(token_df)} tokens.",
gr.update(visible=True),
)
def save_edits(table):
global token_df
token_df = pd.DataFrame(table, columns=["sentence_id","token","label"])
return "πŸ’Ύ Edits saved."
def download_tokens():
token_df.to_csv("raw_tokens.csv", index=False)
return "raw_tokens.csv"
def download_iob():
# Build IOB tags
iob, prev = [], {}
for _, r in token_df.iterrows():
sid, lbl = r["sentence_id"], r["label"]
if lbl == "O":
iob.append("O")
prev[sid] = None
else:
tag = ("I-" if prev.get(sid)==lbl else "B-") + lbl
iob.append(tag)
prev[sid] = lbl
out = token_df.copy()
out["iob"] = iob
out.to_csv("ner_iob.csv", index=False)
return "ner_iob.csv"
with gr.Blocks() as app:
gr.Markdown("# 🏷️ Label It! Mini-NER")
gr.Markdown("**Step 1:** Upload a CSV with a `text` column (or leave blank for sample).")
with gr.Row():
file_in = gr.File(label="πŸ“ Upload CSV", file_types=[".csv"])
load_btn = gr.Button("Load Data")
status = gr.Textbox(label="Status", interactive=False)
table = gr.Dataframe(
headers=["sentence_id","token","label"],
interactive=True,
visible=False,
label="πŸ“ Annotate Tokens"
)
# Action buttons: Save + Downloads
with gr.Row(visible=False) as actions:
save_btn = gr.Button("πŸ’Ύ Save Edits")
dl_tokens = gr.DownloadButton(fn=download_tokens, file_name="raw_tokens.csv", label="⬇️ Download Tokens CSV")
dl_iob = gr.DownloadButton(fn=download_iob, file_name="ner_iob.csv", label="⬇️ Download IOB CSV")
load_btn.click(load_data, inputs=file_in, outputs=[table, status, actions])
save_btn.click(save_edits, inputs=table, outputs=status)
gr.Markdown("""
**Step 2:**
β€’ Click into the **label** column and type one of: `PER`, `ORG`, `LOC`, or leave as `O`.
β€’ Press **Save Edits** to lock your annotations.
β€’ Download your **Tokens CSV** or **IOB CSV** with the buttons above.
""")
app.launch()