Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
# In-memory token DataFrame | |
token_df = pd.DataFrame() | |
def make_sample_data(n=100): | |
people = ["Alice","Bob","Charlie","Diane","Eve"] | |
orgs = ["Acme","Globex","Initech","Umbrella","Stark"] | |
locs = ["Paris","NYC","London","Tokyo","Sydney"] | |
verbs = ["visited","joined","founded","traveled to","met with"] | |
rows = [] | |
for i in range(n): | |
p = people[i % len(people)] | |
v = verbs[i % len(verbs)] | |
o = orgs[i % len(orgs)] | |
l = locs[i % len(locs)] | |
rows.append({"text": f"{p} {v} {o} in {l}."}) | |
return pd.DataFrame(rows) | |
def load_data(file): | |
global token_df | |
# Load uploaded or sample | |
if file: | |
df = pd.read_csv(file.name) | |
else: | |
df = make_sample_data(100) | |
if "text" not in df.columns: | |
return ( | |
gr.update(visible=False), | |
"β CSV must contain a `text` column.", | |
gr.update(visible=False) | |
) | |
# Tokenize | |
records = [] | |
for sid, txt in enumerate(df["text"]): | |
for tok in txt.split(): | |
records.append({"sentence_id": sid, "token": tok, "label": "O"}) | |
token_df = pd.DataFrame(records) | |
return ( | |
gr.update(value=token_df, visible=True), | |
f"β Loaded {len(df)} sentences β {len(token_df)} tokens.", | |
gr.update(visible=True), | |
) | |
def save_edits(table): | |
global token_df | |
token_df = pd.DataFrame(table, columns=["sentence_id","token","label"]) | |
return "πΎ Edits saved." | |
def download_tokens(): | |
token_df.to_csv("raw_tokens.csv", index=False) | |
return "raw_tokens.csv" | |
def download_iob(): | |
# Build IOB tags | |
iob, prev = [], {} | |
for _, r in token_df.iterrows(): | |
sid, lbl = r["sentence_id"], r["label"] | |
if lbl == "O": | |
iob.append("O") | |
prev[sid] = None | |
else: | |
tag = ("I-" if prev.get(sid)==lbl else "B-") + lbl | |
iob.append(tag) | |
prev[sid] = lbl | |
out = token_df.copy() | |
out["iob"] = iob | |
out.to_csv("ner_iob.csv", index=False) | |
return "ner_iob.csv" | |
with gr.Blocks() as app: | |
gr.Markdown("# π·οΈ Label It! Mini-NER") | |
gr.Markdown("**Step 1:** Upload a CSV with a `text` column (or leave blank for sample).") | |
with gr.Row(): | |
file_in = gr.File(label="π Upload CSV", file_types=[".csv"]) | |
load_btn = gr.Button("Load Data") | |
status = gr.Textbox(label="Status", interactive=False) | |
table = gr.Dataframe( | |
headers=["sentence_id","token","label"], | |
interactive=True, | |
visible=False, | |
label="π Annotate Tokens" | |
) | |
# Action buttons: Save + Downloads | |
with gr.Row(visible=False) as actions: | |
save_btn = gr.Button("πΎ Save Edits") | |
dl_tokens = gr.DownloadButton(fn=download_tokens, file_name="raw_tokens.csv", label="β¬οΈ Download Tokens CSV") | |
dl_iob = gr.DownloadButton(fn=download_iob, file_name="ner_iob.csv", label="β¬οΈ Download IOB CSV") | |
load_btn.click(load_data, inputs=file_in, outputs=[table, status, actions]) | |
save_btn.click(save_edits, inputs=table, outputs=status) | |
gr.Markdown(""" | |
**Step 2:** | |
β’ Click into the **label** column and type one of: `PER`, `ORG`, `LOC`, or leave as `O`. | |
β’ Press **Save Edits** to lock your annotations. | |
β’ Download your **Tokens CSV** or **IOB CSV** with the buttons above. | |
""") | |
app.launch() | |