Spaces:

Suzana
/

labelit-mini-ner

Sleeping

App Files Files Community

Suzana commited on 29 days ago

Commit

9ed6d9a

verified ·

1 Parent(s): 1dd09b6

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -42

app.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import gradio as gr
 import pandas as pd
-from pathlib import Path
-# Global token storage
 token_df = pd.DataFrame()
-# Generate generic sample sentences
 def make_sample_data(n=100):
     people = ["Alice","Bob","Charlie","Diane","Eve"]
-    orgs   = ["Acme Corp","Globex","Initech","Umbrella","Stark Inc"]
-    locs   = ["Paris","New York","London","Tokyo","Sydney"]
     verbs  = ["visited","joined","founded","traveled to","met with"]
     rows = []
     for i in range(n):
@@ -22,18 +20,18 @@ def make_sample_data(n=100):
 def load_data(file):
     global token_df
-    # Load user CSV or fallback to sample
     if file:
         df = pd.read_csv(file.name)
     else:
         df = make_sample_data(100)
     if "text" not in df.columns:
         return (
-            gr.update(visible=False),
-            "❌ CSV must contain a `text` column.",
             gr.update(visible=False)
         )
-    # Tokenize into (sentence_id, token, label)
     records = []
     for sid, txt in enumerate(df["text"]):
         for tok in txt.split():
@@ -42,7 +40,7 @@ def load_data(file):
     return (
         gr.update(value=token_df, visible=True),
         f"✅ Loaded {len(df)} sentences → {len(token_df)} tokens.",
-        gr.update(visible=True)
     )
 def save_edits(table):
@@ -52,10 +50,10 @@ def save_edits(table):
 def download_tokens():
     token_df.to_csv("raw_tokens.csv", index=False)
-    return Path("raw_tokens.csv")
 def download_iob():
-    # Convert to IOB
     iob, prev = [], {}
     for _, r in token_df.iterrows():
         sid, lbl = r["sentence_id"], r["label"]
@@ -69,55 +67,38 @@ def download_iob():
     out = token_df.copy()
     out["iob"] = iob
     out.to_csv("ner_iob.csv", index=False)
-    return Path("ner_iob.csv")
 with gr.Blocks() as app:
     gr.Markdown("# 🏷️ Label It! Mini-NER")
-    gr.Markdown("**Step 1:** Upload a CSV with a `text` column, or leave blank for sample sentences.")
     with gr.Row():
         file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"])
         load_btn = gr.Button("Load Data")
     status = gr.Textbox(label="Status", interactive=False)
-    table = gr.Dataframe(
         headers=["sentence_id","token","label"],
-        editable=True,
         visible=False,
         label="📝 Annotate Tokens"
     )
     with gr.Row(visible=False) as actions:
-        save_btn    = gr.Button("💾 Save Edits")
-        dl_tokens   = gr.DownloadButton(
-            fn=download_tokens,
-            file_name="raw_tokens.csv",
-            label="⬇️ Download Tokens CSV"
-        )
-        dl_iob      = gr.DownloadButton(
-            fn=download_iob,
-            file_name="ner_iob.csv",
-            label="⬇️ Download IOB CSV"
-        )
-    # Bind events
-    load_btn.click(
-        load_data,
-        inputs=file_in,
-        outputs=[table, status, actions]
-    )
-    save_btn.click(
-        save_edits,
-        inputs=table,
-        outputs=status
-    )
     gr.Markdown("""
     **Step 2:**
-    - Click into the **label** column and type one of:
-      `PER`, `ORG`, `LOC`, or leave as `O`.
-    - **Save Edits**, then download your token CSV or IOB‐tagged CSV.
     """)
 app.launch()

 import gradio as gr
 import pandas as pd
+# In-memory token DataFrame
 token_df = pd.DataFrame()
 def make_sample_data(n=100):
     people = ["Alice","Bob","Charlie","Diane","Eve"]
+    orgs   = ["Acme","Globex","Initech","Umbrella","Stark"]
+    locs   = ["Paris","NYC","London","Tokyo","Sydney"]
     verbs  = ["visited","joined","founded","traveled to","met with"]
     rows = []
     for i in range(n):
 def load_data(file):
     global token_df
+    # Load uploaded or sample
     if file:
         df = pd.read_csv(file.name)
     else:
         df = make_sample_data(100)
     if "text" not in df.columns:
         return (
+            gr.update(visible=False),
+            "❌ CSV must contain a `text` column.",
             gr.update(visible=False)
         )
+    # Tokenize
     records = []
     for sid, txt in enumerate(df["text"]):
         for tok in txt.split():
     return (
         gr.update(value=token_df, visible=True),
         f"✅ Loaded {len(df)} sentences → {len(token_df)} tokens.",
+        gr.update(visible=True),
     )
 def save_edits(table):
 def download_tokens():
     token_df.to_csv("raw_tokens.csv", index=False)
+    return "raw_tokens.csv"
 def download_iob():
+    # Build IOB tags
     iob, prev = [], {}
     for _, r in token_df.iterrows():
         sid, lbl = r["sentence_id"], r["label"]
     out = token_df.copy()
     out["iob"] = iob
     out.to_csv("ner_iob.csv", index=False)
+    return "ner_iob.csv"
 with gr.Blocks() as app:
     gr.Markdown("# 🏷️ Label It! Mini-NER")
+    gr.Markdown("**Step 1:** Upload a CSV with a `text` column (or leave blank for sample).")
     with gr.Row():
         file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"])
         load_btn = gr.Button("Load Data")
     status = gr.Textbox(label="Status", interactive=False)
+    table  = gr.Dataframe(
         headers=["sentence_id","token","label"],
+        interactive=True,
         visible=False,
         label="📝 Annotate Tokens"
     )
+    # Action buttons: Save + Downloads
     with gr.Row(visible=False) as actions:
+        save_btn     = gr.Button("💾 Save Edits")
+        dl_tokens    = gr.DownloadButton(fn=download_tokens, file_name="raw_tokens.csv", label="⬇️ Download Tokens CSV")
+        dl_iob       = gr.DownloadButton(fn=download_iob,   file_name="ner_iob.csv",    label="⬇️ Download IOB CSV")
+    load_btn.click(load_data, inputs=file_in, outputs=[table, status, actions])
+    save_btn.click(save_edits, inputs=table, outputs=status)
     gr.Markdown("""
     **Step 2:**
+    • Click into the **label** column and type one of: `PER`, `ORG`, `LOC`, or leave as `O`.
+    • Press **Save Edits** to lock your annotations.
+    • Download your **Tokens CSV** or **IOB CSV** with the buttons above.
     """)
 app.launch()