Suzana commited on
Commit
9ed6d9a
·
verified ·
1 Parent(s): 1dd09b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -42
app.py CHANGED
@@ -1,15 +1,13 @@
1
  import gradio as gr
2
  import pandas as pd
3
- from pathlib import Path
4
 
5
- # Global token storage
6
  token_df = pd.DataFrame()
7
 
8
- # Generate generic sample sentences
9
  def make_sample_data(n=100):
10
  people = ["Alice","Bob","Charlie","Diane","Eve"]
11
- orgs = ["Acme Corp","Globex","Initech","Umbrella","Stark Inc"]
12
- locs = ["Paris","New York","London","Tokyo","Sydney"]
13
  verbs = ["visited","joined","founded","traveled to","met with"]
14
  rows = []
15
  for i in range(n):
@@ -22,18 +20,18 @@ def make_sample_data(n=100):
22
 
23
  def load_data(file):
24
  global token_df
25
- # Load user CSV or fallback to sample
26
  if file:
27
  df = pd.read_csv(file.name)
28
  else:
29
  df = make_sample_data(100)
30
  if "text" not in df.columns:
31
  return (
32
- gr.update(visible=False),
33
- "❌ CSV must contain a `text` column.",
34
  gr.update(visible=False)
35
  )
36
- # Tokenize into (sentence_id, token, label)
37
  records = []
38
  for sid, txt in enumerate(df["text"]):
39
  for tok in txt.split():
@@ -42,7 +40,7 @@ def load_data(file):
42
  return (
43
  gr.update(value=token_df, visible=True),
44
  f"✅ Loaded {len(df)} sentences → {len(token_df)} tokens.",
45
- gr.update(visible=True)
46
  )
47
 
48
  def save_edits(table):
@@ -52,10 +50,10 @@ def save_edits(table):
52
 
53
  def download_tokens():
54
  token_df.to_csv("raw_tokens.csv", index=False)
55
- return Path("raw_tokens.csv")
56
 
57
  def download_iob():
58
- # Convert to IOB
59
  iob, prev = [], {}
60
  for _, r in token_df.iterrows():
61
  sid, lbl = r["sentence_id"], r["label"]
@@ -69,55 +67,38 @@ def download_iob():
69
  out = token_df.copy()
70
  out["iob"] = iob
71
  out.to_csv("ner_iob.csv", index=False)
72
- return Path("ner_iob.csv")
73
 
74
  with gr.Blocks() as app:
75
  gr.Markdown("# 🏷️ Label It! Mini-NER")
76
- gr.Markdown("**Step 1:** Upload a CSV with a `text` column, or leave blank for sample sentences.")
77
 
78
  with gr.Row():
79
  file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"])
80
  load_btn = gr.Button("Load Data")
81
 
82
  status = gr.Textbox(label="Status", interactive=False)
83
-
84
- table = gr.Dataframe(
85
  headers=["sentence_id","token","label"],
86
- editable=True,
87
  visible=False,
88
  label="📝 Annotate Tokens"
89
  )
90
 
 
91
  with gr.Row(visible=False) as actions:
92
- save_btn = gr.Button("💾 Save Edits")
93
- dl_tokens = gr.DownloadButton(
94
- fn=download_tokens,
95
- file_name="raw_tokens.csv",
96
- label="⬇️ Download Tokens CSV"
97
- )
98
- dl_iob = gr.DownloadButton(
99
- fn=download_iob,
100
- file_name="ner_iob.csv",
101
- label="⬇️ Download IOB CSV"
102
- )
103
 
104
- # Bind events
105
- load_btn.click(
106
- load_data,
107
- inputs=file_in,
108
- outputs=[table, status, actions]
109
- )
110
- save_btn.click(
111
- save_edits,
112
- inputs=table,
113
- outputs=status
114
- )
115
 
116
  gr.Markdown("""
117
  **Step 2:**
118
- - Click into the **label** column and type one of:
119
- `PER`, `ORG`, `LOC`, or leave as `O`.
120
- - **Save Edits**, then download your token CSV or IOB‐tagged CSV.
121
  """)
122
 
123
  app.launch()
 
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
+ # In-memory token DataFrame
5
  token_df = pd.DataFrame()
6
 
 
7
  def make_sample_data(n=100):
8
  people = ["Alice","Bob","Charlie","Diane","Eve"]
9
+ orgs = ["Acme","Globex","Initech","Umbrella","Stark"]
10
+ locs = ["Paris","NYC","London","Tokyo","Sydney"]
11
  verbs = ["visited","joined","founded","traveled to","met with"]
12
  rows = []
13
  for i in range(n):
 
20
 
21
  def load_data(file):
22
  global token_df
23
+ # Load uploaded or sample
24
  if file:
25
  df = pd.read_csv(file.name)
26
  else:
27
  df = make_sample_data(100)
28
  if "text" not in df.columns:
29
  return (
30
+ gr.update(visible=False),
31
+ "❌ CSV must contain a `text` column.",
32
  gr.update(visible=False)
33
  )
34
+ # Tokenize
35
  records = []
36
  for sid, txt in enumerate(df["text"]):
37
  for tok in txt.split():
 
40
  return (
41
  gr.update(value=token_df, visible=True),
42
  f"✅ Loaded {len(df)} sentences → {len(token_df)} tokens.",
43
+ gr.update(visible=True),
44
  )
45
 
46
  def save_edits(table):
 
50
 
51
  def download_tokens():
52
  token_df.to_csv("raw_tokens.csv", index=False)
53
+ return "raw_tokens.csv"
54
 
55
  def download_iob():
56
+ # Build IOB tags
57
  iob, prev = [], {}
58
  for _, r in token_df.iterrows():
59
  sid, lbl = r["sentence_id"], r["label"]
 
67
  out = token_df.copy()
68
  out["iob"] = iob
69
  out.to_csv("ner_iob.csv", index=False)
70
+ return "ner_iob.csv"
71
 
72
  with gr.Blocks() as app:
73
  gr.Markdown("# 🏷️ Label It! Mini-NER")
74
+ gr.Markdown("**Step 1:** Upload a CSV with a `text` column (or leave blank for sample).")
75
 
76
  with gr.Row():
77
  file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"])
78
  load_btn = gr.Button("Load Data")
79
 
80
  status = gr.Textbox(label="Status", interactive=False)
81
+ table = gr.Dataframe(
 
82
  headers=["sentence_id","token","label"],
83
+ interactive=True,
84
  visible=False,
85
  label="📝 Annotate Tokens"
86
  )
87
 
88
+ # Action buttons: Save + Downloads
89
  with gr.Row(visible=False) as actions:
90
+ save_btn = gr.Button("💾 Save Edits")
91
+ dl_tokens = gr.DownloadButton(fn=download_tokens, file_name="raw_tokens.csv", label="⬇️ Download Tokens CSV")
92
+ dl_iob = gr.DownloadButton(fn=download_iob, file_name="ner_iob.csv", label="⬇️ Download IOB CSV")
 
 
 
 
 
 
 
 
93
 
94
+ load_btn.click(load_data, inputs=file_in, outputs=[table, status, actions])
95
+ save_btn.click(save_edits, inputs=table, outputs=status)
 
 
 
 
 
 
 
 
 
96
 
97
  gr.Markdown("""
98
  **Step 2:**
99
+ Click into the **label** column and type one of: `PER`, `ORG`, `LOC`, or leave as `O`.
100
+ Press **Save Edits** to lock your annotations.
101
+ Download your **Tokens CSV** or **IOB CSV** with the buttons above.
102
  """)
103
 
104
  app.launch()