Suzana commited on
Commit
3ea3aae
·
verified ·
1 Parent(s): afd9bdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -6
app.py CHANGED
@@ -4,15 +4,25 @@ import io
4
  import os
5
  from pathlib import Path
6
  from huggingface_hub import HfApi, Repository
 
7
 
 
8
  df = pd.DataFrame()
9
 
 
 
 
 
 
 
 
 
10
  def upload_csv(file):
11
  global df
12
  df = pd.read_csv(file.name)
13
  if "text" not in df.columns or "label" not in df.columns:
14
  return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
15
- df["label"] = df["label"].fillna("")
16
  return (
17
  gr.update(value=df[["text","label"]], visible=True),
18
  "✅ File uploaded — you can now edit labels."
@@ -29,6 +39,19 @@ def download_csv():
29
  df.to_csv(out_path, index=False)
30
  return out_path
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def push_to_hub(repo_name: str, hf_token: str) -> str:
33
  global df
34
  try:
@@ -49,13 +72,12 @@ def push_to_hub(repo_name: str, hf_token: str) -> str:
49
  repo = Repository(
50
  local_dir=str(local_dir),
51
  clone_from=repo_name,
52
- repo_type="dataset", # <-- important fix!
53
  use_auth_token=hf_token
54
  )
55
 
56
  csv_path = local_dir / "data.csv"
57
  df.to_csv(csv_path, index=False)
58
-
59
  repo.push_to_hub(commit_message="📑 Update annotated data")
60
  return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
61
 
@@ -64,7 +86,7 @@ def push_to_hub(repo_name: str, hf_token: str) -> str:
64
 
65
  with gr.Blocks(theme=gr.themes.Default()) as app:
66
  gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
67
- gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")
68
 
69
  with gr.Row():
70
  file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
@@ -81,17 +103,28 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
81
  with gr.Row():
82
  save_btn = gr.Button("💾 Save")
83
  download_btn = gr.Button("⬇️ Download CSV")
 
84
  download_out = gr.File(label="📥 Downloaded File")
 
 
 
 
 
 
 
 
85
 
86
  with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
87
  repo_input = gr.Textbox(label="Repo (username/dataset-name)")
88
- token_input = gr.Textbox(label="HF Token", type="password")
89
  push_btn = gr.Button("🚀 Push")
90
  push_status = gr.Textbox(label="Push Status", interactive=False)
91
 
 
92
  upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
93
- save_btn.click( save_changes, inputs=df_table, outputs=status)
94
  download_btn.click(download_csv, outputs=download_out)
 
95
  push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
96
 
97
  app.launch()
 
4
  import os
5
  from pathlib import Path
6
  from huggingface_hub import HfApi, Repository
7
+ import matplotlib.pyplot as plt
8
 
9
+ # Global DataFrame
10
  df = pd.DataFrame()
11
 
12
+ # List of free, recommended models (for future auto-labeling)
13
+ DEFAULT_MODELS = [
14
+ "mistralai/Mistral-7B-Instruct-v0.2",
15
+ "HuggingFaceH4/zephyr-7b-beta",
16
+ "tiiuae/falcon-rw-1b",
17
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
18
+ ]
19
+
20
  def upload_csv(file):
21
  global df
22
  df = pd.read_csv(file.name)
23
  if "text" not in df.columns or "label" not in df.columns:
24
  return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
25
+ df["label"] = df["label"].fillna("") # ensure there’s always a label column
26
  return (
27
  gr.update(value=df[["text","label"]], visible=True),
28
  "✅ File uploaded — you can now edit labels."
 
39
  df.to_csv(out_path, index=False)
40
  return out_path
41
 
42
+ def visualize_distribution():
43
+ global df
44
+ if df.empty or "label" not in df.columns:
45
+ return None
46
+ counts = df["label"].value_counts()
47
+ fig, ax = plt.subplots()
48
+ counts.plot(kind="bar", ax=ax)
49
+ ax.set_title("Label Distribution")
50
+ ax.set_xlabel("Label")
51
+ ax.set_ylabel("Count")
52
+ plt.tight_layout()
53
+ return fig
54
+
55
  def push_to_hub(repo_name: str, hf_token: str) -> str:
56
  global df
57
  try:
 
72
  repo = Repository(
73
  local_dir=str(local_dir),
74
  clone_from=repo_name,
75
+ repo_type="dataset",
76
  use_auth_token=hf_token
77
  )
78
 
79
  csv_path = local_dir / "data.csv"
80
  df.to_csv(csv_path, index=False)
 
81
  repo.push_to_hub(commit_message="📑 Update annotated data")
82
  return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
83
 
 
86
 
87
  with gr.Blocks(theme=gr.themes.Default()) as app:
88
  gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
89
+ gr.Markdown("Upload a `.csv` with **text** + **label** columns, annotate in-place, then export, visualize, or publish.")
90
 
91
  with gr.Row():
92
  file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
 
103
  with gr.Row():
104
  save_btn = gr.Button("💾 Save")
105
  download_btn = gr.Button("⬇️ Download CSV")
106
+ visualize_btn= gr.Button("📊 Visualize Distribution")
107
  download_out = gr.File(label="📥 Downloaded File")
108
+ viz_out = gr.Plot(label="Label Distribution")
109
+
110
+ with gr.Row():
111
+ model_dropdown = gr.Dropdown(
112
+ label="🤖 (Future) Auto-Label Model",
113
+ choices=DEFAULT_MODELS,
114
+ value=DEFAULT_MODELS[0]
115
+ )
116
 
117
  with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
118
  repo_input = gr.Textbox(label="Repo (username/dataset-name)")
119
+ token_input = gr.Textbox(label="🔑 HF Token", type="password")
120
  push_btn = gr.Button("🚀 Push")
121
  push_status = gr.Textbox(label="Push Status", interactive=False)
122
 
123
+ # Bind events
124
  upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
125
+ save_btn.click( save_changes, inputs=df_table, outputs=status)
126
  download_btn.click(download_csv, outputs=download_out)
127
+ visualize_btn.click(visualize_distribution, outputs=viz_out)
128
  push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
129
 
130
  app.launch()