Suzana commited on
Commit
8277138
·
verified ·
1 Parent(s): d058c73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -4,27 +4,31 @@ import io
4
  import os
5
  from huggingface_hub import HfApi, Repository
6
 
7
- # Global state
8
  df = pd.DataFrame()
9
 
 
10
  def upload_csv(file):
11
  global df
12
  df = pd.read_csv(file.name)
13
  if "text" not in df.columns or "label" not in df.columns:
14
- return None, "❌ CSV must have 'text' and 'label' columns."
15
  df["label"] = df["label"].fillna("")
16
- return df[["text", "label"]], "✅ File uploaded. You can now edit labels."
17
 
18
- def save_changes(updated_table):
 
19
  global df
20
- df = pd.DataFrame(updated_table, columns=["text", "label"])
21
  return "✅ Changes saved."
22
 
 
23
  def download_csv():
24
  global df
25
  csv_bytes = df.to_csv(index=False).encode("utf-8")
26
  return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")
27
 
 
28
  def push_to_hub(repo_name, hf_token):
29
  global df
30
  repo_url = f"https://huggingface.co/datasets/{repo_name}"
@@ -33,23 +37,22 @@ def push_to_hub(repo_name, hf_token):
33
  if os.path.exists(local_path):
34
  os.system(f"rm -rf {local_path}")
35
 
36
- api = HfApi()
37
- api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
38
  repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
39
-
40
  df.to_csv(f"{local_path}/data.csv", index=False)
41
  repo.push_to_hub()
42
 
43
- return f" Successfully pushed to: {repo_url}"
44
 
45
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
 
46
  gr.Markdown("# 🏷️ Label it! Text Annotation Tool")
47
- gr.Markdown("Upload your CSV with `text` and `label` columns. Edit labels, save your work, and export or publish.")
48
 
49
  with gr.Row():
50
  file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
51
- upload_btn = gr.Button("Upload")
52
-
53
  dataframe = gr.Dataframe(
54
  headers=["text", "label"],
55
  label="📝 Annotate Labels Below",
@@ -58,20 +61,23 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
58
  row_count=10,
59
  col_count=(2, "fixed")
60
  )
61
-
62
- status = gr.Textbox(visible=True, label="Status", interactive=False)
63
 
64
  with gr.Row():
65
  save_btn = gr.Button("💾 Save")
66
  download_btn = gr.Button("⬇️ Download CSV")
67
- download_file = gr.File(label="📥 Downloaded File")
68
 
69
  with gr.Row():
70
- repo_input = gr.Textbox(label="📦 Hugging Face Dataset Repo (e.g. username/my-dataset)")
71
- token_input = gr.Textbox(label="🔑 Hugging Face Token", type="password")
72
- push_btn = gr.Button("🚀 Push to HF Hub")
73
  push_status = gr.Textbox(label="Push Status", interactive=False)
74
 
75
- upload_btn.click(fn=upload_csv, inputs=file_input, outputs=[dataframe, status])
76
  save_btn.click(fn=save_changes, inputs=dataframe, outputs=status)
77
  download_btn.click(fn=download_csv, outputs=download_file)
 
 
 
 
4
  import os
5
  from huggingface_hub import HfApi, Repository
6
 
7
+ # Global variable to store the DataFrame
8
  df = pd.DataFrame()
9
 
10
+ # Upload CSV
11
  def upload_csv(file):
12
  global df
13
  df = pd.read_csv(file.name)
14
  if "text" not in df.columns or "label" not in df.columns:
15
+ return None, "❌ CSV must contain 'text' and 'label' columns."
16
  df["label"] = df["label"].fillna("")
17
+ return df[["text", "label"]], "✅ File uploaded. You can now annotate."
18
 
19
+ # Save changes from the editable table
20
+ def save_changes(edited_data):
21
  global df
22
+ df = pd.DataFrame(edited_data, columns=["text", "label"])
23
  return "✅ Changes saved."
24
 
25
+ # Download the updated CSV
26
  def download_csv():
27
  global df
28
  csv_bytes = df.to_csv(index=False).encode("utf-8")
29
  return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")
30
 
31
+ # Push to Hugging Face Hub
32
  def push_to_hub(repo_name, hf_token):
33
  global df
34
  repo_url = f"https://huggingface.co/datasets/{repo_name}"
 
37
  if os.path.exists(local_path):
38
  os.system(f"rm -rf {local_path}")
39
 
40
+ HfApi().create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
 
41
  repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
 
42
  df.to_csv(f"{local_path}/data.csv", index=False)
43
  repo.push_to_hub()
44
 
45
+ return f"🚀 Successfully pushed to: {repo_url}"
46
 
47
+ # Gradio app
48
+ with gr.Blocks(title="CSV Labeling App") as app:
49
  gr.Markdown("# 🏷️ Label it! Text Annotation Tool")
50
+ gr.Markdown("Upload a `.csv` with `text` and `label` columns. You can annotate, save, and publish your data.")
51
 
52
  with gr.Row():
53
  file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
54
+ upload_button = gr.Button("Upload")
55
+
56
  dataframe = gr.Dataframe(
57
  headers=["text", "label"],
58
  label="📝 Annotate Labels Below",
 
61
  row_count=10,
62
  col_count=(2, "fixed")
63
  )
64
+
65
+ status = gr.Textbox(label="Status", interactive=False)
66
 
67
  with gr.Row():
68
  save_btn = gr.Button("💾 Save")
69
  download_btn = gr.Button("⬇️ Download CSV")
70
+ download_file = gr.File(label="📥 Download", interactive=False)
71
 
72
  with gr.Row():
73
+ repo_input = gr.Textbox(label="📦 HF Dataset Repo (e.g. username/my-dataset)")
74
+ token_input = gr.Textbox(label="🔑 HF Token", type="password")
75
+ push_btn = gr.Button("🚀 Push to Hub")
76
  push_status = gr.Textbox(label="Push Status", interactive=False)
77
 
78
+ upload_button.click(fn=upload_csv, inputs=file_input, outputs=[dataframe, status])
79
  save_btn.click(fn=save_changes, inputs=dataframe, outputs=status)
80
  download_btn.click(fn=download_csv, outputs=download_file)
81
+ push_btn.click(fn=push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
82
+
83
+ app.launch()