Suzana commited on
Commit
20e7095
·
verified ·
1 Parent(s): 7a0f989

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import io
4
+ from huggingface_hub import HfApi, HfFolder, Repository
5
+ import os
6
+
7
+ # Global state
8
+ df = pd.DataFrame()
9
+
10
+ def upload_csv(file):
11
+ global df
12
+ df = pd.read_csv(file.name)
13
+
14
+ if "text" not in df.columns or "label" not in df.columns:
15
+ return gr.update(visible=False), "CSV must contain 'text' and 'label' columns."
16
+
17
+ # Fill label column if empty
18
+ df["label"] = df["label"].fillna("")
19
+
20
+ # Return the editable table
21
+ return gr.Dataframe(
22
+ value=df,
23
+ headers=["text", "label"],
24
+ interactive=True,
25
+ label="Edit labels below"
26
+ ), "File uploaded successfully."
27
+
28
+ def save_edits(updated_table):
29
+ global df
30
+ df = pd.DataFrame(updated_table, columns=["text", "label"])
31
+ return "Changes saved."
32
+
33
+ def download_csv():
34
+ # Create a downloadable CSV
35
+ csv_bytes = df.to_csv(index=False).encode()
36
+ return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")
37
+
38
+ def push_to_hub(repo_name, hf_token):
39
+ # Authenticate and push to Hugging Face Hub
40
+ repo_url = f"https://huggingface.co/datasets/{repo_name}"
41
+ local_path = f"./{repo_name}"
42
+
43
+ if os.path.exists(local_path):
44
+ os.system(f"rm -rf {local_path}")
45
+
46
+ api = HfApi()
47
+ api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
48
+ repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
49
+ df.to_csv(f"{local_path}/data.csv", index=False)
50
+ repo.push_to_hub()
51
+
52
+ return f"Pushed to Hugging Face: {repo_url}"
53
+
54
+ with gr.Blocks() as demo:
55
+ gr.Markdown("## 🏷️ CSV Text Labeling Tool")
56
+
57
+ with gr.Row():
58
+ csv_input = gr.File(label="Upload CSV", file_types=[".csv"])
59
+ upload_btn = gr.Button("Upload")
60
+
61
+ df_output = gr.Dataframe(headers=["text", "label"], interactive=True, visible=False)
62
+ upload_status = gr.Textbox(visible=True, interactive=False)
63
+
64
+ with gr.Row():
65
+ save_btn = gr.Button("Save Changes")
66
+ download_btn = gr.Button("Download CSV")
67
+ download_file = gr.File(label="Download", interactive=False)
68
+
69
+ with gr.Row():
70
+ hf_repo = gr.Textbox(label="HF Dataset Repo (e.g. your-username/my-dataset)")
71
+ hf_token = gr.Textbox(label="Hugging Face Token", type="password")
72
+ push_btn = gr.Button("Push to Hugging Face Hub")
73
+ push_status = gr.Textbox(interactive=False)
74
+
75
+ upload_btn.click(fn=upload_csv, inputs=csv_input, outputs=[df_output, upload_status])
76
+ save_btn.click(fn=save_edits, inputs=df_output, outputs=upload_status)
77
+ download_btn.click(fn=download_csv, outputs=download_file)
78
+ push_btn.click(fn=push_to_hub, inputs=[hf_repo, hf_token], outputs=push_status)
79
+
80
+ demo.launch()