import gradio as gr import pandas as pd import io from huggingface_hub import HfApi, HfFolder, Repository import os # Global state df = pd.DataFrame() def upload_csv(file): global df df = pd.read_csv(file.name) if "text" not in df.columns or "label" not in df.columns: return gr.update(visible=False), "CSV must contain 'text' and 'label' columns." # Fill label column if empty df["label"] = df["label"].fillna("") # Return the editable table return gr.Dataframe( value=df, headers=["text", "label"], interactive=True, label="Edit labels below" ), "File uploaded successfully." def save_edits(updated_table): global df df = pd.DataFrame(updated_table, columns=["text", "label"]) return "Changes saved." def download_csv(): # Create a downloadable CSV csv_bytes = df.to_csv(index=False).encode() return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv") def push_to_hub(repo_name, hf_token): # Authenticate and push to Hugging Face Hub repo_url = f"https://huggingface.co/datasets/{repo_name}" local_path = f"./{repo_name}" if os.path.exists(local_path): os.system(f"rm -rf {local_path}") api = HfApi() api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True) repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token) df.to_csv(f"{local_path}/data.csv", index=False) repo.push_to_hub() return f"Pushed to Hugging Face: {repo_url}" with gr.Blocks() as demo: gr.Markdown("## 🏷️ Label it! Text Labeling Tool") with gr.Row(): csv_input = gr.File(label="Upload CSV", file_types=[".csv"]) upload_btn = gr.Button("Upload") df_output = gr.Dataframe(headers=["text", "label"], interactive=True, visible=False) upload_status = gr.Textbox(visible=True, interactive=False) with gr.Row(): save_btn = gr.Button("Save Changes") download_btn = gr.Button("Download CSV") download_file = gr.File(label="Download", interactive=False) with gr.Row(): hf_repo = gr.Textbox(label="HF Dataset Repo (e.g. your-username/my-dataset)") hf_token = gr.Textbox(label="Hugging Face Token", type="password") push_btn = gr.Button("Push to Hugging Face Hub") push_status = gr.Textbox(interactive=False) upload_btn.click(fn=upload_csv, inputs=csv_input, outputs=[df_output, upload_status]) save_btn.click(fn=save_edits, inputs=df_output, outputs=upload_status) download_btn.click(fn=download_csv, outputs=download_file) push_btn.click(fn=push_to_hub, inputs=[hf_repo, hf_token], outputs=push_status) demo.launch()