Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from pathlib import Path | |
from typing import Dict, List, Tuple | |
import os | |
import base64 | |
PROCESSED_DATA_DIR = Path(".") | |
# Embed logo as a base64 data URI to avoid Gradio toolbar interactions | |
logo_path = "rowsquared-logo-large.png" | |
with open(logo_path, "rb") as f: | |
logo_b64 = base64.b64encode(f.read()).decode("utf-8") | |
# ---------------------------- | |
# Data loading & preprocessing | |
# ---------------------------- | |
df_isco = ( | |
pd.read_excel( | |
PROCESSED_DATA_DIR / "isco_imperfect.xlsx", | |
converters={"major": str, "sub_major": str, "minor": str, "unit": str}, | |
)[["major_label", "sub_major_label", "minor_label", "unit_label"]] | |
.dropna() | |
.drop_duplicates() | |
.reset_index(drop=True) | |
) | |
# Build nested hierarchy dict: {major: {sub: {minor: [units]}}} | |
hierarchy: Dict[str, Dict[str, Dict[str, List[str]]]] = {} | |
for _, r in df_isco.iterrows(): | |
hierarchy.setdefault(r.major_label, {}) \ | |
.setdefault(r.sub_major_label, {}) \ | |
.setdefault(r.minor_label, []) \ | |
.append(r.unit_label) | |
# Ensure uniqueness & sorting at leaf lists | |
for maj in hierarchy: | |
for sub in hierarchy[maj]: | |
for mn in hierarchy[maj][sub]: | |
hierarchy[maj][sub][mn] = sorted(list(dict.fromkeys(hierarchy[maj][sub][mn]))) | |
# Fast helpers for children | |
def majors() -> List[str]: | |
return sorted(hierarchy.keys()) | |
def submajors(maj: str) -> List[str]: | |
return sorted(hierarchy.get(maj, {}).keys()) | |
def minors(maj: str, sub: str) -> List[str]: | |
return sorted(hierarchy.get(maj, {}).get(sub, {}).keys()) | |
def units(maj: str, sub: str, mn: str) -> List[str]: | |
return hierarchy.get(maj, {}).get(sub, {}).get(mn, []) | |
# ---------------------------- | |
# Records to annotate | |
# ---------------------------- | |
records = pd.read_excel(PROCESSED_DATA_DIR / "isco_predictions.xlsx").copy() | |
for col in ["major_label", "sub_major_label", "minor_label", "unit_label"]: | |
if col not in records: | |
records[col] = "" | |
if "annotated" not in records: | |
records["annotated"] = False | |
# ensure not views | |
for col in ["major_label", "sub_major_label", "minor_label", "unit_label", "annotated"]: | |
records[col] = records[col].copy() | |
records.reset_index(drop=True, inplace=True) | |
# ----------------------------------- | |
# Core logic: clamp & state management | |
# ----------------------------------- | |
def clamp_path(maj: str, sub: str, mn: str, un: str | |
) -> Tuple[str, str, str, str, List[str], List[str], List[str], List[str]]: | |
"""Return a valid (maj, sub, mn, un) tuple + their choices lists. | |
Only replace a level if it's invalid for the hierarchy.""" | |
maj_choices = majors() | |
if maj not in maj_choices: | |
maj = maj_choices[0] if maj_choices else "" | |
sub_choices = submajors(maj) if maj else [] | |
if sub not in sub_choices: | |
sub = sub_choices[0] if sub_choices else "" | |
mn_choices = minors(maj, sub) if sub else [] | |
if mn not in mn_choices: | |
mn = mn_choices[0] if mn_choices else "" | |
un_choices = units(maj, sub, mn) if mn else [] | |
if un not in un_choices: | |
un = un_choices[0] if un_choices else "" | |
return maj, sub, mn, un, maj_choices, sub_choices, mn_choices, un_choices | |
def save_record(i: int, maj: str, sub: str, mn: str, un: str) -> None: | |
records.loc[i, ["major_label", "sub_major_label", "minor_label", "unit_label"]] = [maj, sub, mn, un] | |
records.loc[i, "annotated"] = True | |
def status_text(i: int) -> str: | |
return f"**Status**: {'β Annotated' if records.loc[i, 'annotated'] else 'β Not Annotated'}" | |
def load_record(i: int): | |
rec = records.loc[i] | |
maj, sub, mn, un, maj_c, sub_c, mn_c, un_c = clamp_path( | |
rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"] | |
) | |
# Persist clamped values back (only if changed) | |
save_record(i, maj, sub, mn, un) | |
record_md = f"## Occupation: {rec['occupation_title_main']}\n## Industry: {rec['industry_title_main']}" | |
return ( | |
record_md, | |
status_text(i), | |
gr.update(choices=maj_c, value=maj), | |
gr.update(choices=sub_c, value=sub), | |
gr.update(choices=mn_c, value=mn), | |
gr.update(choices=un_c, value=un), | |
) | |
# --------------------- | |
# Event handler helpers | |
# --------------------- | |
def on_major_change(new_major: str, i: int): | |
sub_c = submajors(new_major) | |
sub = sub_c[0] if sub_c else "" | |
mn_c = minors(new_major, sub) if sub else [] | |
mn = mn_c[0] if mn_c else "" | |
un_c = units(new_major, sub, mn) if mn else [] | |
un = un_c[0] if un_c else "" | |
save_record(i, new_major, sub, mn, un) | |
return ( | |
gr.update(choices=majors(), value=new_major), | |
gr.update(choices=sub_c, value=sub), | |
gr.update(choices=mn_c, value=mn), | |
gr.update(choices=un_c, value=un), | |
status_text(i), | |
) | |
def on_sub_change(new_sub: str, i: int, major: str): | |
mn_c = minors(major, new_sub) | |
mn = mn_c[0] if mn_c else "" | |
un_c = units(major, new_sub, mn) if mn else [] | |
un = un_c[0] if un_c else "" | |
records.loc[i, ["sub_major_label", "minor_label", "unit_label"]] = [new_sub, mn, un] | |
records.loc[i, "annotated"] = True | |
return ( | |
gr.update(choices=submajors(major), value=new_sub), | |
gr.update(choices=mn_c, value=mn), | |
gr.update(choices=un_c, value=un), | |
status_text(i), | |
) | |
def on_minor_change(new_minor: str, i: int, major: str, sub: str): | |
un_c = units(major, sub, new_minor) | |
un = un_c[0] if un_c else "" | |
records.loc[i, ["minor_label", "unit_label"]] = [new_minor, un] | |
records.loc[i, "annotated"] = True | |
return ( | |
gr.update(choices=minors(major, sub), value=new_minor), | |
gr.update(choices=un_c, value=un), | |
status_text(i), | |
) | |
def on_unit_change(new_unit: str, i: int, major: str, sub: str, mn: str): | |
un_c = units(major, sub, mn) | |
if new_unit not in un_c: | |
new_unit = un_c[0] if un_c else "" | |
records.loc[i, "unit_label"] = new_unit | |
records.loc[i, "annotated"] = True | |
return gr.update(choices=un_c, value=new_unit), status_text(i) | |
def go_next(i: int) -> int: | |
return (i + 1) % len(records) | |
def go_prev(i: int) -> int: | |
return (i - 1) % len(records) | |
# ---- NAVIGATION: save + move + reload in ONE callback ---- | |
def save_and_jump(i: int, direction: str): | |
# Final safety net: clamp and persist whatever is currently stored | |
rec = records.loc[i] | |
maj, sub, mn, un, *_ = clamp_path( | |
rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"] | |
) | |
save_record(i, maj, sub, mn, un) | |
new_i = go_next(i) if direction == "next" else go_prev(i) | |
return (new_i,) + load_record(new_i) | |
def download_annotations() -> str: | |
path = PROCESSED_DATA_DIR / "annotated_output.csv" | |
records.to_csv(path, index=False) | |
return str(path) | |
# -------------- | |
# Build the UI | |
# -------------- | |
def build_gradio_app(): | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
# Static logo, non-interactive | |
gr.HTML( | |
f'<img src="data:image/png;base64,{logo_b64}" width="200" style="pointer-events:none; user-select:none; display:block;" />' | |
) | |
with gr.Row(): | |
gr.Markdown("# ISCO Annotation", elem_id="isco-title") | |
gr.HTML(""" | |
<style> | |
#isco-title { | |
text-align: center; | |
width: 100%; | |
margin: 0.5em 0; | |
} | |
footer { display: none !important; } | |
.gradio-container .api-link, .gradio-container .share-link { display: none !important; } | |
</style> | |
""") | |
idx_state = gr.State(0) | |
with gr.Group(): | |
record_md = gr.Markdown() | |
status_md = gr.Markdown() | |
with gr.Row(): | |
prev_btn = gr.Button("β¬ Previous") | |
next_btn = gr.Button("β Next") | |
with gr.Row(): | |
with gr.Column(): | |
major_radio = gr.Radio(label="Level 1: Major", choices=[], interactive=True) | |
with gr.Column(): | |
sub_radio = gr.Radio(label="Level 2: Sub-major", choices=[], interactive=True) | |
with gr.Column(): | |
minor_radio = gr.Radio(label="Level 3: Minor", choices=[], interactive=True) | |
with gr.Column(): | |
unit_radio = gr.Radio(label="Level 4: Unit", choices=[], interactive=True) | |
download_btn = gr.Button("π₯ Download Annotations") | |
download_file = gr.File(label="Annotated CSV", visible=False) | |
# Initial load | |
demo.load( | |
lambda: (0,) + load_record(0), | |
outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio], | |
) | |
next_btn.click(lambda i: save_and_jump(i, "next"), | |
inputs=[idx_state], | |
outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio]) | |
prev_btn.click(lambda i: save_and_jump(i, "prev"), | |
inputs=[idx_state], | |
outputs=[idx_state, record_md, status_md, major_radio, sub_radio, minor_radio, unit_radio]) | |
# Change handlers (also update status) | |
major_radio.change( | |
on_major_change, | |
inputs=[major_radio, idx_state], | |
outputs=[major_radio, sub_radio, minor_radio, unit_radio, status_md], | |
) | |
sub_radio.change( | |
on_sub_change, | |
inputs=[sub_radio, idx_state, major_radio], | |
outputs=[sub_radio, minor_radio, unit_radio, status_md], | |
) | |
minor_radio.change( | |
on_minor_change, | |
inputs=[minor_radio, idx_state, major_radio, sub_radio], | |
outputs=[minor_radio, unit_radio, status_md], | |
) | |
unit_radio.change( | |
on_unit_change, | |
inputs=[unit_radio, idx_state, major_radio, sub_radio, minor_radio], | |
outputs=[unit_radio, status_md], | |
) | |
# Download | |
download_btn.click(download_annotations, outputs=[download_file]).then( | |
lambda: gr.update(visible=True), None, [download_file] | |
) | |
return demo | |
if __name__=="__main__": | |
demo = build_gradio_app() | |
demo.queue().launch( | |
show_api=False, | |
ssr_mode=False, # β disable experimental SSR | |
auth=(os.getenv("APP_USER",""), os.getenv("APP_PASS","")), | |
server_name="0.0.0.0", # optional, but explicit | |
server_port=int(os.getenv("PORT", 7860)), | |
) | |