File size: 18,216 Bytes

import json
import re
from typing import List, Dict, Tuple
import csv, os, time
import gradio as gr
import matplotlib.pyplot as plt

# ==========================
# Config & estilos
# ==========================
DEFAULT_COLS = [
    "Código", "Indicador", "Score (0–4)",
    "Entailment medio", "Evidencias (hipótesis)", "Descripción"
]

CUSTOM_CSS = """
#app {max-width: 1200px; margin: 0 auto;}
.badge {
  display:inline-block; padding:10px 14px; border-radius:12px; font-weight:700;
  background:linear-gradient(135deg,#1f6feb,#5ac8fa); color:white; box-shadow:0 6px 20px rgba(0,0,0,.2);
}
.card {
  background: rgba(255,255,255,.03);
  border: 1px solid rgba(255,255,255,.08);
  border-radius: 14px; padding: 14px;
  box-shadow: 0 8px 24px rgba(0,0,0,.18);
}
.small {font-size: 12px; opacity: .9;}
"""

# ==========================
# Metadatos IPMA ICB4 4.4.5.x
# ==========================
INDICATOR_META = {
    "4.4.5.1": ("Iniciativa y ayuda proactiva",
                "Inicia acciones sin que se lo pidan; ofrece ayuda, anticipa y equilibra riesgos."),
    "4.4.5.2": ("Ownership y compromiso",
                "Asume responsabilidad; impulsa el proyecto; define/monitorea indicadores y mejora procesos."),
    "4.4.5.3": ("Dirección, coaching y mentoring",
                "Da dirección; coach/mentor al equipo; alinea visión, valores y objetivos."),
    "4.4.5.4": ("Poder e influencia",
                "Usa influencia adecuada; elige bien el canal; es percibido como líder por stakeholders."),
    "4.4.5.5": ("Decisiones",
                "Toma decisiones bajo incertidumbre; explica razones; revisa con nueva evidencia; comunica con claridad.")
}

# ==========================
# Modelos (CPU Basic friendly)
# ==========================
_llm = None
_llm_tok = None
_gen = None
_nli_cache: Dict[str, object] = {}  # cache de pipelines NLI por model_id

LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct"  # LLM pequeño multilingüe para extraer STAR

# Selector de NLI con configuración asociada
MODEL_CHOICES = {
    "Velocidad (MiniLM)": {
        "id": "MoritzLaurer/multilingual-MiniLMv2-L12-mnli-xnli",
        "calibrate": True,
        "thresholds": (0.70, 0.50, 0.30, 0.15)  # 4,3,2,1
    },
    "Precisión (DeBERTa)": {
        "id": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7",
        "calibrate": False,
        "thresholds": (0.80, 0.60, 0.40, 0.20)
    }
}
DEFAULT_MODEL_KEY = "Velocidad (MiniLM)"  # por defecto en Spaces gratis

STAR_PROMPT = """Eres evaluador ICB4. Toma el texto del candidato y devuélvelo en formato STAR como JSON válido con claves:
"situation" (<=3 frases), "task" (<=2 frases), "action" (lista de viñetas, verbos de acción), "result" (lista de viñetas, resultados/indicadores/aprendizajes).
Siempre responde SOLO con JSON válido y conciso en español, sin comentarios adicionales.

TEXTO:
{texto}
"""

HYP: Dict[str, List[str]] = {
    "4.4.5.1": [
        "Tomó la iniciativa sin que se lo pidieran.",
        "Ofreció ayuda o asesoría no solicitada.",
        "Pensó con orientación al futuro.",
        "Equilibró iniciativa y riesgo."
    ],
    "4.4.5.2": [
        "Mostró compromiso personal con los objetivos.",
        "Promovió el proyecto y generó entusiasmo.",
        "Definió o monitoreó indicadores de desempeño.",
        "Buscó mejoras en procesos."
    ],
    "4.4.5.3": [
        "Proporcionó dirección clara al equipo.",
        "Realizó coaching o mentoring para mejorar capacidades.",
        "Estableció y comunicó visión y valores.",
        "Alineó objetivos individuales con los comunes."
    ],
    "4.4.5.4": [
        "Usó apropiadamente poder e influencia.",
        "Seleccionó el canal de comunicación adecuado para influir.",
        "Fue percibido como líder por los stakeholders."
    ],
    "4.4.5.5": [
        "Tomó decisiones bajo incertidumbre considerando pros y contras.",
        "Explicó el razonamiento de las decisiones.",
        "Revisó decisiones con nueva evidencia.",
        "Comunicó claramente la decisión e influyó su adopción."
    ]
}

# ==========================
# Carga perezosa de modelos
# ==========================
def lazy_load_llm():
    """Pipeline de generación (Qwen 0.5B) para extraer STAR."""
    global _llm, _llm_tok, _gen
    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
    if _gen is not None:
        return _gen
    _llm_tok = AutoTokenizer.from_pretrained(LLM_ID)
    _llm = AutoModelForCausalLM.from_pretrained(LLM_ID, device_map="auto")
    _gen = pipeline(
        "text-generation",
        model=_llm,
        tokenizer=_llm_tok,
        max_new_tokens=512,
        do_sample=False,
        repetition_penalty=1.1,
    )
    return _gen

def lazy_load_nli(model_id: str):
    """NLI con salida completa y truncado seguro. Cachea por model_id."""
    from transformers import pipeline
    if model_id in _nli_cache:
        return _nli_cache[model_id]
    nli = pipeline(
        "text-classification",
        model=model_id,
        tokenizer=model_id,
        return_all_scores=True,   # {label, score} para todas las clases
        truncation=True           # evita degradación por textos largos
    )
    _nli_cache[model_id] = nli
    return nli

# ==========================
# Utilidades extracción STAR
# ==========================
def extract_json_block(text: str) -> str:
    start = text.find("{")
    end = text.rfind("}")
    if start != -1 and end != -1 and end > start:
        return text[start:end+1]
    return '{"situation":"","task":"","action":[],"result":[]}'

def quick_parse_star(txt: str):
    t = (txt or "").strip()
    if not t:
        return None
    keys = ("SITUATION", "TASK", "ACTION", "RESULT", "S:", "T:", "A:", "R:")
    if not any(k in t for k in keys):
        return None
    sections = {"situation": "", "task": "", "action": [], "result": []}
    blocks = re.split(r'(?im)^(SITUATION|TASK|ACTION|RESULT|S:|T:|A:|R:)\s*:?', t)
    for i in range(1, len(blocks), 2):
        key = blocks[i].lower()[0]
        val = blocks[i+1].strip()
        if key == "s":
            sections["situation"] = val
        elif key == "t":
            sections["task"] = val
        elif key == "a":
            sections["action"] = [x.strip("•- ") for x in val.splitlines() if x.strip()]
        elif key == "r":
            sections["result"] = [x.strip("•- ") for x in val.splitlines() if x.strip()]
    return sections

def extract_star(user_text: str) -> Dict:
    parsed = quick_parse_star(user_text)
    if parsed:
        return parsed
    gen = lazy_load_llm()
    prompt = STAR_PROMPT.format(texto=(user_text or "").strip())
    out = gen(prompt)[0]["generated_text"]
    raw = extract_json_block(out)
    try:
        data = json.loads(raw)
    except Exception:
        data = {"situation": "", "task": "", "action": [], "result": []}
        m = re.search(r'Situation[:：]\s*(.*)', user_text or "", flags=re.I)
        if m:
            data["situation"] = m.group(1).strip()
    data["action"] = data.get("action", [])
    data["result"] = data.get("result", [])
    if isinstance(data["action"], str):
        data["action"] = [data["action"]]
    if isinstance(data["result"], str):
        data["result"] = [data["result"]]
    return {
        "situation": (data.get("situation", "") or "").strip(),
        "task": (data.get("task", "") or "").strip(),
        "action": [str(a).strip(" •-") for a in data["action"] if str(a).strip()],
        "result": [str(r).strip(" •-") for r in data["result"] if str(r).strip()],
    }

# ==========================
# NLI + scoring (dinámico por modelo)
# ==========================
def calibrate_prob(p: float, use_calibration: bool) -> float:
    """Calibración leve solo para MiniLM (p**0.9)."""
    p = max(0.0, min(1.0, float(p)))
    return (p ** 0.9) if use_calibration else p

def nli_entails(premise: str, hypothesis: str, model_id: str) -> float:
    """Probabilidad de ENTAILMENT (0..1) robusta a variantes de salida."""
    nli = lazy_load_nli(model_id)

    def _trim(s: str, limit=900):
        s = (s or "").strip()
        return s[:limit]

    text_a = _trim(premise)
    text_b = _trim(hypothesis)
    if not text_a or not text_b:
        return 0.0

    try:
        res = nli({"text": text_a, "text_pair": text_b})
    except Exception:
        return 0.0

    # return_all_scores=True → [{label, score}, ...] ó [[{...}]]
    if isinstance(res, dict):
        candidates = [res]
    elif isinstance(res, list):
        candidates = res[0] if (res and isinstance(res[0], list)) else res
    else:
        return 0.0

    for c in (d for d in candidates if isinstance(d, dict)):
        lab = str(c.get("label", "")).lower()
        if "entail" in lab:
            try:
                return float(c.get("score", 0.0))
            except Exception:
                return 0.0
    return 0.0

def map_prob_to_score(p: float, thresholds: Tuple[float, float, float, float]) -> int:
    t4, t3, t2, t1 = thresholds
    if p >= t4: return 4
    if p >= t3: return 3
    if p >= t2: return 2
    if p >= t1: return 1
    return 0

def score_indicator(premise: str, hyps: List[str], model_id: str, use_calibration: bool,
                    thresholds: Tuple[float, float, float, float]) -> Tuple[int, List[Tuple[str, float]], float]:
    raw = [(h, nli_entails(premise, h, model_id)) for h in hyps]
    probs = [(h, calibrate_prob(p, use_calibration)) for h, p in raw]
    avg = sum(p for _, p in probs) / max(1, len(probs))
    score = map_prob_to_score(avg, thresholds)
    probs_sorted = sorted(probs, key=lambda x: x[1], reverse=True)[:2]
    return score, probs_sorted, avg

# ==========================
# Evaluación orquestada
# ==========================
def evaluate(texto: str, model_key: str):
    """Devuelve: status_msg, matplotlib_fig, {"columns":[...], "data":[...] }."""
    try:
        if not texto or not texto.strip():
            return "Introduce un caso en formato STAR (o texto libre).", None, {"columns": [], "data": []}

        # Config del modelo seleccionado
        cfg = MODEL_CHOICES.get(model_key, MODEL_CHOICES[DEFAULT_MODEL_KEY])
        model_id = cfg["id"]
        use_calibration = cfg["calibrate"]
        thresholds = cfg["thresholds"]

        star = extract_star(texto)

        # Limita premisa para dar señal clara al NLI (6 A + 4 R)
        actions = (star.get("action", []) or [])[:6]
        results = (star.get("result", []) or [])[:4]
        premise = " ".join(actions) + " " + " ".join(results)

        # Scoring por indicador
        scores, table_rows, per_indicator_values = [], [], []
        for ind, hyps in HYP.items():
            s, ev, avg = score_indicator(premise, hyps, model_id, use_calibration, thresholds)
            scores.append(s)
            per_indicator_values.append((ind, s))
            best_evid = " / ".join([h for h, _ in ev])
            name, desc = INDICATOR_META[ind]
            table_rows.append([ind, name, s, f"{avg:.2f}", best_evid, desc])

        overall = round(sum(scores) / max(1, len(scores)), 2)

        # Gráfica
        labels = [f"{k.split('.')[-1]}" for k, _ in per_indicator_values]
        values = [v for _, v in per_indicator_values]
        fig, ax = plt.subplots(figsize=(8.2, 4.0))
        ax.bar(labels, values)
        ax.set_ylim(0, 4)
        ax.set_xlabel("Indicadores 4.4.5.x")
        ax.set_ylabel("Score (0–4)")
        fig.suptitle(f"ICB4 4.4.5 Leadership — Score global: {overall}  |  Modelo: {model_key}", y=0.97)
        fig.subplots_adjust(top=0.86)
        for i, v in enumerate(values):
            ax.text(i, v + 0.08, f"{v}", ha="center", va="bottom")
        fig.tight_layout()

        table = {
            "columns": DEFAULT_COLS,
            "data": table_rows,
            "model_key": model_key,   # ← etiqueta elegida en el dropdown (MiniLM / DeBERTa)
            "model_id": model_id      # ← repo real en HF (para trazabilidad)
        }

        msg = (
            f"Evaluación completada. Score global (0–4): {overall}\n"
            f"Modelo: {model_key}\n"
            f"Sugerencia: revisa evidencias y ajusta umbrales según tu rúbrica."
        )
        return msg, fig, table

    except Exception as e:
        return f"⚠️ Error en evaluate(): {type(e).__name__}: {e}", None, {"columns": [], "data": []}

# ==========================
# CSV helper
# ==========================
def make_csv_from_table(table: dict) -> str:
    """Genera CSV temporal sin incluir la columna 'Modelo (repo)', pero conserva 'Modelo (etiqueta)'."""
    cols = table.get("columns", [])
    rows = table.get("data", [])
    ts = int(time.time())
    path = f"/tmp/icb4_leadership_{ts}.csv"

    # Detecta y elimina solo la columna 'Modelo (repo)'
    if "Modelo (repo)" in cols:
        idx_repo = cols.index("Modelo (repo)")
        cols = [c for i, c in enumerate(cols) if i != idx_repo]
        new_rows = []
        for r in rows:
            if len(r) > idx_repo:
                # Elimina solo la celda correspondiente al campo 'Modelo (repo)'
                r = [c for i, c in enumerate(r) if i != idx_repo]
            new_rows.append(r)
        rows = new_rows

    # Escribe el CSV final
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(cols)
        for r in rows:
            writer.writerow(r)

    return path if os.path.exists(path) else ""



# ==========================
# UI (2 columnas + selector modelo + CSV)
# ==========================
with gr.Blocks(title="ICB4 4.4.5 Leadership — Evaluación STAR (FRAQX)", css=CUSTOM_CSS, elem_id="app") as demo:
    gr.Markdown(
        """
        <div style="display:flex;align-items:center;gap:12px;margin:8px 0 2px 0;">
            <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" height="28">
            <h1 style="margin:0;">ICB4 • 4.4.5 Leadership — Evaluación STAR + NLI</h1>
        </div>
        <div class="small">Extracción STAR, scoring (4.4.5.1–4.4.5.5), gráfica y reporte descargable. Elige el modelo NLI según tu prioridad.</div>
        """
    )

    with gr.Row(equal_height=True):
        # Entrada
        with gr.Column(scale=5):
            gr.Markdown("<div class='card'><b>Entrada</b></div>")

            model_key = gr.Dropdown(
                choices=list(MODEL_CHOICES.keys()),
                value=DEFAULT_MODEL_KEY,
                label="Modelo NLI",
                info="Velocidad (MiniLM) = más rápido | Precisión (DeBERTa) = mejor calidad"
            )

            texto = gr.Textbox(
                label="Caso (STAR o texto libre)",
                lines=16,
                placeholder="Pega aquí tu caso en formato STAR (S, T, A, R) o texto libre…"
            )
            with gr.Row():
                btn = gr.Button("Evaluar", variant="primary", scale=3)
                gr.ClearButton([texto], value="Limpiar", scale=1)

            gr.Markdown(
                """
                <details>
                  <summary>Ejemplo rápido (clic para autocompletar)</summary>
                  <div class="small">
                  S: El proyecto CRM estaba retrasado 6 semanas y el equipo estaba desmotivado.<br/>
                  T: Recuperar el plan y mejorar la colaboración en 2 sprints.<br/>
                  A: Organicé una sesión de visión y valores; definí métricas; implementé dailies; mentoring a líderes junior;
                  negocié con stakeholders; prioricé backlog mínimo; comuniqué riesgos y fechas realistas.<br/>
                  R: Recuperamos 4 semanas en 2 sprints; NPS interno +22; retrabajo -18%; se mantuvieron prácticas; dos líderes promovidos.
                  </div>
                </details>
                """,
            )

        # Salida
        with gr.Column(scale=7):
            gr.Markdown("<div class='card'><b>Resultados</b></div>")
            status = gr.Markdown(value="**Estado**: —", elem_id="status_md")
            score_badge = gr.Markdown(value="<span class='badge'>Score global: —</span>")
            plot = gr.Plot(label="Gráfica de evaluación (0–4)")
            table = gr.Dataframe(
                headers=DEFAULT_COLS,
                datatype=["str", "str", "number", "str", "str", "str"],
                interactive=False,
                label="Detalle por indicador"
            )
            with gr.Row():
                download_btn = gr.Button("Descargar CSV")
                csv_file = gr.File(label="Archivo CSV", visible=False)

    # Lógica
    def run_eval(t: str, mk: str):
        msg, fig, tbl = evaluate(t, mk)

        status_md = "**Estado**  \n" + (msg or "").replace("\n", "  \n")

        badge_html = "<span class='badge'>Score global: —</span>"
        try:
            m = re.search(r"Score global \(0–4\):\s*([0-4](?:\.[0-9])?)", msg or "")
            if m:
                badge_html = f"<span class='badge'>Score global: {m.group(1)}</span>"
        except Exception:
            pass

        cols = (tbl or {}).get("columns") or DEFAULT_COLS
        data = (tbl or {}).get("data") or []
        safe_data = []
        for row in data:
            r = list(row)
            if len(r) < len(cols):
                r += [""] * (len(cols) - len(r))
            elif len(r) > len(cols):
                r = r[:len(cols)]
            safe_data.append(r)

        if fig is None:
            fig, ax = plt.subplots(figsize=(6, 2))
            ax.axis("off")
            ax.text(0.5, 0.5, "Sin datos para graficar", ha="center", va="center")

        return status_md, badge_html, fig, gr.update(value=safe_data, headers=cols)

    btn.click(fn=run_eval, inputs=[texto, model_key], outputs=[status, score_badge, plot, table])

    def export_csv_handler(t: str, mk: str):
        _, _, tbl = evaluate(t, mk)
        path = make_csv_from_table(tbl)
        return path, gr.update(visible=True)

    download_btn.click(fn=export_csv_handler, inputs=[texto, model_key], outputs=[csv_file, csv_file])

# Lanzamiento
if __name__ == "__main__":
    demo.queue(max_size=16).launch(ssr_mode=False, show_error=True)