|
import json |
|
import re |
|
from typing import List, Dict, Tuple |
|
import csv, os, time |
|
import gradio as gr |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
|
|
DEFAULT_COLS = [ |
|
"Código", "Indicador", "Score (0–4)", |
|
"Entailment medio", "Evidencias (hipótesis)", "Descripción" |
|
] |
|
|
|
CUSTOM_CSS = """ |
|
#app {max-width: 1200px; margin: 0 auto;} |
|
.badge { |
|
display:inline-block; padding:10px 14px; border-radius:12px; font-weight:700; |
|
background:linear-gradient(135deg,#1f6feb,#5ac8fa); color:white; box-shadow:0 6px 20px rgba(0,0,0,.2); |
|
} |
|
.card { |
|
background: rgba(255,255,255,.03); |
|
border: 1px solid rgba(255,255,255,.08); |
|
border-radius: 14px; padding: 14px; |
|
box-shadow: 0 8px 24px rgba(0,0,0,.18); |
|
} |
|
.small {font-size: 12px; opacity: .9;} |
|
""" |
|
|
|
|
|
|
|
|
|
INDICATOR_META = { |
|
"4.4.5.1": ("Iniciativa y ayuda proactiva", |
|
"Inicia acciones sin que se lo pidan; ofrece ayuda, anticipa y equilibra riesgos."), |
|
"4.4.5.2": ("Ownership y compromiso", |
|
"Asume responsabilidad; impulsa el proyecto; define/monitorea indicadores y mejora procesos."), |
|
"4.4.5.3": ("Dirección, coaching y mentoring", |
|
"Da dirección; coach/mentor al equipo; alinea visión, valores y objetivos."), |
|
"4.4.5.4": ("Poder e influencia", |
|
"Usa influencia adecuada; elige bien el canal; es percibido como líder por stakeholders."), |
|
"4.4.5.5": ("Decisiones", |
|
"Toma decisiones bajo incertidumbre; explica razones; revisa con nueva evidencia; comunica con claridad.") |
|
} |
|
|
|
|
|
|
|
|
|
_llm = None |
|
_llm_tok = None |
|
_gen = None |
|
_nli_cache: Dict[str, object] = {} |
|
|
|
LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct" |
|
|
|
|
|
MODEL_CHOICES = { |
|
"Velocidad (MiniLM)": { |
|
"id": "MoritzLaurer/multilingual-MiniLMv2-L12-mnli-xnli", |
|
"calibrate": True, |
|
"thresholds": (0.70, 0.50, 0.30, 0.15) |
|
}, |
|
"Precisión (DeBERTa)": { |
|
"id": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7", |
|
"calibrate": False, |
|
"thresholds": (0.80, 0.60, 0.40, 0.20) |
|
} |
|
} |
|
DEFAULT_MODEL_KEY = "Velocidad (MiniLM)" |
|
|
|
STAR_PROMPT = """Eres evaluador ICB4. Toma el texto del candidato y devuélvelo en formato STAR como JSON válido con claves: |
|
"situation" (<=3 frases), "task" (<=2 frases), "action" (lista de viñetas, verbos de acción), "result" (lista de viñetas, resultados/indicadores/aprendizajes). |
|
Siempre responde SOLO con JSON válido y conciso en español, sin comentarios adicionales. |
|
|
|
TEXTO: |
|
{texto} |
|
""" |
|
|
|
HYP: Dict[str, List[str]] = { |
|
"4.4.5.1": [ |
|
"Tomó la iniciativa sin que se lo pidieran.", |
|
"Ofreció ayuda o asesoría no solicitada.", |
|
"Pensó con orientación al futuro.", |
|
"Equilibró iniciativa y riesgo." |
|
], |
|
"4.4.5.2": [ |
|
"Mostró compromiso personal con los objetivos.", |
|
"Promovió el proyecto y generó entusiasmo.", |
|
"Definió o monitoreó indicadores de desempeño.", |
|
"Buscó mejoras en procesos." |
|
], |
|
"4.4.5.3": [ |
|
"Proporcionó dirección clara al equipo.", |
|
"Realizó coaching o mentoring para mejorar capacidades.", |
|
"Estableció y comunicó visión y valores.", |
|
"Alineó objetivos individuales con los comunes." |
|
], |
|
"4.4.5.4": [ |
|
"Usó apropiadamente poder e influencia.", |
|
"Seleccionó el canal de comunicación adecuado para influir.", |
|
"Fue percibido como líder por los stakeholders." |
|
], |
|
"4.4.5.5": [ |
|
"Tomó decisiones bajo incertidumbre considerando pros y contras.", |
|
"Explicó el razonamiento de las decisiones.", |
|
"Revisó decisiones con nueva evidencia.", |
|
"Comunicó claramente la decisión e influyó su adopción." |
|
] |
|
} |
|
|
|
|
|
|
|
|
|
def lazy_load_llm(): |
|
"""Pipeline de generación (Qwen 0.5B) para extraer STAR.""" |
|
global _llm, _llm_tok, _gen |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
if _gen is not None: |
|
return _gen |
|
_llm_tok = AutoTokenizer.from_pretrained(LLM_ID) |
|
_llm = AutoModelForCausalLM.from_pretrained(LLM_ID, device_map="auto") |
|
_gen = pipeline( |
|
"text-generation", |
|
model=_llm, |
|
tokenizer=_llm_tok, |
|
max_new_tokens=512, |
|
do_sample=False, |
|
repetition_penalty=1.1, |
|
) |
|
return _gen |
|
|
|
def lazy_load_nli(model_id: str): |
|
"""NLI con salida completa y truncado seguro. Cachea por model_id.""" |
|
from transformers import pipeline |
|
if model_id in _nli_cache: |
|
return _nli_cache[model_id] |
|
nli = pipeline( |
|
"text-classification", |
|
model=model_id, |
|
tokenizer=model_id, |
|
return_all_scores=True, |
|
truncation=True |
|
) |
|
_nli_cache[model_id] = nli |
|
return nli |
|
|
|
|
|
|
|
|
|
def extract_json_block(text: str) -> str: |
|
start = text.find("{") |
|
end = text.rfind("}") |
|
if start != -1 and end != -1 and end > start: |
|
return text[start:end+1] |
|
return '{"situation":"","task":"","action":[],"result":[]}' |
|
|
|
def quick_parse_star(txt: str): |
|
t = (txt or "").strip() |
|
if not t: |
|
return None |
|
keys = ("SITUATION", "TASK", "ACTION", "RESULT", "S:", "T:", "A:", "R:") |
|
if not any(k in t for k in keys): |
|
return None |
|
sections = {"situation": "", "task": "", "action": [], "result": []} |
|
blocks = re.split(r'(?im)^(SITUATION|TASK|ACTION|RESULT|S:|T:|A:|R:)\s*:?', t) |
|
for i in range(1, len(blocks), 2): |
|
key = blocks[i].lower()[0] |
|
val = blocks[i+1].strip() |
|
if key == "s": |
|
sections["situation"] = val |
|
elif key == "t": |
|
sections["task"] = val |
|
elif key == "a": |
|
sections["action"] = [x.strip("•- ") for x in val.splitlines() if x.strip()] |
|
elif key == "r": |
|
sections["result"] = [x.strip("•- ") for x in val.splitlines() if x.strip()] |
|
return sections |
|
|
|
def extract_star(user_text: str) -> Dict: |
|
parsed = quick_parse_star(user_text) |
|
if parsed: |
|
return parsed |
|
gen = lazy_load_llm() |
|
prompt = STAR_PROMPT.format(texto=(user_text or "").strip()) |
|
out = gen(prompt)[0]["generated_text"] |
|
raw = extract_json_block(out) |
|
try: |
|
data = json.loads(raw) |
|
except Exception: |
|
data = {"situation": "", "task": "", "action": [], "result": []} |
|
m = re.search(r'Situation[::]\s*(.*)', user_text or "", flags=re.I) |
|
if m: |
|
data["situation"] = m.group(1).strip() |
|
data["action"] = data.get("action", []) |
|
data["result"] = data.get("result", []) |
|
if isinstance(data["action"], str): |
|
data["action"] = [data["action"]] |
|
if isinstance(data["result"], str): |
|
data["result"] = [data["result"]] |
|
return { |
|
"situation": (data.get("situation", "") or "").strip(), |
|
"task": (data.get("task", "") or "").strip(), |
|
"action": [str(a).strip(" •-") for a in data["action"] if str(a).strip()], |
|
"result": [str(r).strip(" •-") for r in data["result"] if str(r).strip()], |
|
} |
|
|
|
|
|
|
|
|
|
def calibrate_prob(p: float, use_calibration: bool) -> float: |
|
"""Calibración leve solo para MiniLM (p**0.9).""" |
|
p = max(0.0, min(1.0, float(p))) |
|
return (p ** 0.9) if use_calibration else p |
|
|
|
def nli_entails(premise: str, hypothesis: str, model_id: str) -> float: |
|
"""Probabilidad de ENTAILMENT (0..1) robusta a variantes de salida.""" |
|
nli = lazy_load_nli(model_id) |
|
|
|
def _trim(s: str, limit=900): |
|
s = (s or "").strip() |
|
return s[:limit] |
|
|
|
text_a = _trim(premise) |
|
text_b = _trim(hypothesis) |
|
if not text_a or not text_b: |
|
return 0.0 |
|
|
|
try: |
|
res = nli({"text": text_a, "text_pair": text_b}) |
|
except Exception: |
|
return 0.0 |
|
|
|
|
|
if isinstance(res, dict): |
|
candidates = [res] |
|
elif isinstance(res, list): |
|
candidates = res[0] if (res and isinstance(res[0], list)) else res |
|
else: |
|
return 0.0 |
|
|
|
for c in (d for d in candidates if isinstance(d, dict)): |
|
lab = str(c.get("label", "")).lower() |
|
if "entail" in lab: |
|
try: |
|
return float(c.get("score", 0.0)) |
|
except Exception: |
|
return 0.0 |
|
return 0.0 |
|
|
|
def map_prob_to_score(p: float, thresholds: Tuple[float, float, float, float]) -> int: |
|
t4, t3, t2, t1 = thresholds |
|
if p >= t4: return 4 |
|
if p >= t3: return 3 |
|
if p >= t2: return 2 |
|
if p >= t1: return 1 |
|
return 0 |
|
|
|
def score_indicator(premise: str, hyps: List[str], model_id: str, use_calibration: bool, |
|
thresholds: Tuple[float, float, float, float]) -> Tuple[int, List[Tuple[str, float]], float]: |
|
raw = [(h, nli_entails(premise, h, model_id)) for h in hyps] |
|
probs = [(h, calibrate_prob(p, use_calibration)) for h, p in raw] |
|
avg = sum(p for _, p in probs) / max(1, len(probs)) |
|
score = map_prob_to_score(avg, thresholds) |
|
probs_sorted = sorted(probs, key=lambda x: x[1], reverse=True)[:2] |
|
return score, probs_sorted, avg |
|
|
|
|
|
|
|
|
|
def evaluate(texto: str, model_key: str): |
|
"""Devuelve: status_msg, matplotlib_fig, {"columns":[...], "data":[...] }.""" |
|
try: |
|
if not texto or not texto.strip(): |
|
return "Introduce un caso en formato STAR (o texto libre).", None, {"columns": [], "data": []} |
|
|
|
|
|
cfg = MODEL_CHOICES.get(model_key, MODEL_CHOICES[DEFAULT_MODEL_KEY]) |
|
model_id = cfg["id"] |
|
use_calibration = cfg["calibrate"] |
|
thresholds = cfg["thresholds"] |
|
|
|
star = extract_star(texto) |
|
|
|
|
|
actions = (star.get("action", []) or [])[:6] |
|
results = (star.get("result", []) or [])[:4] |
|
premise = " ".join(actions) + " " + " ".join(results) |
|
|
|
|
|
scores, table_rows, per_indicator_values = [], [], [] |
|
for ind, hyps in HYP.items(): |
|
s, ev, avg = score_indicator(premise, hyps, model_id, use_calibration, thresholds) |
|
scores.append(s) |
|
per_indicator_values.append((ind, s)) |
|
best_evid = " / ".join([h for h, _ in ev]) |
|
name, desc = INDICATOR_META[ind] |
|
table_rows.append([ind, name, s, f"{avg:.2f}", best_evid, desc]) |
|
|
|
overall = round(sum(scores) / max(1, len(scores)), 2) |
|
|
|
|
|
labels = [f"{k.split('.')[-1]}" for k, _ in per_indicator_values] |
|
values = [v for _, v in per_indicator_values] |
|
fig, ax = plt.subplots(figsize=(8.2, 4.0)) |
|
ax.bar(labels, values) |
|
ax.set_ylim(0, 4) |
|
ax.set_xlabel("Indicadores 4.4.5.x") |
|
ax.set_ylabel("Score (0–4)") |
|
fig.suptitle(f"ICB4 4.4.5 Leadership — Score global: {overall} | Modelo: {model_key}", y=0.97) |
|
fig.subplots_adjust(top=0.86) |
|
for i, v in enumerate(values): |
|
ax.text(i, v + 0.08, f"{v}", ha="center", va="bottom") |
|
fig.tight_layout() |
|
|
|
table = { |
|
"columns": DEFAULT_COLS, |
|
"data": table_rows, |
|
"model_key": model_key, |
|
"model_id": model_id |
|
} |
|
|
|
msg = ( |
|
f"Evaluación completada. Score global (0–4): {overall}\n" |
|
f"Modelo: {model_key}\n" |
|
f"Sugerencia: revisa evidencias y ajusta umbrales según tu rúbrica." |
|
) |
|
return msg, fig, table |
|
|
|
except Exception as e: |
|
return f"⚠️ Error en evaluate(): {type(e).__name__}: {e}", None, {"columns": [], "data": []} |
|
|
|
|
|
|
|
|
|
def make_csv_from_table(table: dict) -> str: |
|
"""Genera CSV temporal sin incluir la columna 'Modelo (repo)', pero conserva 'Modelo (etiqueta)'.""" |
|
cols = table.get("columns", []) |
|
rows = table.get("data", []) |
|
ts = int(time.time()) |
|
path = f"/tmp/icb4_leadership_{ts}.csv" |
|
|
|
|
|
if "Modelo (repo)" in cols: |
|
idx_repo = cols.index("Modelo (repo)") |
|
cols = [c for i, c in enumerate(cols) if i != idx_repo] |
|
new_rows = [] |
|
for r in rows: |
|
if len(r) > idx_repo: |
|
|
|
r = [c for i, c in enumerate(r) if i != idx_repo] |
|
new_rows.append(r) |
|
rows = new_rows |
|
|
|
|
|
with open(path, "w", newline="", encoding="utf-8") as f: |
|
writer = csv.writer(f) |
|
writer.writerow(cols) |
|
for r in rows: |
|
writer.writerow(r) |
|
|
|
return path if os.path.exists(path) else "" |
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="ICB4 4.4.5 Leadership — Evaluación STAR (FRAQX)", css=CUSTOM_CSS, elem_id="app") as demo: |
|
gr.Markdown( |
|
""" |
|
<div style="display:flex;align-items:center;gap:12px;margin:8px 0 2px 0;"> |
|
<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" height="28"> |
|
<h1 style="margin:0;">ICB4 • 4.4.5 Leadership — Evaluación STAR + NLI</h1> |
|
</div> |
|
<div class="small">Extracción STAR, scoring (4.4.5.1–4.4.5.5), gráfica y reporte descargable. Elige el modelo NLI según tu prioridad.</div> |
|
""" |
|
) |
|
|
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=5): |
|
gr.Markdown("<div class='card'><b>Entrada</b></div>") |
|
|
|
model_key = gr.Dropdown( |
|
choices=list(MODEL_CHOICES.keys()), |
|
value=DEFAULT_MODEL_KEY, |
|
label="Modelo NLI", |
|
info="Velocidad (MiniLM) = más rápido | Precisión (DeBERTa) = mejor calidad" |
|
) |
|
|
|
texto = gr.Textbox( |
|
label="Caso (STAR o texto libre)", |
|
lines=16, |
|
placeholder="Pega aquí tu caso en formato STAR (S, T, A, R) o texto libre…" |
|
) |
|
with gr.Row(): |
|
btn = gr.Button("Evaluar", variant="primary", scale=3) |
|
gr.ClearButton([texto], value="Limpiar", scale=1) |
|
|
|
gr.Markdown( |
|
""" |
|
<details> |
|
<summary>Ejemplo rápido (clic para autocompletar)</summary> |
|
<div class="small"> |
|
S: El proyecto CRM estaba retrasado 6 semanas y el equipo estaba desmotivado.<br/> |
|
T: Recuperar el plan y mejorar la colaboración en 2 sprints.<br/> |
|
A: Organicé una sesión de visión y valores; definí métricas; implementé dailies; mentoring a líderes junior; |
|
negocié con stakeholders; prioricé backlog mínimo; comuniqué riesgos y fechas realistas.<br/> |
|
R: Recuperamos 4 semanas en 2 sprints; NPS interno +22; retrabajo -18%; se mantuvieron prácticas; dos líderes promovidos. |
|
</div> |
|
</details> |
|
""", |
|
) |
|
|
|
|
|
with gr.Column(scale=7): |
|
gr.Markdown("<div class='card'><b>Resultados</b></div>") |
|
status = gr.Markdown(value="**Estado**: —", elem_id="status_md") |
|
score_badge = gr.Markdown(value="<span class='badge'>Score global: —</span>") |
|
plot = gr.Plot(label="Gráfica de evaluación (0–4)") |
|
table = gr.Dataframe( |
|
headers=DEFAULT_COLS, |
|
datatype=["str", "str", "number", "str", "str", "str"], |
|
interactive=False, |
|
label="Detalle por indicador" |
|
) |
|
with gr.Row(): |
|
download_btn = gr.Button("Descargar CSV") |
|
csv_file = gr.File(label="Archivo CSV", visible=False) |
|
|
|
|
|
def run_eval(t: str, mk: str): |
|
msg, fig, tbl = evaluate(t, mk) |
|
|
|
status_md = "**Estado** \n" + (msg or "").replace("\n", " \n") |
|
|
|
badge_html = "<span class='badge'>Score global: —</span>" |
|
try: |
|
m = re.search(r"Score global \(0–4\):\s*([0-4](?:\.[0-9])?)", msg or "") |
|
if m: |
|
badge_html = f"<span class='badge'>Score global: {m.group(1)}</span>" |
|
except Exception: |
|
pass |
|
|
|
cols = (tbl or {}).get("columns") or DEFAULT_COLS |
|
data = (tbl or {}).get("data") or [] |
|
safe_data = [] |
|
for row in data: |
|
r = list(row) |
|
if len(r) < len(cols): |
|
r += [""] * (len(cols) - len(r)) |
|
elif len(r) > len(cols): |
|
r = r[:len(cols)] |
|
safe_data.append(r) |
|
|
|
if fig is None: |
|
fig, ax = plt.subplots(figsize=(6, 2)) |
|
ax.axis("off") |
|
ax.text(0.5, 0.5, "Sin datos para graficar", ha="center", va="center") |
|
|
|
return status_md, badge_html, fig, gr.update(value=safe_data, headers=cols) |
|
|
|
btn.click(fn=run_eval, inputs=[texto, model_key], outputs=[status, score_badge, plot, table]) |
|
|
|
def export_csv_handler(t: str, mk: str): |
|
_, _, tbl = evaluate(t, mk) |
|
path = make_csv_from_table(tbl) |
|
return path, gr.update(visible=True) |
|
|
|
download_btn.click(fn=export_csv_handler, inputs=[texto, model_key], outputs=[csv_file, csv_file]) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.queue(max_size=16).launch(ssr_mode=False, show_error=True) |
|
|