Spaces:

cesparzaf
/

icb4-leadership-star

Running

App Files Files

xet

Community

icb4-leadership-star / app.py

cesparzaf

Update app.py

22a5b8d verified 2 days ago

raw

history blame contribute delete

18.2 kB

	import json
	import re
	from typing import List, Dict, Tuple
	import csv, os, time
	import gradio as gr
	import matplotlib.pyplot as plt

	# ==========================
	# Config & estilos
	# ==========================
	DEFAULT_COLS = [
	"Código", "Indicador", "Score (0–4)",
	"Entailment medio", "Evidencias (hipótesis)", "Descripción"
	]

	CUSTOM_CSS = """
	#app {max-width: 1200px; margin: 0 auto;}
	.badge {
	display:inline-block; padding:10px 14px; border-radius:12px; font-weight:700;
	background:linear-gradient(135deg,#1f6feb,#5ac8fa); color:white; box-shadow:0 6px 20px rgba(0,0,0,.2);
	}
	.card {
	background: rgba(255,255,255,.03);
	border: 1px solid rgba(255,255,255,.08);
	border-radius: 14px; padding: 14px;
	box-shadow: 0 8px 24px rgba(0,0,0,.18);
	}
	.small {font-size: 12px; opacity: .9;}
	"""

	# ==========================
	# Metadatos IPMA ICB4 4.4.5.x
	# ==========================
	INDICATOR_META = {
	"4.4.5.1": ("Iniciativa y ayuda proactiva",
	"Inicia acciones sin que se lo pidan; ofrece ayuda, anticipa y equilibra riesgos."),
	"4.4.5.2": ("Ownership y compromiso",
	"Asume responsabilidad; impulsa el proyecto; define/monitorea indicadores y mejora procesos."),
	"4.4.5.3": ("Dirección, coaching y mentoring",
	"Da dirección; coach/mentor al equipo; alinea visión, valores y objetivos."),
	"4.4.5.4": ("Poder e influencia",
	"Usa influencia adecuada; elige bien el canal; es percibido como líder por stakeholders."),
	"4.4.5.5": ("Decisiones",
	"Toma decisiones bajo incertidumbre; explica razones; revisa con nueva evidencia; comunica con claridad.")
	}

	# ==========================
	# Modelos (CPU Basic friendly)
	# ==========================
	_llm = None
	_llm_tok = None
	_gen = None
	_nli_cache: Dict[str, object] = {} # cache de pipelines NLI por model_id

	LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct" # LLM pequeño multilingüe para extraer STAR

	# Selector de NLI con configuración asociada
	MODEL_CHOICES = {
	"Velocidad (MiniLM)": {
	"id": "MoritzLaurer/multilingual-MiniLMv2-L12-mnli-xnli",
	"calibrate": True,
	"thresholds": (0.70, 0.50, 0.30, 0.15) # 4,3,2,1
	},
	"Precisión (DeBERTa)": {
	"id": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7",
	"calibrate": False,
	"thresholds": (0.80, 0.60, 0.40, 0.20)
	}
	}
	DEFAULT_MODEL_KEY = "Velocidad (MiniLM)" # por defecto en Spaces gratis

	STAR_PROMPT = """Eres evaluador ICB4. Toma el texto del candidato y devuélvelo en formato STAR como JSON válido con claves:
	"situation" (<=3 frases), "task" (<=2 frases), "action" (lista de viñetas, verbos de acción), "result" (lista de viñetas, resultados/indicadores/aprendizajes).
	Siempre responde SOLO con JSON válido y conciso en español, sin comentarios adicionales.

	TEXTO:
	{texto}
	"""

	HYP: Dict[str, List[str]] = {
	"4.4.5.1": [
	"Tomó la iniciativa sin que se lo pidieran.",
	"Ofreció ayuda o asesoría no solicitada.",
	"Pensó con orientación al futuro.",
	"Equilibró iniciativa y riesgo."
	],
	"4.4.5.2": [
	"Mostró compromiso personal con los objetivos.",
	"Promovió el proyecto y generó entusiasmo.",
	"Definió o monitoreó indicadores de desempeño.",
	"Buscó mejoras en procesos."
	],
	"4.4.5.3": [
	"Proporcionó dirección clara al equipo.",
	"Realizó coaching o mentoring para mejorar capacidades.",
	"Estableció y comunicó visión y valores.",
	"Alineó objetivos individuales con los comunes."
	],
	"4.4.5.4": [
	"Usó apropiadamente poder e influencia.",
	"Seleccionó el canal de comunicación adecuado para influir.",
	"Fue percibido como líder por los stakeholders."
	],
	"4.4.5.5": [
	"Tomó decisiones bajo incertidumbre considerando pros y contras.",
	"Explicó el razonamiento de las decisiones.",
	"Revisó decisiones con nueva evidencia.",
	"Comunicó claramente la decisión e influyó su adopción."
	]
	}

	# ==========================
	# Carga perezosa de modelos
	# ==========================
	def lazy_load_llm():
	"""Pipeline de generación (Qwen 0.5B) para extraer STAR."""
	global _llm, _llm_tok, _gen
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	if _gen is not None:
	return _gen
	_llm_tok = AutoTokenizer.from_pretrained(LLM_ID)
	_llm = AutoModelForCausalLM.from_pretrained(LLM_ID, device_map="auto")
	_gen = pipeline(
	"text-generation",
	model=_llm,
	tokenizer=_llm_tok,
	max_new_tokens=512,
	do_sample=False,
	repetition_penalty=1.1,
	)
	return _gen

	def lazy_load_nli(model_id: str):
	"""NLI con salida completa y truncado seguro. Cachea por model_id."""
	from transformers import pipeline
	if model_id in _nli_cache:
	return _nli_cache[model_id]
	nli = pipeline(
	"text-classification",
	model=model_id,
	tokenizer=model_id,
	return_all_scores=True, # {label, score} para todas las clases
	truncation=True # evita degradación por textos largos
	)
	_nli_cache[model_id] = nli
	return nli

	# ==========================
	# Utilidades extracción STAR
	# ==========================
	def extract_json_block(text: str) -> str:
	start = text.find("{")
	end = text.rfind("}")
	if start != -1 and end != -1 and end > start:
	return text[start:end+1]
	return '{"situation":"","task":"","action":[],"result":[]}'

	def quick_parse_star(txt: str):
	t = (txt or "").strip()
	if not t:
	return None
	keys = ("SITUATION", "TASK", "ACTION", "RESULT", "S:", "T:", "A:", "R:")
	if not any(k in t for k in keys):
	return None
	sections = {"situation": "", "task": "", "action": [], "result": []}
	blocks = re.split(r'(?im)^(SITUATION\|TASK\|ACTION\|RESULT\|S:\|T:\|A:\|R:)\s*:?', t)
	for i in range(1, len(blocks), 2):
	key = blocks[i].lower()[0]
	val = blocks[i+1].strip()
	if key == "s":
	sections["situation"] = val
	elif key == "t":
	sections["task"] = val
	elif key == "a":
	sections["action"] = [x.strip("•- ") for x in val.splitlines() if x.strip()]
	elif key == "r":
	sections["result"] = [x.strip("•- ") for x in val.splitlines() if x.strip()]
	return sections

	def extract_star(user_text: str) -> Dict:
	parsed = quick_parse_star(user_text)
	if parsed:
	return parsed
	gen = lazy_load_llm()
	prompt = STAR_PROMPT.format(texto=(user_text or "").strip())
	out = gen(prompt)[0]["generated_text"]
	raw = extract_json_block(out)
	try:
	data = json.loads(raw)
	except Exception:
	data = {"situation": "", "task": "", "action": [], "result": []}
	m = re.search(r'Situation[:：]\s(.)', user_text or "", flags=re.I)
	if m:
	data["situation"] = m.group(1).strip()
	data["action"] = data.get("action", [])
	data["result"] = data.get("result", [])
	if isinstance(data["action"], str):
	data["action"] = [data["action"]]
	if isinstance(data["result"], str):
	data["result"] = [data["result"]]
	return {
	"situation": (data.get("situation", "") or "").strip(),
	"task": (data.get("task", "") or "").strip(),
	"action": [str(a).strip(" •-") for a in data["action"] if str(a).strip()],
	"result": [str(r).strip(" •-") for r in data["result"] if str(r).strip()],
	}

	# ==========================
	# NLI + scoring (dinámico por modelo)
	# ==========================
	def calibrate_prob(p: float, use_calibration: bool) -> float:
	"""Calibración leve solo para MiniLM (p**0.9)."""
	p = max(0.0, min(1.0, float(p)))
	return (p ** 0.9) if use_calibration else p

	def nli_entails(premise: str, hypothesis: str, model_id: str) -> float:
	"""Probabilidad de ENTAILMENT (0..1) robusta a variantes de salida."""
	nli = lazy_load_nli(model_id)

	def _trim(s: str, limit=900):
	s = (s or "").strip()
	return s[:limit]

	text_a = _trim(premise)
	text_b = _trim(hypothesis)
	if not text_a or not text_b:
	return 0.0

	try:
	res = nli({"text": text_a, "text_pair": text_b})
	except Exception:
	return 0.0

	# return_all_scores=True → [{label, score}, ...] ó [[{...}]]
	if isinstance(res, dict):
	candidates = [res]
	elif isinstance(res, list):
	candidates = res[0] if (res and isinstance(res[0], list)) else res
	else:
	return 0.0

	for c in (d for d in candidates if isinstance(d, dict)):
	lab = str(c.get("label", "")).lower()
	if "entail" in lab:
	try:
	return float(c.get("score", 0.0))
	except Exception:
	return 0.0
	return 0.0

	def map_prob_to_score(p: float, thresholds: Tuple[float, float, float, float]) -> int:
	t4, t3, t2, t1 = thresholds
	if p >= t4: return 4
	if p >= t3: return 3
	if p >= t2: return 2
	if p >= t1: return 1
	return 0

	def score_indicator(premise: str, hyps: List[str], model_id: str, use_calibration: bool,
	thresholds: Tuple[float, float, float, float]) -> Tuple[int, List[Tuple[str, float]], float]:
	raw = [(h, nli_entails(premise, h, model_id)) for h in hyps]
	probs = [(h, calibrate_prob(p, use_calibration)) for h, p in raw]
	avg = sum(p for _, p in probs) / max(1, len(probs))
	score = map_prob_to_score(avg, thresholds)
	probs_sorted = sorted(probs, key=lambda x: x[1], reverse=True)[:2]
	return score, probs_sorted, avg

	# ==========================
	# Evaluación orquestada
	# ==========================
	def evaluate(texto: str, model_key: str):
	"""Devuelve: status_msg, matplotlib_fig, {"columns":[...], "data":[...] }."""
	try:
	if not texto or not texto.strip():
	return "Introduce un caso en formato STAR (o texto libre).", None, {"columns": [], "data": []}

	# Config del modelo seleccionado
	cfg = MODEL_CHOICES.get(model_key, MODEL_CHOICES[DEFAULT_MODEL_KEY])
	model_id = cfg["id"]
	use_calibration = cfg["calibrate"]
	thresholds = cfg["thresholds"]

	star = extract_star(texto)

	# Limita premisa para dar señal clara al NLI (6 A + 4 R)
	actions = (star.get("action", []) or [])[:6]
	results = (star.get("result", []) or [])[:4]
	premise = " ".join(actions) + " " + " ".join(results)

	# Scoring por indicador
	scores, table_rows, per_indicator_values = [], [], []
	for ind, hyps in HYP.items():
	s, ev, avg = score_indicator(premise, hyps, model_id, use_calibration, thresholds)
	scores.append(s)
	per_indicator_values.append((ind, s))
	best_evid = " / ".join([h for h, _ in ev])
	name, desc = INDICATOR_META[ind]
	table_rows.append([ind, name, s, f"{avg:.2f}", best_evid, desc])

	overall = round(sum(scores) / max(1, len(scores)), 2)

	# Gráfica
	labels = [f"{k.split('.')[-1]}" for k, _ in per_indicator_values]
	values = [v for _, v in per_indicator_values]
	fig, ax = plt.subplots(figsize=(8.2, 4.0))
	ax.bar(labels, values)
	ax.set_ylim(0, 4)
	ax.set_xlabel("Indicadores 4.4.5.x")
	ax.set_ylabel("Score (0–4)")
	fig.suptitle(f"ICB4 4.4.5 Leadership — Score global: {overall} \| Modelo: {model_key}", y=0.97)
	fig.subplots_adjust(top=0.86)
	for i, v in enumerate(values):
	ax.text(i, v + 0.08, f"{v}", ha="center", va="bottom")
	fig.tight_layout()

	table = {
	"columns": DEFAULT_COLS,
	"data": table_rows,
	"model_key": model_key, # ← etiqueta elegida en el dropdown (MiniLM / DeBERTa)
	"model_id": model_id # ← repo real en HF (para trazabilidad)
	}

	msg = (
	f"Evaluación completada. Score global (0–4): {overall}\n"
	f"Modelo: {model_key}\n"
	f"Sugerencia: revisa evidencias y ajusta umbrales según tu rúbrica."
	)
	return msg, fig, table

	except Exception as e:
	return f"⚠️ Error en evaluate(): {type(e).__name__}: {e}", None, {"columns": [], "data": []}

	# ==========================
	# CSV helper
	# ==========================
	def make_csv_from_table(table: dict) -> str:
	"""Genera CSV temporal sin incluir la columna 'Modelo (repo)', pero conserva 'Modelo (etiqueta)'."""
	cols = table.get("columns", [])
	rows = table.get("data", [])
	ts = int(time.time())
	path = f"/tmp/icb4_leadership_{ts}.csv"

	# Detecta y elimina solo la columna 'Modelo (repo)'
	if "Modelo (repo)" in cols:
	idx_repo = cols.index("Modelo (repo)")
	cols = [c for i, c in enumerate(cols) if i != idx_repo]
	new_rows = []
	for r in rows:
	if len(r) > idx_repo:
	# Elimina solo la celda correspondiente al campo 'Modelo (repo)'
	r = [c for i, c in enumerate(r) if i != idx_repo]
	new_rows.append(r)
	rows = new_rows

	# Escribe el CSV final
	with open(path, "w", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	writer.writerow(cols)
	for r in rows:
	writer.writerow(r)

	return path if os.path.exists(path) else ""



	# ==========================
	# UI (2 columnas + selector modelo + CSV)
	# ==========================
	with gr.Blocks(title="ICB4 4.4.5 Leadership — Evaluación STAR (FRAQX)", css=CUSTOM_CSS, elem_id="app") as demo:
	gr.Markdown(
	"""
	<div style="display:flex;align-items:center;gap:12px;margin:8px 0 2px 0;">
	<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" height="28">
	<h1 style="margin:0;">ICB4 • 4.4.5 Leadership — Evaluación STAR + NLI</h1>
	</div>
	<div class="small">Extracción STAR, scoring (4.4.5.1–4.4.5.5), gráfica y reporte descargable. Elige el modelo NLI según tu prioridad.</div>
	"""
	)

	with gr.Row(equal_height=True):
	# Entrada
	with gr.Column(scale=5):
	gr.Markdown("<div class='card'><b>Entrada</b></div>")

	model_key = gr.Dropdown(
	choices=list(MODEL_CHOICES.keys()),
	value=DEFAULT_MODEL_KEY,
	label="Modelo NLI",
	info="Velocidad (MiniLM) = más rápido \| Precisión (DeBERTa) = mejor calidad"
	)

	texto = gr.Textbox(
	label="Caso (STAR o texto libre)",
	lines=16,
	placeholder="Pega aquí tu caso en formato STAR (S, T, A, R) o texto libre…"
	)
	with gr.Row():
	btn = gr.Button("Evaluar", variant="primary", scale=3)
	gr.ClearButton([texto], value="Limpiar", scale=1)

	gr.Markdown(
	"""
	<details>
	<summary>Ejemplo rápido (clic para autocompletar)</summary>
	<div class="small">
	S: El proyecto CRM estaba retrasado 6 semanas y el equipo estaba desmotivado.<br/>
	T: Recuperar el plan y mejorar la colaboración en 2 sprints.<br/>
	A: Organicé una sesión de visión y valores; definí métricas; implementé dailies; mentoring a líderes junior;
	negocié con stakeholders; prioricé backlog mínimo; comuniqué riesgos y fechas realistas.<br/>
	R: Recuperamos 4 semanas en 2 sprints; NPS interno +22; retrabajo -18%; se mantuvieron prácticas; dos líderes promovidos.
	</div>
	</details>
	""",
	)

	# Salida
	with gr.Column(scale=7):
	gr.Markdown("<div class='card'><b>Resultados</b></div>")
	status = gr.Markdown(value="Estado: —", elem_id="status_md")
	score_badge = gr.Markdown(value="<span class='badge'>Score global: —</span>")
	plot = gr.Plot(label="Gráfica de evaluación (0–4)")
	table = gr.Dataframe(
	headers=DEFAULT_COLS,
	datatype=["str", "str", "number", "str", "str", "str"],
	interactive=False,
	label="Detalle por indicador"
	)
	with gr.Row():
	download_btn = gr.Button("Descargar CSV")
	csv_file = gr.File(label="Archivo CSV", visible=False)

	# Lógica
	def run_eval(t: str, mk: str):
	msg, fig, tbl = evaluate(t, mk)

	status_md = "Estado \n" + (msg or "").replace("\n", " \n")

	badge_html = "<span class='badge'>Score global: —</span>"
	try:
	m = re.search(r"Score global \(0–4\):\s*([0-4](?:\.[0-9])?)", msg or "")
	if m:
	badge_html = f"<span class='badge'>Score global: {m.group(1)}</span>"
	except Exception:
	pass

	cols = (tbl or {}).get("columns") or DEFAULT_COLS
	data = (tbl or {}).get("data") or []
	safe_data = []
	for row in data:
	r = list(row)
	if len(r) < len(cols):
	r += [""] * (len(cols) - len(r))
	elif len(r) > len(cols):
	r = r[:len(cols)]
	safe_data.append(r)

	if fig is None:
	fig, ax = plt.subplots(figsize=(6, 2))
	ax.axis("off")
	ax.text(0.5, 0.5, "Sin datos para graficar", ha="center", va="center")

	return status_md, badge_html, fig, gr.update(value=safe_data, headers=cols)

	btn.click(fn=run_eval, inputs=[texto, model_key], outputs=[status, score_badge, plot, table])

	def export_csv_handler(t: str, mk: str):
	_, _, tbl = evaluate(t, mk)
	path = make_csv_from_table(tbl)
	return path, gr.update(visible=True)

	download_btn.click(fn=export_csv_handler, inputs=[texto, model_key], outputs=[csv_file, csv_file])

	# Lanzamiento
	if __name__ == "__main__":
	demo.queue(max_size=16).launch(ssr_mode=False, show_error=True)