import csv def load_annotated_data(ann_path): """ Carrega as anotações manuais (gold) de um CSV. Retorna uma lista de dicionários. """ annotated_sentences = [] with open(ann_path, mode="r", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: annotated_sentences.append({ "sentence_id": row["sentence_id"], "text": row["sentence_text"], "kpi_label": row["kpi_label"], "start": int(row["start_char"]), "end": int(row["end_char"]) }) return annotated_sentences def evaluate_system(predicted_kpis, annotated_sentences): """ Compara as predições do sistema com as anotações manuais. Retorna métricas: precisão, recall, F1, TP, FP e FN. """ gold_data = {} for ann in annotated_sentences: sid = ann["sentence_id"] kpi = ann["kpi_label"].lower() offsets = (ann["start"], ann["end"]) if sid not in gold_data: gold_data[sid] = [] gold_data[sid].append((kpi, offsets)) pred_data = {} for pred in predicted_kpis: sid = pred["sentence_id"] kpi = pred["kpi"].lower() offsets = (pred["start"], pred["end"]) if sid not in pred_data: pred_data[sid] = [] pred_data[sid].append((kpi, offsets)) tp = 0 fp = 0 fn = 0 for sid, gold_list in gold_data.items(): predicted_list = pred_data.get(sid, []) gold_set = set(gold_list) pred_set = set(predicted_list) tp_set = gold_set.intersection(pred_set) fp_set = pred_set - gold_set fn_set = gold_set - pred_set tp += len(tp_set) fp += len(fp_set) fn += len(fn_set) precision = tp / (tp + fp + 1e-9) recall = tp / (tp + fn + 1e-9) f1 = 2 * (precision * recall) / (precision + recall + 1e-9) metrics = { "true_positives": tp, "false_positives": fp, "false_negatives": fn, "precision": precision, "recall": recall, "f1_score": f1 } return metrics