Spaces:

ai4data
/

datause-detector

Paused

App Files Files Community

rafmacalaba commited on May 24

Commit

eb6e673

1 Parent(s): 079aa2a

just

Browse files

Files changed (1) hide show

app.py +29 -69

app.py CHANGED Viewed

@@ -109,84 +109,44 @@ def prune_acronym_and_self_relations(ner_preds, rel_preds):
 # Highlighting function
-# def highlight_text(text, ner_threshold, re_threshold):
-#     # Run inference
-#     ner_preds, rel_preds = inference_pipeline(
-#         text,
-#         model=model,
-#         labels=labels,
-#         relation_extractor=relation_extractor,
-#         TYPE2RELS=TYPE2RELS,
-#         ner_threshold=ner_threshold,
-#         re_threshold=re_threshold,
-#         re_multi_label=False
-#     )
-#     # Post-process
-#     ner_preds, rel_preds = prune_acronym_and_self_relations(ner_preds, rel_preds)
-#     # Gather all spans
-#     spans = []
-#     for ent in ner_preds:
-#         spans.append((ent["start"], ent["end"], ent["label"]))
-#     for src, rels in rel_preds.items():
-#         for r in rels:
-#             for m in re.finditer(re.escape(r["target"]), text):
-#                 spans.append((m.start(), m.end(), f"{src} <> {r['relation']}"))
-#     # Merge labels by span
-#     merged = defaultdict(list)
-#     for start, end, lbl in spans:
-#         merged[(start, end)].append(lbl)
-#     # Build Gradio entities
-#     entities = []
-#     for (start, end), lbls in sorted(merged.items(), key=lambda x: x[0]):
-#         entities.append({
-#             "entity": ", ".join(lbls),
-#             "start": start,
-#             "end": end
-#         })
 def highlight_text(text, ner_threshold, re_threshold):
-    # … inference + pruning …
-    ner_preds, rel_preds = inference_pipeline(…)
     ner_preds, rel_preds = prune_acronym_and_self_relations(ner_preds, rel_preds)
     spans = []
-    # 1) NER spans
     for ent in ner_preds:
         spans.append((ent["start"], ent["end"], ent["label"]))
-    # 2) RE spans, closest‐match logic (no math import needed)
     for src, rels in rel_preds.items():
-        # find the source span center
-        src_ent = next((e for e in ner_preds if e["text"] == src), None)
-        src_center = ((src_ent["start"] + src_ent["end"]) / 2) if src_ent else None
         for r in rels:
-            target = r["target"]
-            matches = list(re.finditer(re.escape(target), text))
-            if not matches:
-                continue
-            # pick the match whose center is nearest to src_center
-            if src_center is not None:
-                best = min(
-                    matches,
-                    key=lambda m: abs(((m.start() + m.end()) / 2) - src_center)
-                )
-            else:
-                best = matches[0]
-            spans.append((best.start(), best.end(), f"{src} <> {r['relation']}"))
-    # 3) merge & return…
-    merged = defaultdict(list)
-    for s, e, lbl in spans:
-        merged[(s, e)].append(lbl)
-    entities = [
-        {"entity": ", ".join(lbls), "start": s, "end": e}
-        for (s, e), lbls in sorted(merged.items(), key=lambda x: x[0])
-    ]
     return {"text": text, "entities": entities}, {"ner": ner_preds, "relations": rel_preds}
 # JSON output function

 # Highlighting function
 def highlight_text(text, ner_threshold, re_threshold):
+    # Run inference
+    ner_preds, rel_preds = inference_pipeline(
+        text,
+        model=model,
+        labels=labels,
+        relation_extractor=relation_extractor,
+        TYPE2RELS=TYPE2RELS,
+        ner_threshold=ner_threshold,
+        re_threshold=re_threshold,
+        re_multi_label=False
+    )
+    # Post-process
     ner_preds, rel_preds = prune_acronym_and_self_relations(ner_preds, rel_preds)
+    # Gather all spans
     spans = []
     for ent in ner_preds:
         spans.append((ent["start"], ent["end"], ent["label"]))
     for src, rels in rel_preds.items():
         for r in rels:
+            for m in re.finditer(re.escape(r["target"]), text):
+                spans.append((m.start(), m.end(), f"{src} <> {r['relation']}"))
+    # Merge labels by span
+    merged = defaultdict(list)
+    for start, end, lbl in spans:
+        merged[(start, end)].append(lbl)
+    # Build Gradio entities
+    entities = []
+    for (start, end), lbls in sorted(merged.items(), key=lambda x: x[0]):
+        entities.append({
+            "entity": ", ".join(lbls),
+            "start": start,
+            "end": end
+        })
     return {"text": text, "entities": entities}, {"ner": ner_preds, "relations": rel_preds}
 # JSON output function