Spaces:

ai4data
/

datause-detector

Running

App Files Files Community

rafmacalaba commited on 19 days ago

Commit

3d53082

1 Parent(s): c35975c

add model

Browse files

Files changed (1) hide show

app.py +96 -33

app.py CHANGED Viewed

@@ -1,56 +1,119 @@
 import gradio as gr
-# Step 1: Textbox input
-# Define relation types and sample text
-rels = [
-    'acronym',
-    'author',
-    'data description',
-    'data geography',
-    'data source',
-    'data type',
-    'publication year',
-    'publisher',
-    'reference year',
-    'version'
 ]
-sample_text = (
-    "Recent studies on ocean currents from the Global Ocean Temperature Dataset (GOTD) "
-    "indicate significant shifts in marine biodiversity."
-)
-# Dummy inference echoes input + relations
-def dummy_inference(query: str) -> str:
-    # TODO: replace with actual NER+RE model inference
-    return f"Model received: '{query}' with relations: {rels}"
-with gr.Blocks(title="Step 1: Input Box Demo") as demo:
     gr.Markdown(
         """
-        ## Step 1: Implement a Text Input
-        Enter any text below (prepopulated with a sample).
-        This is where your NER + relation-extraction model will later consume the query.
         """
     )
     query_input = gr.Textbox(
         lines=4,
-        value=sample_text,
-        label="Input Text",
         placeholder="Type your text here...",
     )
     submit_btn = gr.Button("Submit")
     output_box = gr.Textbox(
-        lines=3,
-        label="Echoed Output",
     )
     submit_btn.click(
-        fn=dummy_inference,
         inputs=[query_input],
         outputs=[output_box],
     )
-# Launch the demo
 if __name__ == "__main__":
-    demo.queue(default_concurrency_limit=5)
     demo.launch(debug=True)

+import os
+import json
 import gradio as gr
+import torch
+import spaces
+from gliner import GLiNER
+from gliner.multitask import GLiNERRelationExtractor
+from typing import List, Dict, Any, Tuple
+from tqdm.auto import tqdm
+# Configuration
+data_model_id = "rafmacalaba/gliner_re_finetuned-v3"
+CACHE_DIR = os.environ.get("CACHE_DIR", None)
+# Relation types
+trels = [
+    'acronym', 'author', 'data description',
+    'data geography', 'data source', 'data type',
+    'publication year', 'publisher', 'reference year', 'version'
 ]
+# Map NER labels to relation types
+TYPE2RELS = {
+    "named dataset":   trels,
+    "unnamed dataset": trels,
+    "vague dataset":   trels,
+}
+# Load models
+print("Loading NER+RE model...")
+model = GLiNER.from_pretrained(data_model_id, cache_dir=CACHE_DIR)
+relation_extractor = GLiNERRelationExtractor(model=model)
+if torch.cuda.is_available():
+    model.to("cuda")
+    relation_extractor.model.to("cuda")
+print("Models loaded.")
+# Inference pipeline
+def inference_pipeline(
+    text: str,
+    model,
+    labels: List[str],
+    relation_extractor: GLiNERRelationExtractor,
+    TYPE2RELS: Dict[str, List[str]],
+    ner_threshold: float = 0.5,
+    re_threshold: float = 0.4,
+    re_multi_label: bool = False,
+) -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]]]:
+    # NER predictions
+    ner_preds = model.predict_entities(
+        text,
+        labels,
+        flat_ner=True,
+        threshold=ner_threshold
+    )
+    # Relation extraction per entity span
+    re_results: Dict[str, List[Dict[str, Any]]] = {}
+    for ner in ner_preds:
+        span = ner['text']
+        rel_types = TYPE2RELS.get(ner['label'], [])
+        if not rel_types:
+            continue
+        slot_labels = [f"{span} <> {r}" for r in rel_types]
+        preds = relation_extractor(
+            text,
+            relations=None,
+            entities=None,
+            relation_labels=slot_labels,
+            threshold=re_threshold,
+            multi_label=re_multi_label,
+            distance_threshold=100,
+        )[0]
+        re_results[span] = preds
+    return ner_preds, re_results
+# Gradio UI - Step 2: Model Inference
+@spaces.GPU(enable_queue=True, duration=120)
+def model_inference(query: str) -> str:
+    labels = ["named dataset", "unnamed dataset", "vague dataset"]
+    ner_preds, re_results = inference_pipeline(
+        query,
+        model,
+        labels,
+        relation_extractor,
+        TYPE2RELS
+    )
+    output = {
+        "entities": ner_preds,
+        "relations": re_results,
+    }
+    return json.dumps(output, indent=2)
+with gr.Blocks(title="Step 2: NER + Relation Inference") as demo:
     gr.Markdown(
         """
+        ## Step 2: Integrate Model Inference
+        Enter text and click submit to run your GLiNER-based NER + RE pipeline.
         """
     )
     query_input = gr.Textbox(
         lines=4,
         placeholder="Type your text here...",
+        label="Input Text",
     )
     submit_btn = gr.Button("Submit")
     output_box = gr.Textbox(
+        lines=15,
+        label="Model Output (JSON)",
     )
     submit_btn.click(
+        fn=model_inference,
         inputs=[query_input],
         outputs=[output_box],
     )
 if __name__ == "__main__":
     demo.launch(debug=True)