Spaces:

ai4data
/

datause-detector

Running

App Files Files Community

rafmacalaba commited on 20 days ago

Commit

a508c57

1 Parent(s): 0fd2cd1

simulated

Browse files

Files changed (2) hide show

app.py +50 -102
requirements.py +2 -4

app.py CHANGED Viewed

@@ -1,118 +1,66 @@
-import os
-import json
 import gradio as gr
-# import torch
-import spaces
-from gliner import GLiNER
-from gliner.multitask import GLiNERRelationExtractor
-from typing import List, Dict, Any, Tuple
-# Configuration
-data_model_id = "rafmacalaba/gliner_re_finetuned-v3"
-CACHE_DIR = os.environ.get("CACHE_DIR", None)
-# Relation types
-trels = [
     'acronym', 'author', 'data description',
     'data geography', 'data source', 'data type',
     'publication year', 'publisher', 'reference year', 'version'
 ]
-# Map NER labels to relation types
-TYPE2RELS = {
-    "named dataset":   trels,
-    "unnamed dataset": trels,
-    "vague dataset":   trels,
-}
-# Load models
-print("Loading NER+RE model...")
-model = GLiNER.from_pretrained(data_model_id, cache_dir=CACHE_DIR)
-relation_extractor = GLiNERRelationExtractor(model=model)
-# if torch.cuda.is_available():
-#     model.to("cuda")
-#     relation_extractor.model.to("cuda")
-# print("Models loaded.")
-# Inference pipeline
-def inference_pipeline(
-    text: str,
-    model,
-    labels: List[str],
-    relation_extractor: GLiNERRelationExtractor,
-    TYPE2RELS: Dict[str, List[str]],
-    ner_threshold: float = 0.5,
-    re_threshold: float = 0.4,
-    re_multi_label: bool = False,
-) -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]]]:
-    # NER predictions
-    ner_preds = model.predict_entities(
-        text,
-        labels,
-        flat_ner=True,
-        threshold=ner_threshold
-    )
-    # Relation extraction per entity span
-    re_results: Dict[str, List[Dict[str, Any]]] = {}
-    for ner in ner_preds:
-        span = ner['text']
-        rel_types = TYPE2RELS.get(ner['label'], [])
-        if not rel_types:
-            continue
-        slot_labels = [f"{span} <> {r}" for r in rel_types]
-        preds = relation_extractor(
-            text,
-            relations=None,
-            entities=None,
-            relation_labels=slot_labels,
-            threshold=re_threshold,
-            multi_label=re_multi_label,
-            distance_threshold=100,
-        )[0]
-        re_results[span] = preds
-    return ner_preds, re_results
-# Gradio UI - Step 2: Model Inference
-@spaces.GPU(enable_queue=True, duration=120)
-def model_inference(query: str) -> str:
-    labels = ["named dataset", "unnamed dataset", "vague dataset"]
-    ner_preds, re_results = inference_pipeline(
-        query,
-        model,
-        labels,
-        relation_extractor,
-        TYPE2RELS
-    )
-    output = {
-        "entities": ner_preds,
-        "relations": re_results,
     }
-    return json.dumps(output, indent=2)
-with gr.Blocks(title="Step 2: NER + Relation Inference") as demo:
     gr.Markdown(
         """
-        ## Step 2: Integrate Model Inference
-        Enter text and click submit to run your GLiNER-based NER + RE pipeline.
         """
     )
-    query_input = gr.Textbox(
-        lines=4,
-        placeholder="Type your text here...",
-        label="Input Text",
-    )
-    submit_btn = gr.Button("Submit")
-    output_box = gr.Textbox(
-        lines=15,
-        label="Model Output (JSON)",
-    )
-    submit_btn.click(
-        fn=model_inference,
-        inputs=[query_input],
-        outputs=[output_box],
     )
-if __name__ == "__main__":
-    demo.launch(debug=True)

 import gradio as gr
+from typing import Union, Dict, Any, List
+# Sample pre-calculated entities
+sample_text = (
+    "Recent studies on ocean currents from the Global Ocean Temperature Dataset "
+    "(GOTD) indicate significant shifts in marine biodiversity."
+)
+sample_entities = [
+    {"label": "named dataset", "text": "Global Ocean Temperature Dataset", "start": 29, "end": 62, "score": 0.99},
+    {"label": "acronym", "text": "GOTD", "start": 64, "end": 68, "score": 0.98},
+]
+rels = [
     'acronym', 'author', 'data description',
     'data geography', 'data source', 'data type',
     'publication year', 'publisher', 'reference year', 'version'
 ]
+MODELS = ["demo-model-1", "demo-model-2"]
+# Annotate_query simulation
+def annotate_query(
+    query: str,
+    labels: Union[str, List[str]],
+    threshold: float = 0.3,
+    nested_ner: bool = False,
+    model_name: str = None
+) -> Dict[str, Any]:
+    # In a real app, you'd call parse_query/inference_pipeline here.
+    # For simulation, reuse sample_entities.
+    return {
+        "text": query,
+        "entities": [
+            {"start": ent["start"], "end": ent["end"], "label": ent["label"]}
+            for ent in sample_entities
+        ]
     }
+# Build Gradio UI
+demo = gr.Blocks()
+with demo:
     gr.Markdown(
         """
+        ## Step: Annotate Query Simulation
+        Enter text (prepopulated) and click **Annotate** to see how entities are highlighted.
         """
     )
+    # Inputs
+    query = gr.Textbox(lines=3, value=sample_text, label="Input Text")
+    entities = gr.Textbox(value=", ".join(rels), label="Relations (unused in simulation)")
+    threshold = gr.Slider(0, 1, value=0.3, step=0.01, label="Threshold")
+    nested = gr.Checkbox(value=False, label="Nested NER")
+    model = gr.Radio(choices=MODELS, value=MODELS[0], label="Model")
+    # Outputs
+    output_hl = gr.HighlightedText(label="Annotated Entities")
+    # Button
+    annotate_btn = gr.Button("Annotate")
+    annotate_btn.click(
+        fn=annotate_query,
+        inputs=[query, entities, threshold, nested, model],
+        outputs=[output_hl]
     )
+demo.launch(debug=True)

requirements.py CHANGED Viewed

@@ -1,4 +1,2 @@
-gradio>=3.0
-gliner  # your GLiNER package
-torch   # PyTorch (CPU build by default)
-huggingface-hub


1	+ gradio
2	+ spaces