Spaces:
Running
Running
import re | |
import gradio as gr | |
# Your actual model outputs: | |
ner = [ | |
{ | |
'start': 12, | |
'end': 30, | |
'text': 'Home Visits Survey', | |
'label': 'named dataset', | |
'score': 0.9947463870048523 | |
} | |
] | |
relations = { | |
'Home Visits Survey': [ | |
{'source': 'Home Visits Survey', 'relation': 'data geography', 'target': 'Jordan', 'score': 0.6180844902992249}, | |
{'source': 'Home Visits Survey', 'relation': 'version', 'target': 'Round II', 'score': 0.9688164591789246}, | |
{'source': 'Home Visits Survey', 'relation': 'acronym', 'target': 'HV', 'score': 0.9140607714653015}, | |
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'UNHCR', 'score': 0.7762154340744019}, | |
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'World Food Programme', 'score': 0.6582539677619934}, | |
{'source': 'Home Visits Survey', 'relation': 'reference year', 'target': '2013', 'score': 0.524115264415741}, | |
{'source': 'Home Visits Survey', 'relation': 'publication year', 'target': '2014', 'score': 0.6853994131088257}, | |
{'source': 'Home Visits Survey', 'relation': 'data description', 'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493}, | |
] | |
} | |
# The sample sentence you want to highlight: | |
SAMPLE_TEXT = ( | |
"The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food " | |
"Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee " | |
"households in Jordan, it gathered detailed socio-economic, health, and protection data—each " | |
"household tagged with a unique ID to allow longitudinal tracking." | |
) | |
def highlight_text(text): | |
entities = [] | |
# 1) NER spans | |
for ent in ner: | |
entities.append({ | |
"entity": ent["label"], | |
"start": ent["start"], | |
"end": ent["end"], | |
}) | |
# 2) RE spans: annotate each target with its relation label | |
for src, rels in relations.items(): | |
for r in rels: | |
label = r["relation"] | |
target = r["target"] | |
for m in re.finditer(re.escape(target), text): | |
entities.append({ | |
"entity": label, | |
"start": m.start(), | |
"end": m.end(), | |
}) | |
return {"text": text, "entities": entities} | |
with gr.Blocks() as demo: | |
gr.Markdown("## Data Use Detector\n" | |
"Input text and the model will highlight the entities it detects.") | |
txt_in = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT) | |
btn = gr.Button("Highlight Entities") | |
txt_out = gr.HighlightedText(label="Annotated Entities") | |
btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out) | |
txt_in.submit(fn=highlight_text, inputs=txt_in, outputs=txt_out) | |
demo.load(fn=highlight_text, inputs=txt_in, outputs=txt_out) | |
gr.Markdown(""" | |
**Legend** | |
- **named dataset** → Home Visits Survey | |
- **data geography** → Jordan | |
- **version** → Round II | |
- **acronym** → HV | |
- **author** → UNHCR, World Food Programme | |
- **reference year** → 2013 | |
- **publication year** → 2014 | |
- **data description** → detailed socio-economic, health, and protection data | |
""") | |
if __name__ == "__main__": | |
demo.launch() | |