Spaces:

ai4data
/

datause-detector

Running

App Files Files Community

datause-detector / app.py

rafmacalaba

use ner and rel

cd683ff 21 days ago

raw

history blame

3.67 kB

	import re
	import gradio as gr

	# Your actual model outputs:
	ner = [
	{
	'start': 12,
	'end': 30,
	'text': 'Home Visits Survey',
	'label': 'named dataset',
	'score': 0.9947463870048523
	}
	]

	relations = {
	'Home Visits Survey': [
	{'source': 'Home Visits Survey', 'relation': 'data geography', 'target': 'Jordan', 'score': 0.6180844902992249},
	{'source': 'Home Visits Survey', 'relation': 'version', 'target': 'Round II', 'score': 0.9688164591789246},
	{'source': 'Home Visits Survey', 'relation': 'acronym', 'target': 'HV', 'score': 0.9140607714653015},
	{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'UNHCR', 'score': 0.7762154340744019},
	{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'World Food Programme', 'score': 0.6582539677619934},
	{'source': 'Home Visits Survey', 'relation': 'reference year', 'target': '2013', 'score': 0.524115264415741},
	{'source': 'Home Visits Survey', 'relation': 'publication year', 'target': '2014', 'score': 0.6853994131088257},
	{'source': 'Home Visits Survey', 'relation': 'data description', 'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
	]
	}

	# The sample sentence you want to highlight:
	SAMPLE_TEXT = (
	"The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food "
	"Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee "
	"households in Jordan, it gathered detailed socio-economic, health, and protection data—each "
	"household tagged with a unique ID to allow longitudinal tracking."
	)

	def highlight_text(text):
	entities = []
	# 1) NER spans
	for ent in ner:
	entities.append({
	"entity": ent["label"],
	"start": ent["start"],
	"end": ent["end"],
	})
	# 2) RE spans: annotate each target with its relation label
	for src, rels in relations.items():
	for r in rels:
	label = r["relation"]
	target = r["target"]
	for m in re.finditer(re.escape(target), text):
	entities.append({
	"entity": label,
	"start": m.start(),
	"end": m.end(),
	})
	return {"text": text, "entities": entities}

	with gr.Blocks() as demo:
	gr.Markdown("## Data Use Detector\n"
	"Input text and the model will highlight the entities it detects.")

	txt_in = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
	btn = gr.Button("Highlight Entities")
	txt_out = gr.HighlightedText(label="Annotated Entities")

	btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
	txt_in.submit(fn=highlight_text, inputs=txt_in, outputs=txt_out)
	demo.load(fn=highlight_text, inputs=txt_in, outputs=txt_out)

	gr.Markdown("""
	Legend
	- named dataset → Home Visits Survey
	- data geography → Jordan
	- version → Round II
	- acronym → HV
	- author → UNHCR, World Food Programme
	- reference year → 2013
	- publication year → 2014
	- data description → detailed socio-economic, health, and protection data
	""")

	if __name__ == "__main__":
	demo.launch()