Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from langdetect import detect | |
import requests | |
import wikipedia | |
import re | |
# Load NER model | |
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True) | |
# Get Wikidata entity info | |
def get_wikidata_info(entity, lang="en"): | |
query = f''' | |
SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{ | |
?item rdfs:label "{entity}"@{lang}. | |
OPTIONAL {{ ?item wdt:P625 ?coordinate. }} | |
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }} | |
}} LIMIT 1 | |
''' | |
url = "https://query.wikidata.org/sparql" | |
headers = {"Accept": "application/sparql-results+json"} | |
try: | |
response = requests.get(url, params={"query": query}, headers=headers) | |
data = response.json() | |
if data['results']['bindings']: | |
item = data['results']['bindings'][0] | |
label = item.get('itemLabel', {}).get('value', entity) | |
description = item.get('itemDescription', {}).get('value', 'No description available.') | |
coord = item.get('coordinate', {}).get('value', '') | |
wikidata_link = item.get('item', {}).get('value', '') | |
return label, description, coord, wikidata_link | |
except: | |
pass | |
return entity, "No description available.", "", "" | |
# Get Wikipedia details | |
def get_wikipedia_details(entity, lang="en"): | |
try: | |
wikipedia.set_lang(lang) | |
page = wikipedia.page(entity, auto_suggest=True, redirect=True) | |
categories = page.categories[:5] | |
links = page.links[:5] | |
url = page.url | |
return url, categories, links | |
except: | |
return "", [], [] | |
# Enrich info with tags and intent | |
def enrich_info(summary): | |
related_info = [] | |
if re.search(r'capital', summary, re.IGNORECASE): | |
related_info.append("ποΈ Capital city") | |
if re.search(r'tourism|attraction', summary, re.IGNORECASE): | |
related_info.append("π§³ Popular for tourism") | |
if re.search(r'population', summary, re.IGNORECASE): | |
related_info.append("π₯ Densely populated") | |
if re.search(r'university|education', summary, re.IGNORECASE): | |
related_info.append("π Educational hub") | |
if re.search(r'beach', summary, re.IGNORECASE): | |
related_info.append("ποΈ Known for beaches") | |
intent = "General knowledge inquiry" | |
if re.search(r'tourism|travel', summary, re.IGNORECASE): | |
intent = "Looking for travel guidance" | |
elif re.search(r'university|education', summary, re.IGNORECASE): | |
intent = "Seeking educational info" | |
return related_info, intent | |
# Main combined function | |
def ner_wikidata_lookup(text): | |
try: | |
detected_lang = detect(text) | |
except: | |
detected_lang = "en" | |
entities = ner_pipeline(text) | |
seen = set() | |
result = f"<b>π Detected Language:</b> <code>{detected_lang}</code><br><br>" | |
for ent in entities: | |
name = ent['word'].strip() | |
if name not in seen and name.isalpha(): | |
seen.add(name) | |
label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang) | |
wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang) | |
related_tags, detected_intent = enrich_info(desc) | |
osm_link = "" | |
if coord: | |
try: | |
lon, lat = coord.replace('Point(', '').replace(')', '').split(' ') | |
osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>π View on OpenStreetMap</a>" | |
except: | |
pass | |
links = "" | |
if wikidata_url: | |
links += f"<a href='{wikidata_url}' target='_blank'>π Wikidata</a> " | |
if wiki_url: | |
links += f"<a href='{wiki_url}' target='_blank'>π Wikipedia</a>" | |
tags_html = f"<p><b>Related Tags:</b> {' | '.join(related_tags)}</p>" if related_tags else "" | |
intent_html = f"<p><b>Intent:</b> {detected_intent}</p>" | |
extra_info = "" | |
if wiki_categories: | |
extra_info += f"<p><b>Wikipedia Categories:</b> {', '.join(wiki_categories)}</p>" | |
if wiki_links: | |
extra_info += f"<p><b>Related Topics:</b> {', '.join(wiki_links)}</p>" | |
result += f""" | |
<hr><h3>π {label}</h3> | |
<p>{desc}</p> | |
<p>{links}</p> | |
<p>{osm_link}</p> | |
{tags_html} | |
{intent_html} | |
{extra_info} | |
""" | |
return result if seen else "No named entities found." | |
# Gradio Interface using HTML output | |
iface = gr.Interface( | |
fn=ner_wikidata_lookup, | |
inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), | |
outputs=gr.HTML(), | |
title="π NER with Wikidata + Wikipedia + Smart Tags", | |
description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics." | |
) | |
if __name__ == "__main__": | |
iface.launch() |