import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
import re
# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
# Get Wikidata entity info
def get_wikidata_info(entity, lang="en"):
query = f'''
SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
?item rdfs:label "{entity}"@{lang}.
OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
}} LIMIT 1
'''
url = "https://query.wikidata.org/sparql"
headers = {"Accept": "application/sparql-results+json"}
try:
response = requests.get(url, params={"query": query}, headers=headers)
data = response.json()
if data['results']['bindings']:
item = data['results']['bindings'][0]
label = item.get('itemLabel', {}).get('value', entity)
description = item.get('itemDescription', {}).get('value', 'No description available.')
coord = item.get('coordinate', {}).get('value', '')
wikidata_link = item.get('item', {}).get('value', '')
return label, description, coord, wikidata_link
except:
pass
return entity, "No description available.", "", ""
# Get Wikipedia details
def get_wikipedia_details(entity, lang="en"):
try:
wikipedia.set_lang(lang)
page = wikipedia.page(entity, auto_suggest=True, redirect=True)
categories = page.categories[:5]
links = page.links[:5]
url = page.url
return url, categories, links
except:
return "", [], []
# Enrich info with tags and intent
def enrich_info(summary):
related_info = []
if re.search(r'capital', summary, re.IGNORECASE):
related_info.append("🏛️ Capital city")
if re.search(r'tourism|attraction', summary, re.IGNORECASE):
related_info.append("🧳 Popular for tourism")
if re.search(r'population', summary, re.IGNORECASE):
related_info.append("👥 Densely populated")
if re.search(r'university|education', summary, re.IGNORECASE):
related_info.append("🎓 Educational hub")
if re.search(r'beach', summary, re.IGNORECASE):
related_info.append("🏖️ Known for beaches")
intent = "General knowledge inquiry"
if re.search(r'tourism|travel', summary, re.IGNORECASE):
intent = "Looking for travel guidance"
elif re.search(r'university|education', summary, re.IGNORECASE):
intent = "Seeking educational info"
return related_info, intent
# Main combined function
def ner_wikidata_lookup(text):
try:
detected_lang = detect(text)
except:
detected_lang = "en"
entities = ner_pipeline(text)
seen = set()
result = f"🌐 Detected Language: {detected_lang}
"
for ent in entities:
name = ent['word'].strip()
if name not in seen and name.isalpha():
seen.add(name)
label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang)
related_tags, detected_intent = enrich_info(desc)
osm_link = ""
if coord:
try:
lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
osm_link = f"📍 View on OpenStreetMap"
except:
pass
links = ""
if wikidata_url:
links += f"🔗 Wikidata "
if wiki_url:
links += f"📘 Wikipedia"
tags_html = f"
Related Tags: {' | '.join(related_tags)}
" if related_tags else "" intent_html = f"Intent: {detected_intent}
" extra_info = "" if wiki_categories: extra_info += f"Wikipedia Categories: {', '.join(wiki_categories)}
" if wiki_links: extra_info += f"Related Topics: {', '.join(wiki_links)}
" result += f"""{desc}
{links}
{osm_link}
{tags_html} {intent_html} {extra_info} """ return result if seen else "No named entities found." # Gradio Interface using HTML output iface = gr.Interface( fn=ner_wikidata_lookup, inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), outputs=gr.HTML(), title="🌐 NER with Wikidata + Wikipedia + Smart Tags", description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics." ) if __name__ == "__main__": iface.launch()