import gradio as gr from transformers import pipeline from langdetect import detect import requests import wikipedia import re # Load NER model ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True) # Get Wikidata entity info def get_wikidata_info(entity, lang="en"): query = f''' SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{ ?item rdfs:label "{entity}"@{lang}. OPTIONAL {{ ?item wdt:P625 ?coordinate. }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }} }} LIMIT 1 ''' url = "https://query.wikidata.org/sparql" headers = {"Accept": "application/sparql-results+json"} try: response = requests.get(url, params={"query": query}, headers=headers) data = response.json() if data['results']['bindings']: item = data['results']['bindings'][0] label = item.get('itemLabel', {}).get('value', entity) description = item.get('itemDescription', {}).get('value', 'No description available.') coord = item.get('coordinate', {}).get('value', '') wikidata_link = item.get('item', {}).get('value', '') return label, description, coord, wikidata_link except: pass return entity, "No description available.", "", "" # Get Wikipedia details def get_wikipedia_details(entity, lang="en"): try: wikipedia.set_lang(lang) page = wikipedia.page(entity, auto_suggest=True, redirect=True) categories = page.categories[:5] links = page.links[:5] url = page.url return url, categories, links except: return "", [], [] # Enrich info with tags and intent def enrich_info(summary): related_info = [] if re.search(r'capital', summary, re.IGNORECASE): related_info.append("🏛️ Capital city") if re.search(r'tourism|attraction', summary, re.IGNORECASE): related_info.append("🧳 Popular for tourism") if re.search(r'population', summary, re.IGNORECASE): related_info.append("👥 Densely populated") if re.search(r'university|education', summary, re.IGNORECASE): related_info.append("🎓 Educational hub") if re.search(r'beach', summary, re.IGNORECASE): related_info.append("🏖️ Known for beaches") intent = "General knowledge inquiry" if re.search(r'tourism|travel', summary, re.IGNORECASE): intent = "Looking for travel guidance" elif re.search(r'university|education', summary, re.IGNORECASE): intent = "Seeking educational info" return related_info, intent # Main combined function def ner_wikidata_lookup(text): try: detected_lang = detect(text) except: detected_lang = "en" entities = ner_pipeline(text) seen = set() result = f"🌐 Detected Language: {detected_lang}

" for ent in entities: name = ent['word'].strip() if name not in seen and name.isalpha(): seen.add(name) label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang) wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang) related_tags, detected_intent = enrich_info(desc) osm_link = "" if coord: try: lon, lat = coord.replace('Point(', '').replace(')', '').split(' ') osm_link = f"📍 View on OpenStreetMap" except: pass links = "" if wikidata_url: links += f"🔗 Wikidata " if wiki_url: links += f"📘 Wikipedia" tags_html = f"

Related Tags: {' | '.join(related_tags)}

" if related_tags else "" intent_html = f"

Intent: {detected_intent}

" extra_info = "" if wiki_categories: extra_info += f"

Wikipedia Categories: {', '.join(wiki_categories)}

" if wiki_links: extra_info += f"

Related Topics: {', '.join(wiki_links)}

" result += f"""

🔎 {label}

{desc}

{links}

{osm_link}

{tags_html} {intent_html} {extra_info} """ return result if seen else "No named entities found." # Gradio Interface using HTML output iface = gr.Interface( fn=ner_wikidata_lookup, inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), outputs=gr.HTML(), title="🌐 NER with Wikidata + Wikipedia + Smart Tags", description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics." ) if __name__ == "__main__": iface.launch()