Spaces:
Sleeping
Sleeping
File size: 5,160 Bytes
6c8edd7 0ac101d 6c8edd7 a5b8665 6c8edd7 a5b8665 0ac101d 6c8edd7 a5b8665 6c8edd7 e111cbc 6c8edd7 a5b8665 e111cbc a5b8665 6c8edd7 a5b8665 6c8edd7 139cfd7 6c8edd7 a5b8665 139cfd7 6c8edd7 139cfd7 6c8edd7 0ac101d a5b8665 6c8edd7 a5b8665 6c8edd7 e111cbc a5b8665 139cfd7 e111cbc 0ac101d a5b8665 e111cbc a5b8665 e111cbc a5b8665 0ac101d 139cfd7 a5b8665 0ac101d 139cfd7 a5b8665 6c8edd7 a5b8665 6c8edd7 a5b8665 0ac101d 139cfd7 6c8edd7 139cfd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
import re
# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
# Get Wikidata entity info
def get_wikidata_info(entity, lang="en"):
query = f'''
SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
?item rdfs:label "{entity}"@{lang}.
OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
}} LIMIT 1
'''
url = "https://query.wikidata.org/sparql"
headers = {"Accept": "application/sparql-results+json"}
try:
response = requests.get(url, params={"query": query}, headers=headers)
data = response.json()
if data['results']['bindings']:
item = data['results']['bindings'][0]
label = item.get('itemLabel', {}).get('value', entity)
description = item.get('itemDescription', {}).get('value', 'No description available.')
coord = item.get('coordinate', {}).get('value', '')
wikidata_link = item.get('item', {}).get('value', '')
return label, description, coord, wikidata_link
except:
pass
return entity, "No description available.", "", ""
# Get Wikipedia details
def get_wikipedia_details(entity, lang="en"):
try:
wikipedia.set_lang(lang)
page = wikipedia.page(entity, auto_suggest=True, redirect=True)
categories = page.categories[:5]
links = page.links[:5]
url = page.url
return url, categories, links
except:
return "", [], []
# Enrich info with tags and intent
def enrich_info(summary):
related_info = []
if re.search(r'capital', summary, re.IGNORECASE):
related_info.append("ποΈ Capital city")
if re.search(r'tourism|attraction', summary, re.IGNORECASE):
related_info.append("π§³ Popular for tourism")
if re.search(r'population', summary, re.IGNORECASE):
related_info.append("π₯ Densely populated")
if re.search(r'university|education', summary, re.IGNORECASE):
related_info.append("π Educational hub")
if re.search(r'beach', summary, re.IGNORECASE):
related_info.append("ποΈ Known for beaches")
intent = "General knowledge inquiry"
if re.search(r'tourism|travel', summary, re.IGNORECASE):
intent = "Looking for travel guidance"
elif re.search(r'university|education', summary, re.IGNORECASE):
intent = "Seeking educational info"
return related_info, intent
# Main combined function
def ner_wikidata_lookup(text):
try:
detected_lang = detect(text)
except:
detected_lang = "en"
entities = ner_pipeline(text)
seen = set()
result = f"<b>π Detected Language:</b> <code>{detected_lang}</code><br><br>"
for ent in entities:
name = ent['word'].strip()
if name not in seen and name.isalpha():
seen.add(name)
label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang)
related_tags, detected_intent = enrich_info(desc)
osm_link = ""
if coord:
try:
lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>π View on OpenStreetMap</a>"
except:
pass
links = ""
if wikidata_url:
links += f"<a href='{wikidata_url}' target='_blank'>π Wikidata</a> "
if wiki_url:
links += f"<a href='{wiki_url}' target='_blank'>π Wikipedia</a>"
tags_html = f"<p><b>Related Tags:</b> {' | '.join(related_tags)}</p>" if related_tags else ""
intent_html = f"<p><b>Intent:</b> {detected_intent}</p>"
extra_info = ""
if wiki_categories:
extra_info += f"<p><b>Wikipedia Categories:</b> {', '.join(wiki_categories)}</p>"
if wiki_links:
extra_info += f"<p><b>Related Topics:</b> {', '.join(wiki_links)}</p>"
result += f"""
<hr><h3>π {label}</h3>
<p>{desc}</p>
<p>{links}</p>
<p>{osm_link}</p>
{tags_html}
{intent_html}
{extra_info}
"""
return result if seen else "No named entities found."
# Gradio Interface using HTML output
iface = gr.Interface(
fn=ner_wikidata_lookup,
inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
outputs=gr.HTML(),
title="π NER with Wikidata + Wikipedia + Smart Tags",
description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics."
)
if __name__ == "__main__":
iface.launch() |