File size: 5,160 Bytes
6c8edd7
 
 
 
 
0ac101d
6c8edd7
a5b8665
6c8edd7
 
a5b8665
0ac101d
6c8edd7
 
a5b8665
6c8edd7
e111cbc
6c8edd7
 
 
 
 
 
 
 
 
 
 
a5b8665
e111cbc
a5b8665
 
6c8edd7
 
a5b8665
6c8edd7
139cfd7
 
 
6c8edd7
 
a5b8665
139cfd7
 
 
 
6c8edd7
139cfd7
6c8edd7
0ac101d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5b8665
 
6c8edd7
 
 
 
 
 
 
a5b8665
6c8edd7
 
 
 
 
e111cbc
a5b8665
139cfd7
e111cbc
0ac101d
 
a5b8665
e111cbc
 
a5b8665
 
e111cbc
 
 
a5b8665
 
 
 
 
 
0ac101d
 
 
139cfd7
 
 
 
 
 
a5b8665
 
 
 
 
0ac101d
 
139cfd7
a5b8665
6c8edd7
 
 
a5b8665
6c8edd7
a5b8665
 
 
0ac101d
139cfd7
6c8edd7
 
 
139cfd7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
import re

# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)

# Get Wikidata entity info

def get_wikidata_info(entity, lang="en"):
    query = f'''
    SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
      ?item rdfs:label "{entity}"@{lang}.
      OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
    }} LIMIT 1
    '''
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    try:
        response = requests.get(url, params={"query": query}, headers=headers)
        data = response.json()
        if data['results']['bindings']:
            item = data['results']['bindings'][0]
            label = item.get('itemLabel', {}).get('value', entity)
            description = item.get('itemDescription', {}).get('value', 'No description available.')
            coord = item.get('coordinate', {}).get('value', '')
            wikidata_link = item.get('item', {}).get('value', '')
            return label, description, coord, wikidata_link
    except:
        pass
    return entity, "No description available.", "", ""

# Get Wikipedia details

def get_wikipedia_details(entity, lang="en"):
    try:
        wikipedia.set_lang(lang)
        page = wikipedia.page(entity, auto_suggest=True, redirect=True)
        categories = page.categories[:5]
        links = page.links[:5]
        url = page.url
        return url, categories, links
    except:
        return "", [], []

# Enrich info with tags and intent
def enrich_info(summary):
    related_info = []

    if re.search(r'capital', summary, re.IGNORECASE):
        related_info.append("πŸ›οΈ Capital city")
    if re.search(r'tourism|attraction', summary, re.IGNORECASE):
        related_info.append("🧳 Popular for tourism")
    if re.search(r'population', summary, re.IGNORECASE):
        related_info.append("πŸ‘₯ Densely populated")
    if re.search(r'university|education', summary, re.IGNORECASE):
        related_info.append("πŸŽ“ Educational hub")
    if re.search(r'beach', summary, re.IGNORECASE):
        related_info.append("πŸ–οΈ Known for beaches")

    intent = "General knowledge inquiry"
    if re.search(r'tourism|travel', summary, re.IGNORECASE):
        intent = "Looking for travel guidance"
    elif re.search(r'university|education', summary, re.IGNORECASE):
        intent = "Seeking educational info"

    return related_info, intent

# Main combined function
def ner_wikidata_lookup(text):
    try:
        detected_lang = detect(text)
    except:
        detected_lang = "en"

    entities = ner_pipeline(text)
    seen = set()
    result = f"<b>🌐 Detected Language:</b> <code>{detected_lang}</code><br><br>"

    for ent in entities:
        name = ent['word'].strip()
        if name not in seen and name.isalpha():
            seen.add(name)

            label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
            wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang)

            related_tags, detected_intent = enrich_info(desc)

            osm_link = ""
            if coord:
                try:
                    lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
                    osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>πŸ“ View on OpenStreetMap</a>"
                except:
                    pass

            links = ""
            if wikidata_url:
                links += f"<a href='{wikidata_url}' target='_blank'>πŸ”— Wikidata</a>  "
            if wiki_url:
                links += f"<a href='{wiki_url}' target='_blank'>πŸ“˜ Wikipedia</a>"

            tags_html = f"<p><b>Related Tags:</b> {' | '.join(related_tags)}</p>" if related_tags else ""
            intent_html = f"<p><b>Intent:</b> {detected_intent}</p>"

            extra_info = ""
            if wiki_categories:
                extra_info += f"<p><b>Wikipedia Categories:</b> {', '.join(wiki_categories)}</p>"
            if wiki_links:
                extra_info += f"<p><b>Related Topics:</b> {', '.join(wiki_links)}</p>"

            result += f"""
            <hr><h3>πŸ”Ž {label}</h3>
            <p>{desc}</p>
            <p>{links}</p>
            <p>{osm_link}</p>
            {tags_html}
            {intent_html}
            {extra_info}
            """

    return result if seen else "No named entities found."

# Gradio Interface using HTML output
iface = gr.Interface(
    fn=ner_wikidata_lookup,
    inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
    outputs=gr.HTML(),
    title="🌐 NER with Wikidata + Wikipedia + Smart Tags",
    description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics."
)

if __name__ == "__main__":
    iface.launch()