Spaces:

Pracheethaa
/

Context_info

Sleeping

File size: 5,160 Bytes

6c8edd7
 
 
 
 
0ac101d
6c8edd7
a5b8665
6c8edd7
 
a5b8665
0ac101d
6c8edd7
 
a5b8665
6c8edd7
e111cbc
6c8edd7
 
 
 
 
 
 
 
 
 
 
a5b8665
e111cbc
a5b8665
 
6c8edd7
 
a5b8665
6c8edd7
139cfd7
 
 
6c8edd7
 
a5b8665
139cfd7
 
 
 
6c8edd7
139cfd7
6c8edd7
0ac101d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5b8665
 
6c8edd7
 
 
 
 
 
 
a5b8665
6c8edd7
 
 
 
 
e111cbc
a5b8665
139cfd7
e111cbc
0ac101d
 
a5b8665
e111cbc
 
a5b8665
 
e111cbc
 
 
a5b8665
 
 
 
 
 
0ac101d
 
 
139cfd7
 
 
 
 
 
a5b8665
 
 
 
 
0ac101d
 
139cfd7
a5b8665
6c8edd7
 
 
a5b8665
6c8edd7
a5b8665
 
 
0ac101d
139cfd7
6c8edd7
 
 
139cfd7

import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
import re

# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)

# Get Wikidata entity info

def get_wikidata_info(entity, lang="en"):
    query = f'''
    SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
      ?item rdfs:label "{entity}"@{lang}.
      OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
    }} LIMIT 1
    '''
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    try:
        response = requests.get(url, params={"query": query}, headers=headers)
        data = response.json()
        if data['results']['bindings']:
            item = data['results']['bindings'][0]
            label = item.get('itemLabel', {}).get('value', entity)
            description = item.get('itemDescription', {}).get('value', 'No description available.')
            coord = item.get('coordinate', {}).get('value', '')
            wikidata_link = item.get('item', {}).get('value', '')
            return label, description, coord, wikidata_link
    except:
        pass
    return entity, "No description available.", "", ""

# Get Wikipedia details

def get_wikipedia_details(entity, lang="en"):
    try:
        wikipedia.set_lang(lang)
        page = wikipedia.page(entity, auto_suggest=True, redirect=True)
        categories = page.categories[:5]
        links = page.links[:5]
        url = page.url
        return url, categories, links
    except:
        return "", [], []

# Enrich info with tags and intent
def enrich_info(summary):
    related_info = []

    if re.search(r'capital', summary, re.IGNORECASE):
        related_info.append("🏛️ Capital city")
    if re.search(r'tourism|attraction', summary, re.IGNORECASE):
        related_info.append("🧳 Popular for tourism")
    if re.search(r'population', summary, re.IGNORECASE):
        related_info.append("👥 Densely populated")
    if re.search(r'university|education', summary, re.IGNORECASE):
        related_info.append("🎓 Educational hub")
    if re.search(r'beach', summary, re.IGNORECASE):
        related_info.append("🏖️ Known for beaches")

    intent = "General knowledge inquiry"
    if re.search(r'tourism|travel', summary, re.IGNORECASE):
        intent = "Looking for travel guidance"
    elif re.search(r'university|education', summary, re.IGNORECASE):
        intent = "Seeking educational info"

    return related_info, intent

# Main combined function
def ner_wikidata_lookup(text):
    try:
        detected_lang = detect(text)
    except:
        detected_lang = "en"

    entities = ner_pipeline(text)
    seen = set()
    result = f"<b>🌐 Detected Language:</b> <code>{detected_lang}</code><br><br>"

    for ent in entities:
        name = ent['word'].strip()
        if name not in seen and name.isalpha():
            seen.add(name)

            label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
            wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang)

            related_tags, detected_intent = enrich_info(desc)

            osm_link = ""
            if coord:
                try:
                    lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
                    osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>📍 View on OpenStreetMap</a>"
                except:
                    pass

            links = ""
            if wikidata_url:
                links += f"<a href='{wikidata_url}' target='_blank'>🔗 Wikidata</a>  "
            if wiki_url:
                links += f"<a href='{wiki_url}' target='_blank'>📘 Wikipedia</a>"

            tags_html = f"<p><b>Related Tags:</b> {' | '.join(related_tags)}</p>" if related_tags else ""
            intent_html = f"<p><b>Intent:</b> {detected_intent}</p>"

            extra_info = ""
            if wiki_categories:
                extra_info += f"<p><b>Wikipedia Categories:</b> {', '.join(wiki_categories)}</p>"
            if wiki_links:
                extra_info += f"<p><b>Related Topics:</b> {', '.join(wiki_links)}</p>"

            result += f"""
            <hr><h3>🔎 {label}</h3>
            <p>{desc}</p>
            <p>{links}</p>
            <p>{osm_link}</p>
            {tags_html}
            {intent_html}
            {extra_info}
            """

    return result if seen else "No named entities found."

# Gradio Interface using HTML output
iface = gr.Interface(
    fn=ner_wikidata_lookup,
    inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."),
    outputs=gr.HTML(),
    title="🌐 NER with Wikidata + Wikipedia + Smart Tags",
    description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics."
)

if __name__ == "__main__":
    iface.launch()