Pracheethaa commited on
Commit
e111cbc
Β·
verified Β·
1 Parent(s): 994fc85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -10,11 +10,14 @@ ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_
10
  # Translation models cache
11
  translation_models = {}
12
 
13
- # Get Wikidata entity info via SPARQL
14
  def get_wikidata_info(entity, lang="en"):
15
  query = f'''
16
- SELECT ?item ?itemLabel ?itemDescription WHERE {{
17
  ?item rdfs:label "{entity}"@{lang}.
 
 
 
18
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
19
  }} LIMIT 1
20
  '''
@@ -27,10 +30,13 @@ def get_wikidata_info(entity, lang="en"):
27
  item = data['results']['bindings'][0]
28
  label = item.get('itemLabel', {}).get('value', entity)
29
  description = item.get('itemDescription', {}).get('value', '')
30
- return label, description
 
 
 
31
  except:
32
  pass
33
- return entity, ""
34
 
35
  # Get Wikipedia description as fallback
36
  def get_wikipedia_summary(entity, lang="en"):
@@ -54,7 +60,7 @@ def translate_text(text, src_lang, tgt_lang):
54
  except:
55
  return text # Return untranslated if model fails
56
 
57
- # Combined NER + Wikidata + fallback Wikipedia + translation
58
  def multilingual_entity_info(text, output_lang):
59
  try:
60
  detected_lang = detect(text)
@@ -69,11 +75,27 @@ def multilingual_entity_info(text, output_lang):
69
  name = ent['word'].strip()
70
  if name not in seen and name.isalpha():
71
  seen.add(name)
72
- label, desc = get_wikidata_info(name, lang=detected_lang)
73
  if not desc:
74
  desc = get_wikipedia_summary(name, lang=detected_lang)
75
  translated_desc = translate_text(desc, detected_lang, output_lang)
76
- result += f"\n---\n\n## πŸ”Ž {label}\n\n{translated_desc}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  return result if seen else "No named entities found."
79
 
@@ -85,9 +107,9 @@ iface = gr.Interface(
85
  gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
86
  ],
87
  outputs=gr.Markdown(),
88
- title="🌐 Multilingual NER + Wikidata + Wikipedia",
89
- description="Detects entities in any language, fetches descriptions from Wikidata (or Wikipedia), and translates the output into your chosen language."
90
  )
91
 
92
  if __name__ == "__main__":
93
- iface.launch()
 
10
  # Translation models cache
11
  translation_models = {}
12
 
13
+ # Get enriched Wikidata info via SPARQL
14
  def get_wikidata_info(entity, lang="en"):
15
  query = f'''
16
+ SELECT ?item ?itemLabel ?itemDescription ?coordinate ?website ?sitelink WHERE {{
17
  ?item rdfs:label "{entity}"@{lang}.
18
+ OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
19
+ OPTIONAL {{ ?item wdt:P856 ?website. }}
20
+ OPTIONAL {{ ?sitelink schema:about ?item; schema:isPartOf <https://{lang}.wikipedia.org/>. }}
21
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
22
  }} LIMIT 1
23
  '''
 
30
  item = data['results']['bindings'][0]
31
  label = item.get('itemLabel', {}).get('value', entity)
32
  description = item.get('itemDescription', {}).get('value', '')
33
+ coord = item.get('coordinate', {}).get('value', '')
34
+ website = item.get('website', {}).get('value', '')
35
+ wiki = item.get('sitelink', {}).get('value', '')
36
+ return label, description, coord, website, wiki
37
  except:
38
  pass
39
+ return entity, "", "", "", ""
40
 
41
  # Get Wikipedia description as fallback
42
  def get_wikipedia_summary(entity, lang="en"):
 
60
  except:
61
  return text # Return untranslated if model fails
62
 
63
+ # Combined NER + Wikidata + fallback Wikipedia + translation + links + map
64
  def multilingual_entity_info(text, output_lang):
65
  try:
66
  detected_lang = detect(text)
 
75
  name = ent['word'].strip()
76
  if name not in seen and name.isalpha():
77
  seen.add(name)
78
+ label, desc, coord, website, wiki = get_wikidata_info(name, lang=detected_lang)
79
  if not desc:
80
  desc = get_wikipedia_summary(name, lang=detected_lang)
81
  translated_desc = translate_text(desc, detected_lang, output_lang)
82
+
83
+ links = ""
84
+ if wiki:
85
+ links += f"πŸ”— [Wikipedia]({wiki}) "
86
+ if website:
87
+ links += f"🌐 [Official Site]({website})"
88
+
89
+ map_embed = ""
90
+ if coord:
91
+ try:
92
+ latlon = coord.replace('Point(', '').replace(')', '').split(' ')
93
+ lon, lat = latlon[0], latlon[1]
94
+ map_embed = f"\n<iframe width='100%' height='300' frameborder='0' scrolling='no' marginheight='0' marginwidth='0' src='https://www.openstreetmap.org/export/embed.html?bbox={lon}%2C{lat}%2C{lon}%2C{lat}&layer=mapnik&marker={lat}%2C{lon}'></iframe>"
95
+ except:
96
+ pass
97
+
98
+ result += f"\n---\n\n## πŸ”Ž {label}\n\n{translated_desc}\n\n{links}\n{map_embed}\n"
99
 
100
  return result if seen else "No named entities found."
101
 
 
107
  gr.Dropdown(label="Select Output Language", choices=["en", "hi", "es", "fr", "de", "ta", "zh"], value="en")
108
  ],
109
  outputs=gr.Markdown(),
110
+ title="🌐 Multilingual NER + Wikidata + Wikipedia + Maps",
111
+ description="Detects entities in any language, fetches enriched Wikidata info, falls back to Wikipedia, translates the description, and embeds maps + links."
112
  )
113
 
114
  if __name__ == "__main__":
115
+ iface.launch()