import gradio as gr from transformers import AutoTokenizer, AutoModelForTokenClassification from transformers import pipeline import json tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner") model = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner") nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") def group_cat(entities): categories = {} for item in entities: group = item.get('entity_group') if group not in categories: categories[group] = [item] else: categories[group].append(item) return categories def ner(text: str) -> str: """ Searches the input text for named entities and returns them organized by category. Args: text (str): The input text to analyze. Returns: str: A json string representing dictionary where each key is a named entity category (e.g., 'PER', 'ORG', 'LOC', etc.), and the corresponding value is a list of entities found in the text under that category. """ max_len = tokenizer.model_max_length stride = 50 # Tokenizza con overflow per gestire testi lunghi inputs = tokenizer( text, return_overflowing_tokens=True, stride=stride, max_length=max_len, truncation=True, return_offsets_mapping=True, padding=False ) all_entities = [] seen = set() # Per deduplicare (word, start, end) for input_ids in inputs["input_ids"]: chunk_text = tokenizer.decode(input_ids, skip_special_tokens=True) chunk_entities = nlp(chunk_text) for ent in chunk_entities: key = (ent["word"], ent["start"], ent["end"]) if key not in seen: seen.add(key) all_entities.append(ent) ner_results =group_cat(all_entities) cleaned = {} for category, items in ner_results.items(): cleaned[category] = {} for ent in items: cleaned[category][ent["word"]] = float(ent["score"]) dict_ner = json.dumps(cleaned, indent=2, separators=(',', ': '), ensure_ascii=False) return dict_ner # Create a standard Gradio interface demo = gr.Interface( fn=ner, inputs=["text"], outputs="text", title="NER", description="Detect named entity within the text in input using the model Babelscape/wikineural - This interface works as MCP server as well." ) # Launch both the Gradio web interface and the MCP server if __name__ == "__main__": demo.launch(mcp_server=True)