File size: 1,291 Bytes
963d538
8015341
963d538
8015341
 
963d538
 
 
 
8015341
 
 
963d538
 
 
bfa7f67
963d538
8015341
 
 
 
 
 
386370d
8015341
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import spacy
import gradio as gr
from transformers import pipeline, AutoTokenizer
from pysentimiento.preprocessing import preprocess_tweet

nlp = spacy.load("en_core_web_sm")

tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base", add_prefix_space=True, model_max_length=512)
pl = pipeline("ner", tokenizer=tokenizer, model="Recognai/veganuary_ner", aggregation_strategy="first")

def ner(text):
    text = preprocess_tweet(text)
    doc = nlp(text)
    text = " ".join([token.text for token in doc])
    predictions = pl(text)
    mentions = [pred["word"].strip() for pred in predictions if pred["entity_group"] == "FOOD"]   
    return "\n".join(mentions)

iface = gr.Interface(
    ner,
    gr.inputs.Textbox(placeholder="copy&paste your veganuary tweet here ...", label="Tweet"),
    gr.outputs.Textbox(label="List of detected food mentions in the tweet"),
    examples=[
        ["Fruit is delicious πŸ˜‹ AND healthy πŸ₯—! Brighten up your plate & palate with fresh watermelon, Greek yoghurt & berries, smashed avocado or lime added to water.  A piece of #fruit a day keeps the doctor away! #Veganuary2022"]
    ],
    allow_flagging=False,
    title="Veganuary NER",
    description="Extract food entities from veganuary tweets πŸ˜‹",
)

iface.launch(share=False)