File size: 1,436 Bytes
897b414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#Core Pkgs
import streamlit as st

#NLP Pkgs
import spacy_streamlit
import spacy
# spacy.load('en_core_web_sm')

nlp = spacy.load("en_core_web_sm")


#Web Scraping Pkgs
from bs4 import BeautifulSoup
from urllib.request import urlopen

@st.cache
def get_text(raw_url):
    page = urlopen(raw_url)
    soup = BeautifulSoup(page)
    fetched_text = " ".join(map(lambda p:p.text, soup.find_all('p')))
    return fetched_text


def main():
    """A Simple NLP App with Spacy-Streamlit"""
    st.title("Named Entity Recognition")

    menu = ["NER", "NER for URL"]
    choice = st.sidebar.radio("Pick a choice", menu)


    if choice == "NER":
        raw_text = st.text_area("Enter Text","")
        if raw_text != "":
            docx = nlp(raw_text)
            spacy_streamlit.visualize_ner(docx, labels = nlp.get_pipe('ner').labels)

    elif choice == "NER for URL":
        raw_url = st.text_input("Enter URL","")
        text_length = st.slider("Length to Preview", 50,200)
        if raw_url != "":
            result = get_text(raw_url)
            len_of_full_text = len(result)
            len_of_short_text = round(len(result)/text_length)
            st.subheader("Text to be analyzed:")
            st.write(result[:len_of_short_text])
            preview_docx = nlp(result[:len_of_short_text])
            spacy_streamlit.visualize_ner(preview_docx, labels = nlp.get_pipe('ner').labels)

if __name__ == '__main__':
    main()