MrGanesh commited on
Commit
7057a87
·
1 Parent(s): 6f5160d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -10,16 +10,18 @@ def load_model():
10
  return model
11
 
12
  model = load_model()
 
 
13
  st.title("Patent Text Extractor")
14
  placeholder = st.empty()
15
  text_input = placeholder.text_area("Paste or write text", height=300)
16
  button = st.button("Extract Keywords")
17
- top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 50,20)
18
  #min_ngram = st.sidebar.number_input("Minimum number of words in each keyword", 1)
19
  #max_ngram = st.sidebar.number_input("Maximum number of words in each keyword", 3)
20
  #st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})")
21
 
22
- params = {"docs": text_input, "top_n": top_n, "keyphrase_ngram_range":(1, 3), "stop_words": 'english',"vectorizer":KeyphraseCountVectorizer()}
23
 
24
  #add_diversity = st.sidebar.checkbox("Adjust diversity of keywords")
25
 
@@ -34,10 +36,13 @@ params = {"docs": text_input, "top_n": top_n, "keyphrase_ngram_range":(1, 3), "s
34
  #diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01)
35
  #params["use_mmr"] = True
36
  #params["diversity"] = diversity
37
-
38
- keywords = model.extract_keywords(**params)
 
 
39
 
40
  if keywords != []:
41
  st.info("Extracted keywords")
42
- keywords = pd.DataFrame(keywords, columns=["Keyword", "Score"])
 
43
  st.table(keywords)
 
10
  return model
11
 
12
  model = load_model()
13
+
14
+
15
  st.title("Patent Text Extractor")
16
  placeholder = st.empty()
17
  text_input = placeholder.text_area("Paste or write text", height=300)
18
  button = st.button("Extract Keywords")
19
+ #top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 50,20)
20
  #min_ngram = st.sidebar.number_input("Minimum number of words in each keyword", 1)
21
  #max_ngram = st.sidebar.number_input("Maximum number of words in each keyword", 3)
22
  #st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})")
23
 
24
+ params = {"docs": text_input, "top_n": top_n, "stop_words": 'english',"vectorizer":KeyphraseCountVectorizer()}
25
 
26
  #add_diversity = st.sidebar.checkbox("Adjust diversity of keywords")
27
 
 
36
  #diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01)
37
  #params["use_mmr"] = True
38
  #params["diversity"] = diversity
39
+ kw_extractor = yake.KeywordExtractor(top=50)
40
+ candidates = kw_extractor.extract_keywords(text_input)
41
+ keyphrases = [candidate[0] for candidate in candidates]
42
+ kw_model = KeyBERT(model=model)
43
 
44
  if keywords != []:
45
  st.info("Extracted keywords")
46
+ keywords = kw_model.extract_keywords(text_input,candidates, keyphrase_ngram_range=(1, 3),
47
+ top_n=50,stop_words='english',vectorizer=KeyphraseCountVectorizer())
48
  st.table(keywords)