topicModeling / app.py
SaraHossam's picture
Update app.py
cec4a0c
import spacy
from keybert import KeyBERT
import numpy as np
import pandas as pd
import os
import re
import json
import seaborn as sns
import gradio as gr
# def separate_punc(text):
# return [token.text.lower() for token in text if token.text not in '\n\n \n\n\n!"-#$%&()--.*+,-/:;<=>?@[\\]^_`{|}~\t\n ']
kw_model = KeyBERT(model='all-mpnet-base-v2')
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
# Feed the training data through the pipeline
def run(text):
# separate_punc(text)
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 3), stop_words='english', highlight=False,)
keywords_list= list(dict(keywords).keys())
s='We suggest the following as potential topic name for the given article: \n '
for i in range (len(keywords_list)):
s = s+keywords_list[i] + '\n '
# if i<=len(keywords_list):
# print('Would you like another suggestion?')
# f=input()
# if f=='No':
# break
# else:
# print('Sorry That is all we can suggest')
return s
iface = gr.Interface(fn=run, inputs="text", outputs="text")
iface.launch()