from pattern.web import Twitter import preprocessor as p from pattern.en import sentiment import pandas as pd import matplotlib.pyplot as plt from wordcloud import WordCloud import gradio as gr import plotly.express as px from collections import Counter from gensim.parsing.preprocessing import remove_stopwords import re def top_10_plots(df): pos_text = ' '.join(list(df[df['sentiment']==1]['Cleaned Tweet'])) neg_text = ' '.join(list(df[df['sentiment']==0]['Cleaned Tweet'])) pos_filtered_sentence = remove_stopwords(pos_text) neg_filtered_sentence = remove_stopwords(neg_text) pos_split_it = pos_filtered_sentence.split() neg_split_it = neg_filtered_sentence.split() pos_Counter = Counter(pos_split_it) neg_Counter = Counter(neg_split_it) pos_most_occur = dict(pos_Counter.most_common(11)) neg_most_occur = dict(neg_Counter.most_common(11)) pos_top10 = px.bar(x=pos_most_occur.keys(), y=pos_most_occur.values(),title="Top 10 words used in positively classified tweets", labels=dict(x="Word", y="Count")) neg_top10 = px.bar(x=neg_most_occur.keys(), y=neg_most_occur.values(),title="Top 10 words used in negatively classified tweets", labels=dict(x="Word", y="Count")) return pos_top10,neg_top10 def cleaner(row): row = p.clean(row) row = row.replace(":","") row = row.lower() row = remove_stopwords(row) row = re.sub(r'[^\w\s]', '', row) return row def twitter_viz(keyword, Count): twitter = Twitter() search_results = [] clean = [] sentimnets = [] subjectivity = [] temp = twitter.search(keyword, count=100) for results in temp: search_results.append(results.text) clean_tweet = cleaner(results.text) clean.append(clean_tweet) sentimnets.append(1 if sentiment(clean_tweet[1:])[0] > 0 else 0) subjectivity.append(round(sentiment(clean_tweet[1:])[1],2)) zipped = list(zip(search_results, clean, sentimnets, subjectivity)) df_raw = pd.DataFrame(zipped, columns=['Tweet', 'Cleaned Tweet', 'sentiment', 'Subjectivity']) df = df_raw[['Cleaned Tweet', 'sentiment', 'Subjectivity']] df_raw = df_raw[['Tweet']] t = dict(df['sentiment'].value_counts()) t['Positive'] = t.pop(1) t['Negative'] = t.pop(0) sent_dist = px.pie(df, values=t.values(), names=t.keys(), title='Sentiment Distribution') sent_dist.update_traces(textposition='inside', textinfo='percent+label') text = ' '.join(list(df['Cleaned Tweet'])) word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text) wc = plt.figure() plt.imshow(word_cloud, interpolation='bilinear') plt.axis("off") plt.title("Word Cloud",loc='left',fontdict={'fontsize': 18}) pos_top10,neg_top10 = top_10_plots(df) return df_raw.head(Count),df.head(Count),sent_dist,wc,pos_top10,neg_top10 with gr.Blocks() as demo: with gr.Row(): keyword = gr.Textbox(placeholder="Enter A Hashtag") count = gr.Slider(1, 10, step=1,label='how many tweets you want to extract',interactive=True) with gr.Row(): btn = gr.Button("Magic!") with gr.Tab("Data"): with gr.Row(): df_raw = gr.Dataframe(interactive=False,wrap=True) with gr.Tab("Analysis"): with gr.Row(): df_rep = gr.Dataframe(interactive=False,wrap=True) with gr.Row(): with gr.Column(scale=1): pos_top10 = gr.Plot() sent_dist = gr.Plot() with gr.Column(scale=1): neg_top10 = gr.Plot() wc = gr.Plot() btn.click(fn=twitter_viz, inputs=[keyword,count], outputs=[df_raw,df_rep,sent_dist,wc,pos_top10,neg_top10]) demo.launch()