datasciencedojo's picture
Create app.py
5489cb6
from pattern.web import Twitter
import preprocessor as p
from pattern.en import sentiment
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import gradio as gr
import plotly.express as px
from collections import Counter
from gensim.parsing.preprocessing import remove_stopwords
import re
def top_10_plots(df):
pos_text = ' '.join(list(df[df['sentiment']==1]['Cleaned Tweet']))
neg_text = ' '.join(list(df[df['sentiment']==0]['Cleaned Tweet']))
pos_filtered_sentence = remove_stopwords(pos_text)
neg_filtered_sentence = remove_stopwords(neg_text)
pos_split_it = pos_filtered_sentence.split()
neg_split_it = neg_filtered_sentence.split()
pos_Counter = Counter(pos_split_it)
neg_Counter = Counter(neg_split_it)
pos_most_occur = dict(pos_Counter.most_common(11))
neg_most_occur = dict(neg_Counter.most_common(11))
pos_top10 = px.bar(x=pos_most_occur.keys(), y=pos_most_occur.values(),title="Top 10 words used in positively classified tweets",
labels=dict(x="Word", y="Count"))
neg_top10 = px.bar(x=neg_most_occur.keys(), y=neg_most_occur.values(),title="Top 10 words used in negatively classified tweets",
labels=dict(x="Word", y="Count"))
return pos_top10,neg_top10
def cleaner(row):
row = p.clean(row)
row = row.replace(":","")
row = row.lower()
row = remove_stopwords(row)
row = re.sub(r'[^\w\s]', '', row)
return row
def twitter_viz(keyword, Count):
twitter = Twitter()
search_results = []
clean = []
sentimnets = []
subjectivity = []
temp = twitter.search(keyword, count=100)
for results in temp:
search_results.append(results.text)
clean_tweet = cleaner(results.text)
clean.append(clean_tweet)
sentimnets.append(1 if sentiment(clean_tweet[1:])[0] > 0 else 0)
subjectivity.append(round(sentiment(clean_tweet[1:])[1],2))
zipped = list(zip(search_results, clean, sentimnets, subjectivity))
df_raw = pd.DataFrame(zipped, columns=['Tweet', 'Cleaned Tweet', 'sentiment', 'Subjectivity'])
df = df_raw[['Cleaned Tweet', 'sentiment', 'Subjectivity']]
df_raw = df_raw[['Tweet']]
t = dict(df['sentiment'].value_counts())
t['Positive'] = t.pop(1)
t['Negative'] = t.pop(0)
sent_dist = px.pie(df, values=t.values(), names=t.keys(), title='Sentiment Distribution')
sent_dist.update_traces(textposition='inside', textinfo='percent+label')
text = ' '.join(list(df['Cleaned Tweet']))
word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
wc = plt.figure()
plt.imshow(word_cloud, interpolation='bilinear')
plt.axis("off")
plt.title("Word Cloud",loc='left',fontdict={'fontsize': 18})
pos_top10,neg_top10 = top_10_plots(df)
return df_raw.head(Count),df.head(Count),sent_dist,wc,pos_top10,neg_top10
with gr.Blocks() as demo:
with gr.Row():
keyword = gr.Textbox(placeholder="Enter A Hashtag")
count = gr.Slider(1, 10, step=1,label='how many tweets you want to extract',interactive=True)
with gr.Row():
btn = gr.Button("Magic!")
with gr.Tab("Data"):
with gr.Row():
df_raw = gr.Dataframe(interactive=False,wrap=True)
with gr.Tab("Analysis"):
with gr.Row():
df_rep = gr.Dataframe(interactive=False,wrap=True)
with gr.Row():
with gr.Column(scale=1):
pos_top10 = gr.Plot()
sent_dist = gr.Plot()
with gr.Column(scale=1):
neg_top10 = gr.Plot()
wc = gr.Plot()
btn.click(fn=twitter_viz, inputs=[keyword,count], outputs=[df_raw,df_rep,sent_dist,wc,pos_top10,neg_top10])
demo.launch()