Spaces:
Runtime error
Runtime error
from pattern.web import Twitter | |
import preprocessor as p | |
from pattern.en import sentiment | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
import gradio as gr | |
import plotly.express as px | |
from collections import Counter | |
from gensim.parsing.preprocessing import remove_stopwords | |
import re | |
def top_10_plots(df): | |
pos_text = ' '.join(list(df[df['sentiment']==1]['Cleaned Tweet'])) | |
neg_text = ' '.join(list(df[df['sentiment']==0]['Cleaned Tweet'])) | |
pos_filtered_sentence = remove_stopwords(pos_text) | |
neg_filtered_sentence = remove_stopwords(neg_text) | |
pos_split_it = pos_filtered_sentence.split() | |
neg_split_it = neg_filtered_sentence.split() | |
pos_Counter = Counter(pos_split_it) | |
neg_Counter = Counter(neg_split_it) | |
pos_most_occur = dict(pos_Counter.most_common(11)) | |
neg_most_occur = dict(neg_Counter.most_common(11)) | |
pos_top10 = px.bar(x=pos_most_occur.keys(), y=pos_most_occur.values(),title="Top 10 words used in positively classified tweets", | |
labels=dict(x="Word", y="Count")) | |
neg_top10 = px.bar(x=neg_most_occur.keys(), y=neg_most_occur.values(),title="Top 10 words used in negatively classified tweets", | |
labels=dict(x="Word", y="Count")) | |
return pos_top10,neg_top10 | |
def cleaner(row): | |
row = p.clean(row) | |
row = row.replace(":","") | |
row = row.lower() | |
row = remove_stopwords(row) | |
row = re.sub(r'[^\w\s]', '', row) | |
return row | |
def twitter_viz(keyword, Count): | |
twitter = Twitter() | |
search_results = [] | |
clean = [] | |
sentimnets = [] | |
subjectivity = [] | |
temp = twitter.search(keyword, count=100) | |
for results in temp: | |
search_results.append(results.text) | |
clean_tweet = cleaner(results.text) | |
clean.append(clean_tweet) | |
sentimnets.append(1 if sentiment(clean_tweet[1:])[0] > 0 else 0) | |
subjectivity.append(round(sentiment(clean_tweet[1:])[1],2)) | |
zipped = list(zip(search_results, clean, sentimnets, subjectivity)) | |
df_raw = pd.DataFrame(zipped, columns=['Tweet', 'Cleaned Tweet', 'sentiment', 'Subjectivity']) | |
df = df_raw[['Cleaned Tweet', 'sentiment', 'Subjectivity']] | |
df_raw = df_raw[['Tweet']] | |
t = dict(df['sentiment'].value_counts()) | |
t['Positive'] = t.pop(1) | |
t['Negative'] = t.pop(0) | |
sent_dist = px.pie(df, values=t.values(), names=t.keys(), title='Sentiment Distribution') | |
sent_dist.update_traces(textposition='inside', textinfo='percent+label') | |
text = ' '.join(list(df['Cleaned Tweet'])) | |
word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text) | |
wc = plt.figure() | |
plt.imshow(word_cloud, interpolation='bilinear') | |
plt.axis("off") | |
plt.title("Word Cloud",loc='left',fontdict={'fontsize': 18}) | |
pos_top10,neg_top10 = top_10_plots(df) | |
return df_raw.head(Count),df.head(Count),sent_dist,wc,pos_top10,neg_top10 | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
keyword = gr.Textbox(placeholder="Enter A Hashtag") | |
count = gr.Slider(1, 10, step=1,label='how many tweets you want to extract',interactive=True) | |
with gr.Row(): | |
btn = gr.Button("Magic!") | |
with gr.Tab("Data"): | |
with gr.Row(): | |
df_raw = gr.Dataframe(interactive=False,wrap=True) | |
with gr.Tab("Analysis"): | |
with gr.Row(): | |
df_rep = gr.Dataframe(interactive=False,wrap=True) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
pos_top10 = gr.Plot() | |
sent_dist = gr.Plot() | |
with gr.Column(scale=1): | |
neg_top10 = gr.Plot() | |
wc = gr.Plot() | |
btn.click(fn=twitter_viz, inputs=[keyword,count], outputs=[df_raw,df_rep,sent_dist,wc,pos_top10,neg_top10]) | |
demo.launch() | |