|
from pattern.web import Twitter |
|
import preprocessor as p |
|
from pattern.en import sentiment |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from wordcloud import WordCloud |
|
import gradio as gr |
|
import plotly.express as px |
|
from collections import Counter |
|
from gensim.parsing.preprocessing import remove_stopwords |
|
import re |
|
|
|
|
|
def top_10_plots(df): |
|
pos_text = ' '.join(list(df[df['sentiment']==1]['Cleaned Tweet'])) |
|
neg_text = ' '.join(list(df[df['sentiment']==0]['Cleaned Tweet'])) |
|
|
|
pos_filtered_sentence = remove_stopwords(pos_text) |
|
neg_filtered_sentence = remove_stopwords(neg_text) |
|
|
|
pos_split_it = pos_filtered_sentence.split() |
|
neg_split_it = neg_filtered_sentence.split() |
|
|
|
pos_Counter = Counter(pos_split_it) |
|
neg_Counter = Counter(neg_split_it) |
|
|
|
pos_most_occur = dict(pos_Counter.most_common(11)) |
|
neg_most_occur = dict(neg_Counter.most_common(11)) |
|
|
|
pos_top10 = px.bar(x=pos_most_occur.keys(), y=pos_most_occur.values(),title="Top 10 words used in positively classified tweets", |
|
labels=dict(x="Word", y="Count")) |
|
neg_top10 = px.bar(x=neg_most_occur.keys(), y=neg_most_occur.values(),title="Top 10 words used in negatively classified tweets", |
|
labels=dict(x="Word", y="Count")) |
|
return pos_top10,neg_top10 |
|
|
|
def cleaner(row): |
|
row = p.clean(row) |
|
row = row.replace(":","") |
|
row = row.lower() |
|
row = remove_stopwords(row) |
|
row = re.sub(r'[^\w\s]', '', row) |
|
|
|
return row |
|
|
|
def twitter_viz(keyword, Count): |
|
twitter = Twitter() |
|
|
|
search_results = [] |
|
clean = [] |
|
sentimnets = [] |
|
subjectivity = [] |
|
|
|
temp = twitter.search(keyword, count=100) |
|
for results in temp: |
|
search_results.append(results.text) |
|
clean_tweet = cleaner(results.text) |
|
clean.append(clean_tweet) |
|
sentimnets.append(1 if sentiment(clean_tweet[1:])[0] > 0 else 0) |
|
subjectivity.append(round(sentiment(clean_tweet[1:])[1],2)) |
|
|
|
zipped = list(zip(search_results, clean, sentimnets, subjectivity)) |
|
df_raw = pd.DataFrame(zipped, columns=['Tweet', 'Cleaned Tweet', 'sentiment', 'Subjectivity']) |
|
df = df_raw[['Cleaned Tweet', 'sentiment', 'Subjectivity']] |
|
df_raw = df_raw[['Tweet']] |
|
|
|
t = dict(df['sentiment'].value_counts()) |
|
t['Positive'] = t.pop(1) |
|
t['Negative'] = t.pop(0) |
|
sent_dist = px.pie(df, values=t.values(), names=t.keys(), title='Sentiment Distribution') |
|
sent_dist.update_traces(textposition='inside', textinfo='percent+label') |
|
|
|
text = ' '.join(list(df['Cleaned Tweet'])) |
|
|
|
word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text) |
|
wc = plt.figure() |
|
plt.imshow(word_cloud, interpolation='bilinear') |
|
plt.axis("off") |
|
plt.title("Word Cloud",loc='left',fontdict={'fontsize': 18}) |
|
pos_top10,neg_top10 = top_10_plots(df) |
|
|
|
return df_raw.head(Count),df.head(Count),sent_dist,wc,pos_top10,neg_top10 |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
keyword = gr.Textbox(placeholder="Enter A Hashtag") |
|
count = gr.Slider(1, 10, step=1,label='how many tweets you want to extract',interactive=True) |
|
with gr.Row(): |
|
btn = gr.Button("Magic!") |
|
with gr.Tab("Data"): |
|
with gr.Row(): |
|
df_raw = gr.Dataframe(interactive=False,wrap=True) |
|
with gr.Tab("Analysis"): |
|
with gr.Row(): |
|
df_rep = gr.Dataframe(interactive=False,wrap=True) |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
pos_top10 = gr.Plot() |
|
sent_dist = gr.Plot() |
|
with gr.Column(scale=1): |
|
neg_top10 = gr.Plot() |
|
wc = gr.Plot() |
|
btn.click(fn=twitter_viz, inputs=[keyword,count], outputs=[df_raw,df_rep,sent_dist,wc,pos_top10,neg_top10]) |
|
demo.launch() |
|
|