File size: 3,641 Bytes
5489cb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from pattern.web import Twitter
import preprocessor as p
from pattern.en import sentiment
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import gradio as gr
import plotly.express as px
from collections import Counter
from gensim.parsing.preprocessing import remove_stopwords
import re


def top_10_plots(df):
  pos_text = ' '.join(list(df[df['sentiment']==1]['Cleaned Tweet']))
  neg_text = ' '.join(list(df[df['sentiment']==0]['Cleaned Tweet']))

  pos_filtered_sentence = remove_stopwords(pos_text)
  neg_filtered_sentence = remove_stopwords(neg_text)

  pos_split_it = pos_filtered_sentence.split()
  neg_split_it = neg_filtered_sentence.split()
    
  pos_Counter = Counter(pos_split_it)
  neg_Counter = Counter(neg_split_it)
    
  pos_most_occur = dict(pos_Counter.most_common(11))
  neg_most_occur = dict(neg_Counter.most_common(11))

  pos_top10 = px.bar(x=pos_most_occur.keys(), y=pos_most_occur.values(),title="Top 10 words used in positively classified tweets",
                     labels=dict(x="Word", y="Count"))
  neg_top10 = px.bar(x=neg_most_occur.keys(), y=neg_most_occur.values(),title="Top 10 words used in negatively classified tweets",
                     labels=dict(x="Word", y="Count"))
  return pos_top10,neg_top10

def cleaner(row):
  row = p.clean(row)
  row = row.replace(":","")
  row = row.lower()
  row = remove_stopwords(row)
  row = re.sub(r'[^\w\s]', '', row)
  
  return row

def twitter_viz(keyword, Count):
  twitter = Twitter()

  search_results = []
  clean = []
  sentimnets = []
  subjectivity = []
 
  temp = twitter.search(keyword, count=100)
  for results in temp:
    search_results.append(results.text)
    clean_tweet = cleaner(results.text)
    clean.append(clean_tweet)
    sentimnets.append(1 if sentiment(clean_tweet[1:])[0] > 0 else 0)
    subjectivity.append(round(sentiment(clean_tweet[1:])[1],2))

  zipped = list(zip(search_results, clean, sentimnets, subjectivity))
  df_raw = pd.DataFrame(zipped, columns=['Tweet', 'Cleaned Tweet', 'sentiment', 'Subjectivity'])
  df = df_raw[['Cleaned Tweet', 'sentiment', 'Subjectivity']]
  df_raw = df_raw[['Tweet']]

  t = dict(df['sentiment'].value_counts())
  t['Positive'] = t.pop(1)
  t['Negative'] = t.pop(0)
  sent_dist = px.pie(df, values=t.values(), names=t.keys(), title='Sentiment Distribution')
  sent_dist.update_traces(textposition='inside', textinfo='percent+label')

  text = ' '.join(list(df['Cleaned Tweet']))
  
  word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)
  wc = plt.figure()
  plt.imshow(word_cloud, interpolation='bilinear')
  plt.axis("off")
  plt.title("Word Cloud",loc='left',fontdict={'fontsize': 18})
  pos_top10,neg_top10 = top_10_plots(df)
  
  return df_raw.head(Count),df.head(Count),sent_dist,wc,pos_top10,neg_top10

with gr.Blocks() as demo:
    with gr.Row():
      keyword = gr.Textbox(placeholder="Enter A Hashtag")
      count = gr.Slider(1, 10, step=1,label='how many tweets you want to extract',interactive=True)
    with gr.Row(): 
        btn = gr.Button("Magic!")
    with gr.Tab("Data"):
      with gr.Row():
        df_raw = gr.Dataframe(interactive=False,wrap=True)
    with gr.Tab("Analysis"):
      with gr.Row():
        df_rep = gr.Dataframe(interactive=False,wrap=True)
      with gr.Row():
        with gr.Column(scale=1):
          pos_top10 = gr.Plot()
          sent_dist = gr.Plot()
        with gr.Column(scale=1):        
          neg_top10 = gr.Plot()
          wc = gr.Plot()
    btn.click(fn=twitter_viz, inputs=[keyword,count], outputs=[df_raw,df_rep,sent_dist,wc,pos_top10,neg_top10])
demo.launch()