File size: 3,596 Bytes
fb94850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
os.environ["USE_TF"] = "0"

import os
import re
import logging
import textwrap
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
from googleapiclient.discovery import build
from transformers import pipeline
from dotenv import load_dotenv
from transformers import pipeline
import streamlit as st

load_dotenv()
API_KEY = os.getenv("YOUTUBE_API_KEY")

@st.cache_resource
def load_sentiment_model():
    from transformers import pipeline
    return pipeline(model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", top_k=None)

sentiment_classifier = load_sentiment_model()

def extract_video_id(url):
    patterns = [
        r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)",
        r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([^?]+)",
        r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([^?]+)",
        r"(?:https?:\/\/)?youtu\.be\/([^?]+)"
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

def get_video_title(video_id):
    youtube = build("youtube", "v3", developerKey=API_KEY)
    try:
        request = youtube.videos().list(part="snippet", id=video_id)
        response = request.execute()
        return response["items"][0]["snippet"]["title"]
    except:
        return "Unknown Title"

def get_comments(video_id, max_results=100):
    youtube = build("youtube", "v3", developerKey=API_KEY)
    comments = []
    next_page_token = None
    try:
        while len(comments) < max_results:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=min(100, max_results - len(comments)),
                textFormat="plainText",
                pageToken=next_page_token
            )
            response = request.execute()
            for item in response.get("items", []):
                comments.append(item["snippet"]["topLevelComment"]["snippet"]["textDisplay"])
            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break
    except Exception as e:
        return [], str(e)
    return comments, None

def analyze_sentiment(comments):
    results = []
    counts = {"positive": 0, "neutral": 0, "negative": 0}
    all_sentiments = sentiment_classifier(comments, batch_size=8)
    for comment, scores in zip(comments, all_sentiments):
        sentiment = max(scores, key=lambda x: x["score"])
        label = sentiment["label"]
        results.append({"Comment": comment, "Sentiment": label, "Score": sentiment["score"]})
        counts[label] += 1
    return results, counts

def plot_pie_chart(counts, video_title):
    labels = list(counts.keys())
    values = list(counts.values())
    # colors = ['#66bb6a', '#ffee58', '#ef5350']

    fig = px.pie(
        names=labels,
        values=values,
        title=f"Sentiment Distribution",
        color=labels,
        color_discrete_map={
            'Positive': '#66bb6a',
            'Neutral': '#ffee58',
            'Negative': '#ef5350'
        },
    )
    fig.update_traces(textinfo='percent+label', textfont_size=14)
    fig.update_layout(
        margin=dict(l=20, r=20, t=40, b=20),
        height=350,
        width=350,
        showlegend=False,
        title_x=0
    )
    return fig

def get_overall_sentiment(counts):
    return f"Overall Video Sentiment: {max(counts, key=counts.get).upper()}"