|
import os
|
|
os.environ["USE_TF"] = "0"
|
|
|
|
import os
|
|
import re
|
|
import logging
|
|
import textwrap
|
|
import matplotlib.pyplot as plt
|
|
import plotly.express as px
|
|
import pandas as pd
|
|
from googleapiclient.discovery import build
|
|
from transformers import pipeline
|
|
from dotenv import load_dotenv
|
|
from transformers import pipeline
|
|
import streamlit as st
|
|
|
|
load_dotenv()
|
|
API_KEY = os.getenv("YOUTUBE_API_KEY")
|
|
|
|
@st.cache_resource
|
|
def load_sentiment_model():
|
|
from transformers import pipeline
|
|
return pipeline(model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", top_k=None)
|
|
|
|
sentiment_classifier = load_sentiment_model()
|
|
|
|
def extract_video_id(url):
|
|
patterns = [
|
|
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)",
|
|
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([^?]+)",
|
|
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([^?]+)",
|
|
r"(?:https?:\/\/)?youtu\.be\/([^?]+)"
|
|
]
|
|
for pattern in patterns:
|
|
match = re.search(pattern, url)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
def get_video_title(video_id):
|
|
youtube = build("youtube", "v3", developerKey=API_KEY)
|
|
try:
|
|
request = youtube.videos().list(part="snippet", id=video_id)
|
|
response = request.execute()
|
|
return response["items"][0]["snippet"]["title"]
|
|
except:
|
|
return "Unknown Title"
|
|
|
|
def get_comments(video_id, max_results=100):
|
|
youtube = build("youtube", "v3", developerKey=API_KEY)
|
|
comments = []
|
|
next_page_token = None
|
|
try:
|
|
while len(comments) < max_results:
|
|
request = youtube.commentThreads().list(
|
|
part="snippet",
|
|
videoId=video_id,
|
|
maxResults=min(100, max_results - len(comments)),
|
|
textFormat="plainText",
|
|
pageToken=next_page_token
|
|
)
|
|
response = request.execute()
|
|
for item in response.get("items", []):
|
|
comments.append(item["snippet"]["topLevelComment"]["snippet"]["textDisplay"])
|
|
next_page_token = response.get("nextPageToken")
|
|
if not next_page_token:
|
|
break
|
|
except Exception as e:
|
|
return [], str(e)
|
|
return comments, None
|
|
|
|
def analyze_sentiment(comments):
|
|
results = []
|
|
counts = {"positive": 0, "neutral": 0, "negative": 0}
|
|
all_sentiments = sentiment_classifier(comments, batch_size=8)
|
|
for comment, scores in zip(comments, all_sentiments):
|
|
sentiment = max(scores, key=lambda x: x["score"])
|
|
label = sentiment["label"]
|
|
results.append({"Comment": comment, "Sentiment": label, "Score": sentiment["score"]})
|
|
counts[label] += 1
|
|
return results, counts
|
|
|
|
def plot_pie_chart(counts, video_title):
|
|
labels = list(counts.keys())
|
|
values = list(counts.values())
|
|
|
|
|
|
fig = px.pie(
|
|
names=labels,
|
|
values=values,
|
|
title=f"Sentiment Distribution",
|
|
color=labels,
|
|
color_discrete_map={
|
|
'Positive': '#66bb6a',
|
|
'Neutral': '#ffee58',
|
|
'Negative': '#ef5350'
|
|
},
|
|
)
|
|
fig.update_traces(textinfo='percent+label', textfont_size=14)
|
|
fig.update_layout(
|
|
margin=dict(l=20, r=20, t=40, b=20),
|
|
height=350,
|
|
width=350,
|
|
showlegend=False,
|
|
title_x=0
|
|
)
|
|
return fig
|
|
|
|
def get_overall_sentiment(counts):
|
|
return f"Overall Video Sentiment: {max(counts, key=counts.get).upper()}" |