Faham
UPDATE: codebase refactored to be more readble and optimized
b1acf7e
"""
Refactored Sentiment Fused - Multimodal Sentiment Analysis Application
This is the main entry point for the application, now using a modular structure.
"""
import streamlit as st
import pandas as pd
from PIL import Image
import logging
# Import our modular components
from src.config.settings import (
APP_NAME,
APP_VERSION,
APP_ICON,
APP_LAYOUT,
CUSTOM_CSS,
SUPPORTED_IMAGE_FORMATS,
SUPPORTED_AUDIO_FORMATS,
SUPPORTED_VIDEO_FORMATS,
)
from src.models.text_model import predict_text_sentiment
from src.models.audio_model import predict_audio_sentiment, load_audio_model
from src.models.vision_model import predict_vision_sentiment, load_vision_model
from src.models.fused_model import predict_fused_sentiment
from src.utils.preprocessing import (
extract_frames_from_video,
extract_audio_from_video,
transcribe_audio,
)
from src.utils.file_handling import get_file_info, format_file_size
from src.utils.sentiment_mapping import get_sentiment_colors, format_sentiment_result
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Page configuration
st.set_page_config(
page_title=APP_NAME,
page_icon=APP_ICON,
layout=APP_LAYOUT,
initial_sidebar_state="expanded",
)
# Apply custom CSS
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
def render_home_page():
"""Render the home page with model information."""
st.markdown(
f'<h1 class="main-header">{APP_NAME}</h1>',
unsafe_allow_html=True,
)
st.markdown(
"""
<div class="model-card">
<h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
<p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
</div>
""",
unsafe_allow_html=True,
)
col1, col2, col3 = st.columns(3)
with col1:
st.markdown(
"""
<div class="model-card">
<h3>Text Sentiment Model</h3>
<p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
<ul>
<li>Process any text input</li>
<li>Get sentiment classification (Positive/Negative/Neutral)</li>
<li>View confidence scores</li>
<li>Real-time NLP analysis</li>
</ul>
</div>
""",
unsafe_allow_html=True,
)
with col2:
st.markdown(
"""
<div class="model-card">
<h3>Audio Sentiment Model</h3>
<p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
<ul>
<li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
<li>Record audio directly with microphone (max 5s)</li>
<li>Automatic preprocessing: 16kHz sampling, 5s max duration</li>
<li>Listen to uploaded/recorded audio</li>
<li>Get sentiment predictions</li>
<li>Real-time audio analysis</li>
</ul>
</div>
""",
unsafe_allow_html=True,
)
with col3:
st.markdown(
"""
<div class="model-card">
<h3>Vision Sentiment Model</h3>
<p>Analyze sentiment from images using fine-tuned ResNet-50</p>
<ul>
<li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
<li>Automatic face detection & preprocessing</li>
<li>Fixed 0% padding for tightest face crop</li>
<li>Convert to 224x224 grayscale β†’ 3-channel RGB (FER2013 format)</li>
<li>Transforms: Resize(224) β†’ CenterCrop(224) β†’ ImageNet Normalization</li>
<li>Preview original & preprocessed images</li>
<li>Get sentiment predictions</li>
</ul>
</div>
""",
unsafe_allow_html=True,
)
st.markdown(
"""
<div class="model-card">
<h3>Fused Model</h3>
<p>Combine predictions from all three models for enhanced accuracy</p>
<ul>
<li>Multi-modal input processing</li>
<li>Ensemble prediction strategies</li>
<li>Comprehensive sentiment analysis</li>
</ul>
</div>
""",
unsafe_allow_html=True,
)
st.markdown(
"""
<div class="model-card">
<h3>🎬 Max Fusion</h3>
<p>Ultimate video-based sentiment analysis combining all three modalities</p>
<ul>
<li>πŸŽ₯ Record or upload 5-second videos</li>
<li>πŸ” Extract frames for vision analysis</li>
<li>🎡 Extract audio for vocal sentiment</li>
<li>πŸ“ Transcribe audio for text analysis</li>
<li>πŸš€ Comprehensive multi-modal results</li>
</ul>
</div>
""",
unsafe_allow_html=True,
)
st.markdown("---")
st.markdown(
"""
<div style="text-align: center; color: #666;">
<p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
<p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
<p><strong>Models are now loaded from Google Drive automatically!</strong></p>
</div>
""",
unsafe_allow_html=True,
)
def render_text_sentiment_page():
"""Render the text sentiment analysis page."""
st.title("Text Sentiment Analysis")
st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")
# Text input
text_input = st.text_area(
"Enter your text here:",
height=150,
placeholder="Type or paste your text here to analyze its sentiment...",
)
# Analyze button
if st.button("Analyze Sentiment", type="primary", use_container_width=True):
if text_input and text_input.strip():
with st.spinner("Analyzing text sentiment..."):
sentiment, confidence = predict_text_sentiment(text_input)
# Display results
st.markdown("### Results")
# Display results in columns
col1, col2 = st.columns(2)
with col1:
st.metric("Sentiment", sentiment)
with col2:
st.metric("Confidence", f"{confidence:.2f}")
# Color-coded sentiment display
sentiment_colors = get_sentiment_colors()
emoji = sentiment_colors.get(sentiment, "❓")
st.markdown(
f"""
<div class="result-box">
<h4>{emoji} Sentiment: {sentiment}</h4>
<p><strong>Confidence:</strong> {confidence:.2f}</p>
<p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
<p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
</div>
""",
unsafe_allow_html=True,
)
else:
st.error("Please enter some text to analyze.")
def render_audio_sentiment_page():
"""Render the audio sentiment analysis page."""
st.title("Audio Sentiment Analysis")
st.markdown(
"Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
)
# Preprocessing information
st.info(
"**Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: "
"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction."
)
# Model status
model, device, num_classes, feature_extractor = load_audio_model()
if model is None:
st.error(
"Audio model could not be loaded. Please check the Google Drive setup."
)
st.info(
"Expected: Models should be configured in Google Drive and accessible via the model manager."
)
else:
st.success(
f"Audio model loaded successfully on {device} with {num_classes} classes!"
)
# Input method selection
st.subheader("Choose Input Method")
input_method = st.radio(
"Select how you want to provide audio:",
["Upload Audio File", "Record Audio"],
horizontal=True,
)
if input_method == "Upload Audio File":
# File uploader
uploaded_audio = st.file_uploader(
"Choose an audio file",
type=SUPPORTED_AUDIO_FORMATS,
help="Supported formats: WAV, MP3, M4A, FLAC",
)
audio_source = "uploaded_file"
audio_name = uploaded_audio.name if uploaded_audio else None
else: # Audio recording
st.markdown(
"""
<div class="model-card">
<h3>Audio Recording</h3>
<p>Record audio directly with your microphone (max 5 seconds).</p>
<p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
</div>
""",
unsafe_allow_html=True,
)
# Audio recorder
recorded_audio = st.audio_input(
label="Click to start recording",
help="Click the microphone button to start/stop recording. Maximum recording time is 5 seconds.",
)
if recorded_audio is not None:
# Display recorded audio
st.audio(recorded_audio, format="audio/wav")
st.success("Audio recorded successfully!")
# Convert recorded audio to bytes for processing
uploaded_audio = recorded_audio
audio_source = "recorded"
audio_name = "Recorded Audio"
else:
uploaded_audio = None
audio_source = None
audio_name = None
if uploaded_audio is not None:
# Display audio player
if audio_source == "recorded":
st.audio(uploaded_audio, format="audio/wav")
st.info(f"{audio_name} | Source: Microphone Recording")
else:
st.audio(
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
)
# File info for uploaded files
file_info = get_file_info(uploaded_audio)
st.info(
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
)
# Analyze button
if st.button(
"Analyze Audio Sentiment", type="primary", use_container_width=True
):
if model is None:
st.error("Model not loaded. Cannot analyze audio.")
else:
with st.spinner("Analyzing audio sentiment..."):
audio_bytes = uploaded_audio.getvalue()
sentiment, confidence = predict_audio_sentiment(audio_bytes)
# Display results
st.markdown("### Results")
col1, col2 = st.columns(2)
with col1:
st.metric("Sentiment", sentiment)
with col2:
st.metric("Confidence", f"{confidence:.2f}")
# Color-coded sentiment display
sentiment_colors = get_sentiment_colors()
emoji = sentiment_colors.get(sentiment, "❓")
st.markdown(
f"""
<div class="result-box">
<h4>{emoji} Sentiment: {sentiment}</h4>
<p><strong>Confidence:</strong> {confidence:.2f}</p>
<p><strong>Audio Source:</strong> {audio_name}</p>
<p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
</div>
""",
unsafe_allow_html=True,
)
else:
if input_method == "Upload Audio File":
st.info("Please upload an audio file to begin analysis.")
else:
st.info("Click the microphone button above to record audio for analysis.")
def render_vision_sentiment_page():
"""Render the vision sentiment analysis page."""
st.title("Vision Sentiment Analysis")
st.markdown(
"Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
)
st.info(
"**Note**: Images will be automatically preprocessed to match FER2013 format: face detection, grayscale conversion, and 224x224 resize (converted to 3-channel RGB)."
)
# Face cropping is set to 0% (no padding) for tightest crop
st.info("**Face Cropping**: Set to 0% padding for tightest crop on facial features")
# Model status
model, device, num_classes = load_vision_model()
if model is None:
st.error(
"Vision model could not be loaded. Please check the Google Drive setup."
)
st.info(
"Expected: Models should be configured in Google Drive and accessible via the model manager."
)
else:
st.success(
f"Vision model loaded successfully on {device} with {num_classes} classes!"
)
# Input method selection
st.subheader("Choose Input Method")
input_method = st.radio(
"Select how you want to provide an image:",
["Upload Image File", "Take Photo with Camera"],
horizontal=True,
)
if input_method == "Upload Image File":
# File uploader
uploaded_image = st.file_uploader(
"Choose an image file",
type=SUPPORTED_IMAGE_FORMATS,
help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
)
if uploaded_image is not None:
# Display image
image = Image.open(uploaded_image)
st.image(
image,
caption=f"Uploaded Image: {uploaded_image.name}",
use_container_width=True,
)
# File info
file_info = get_file_info(uploaded_image)
st.info(
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])} | Dimensions: {image.size[0]}x{image.size[1]}"
)
# Analyze button
if st.button(
"Analyze Image Sentiment", type="primary", use_container_width=True
):
if model is None:
st.error("Model not loaded. Cannot analyze image.")
else:
with st.spinner("Analyzing image sentiment..."):
sentiment, confidence = predict_vision_sentiment(image)
# Display results
st.markdown("### Results")
col1, col2 = st.columns(2)
with col1:
st.metric("Sentiment", sentiment)
with col2:
st.metric("Confidence", f"{confidence:.2f}")
# Color-coded sentiment display
sentiment_colors = get_sentiment_colors()
emoji = sentiment_colors.get(sentiment, "❓")
st.markdown(
f"""
<div class="result-box">
<h4>{emoji} Sentiment: {sentiment}</h4>
<p><strong>Confidence:</strong> {confidence:.2f}</p>
<p><strong>Image File:</strong> {uploaded_image.name}</p>
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
</div>
""",
unsafe_allow_html=True,
)
else: # Camera capture
st.markdown(
"""
<div class="model-card">
<h3>Camera Capture</h3>
<p>Take a photo directly with your camera to analyze its sentiment.</p>
<p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
</div>
""",
unsafe_allow_html=True,
)
# Camera input
camera_photo = st.camera_input(
"Take a photo",
help="Click the camera button to take a photo, or use the upload button to select an existing photo",
)
if camera_photo is not None:
# Display captured image
image = Image.open(camera_photo)
st.image(
image,
caption="Captured Photo",
use_container_width=True,
)
# Image info
st.info(
f"Captured Photo | Dimensions: {image.size[0]}x{image.size[1]} | Format: {image.format}"
)
# Analyze button
if st.button(
"Analyze Photo Sentiment", type="primary", use_container_width=True
):
if model is None:
st.error("Model not loaded. Cannot analyze image.")
else:
with st.spinner("Analyzing photo sentiment..."):
sentiment, confidence = predict_vision_sentiment(image)
# Display results
st.markdown("### Results")
col1, col2 = st.columns(2)
with col1:
st.metric("Sentiment", sentiment)
with col2:
st.metric("Confidence", f"{confidence:.2f}")
# Color-coded sentiment display
sentiment_colors = get_sentiment_colors()
emoji = sentiment_colors.get(sentiment, "❓")
st.markdown(
f"""
<div class="result-box">
<h4>{emoji} Sentiment: {sentiment}</h4>
<p><strong>Confidence:</strong> {confidence:.2f}</p>
<p><strong>Image Source:</strong> Camera Capture</p>
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
</div>
""",
unsafe_allow_html=True,
)
# Show info if no image is provided
if input_method == "Upload Image File" and "uploaded_image" not in locals():
st.info("Please upload an image file to begin analysis.")
elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
st.info("Click the camera button above to take a photo for analysis.")
def render_fused_model_page():
"""Render the fused model analysis page."""
st.title("Fused Model Analysis")
st.markdown(
"Combine predictions from all three models for enhanced sentiment analysis."
)
st.markdown(
"""
<div class="model-card">
<h3>Multi-Modal Sentiment Analysis</h3>
<p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis
using all three models combined.</p>
</div>
""",
unsafe_allow_html=True,
)
# Input sections
col1, col2 = st.columns(2)
with col1:
st.subheader("Text Input")
text_input = st.text_area(
"Enter text (optional):",
height=100,
placeholder="Type or paste your text here...",
)
st.subheader("Audio Input")
# Audio preprocessing information for fused model
st.info(
"**Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: "
"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction."
)
# Audio input method for fused model
audio_input_method = st.radio(
"Audio input method:",
["Upload File", "Record Audio"],
key="fused_audio_method",
horizontal=True,
)
if audio_input_method == "Upload File":
uploaded_audio = st.file_uploader(
"Upload audio file (optional):",
type=SUPPORTED_AUDIO_FORMATS,
key="fused_audio",
)
audio_source = "uploaded_file"
audio_name = uploaded_audio.name if uploaded_audio else None
else:
# Audio recorder for fused model
recorded_audio = st.audio_input(
label="Record audio (optional):",
key="fused_audio_recorder",
help="Click to record audio for sentiment analysis",
)
if recorded_audio is not None:
st.audio(recorded_audio, format="audio/wav")
st.success("Audio recorded successfully!")
uploaded_audio = recorded_audio
audio_source = "recorded"
audio_name = "Recorded Audio"
else:
uploaded_audio = None
audio_source = None
audio_name = None
with col2:
st.subheader("Image Input")
# Face cropping is set to 0% (no padding) for tightest crop
st.info(
"**Face Cropping**: Set to 0% padding for tightest crop on facial features"
)
# Image input method for fused model
image_input_method = st.radio(
"Image input method:",
["Upload File", "Take Photo"],
key="fused_image_method",
horizontal=True,
)
if image_input_method == "Upload File":
uploaded_image = st.file_uploader(
"Upload image file (optional):",
type=SUPPORTED_IMAGE_FORMATS,
key="fused_image",
)
if uploaded_image:
image = Image.open(uploaded_image)
st.image(image, caption="Uploaded Image", use_container_width=True)
else:
# Camera capture for fused model
camera_photo = st.camera_input(
"Take a photo (optional):",
key="fused_camera",
help="Click to take a photo for sentiment analysis",
)
if camera_photo:
image = Image.open(camera_photo)
st.image(image, caption="Captured Photo", use_container_width=True)
# Set uploaded_image to camera_photo for processing
uploaded_image = camera_photo
if uploaded_audio:
st.audio(
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
)
# Analyze button
if st.button("Run Fused Analysis", type="primary", use_container_width=True):
if text_input or uploaded_audio or uploaded_image:
with st.spinner("Running fused sentiment analysis..."):
# Prepare inputs
audio_bytes = uploaded_audio.getvalue() if uploaded_audio else None
image = Image.open(uploaded_image) if uploaded_image else None
# Get fused prediction
sentiment, confidence = predict_fused_sentiment(
text=text_input if text_input else None,
audio_bytes=audio_bytes,
image=image,
)
# Display results
st.markdown("### Fused Model Results")
col1, col2 = st.columns(2)
with col1:
st.metric("Final Sentiment", sentiment)
with col2:
st.metric("Overall Confidence", f"{confidence:.2f}")
# Show individual model results
st.markdown("### Individual Model Results")
results_data = []
if text_input:
text_sentiment, text_conf = predict_text_sentiment(text_input)
results_data.append(
{
"Model": "Text (TextBlob)",
"Input": f"Text: {text_input[:50]}...",
"Sentiment": text_sentiment,
"Confidence": f"{text_conf:.2f}",
}
)
if uploaded_audio:
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
results_data.append(
{
"Model": "Audio (Wav2Vec2)",
"Input": f"Audio: {audio_name}",
"Sentiment": audio_sentiment,
"Confidence": f"{audio_conf:.2f}",
}
)
if uploaded_image:
# Face cropping is set to 0% (no padding) for tightest crop
vision_sentiment, vision_conf = predict_vision_sentiment(
image, crop_tightness=0.0
)
results_data.append(
{
"Model": "Vision (ResNet-50)",
"Input": f"Image: {uploaded_image.name}",
"Sentiment": vision_sentiment,
"Confidence": f"{vision_conf:.2f}",
}
)
if results_data:
df = pd.DataFrame(results_data)
st.dataframe(df, use_container_width=True)
# Final result display
sentiment_colors = get_sentiment_colors()
emoji = sentiment_colors.get(sentiment, "❓")
st.markdown(
f"""
<div class="result-box">
<h4>{emoji} Final Fused Sentiment: {sentiment}</h4>
<p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
<p><strong>Models Used:</strong> {len(results_data)}</p>
</div>
""",
unsafe_allow_html=True,
)
else:
st.warning(
"Please provide at least one input (text, audio, or image) for fused analysis."
)
def render_max_fusion_page():
"""Render the max fusion page for video-based analysis."""
st.title("Max Fusion - Multi-Modal Sentiment Analysis")
st.markdown(
"""
<div class="model-card">
<h3>Ultimate Multi-Modal Sentiment Analysis</h3>
<p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
<ul>
<li>πŸ“Έ <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
<li>🎡 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
<li>πŸ“ <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
</ul>
</div>
""",
unsafe_allow_html=True,
)
# Video input method selection
st.subheader("Video Input")
video_input_method = st.radio(
"Choose input method:",
["Upload Video File", "Record Video (Coming Soon)"],
horizontal=True,
index=0, # Default to upload video
)
if video_input_method == "Record Video (Coming Soon)":
# Coming Soon message for video recording
st.info("πŸŽ₯ Video recording feature is coming soon!")
st.info("πŸ“ Please use the Upload Video File option for now.")
# Show a nice coming soon message
st.markdown("---")
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.markdown(
"""
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
<h3>🚧 Coming Soon 🚧</h3>
<p>Video recording feature is under development</p>
<p>Use Upload Video File for now!</p>
</div>
""",
unsafe_allow_html=True,
)
# Placeholder for future recording functionality
st.markdown(
"""
**Future Features:**
- Real-time video recording with camera
- Audio capture during recording
- Automatic frame extraction
- Live transcription
- WebRTC integration for low-latency streaming
"""
)
# Skip all the recording logic for now
uploaded_video = None
video_source = None
video_name = None
video_file = None
elif video_input_method == "Upload Video File":
# File upload option
st.markdown(
"""
<div class="upload-section">
<h4>πŸ“ Upload Video File</h4>
<p>Upload a video file for comprehensive multimodal analysis.</p>
<p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
<p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
</div>
""",
unsafe_allow_html=True,
)
uploaded_video = st.file_uploader(
"Choose a video file",
type=SUPPORTED_VIDEO_FORMATS,
help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
)
video_source = "uploaded_file"
video_name = uploaded_video.name if uploaded_video else None
video_file = uploaded_video
if video_file is not None:
# Display video or photo
if video_source == "camera_photo":
# For camera photos, we already displayed the image above
st.info(f"Source: Camera Photo | Ready for vision analysis")
# Add audio upload option for camera photo mode
st.subheader("🎡 Audio Input for Analysis")
st.info(
"Since we're using a photo, please upload an audio file for audio sentiment analysis:"
)
uploaded_audio = st.file_uploader(
"Upload audio file for audio analysis:",
type=SUPPORTED_AUDIO_FORMATS,
key="camera_audio",
help="Upload an audio file to complement the photo analysis",
)
if uploaded_audio:
st.audio(
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
)
st.success("βœ… Audio uploaded successfully!")
audio_bytes = uploaded_audio.getvalue()
else:
audio_bytes = None
st.warning("⚠️ Please upload an audio file for complete analysis")
else:
# For uploaded videos
st.video(video_file)
file_info = get_file_info(video_file)
st.info(
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
)
audio_bytes = None # Will be extracted from video
# Video Processing Pipeline
st.subheader("🎬 Video Processing Pipeline")
# Initialize variables
frames = []
audio_bytes = None
transcribed_text = ""
# Process uploaded video
if uploaded_video:
st.info("πŸ“ Processing uploaded video file...")
# Extract frames
st.markdown("**1. πŸŽ₯ Frame Extraction**")
frames = extract_frames_from_video(uploaded_video, max_frames=5)
if frames:
st.success(f"βœ… Extracted {len(frames)} representative frames")
# Display extracted frames
cols = st.columns(len(frames))
for i, frame in enumerate(frames):
with cols[i]:
st.image(
frame, caption=f"Frame {i+1}", use_container_width=True
)
else:
st.warning("⚠️ Could not extract frames from video")
frames = []
# Extract audio
st.markdown("**2. 🎡 Audio Extraction**")
audio_bytes = extract_audio_from_video(uploaded_video)
if audio_bytes:
st.success("βœ… Audio extracted successfully")
st.audio(audio_bytes, format="audio/wav")
else:
st.warning("⚠️ Could not extract audio from video")
audio_bytes = None
# Transcribe audio
st.markdown("**3. πŸ“ Audio Transcription**")
if audio_bytes:
transcribed_text = transcribe_audio(audio_bytes)
if transcribed_text:
st.success("βœ… Audio transcribed successfully")
st.markdown(f'**Transcribed Text:** "{transcribed_text}"')
else:
st.warning("⚠️ Could not transcribe audio")
transcribed_text = ""
else:
transcribed_text = ""
st.info("ℹ️ No audio available for transcription")
# Analysis button
if st.button(
"πŸš€ Run Max Fusion Analysis", type="primary", use_container_width=True
):
with st.spinner(
"πŸ”„ Processing video and running comprehensive analysis..."
):
# Run individual analyses
st.subheader("πŸ” Individual Model Analysis")
results_data = []
# Vision analysis (use first frame for uploaded videos)
if frames:
st.markdown("**Vision Analysis:**")
# For uploaded videos, use first frame
vision_sentiment, vision_conf = predict_vision_sentiment(
frames[0], crop_tightness=0.0
)
results_data.append(
{
"Model": "Vision (ResNet-50)",
"Input": f"Video Frame 1",
"Sentiment": vision_sentiment,
"Confidence": f"{vision_conf:.2f}",
}
)
st.success(
f"Vision: {vision_sentiment} (Confidence: {vision_conf:.2f})"
)
# Audio analysis
if audio_bytes:
st.markdown("**Audio Analysis:**")
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
results_data.append(
{
"Model": "Audio (Wav2Vec2)",
"Input": f"Video Audio",
"Sentiment": audio_sentiment,
"Confidence": f"{audio_conf:.2f}",
}
)
st.success(
f"Audio: {audio_sentiment} (Confidence: {audio_conf:.2f})"
)
# Text analysis
if transcribed_text:
st.markdown("**Text Analysis:**")
text_sentiment, text_conf = predict_text_sentiment(transcribed_text)
results_data.append(
{
"Model": "Text (TextBlob)",
"Input": f"Transcribed: {transcribed_text[:50]}...",
"Sentiment": text_sentiment,
"Confidence": f"{text_conf:.2f}",
}
)
st.success(f"Text: {text_sentiment} (Confidence: {text_conf:.2f})")
# Run fused analysis
st.subheader("🎯 Max Fusion Results")
if results_data:
# Display results table
df = pd.DataFrame(results_data)
st.dataframe(df, use_container_width=True)
# Calculate fused sentiment
image_for_fusion = frames[0] if frames else None
sentiment, confidence = predict_fused_sentiment(
text=transcribed_text if transcribed_text else None,
audio_bytes=audio_bytes,
image=image_for_fusion,
)
# Display final results
col1, col2 = st.columns(2)
with col1:
st.metric("🎯 Final Sentiment", sentiment)
with col2:
st.metric("πŸ“Š Overall Confidence", f"{confidence:.2f}")
# Color-coded sentiment display
sentiment_colors = get_sentiment_colors()
emoji = sentiment_colors.get(sentiment, "❓")
st.markdown(
f"""
<div class="result-box">
<h4>{emoji} Max Fusion Sentiment: {sentiment}</h4>
<p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
<p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
<p><strong>Video Source:</strong> {video_name}</p>
<p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
</div>
""",
unsafe_allow_html=True,
)
else:
st.error(
"❌ No analysis could be performed. Please check your video input."
)
else:
if video_input_method == "Record Video (Coming Soon)":
st.info(
"πŸŽ₯ Video recording feature is coming soon! Please use Upload Video File for now."
)
else:
st.info("πŸ“ Please upload a video file to begin Max Fusion analysis.")
def main():
"""Main application function."""
# Sidebar navigation
st.sidebar.title("Sentiment Analysis")
st.sidebar.markdown("---")
# Navigation
page = st.sidebar.selectbox(
"Choose a page:",
[
"Home",
"Text Sentiment",
"Audio Sentiment",
"Vision Sentiment",
"Fused Model",
"Max Fusion",
],
)
# Page routing
if page == "Home":
render_home_page()
elif page == "Text Sentiment":
render_text_sentiment_page()
elif page == "Audio Sentiment":
render_audio_sentiment_page()
elif page == "Vision Sentiment":
render_vision_sentiment_page()
elif page == "Fused Model":
render_fused_model_page()
elif page == "Max Fusion":
render_max_fusion_page()
# Footer
st.markdown("---")
st.markdown(
"""
<div style="text-align: center; color: #666; padding: 1rem;">
<p>Built with ❀️ | by <a href="https://github.com/iamfaham">iamfaham</a></p>
<p>Version: {version}</p>
</div>
""".format(
version=APP_VERSION
),
unsafe_allow_html=True,
)
if __name__ == "__main__":
main()