Spaces:

iamfaham
/

multimodal-sentiment-analysis

Running

Faham

UPDATE: codebase refactored to be more readble and optimized

b1acf7e 16 days ago

40.3 kB

	"""
	Refactored Sentiment Fused - Multimodal Sentiment Analysis Application

	This is the main entry point for the application, now using a modular structure.
	"""

	import streamlit as st
	import pandas as pd
	from PIL import Image
	import logging

	# Import our modular components
	from src.config.settings import (
	APP_NAME,
	APP_VERSION,
	APP_ICON,
	APP_LAYOUT,
	CUSTOM_CSS,
	SUPPORTED_IMAGE_FORMATS,
	SUPPORTED_AUDIO_FORMATS,
	SUPPORTED_VIDEO_FORMATS,
	)
	from src.models.text_model import predict_text_sentiment
	from src.models.audio_model import predict_audio_sentiment, load_audio_model
	from src.models.vision_model import predict_vision_sentiment, load_vision_model
	from src.models.fused_model import predict_fused_sentiment
	from src.utils.preprocessing import (
	extract_frames_from_video,
	extract_audio_from_video,
	transcribe_audio,
	)
	from src.utils.file_handling import get_file_info, format_file_size
	from src.utils.sentiment_mapping import get_sentiment_colors, format_sentiment_result

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Page configuration
	st.set_page_config(
	page_title=APP_NAME,
	page_icon=APP_ICON,
	layout=APP_LAYOUT,
	initial_sidebar_state="expanded",
	)

	# Apply custom CSS
	st.markdown(CUSTOM_CSS, unsafe_allow_html=True)


	def render_home_page():
	"""Render the home page with model information."""
	st.markdown(
	f'<h1 class="main-header">{APP_NAME}</h1>',
	unsafe_allow_html=True,
	)

	st.markdown(
	"""
	<div class="model-card">
	<h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
	<p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	col1, col2, col3 = st.columns(3)

	with col1:
	st.markdown(
	"""
	<div class="model-card">
	<h3>Text Sentiment Model</h3>
	<p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
	<ul>
	<li>Process any text input</li>
	<li>Get sentiment classification (Positive/Negative/Neutral)</li>
	<li>View confidence scores</li>
	<li>Real-time NLP analysis</li>
	</ul>
	</div>
	""",
	unsafe_allow_html=True,
	)

	with col2:
	st.markdown(
	"""
	<div class="model-card">
	<h3>Audio Sentiment Model</h3>
	<p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
	<ul>
	<li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
	<li>Record audio directly with microphone (max 5s)</li>
	<li>Automatic preprocessing: 16kHz sampling, 5s max duration</li>
	<li>Listen to uploaded/recorded audio</li>
	<li>Get sentiment predictions</li>
	<li>Real-time audio analysis</li>
	</ul>
	</div>
	""",
	unsafe_allow_html=True,
	)

	with col3:
	st.markdown(
	"""
	<div class="model-card">
	<h3>Vision Sentiment Model</h3>
	<p>Analyze sentiment from images using fine-tuned ResNet-50</p>
	<ul>
	<li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
	<li>Automatic face detection & preprocessing</li>
	<li>Fixed 0% padding for tightest face crop</li>
	<li>Convert to 224x224 grayscale → 3-channel RGB (FER2013 format)</li>
	<li>Transforms: Resize(224) → CenterCrop(224) → ImageNet Normalization</li>
	<li>Preview original & preprocessed images</li>
	<li>Get sentiment predictions</li>
	</ul>
	</div>
	""",
	unsafe_allow_html=True,
	)

	st.markdown(
	"""
	<div class="model-card">
	<h3>Fused Model</h3>
	<p>Combine predictions from all three models for enhanced accuracy</p>
	<ul>
	<li>Multi-modal input processing</li>
	<li>Ensemble prediction strategies</li>
	<li>Comprehensive sentiment analysis</li>
	</ul>
	</div>
	""",
	unsafe_allow_html=True,
	)

	st.markdown(
	"""
	<div class="model-card">
	<h3>🎬 Max Fusion</h3>
	<p>Ultimate video-based sentiment analysis combining all three modalities</p>
	<ul>
	<li>🎥 Record or upload 5-second videos</li>
	<li>🔍 Extract frames for vision analysis</li>
	<li>🎵 Extract audio for vocal sentiment</li>
	<li>📝 Transcribe audio for text analysis</li>
	<li>🚀 Comprehensive multi-modal results</li>
	</ul>
	</div>
	""",
	unsafe_allow_html=True,
	)

	st.markdown("---")
	st.markdown(
	"""
	<div style="text-align: center; color: #666;">
	<p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
	<p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
	<p><strong>Models are now loaded from Google Drive automatically!</strong></p>
	</div>
	""",
	unsafe_allow_html=True,
	)


	def render_text_sentiment_page():
	"""Render the text sentiment analysis page."""
	st.title("Text Sentiment Analysis")
	st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")

	# Text input
	text_input = st.text_area(
	"Enter your text here:",
	height=150,
	placeholder="Type or paste your text here to analyze its sentiment...",
	)

	# Analyze button
	if st.button("Analyze Sentiment", type="primary", use_container_width=True):
	if text_input and text_input.strip():
	with st.spinner("Analyzing text sentiment..."):
	sentiment, confidence = predict_text_sentiment(text_input)

	# Display results
	st.markdown("### Results")

	# Display results in columns
	col1, col2 = st.columns(2)
	with col1:
	st.metric("Sentiment", sentiment)
	with col2:
	st.metric("Confidence", f"{confidence:.2f}")

	# Color-coded sentiment display
	sentiment_colors = get_sentiment_colors()
	emoji = sentiment_colors.get(sentiment, "❓")

	st.markdown(
	f"""
	<div class="result-box">
	<h4>{emoji} Sentiment: {sentiment}</h4>
	<p><strong>Confidence:</strong> {confidence:.2f}</p>
	<p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
	<p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
	</div>
	""",
	unsafe_allow_html=True,
	)
	else:
	st.error("Please enter some text to analyze.")


	def render_audio_sentiment_page():
	"""Render the audio sentiment analysis page."""
	st.title("Audio Sentiment Analysis")
	st.markdown(
	"Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
	)

	# Preprocessing information
	st.info(
	"Audio Preprocessing: Audio will be automatically processed to match CREMA-D + RAVDESS training format: "
	"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction."
	)

	# Model status
	model, device, num_classes, feature_extractor = load_audio_model()
	if model is None:
	st.error(
	"Audio model could not be loaded. Please check the Google Drive setup."
	)
	st.info(
	"Expected: Models should be configured in Google Drive and accessible via the model manager."
	)
	else:
	st.success(
	f"Audio model loaded successfully on {device} with {num_classes} classes!"
	)

	# Input method selection
	st.subheader("Choose Input Method")
	input_method = st.radio(
	"Select how you want to provide audio:",
	["Upload Audio File", "Record Audio"],
	horizontal=True,
	)

	if input_method == "Upload Audio File":
	# File uploader
	uploaded_audio = st.file_uploader(
	"Choose an audio file",
	type=SUPPORTED_AUDIO_FORMATS,
	help="Supported formats: WAV, MP3, M4A, FLAC",
	)

	audio_source = "uploaded_file"
	audio_name = uploaded_audio.name if uploaded_audio else None

	else: # Audio recording
	st.markdown(
	"""
	<div class="model-card">
	<h3>Audio Recording</h3>
	<p>Record audio directly with your microphone (max 5 seconds).</p>
	<p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Audio recorder
	recorded_audio = st.audio_input(
	label="Click to start recording",
	help="Click the microphone button to start/stop recording. Maximum recording time is 5 seconds.",
	)

	if recorded_audio is not None:
	# Display recorded audio
	st.audio(recorded_audio, format="audio/wav")
	st.success("Audio recorded successfully!")

	# Convert recorded audio to bytes for processing
	uploaded_audio = recorded_audio
	audio_source = "recorded"
	audio_name = "Recorded Audio"
	else:
	uploaded_audio = None
	audio_source = None
	audio_name = None

	if uploaded_audio is not None:
	# Display audio player
	if audio_source == "recorded":
	st.audio(uploaded_audio, format="audio/wav")
	st.info(f"{audio_name} \| Source: Microphone Recording")
	else:
	st.audio(
	uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
	)
	# File info for uploaded files
	file_info = get_file_info(uploaded_audio)
	st.info(
	f"File: {file_info['name']} \| Size: {format_file_size(file_info['size_bytes'])}"
	)

	# Analyze button
	if st.button(
	"Analyze Audio Sentiment", type="primary", use_container_width=True
	):
	if model is None:
	st.error("Model not loaded. Cannot analyze audio.")
	else:
	with st.spinner("Analyzing audio sentiment..."):
	audio_bytes = uploaded_audio.getvalue()
	sentiment, confidence = predict_audio_sentiment(audio_bytes)

	# Display results
	st.markdown("### Results")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Sentiment", sentiment)
	with col2:
	st.metric("Confidence", f"{confidence:.2f}")

	# Color-coded sentiment display
	sentiment_colors = get_sentiment_colors()
	emoji = sentiment_colors.get(sentiment, "❓")

	st.markdown(
	f"""
	<div class="result-box">
	<h4>{emoji} Sentiment: {sentiment}</h4>
	<p><strong>Confidence:</strong> {confidence:.2f}</p>
	<p><strong>Audio Source:</strong> {audio_name}</p>
	<p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
	</div>
	""",
	unsafe_allow_html=True,
	)
	else:
	if input_method == "Upload Audio File":
	st.info("Please upload an audio file to begin analysis.")
	else:
	st.info("Click the microphone button above to record audio for analysis.")


	def render_vision_sentiment_page():
	"""Render the vision sentiment analysis page."""
	st.title("Vision Sentiment Analysis")
	st.markdown(
	"Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
	)

	st.info(
	"Note: Images will be automatically preprocessed to match FER2013 format: face detection, grayscale conversion, and 224x224 resize (converted to 3-channel RGB)."
	)

	# Face cropping is set to 0% (no padding) for tightest crop
	st.info("Face Cropping: Set to 0% padding for tightest crop on facial features")

	# Model status
	model, device, num_classes = load_vision_model()
	if model is None:
	st.error(
	"Vision model could not be loaded. Please check the Google Drive setup."
	)
	st.info(
	"Expected: Models should be configured in Google Drive and accessible via the model manager."
	)
	else:
	st.success(
	f"Vision model loaded successfully on {device} with {num_classes} classes!"
	)

	# Input method selection
	st.subheader("Choose Input Method")
	input_method = st.radio(
	"Select how you want to provide an image:",
	["Upload Image File", "Take Photo with Camera"],
	horizontal=True,
	)

	if input_method == "Upload Image File":
	# File uploader
	uploaded_image = st.file_uploader(
	"Choose an image file",
	type=SUPPORTED_IMAGE_FORMATS,
	help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
	)

	if uploaded_image is not None:
	# Display image
	image = Image.open(uploaded_image)
	st.image(
	image,
	caption=f"Uploaded Image: {uploaded_image.name}",
	use_container_width=True,
	)

	# File info
	file_info = get_file_info(uploaded_image)
	st.info(
	f"File: {file_info['name']} \| Size: {format_file_size(file_info['size_bytes'])} \| Dimensions: {image.size[0]}x{image.size[1]}"
	)

	# Analyze button
	if st.button(
	"Analyze Image Sentiment", type="primary", use_container_width=True
	):
	if model is None:
	st.error("Model not loaded. Cannot analyze image.")
	else:
	with st.spinner("Analyzing image sentiment..."):
	sentiment, confidence = predict_vision_sentiment(image)

	# Display results
	st.markdown("### Results")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Sentiment", sentiment)
	with col2:
	st.metric("Confidence", f"{confidence:.2f}")

	# Color-coded sentiment display
	sentiment_colors = get_sentiment_colors()
	emoji = sentiment_colors.get(sentiment, "❓")

	st.markdown(
	f"""
	<div class="result-box">
	<h4>{emoji} Sentiment: {sentiment}</h4>
	<p><strong>Confidence:</strong> {confidence:.2f}</p>
	<p><strong>Image File:</strong> {uploaded_image.name}</p>
	<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	else: # Camera capture
	st.markdown(
	"""
	<div class="model-card">
	<h3>Camera Capture</h3>
	<p>Take a photo directly with your camera to analyze its sentiment.</p>
	<p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Camera input
	camera_photo = st.camera_input(
	"Take a photo",
	help="Click the camera button to take a photo, or use the upload button to select an existing photo",
	)

	if camera_photo is not None:
	# Display captured image
	image = Image.open(camera_photo)
	st.image(
	image,
	caption="Captured Photo",
	use_container_width=True,
	)

	# Image info
	st.info(
	f"Captured Photo \| Dimensions: {image.size[0]}x{image.size[1]} \| Format: {image.format}"
	)

	# Analyze button
	if st.button(
	"Analyze Photo Sentiment", type="primary", use_container_width=True
	):
	if model is None:
	st.error("Model not loaded. Cannot analyze image.")
	else:
	with st.spinner("Analyzing photo sentiment..."):
	sentiment, confidence = predict_vision_sentiment(image)

	# Display results
	st.markdown("### Results")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Sentiment", sentiment)
	with col2:
	st.metric("Confidence", f"{confidence:.2f}")

	# Color-coded sentiment display
	sentiment_colors = get_sentiment_colors()
	emoji = sentiment_colors.get(sentiment, "❓")

	st.markdown(
	f"""
	<div class="result-box">
	<h4>{emoji} Sentiment: {sentiment}</h4>
	<p><strong>Confidence:</strong> {confidence:.2f}</p>
	<p><strong>Image Source:</strong> Camera Capture</p>
	<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Show info if no image is provided
	if input_method == "Upload Image File" and "uploaded_image" not in locals():
	st.info("Please upload an image file to begin analysis.")
	elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
	st.info("Click the camera button above to take a photo for analysis.")


	def render_fused_model_page():
	"""Render the fused model analysis page."""
	st.title("Fused Model Analysis")
	st.markdown(
	"Combine predictions from all three models for enhanced sentiment analysis."
	)

	st.markdown(
	"""
	<div class="model-card">
	<h3>Multi-Modal Sentiment Analysis</h3>
	<p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis
	using all three models combined.</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Input sections
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Text Input")
	text_input = st.text_area(
	"Enter text (optional):",
	height=100,
	placeholder="Type or paste your text here...",
	)

	st.subheader("Audio Input")

	# Audio preprocessing information for fused model
	st.info(
	"Audio Preprocessing: Audio will be automatically processed to match CREMA-D + RAVDESS training format: "
	"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction."
	)

	# Audio input method for fused model
	audio_input_method = st.radio(
	"Audio input method:",
	["Upload File", "Record Audio"],
	key="fused_audio_method",
	horizontal=True,
	)

	if audio_input_method == "Upload File":
	uploaded_audio = st.file_uploader(
	"Upload audio file (optional):",
	type=SUPPORTED_AUDIO_FORMATS,
	key="fused_audio",
	)
	audio_source = "uploaded_file"
	audio_name = uploaded_audio.name if uploaded_audio else None
	else:
	# Audio recorder for fused model
	recorded_audio = st.audio_input(
	label="Record audio (optional):",
	key="fused_audio_recorder",
	help="Click to record audio for sentiment analysis",
	)

	if recorded_audio is not None:
	st.audio(recorded_audio, format="audio/wav")
	st.success("Audio recorded successfully!")
	uploaded_audio = recorded_audio
	audio_source = "recorded"
	audio_name = "Recorded Audio"
	else:
	uploaded_audio = None
	audio_source = None
	audio_name = None

	with col2:
	st.subheader("Image Input")

	# Face cropping is set to 0% (no padding) for tightest crop
	st.info(
	"Face Cropping: Set to 0% padding for tightest crop on facial features"
	)

	# Image input method for fused model
	image_input_method = st.radio(
	"Image input method:",
	["Upload File", "Take Photo"],
	key="fused_image_method",
	horizontal=True,
	)

	if image_input_method == "Upload File":
	uploaded_image = st.file_uploader(
	"Upload image file (optional):",
	type=SUPPORTED_IMAGE_FORMATS,
	key="fused_image",
	)

	if uploaded_image:
	image = Image.open(uploaded_image)
	st.image(image, caption="Uploaded Image", use_container_width=True)
	else:
	# Camera capture for fused model
	camera_photo = st.camera_input(
	"Take a photo (optional):",
	key="fused_camera",
	help="Click to take a photo for sentiment analysis",
	)

	if camera_photo:
	image = Image.open(camera_photo)
	st.image(image, caption="Captured Photo", use_container_width=True)
	# Set uploaded_image to camera_photo for processing
	uploaded_image = camera_photo

	if uploaded_audio:
	st.audio(
	uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
	)

	# Analyze button
	if st.button("Run Fused Analysis", type="primary", use_container_width=True):
	if text_input or uploaded_audio or uploaded_image:
	with st.spinner("Running fused sentiment analysis..."):
	# Prepare inputs
	audio_bytes = uploaded_audio.getvalue() if uploaded_audio else None
	image = Image.open(uploaded_image) if uploaded_image else None

	# Get fused prediction
	sentiment, confidence = predict_fused_sentiment(
	text=text_input if text_input else None,
	audio_bytes=audio_bytes,
	image=image,
	)

	# Display results
	st.markdown("### Fused Model Results")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Final Sentiment", sentiment)
	with col2:
	st.metric("Overall Confidence", f"{confidence:.2f}")

	# Show individual model results
	st.markdown("### Individual Model Results")

	results_data = []

	if text_input:
	text_sentiment, text_conf = predict_text_sentiment(text_input)
	results_data.append(
	{
	"Model": "Text (TextBlob)",
	"Input": f"Text: {text_input[:50]}...",
	"Sentiment": text_sentiment,
	"Confidence": f"{text_conf:.2f}",
	}
	)

	if uploaded_audio:
	audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
	results_data.append(
	{
	"Model": "Audio (Wav2Vec2)",
	"Input": f"Audio: {audio_name}",
	"Sentiment": audio_sentiment,
	"Confidence": f"{audio_conf:.2f}",
	}
	)

	if uploaded_image:
	# Face cropping is set to 0% (no padding) for tightest crop
	vision_sentiment, vision_conf = predict_vision_sentiment(
	image, crop_tightness=0.0
	)
	results_data.append(
	{
	"Model": "Vision (ResNet-50)",
	"Input": f"Image: {uploaded_image.name}",
	"Sentiment": vision_sentiment,
	"Confidence": f"{vision_conf:.2f}",
	}
	)

	if results_data:
	df = pd.DataFrame(results_data)
	st.dataframe(df, use_container_width=True)

	# Final result display
	sentiment_colors = get_sentiment_colors()
	emoji = sentiment_colors.get(sentiment, "❓")

	st.markdown(
	f"""
	<div class="result-box">
	<h4>{emoji} Final Fused Sentiment: {sentiment}</h4>
	<p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
	<p><strong>Models Used:</strong> {len(results_data)}</p>
	</div>
	""",
	unsafe_allow_html=True,
	)
	else:
	st.warning(
	"Please provide at least one input (text, audio, or image) for fused analysis."
	)


	def render_max_fusion_page():
	"""Render the max fusion page for video-based analysis."""
	st.title("Max Fusion - Multi-Modal Sentiment Analysis")
	st.markdown(
	"""
	<div class="model-card">
	<h3>Ultimate Multi-Modal Sentiment Analysis</h3>
	<p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
	<ul>
	<li>📸 <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
	<li>🎵 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
	<li>📝 <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
	</ul>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Video input method selection
	st.subheader("Video Input")
	video_input_method = st.radio(
	"Choose input method:",
	["Upload Video File", "Record Video (Coming Soon)"],
	horizontal=True,
	index=0, # Default to upload video
	)

	if video_input_method == "Record Video (Coming Soon)":
	# Coming Soon message for video recording
	st.info("🎥 Video recording feature is coming soon!")
	st.info("📁 Please use the Upload Video File option for now.")

	# Show a nice coming soon message
	st.markdown("---")
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.markdown(
	"""
	<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
	<h3>🚧 Coming Soon 🚧</h3>
	<p>Video recording feature is under development</p>
	<p>Use Upload Video File for now!</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Placeholder for future recording functionality
	st.markdown(
	"""
	Future Features:
	- Real-time video recording with camera
	- Audio capture during recording
	- Automatic frame extraction
	- Live transcription
	- WebRTC integration for low-latency streaming
	"""
	)

	# Skip all the recording logic for now
	uploaded_video = None
	video_source = None
	video_name = None
	video_file = None

	elif video_input_method == "Upload Video File":
	# File upload option
	st.markdown(
	"""
	<div class="upload-section">
	<h4>📁 Upload Video File</h4>
	<p>Upload a video file for comprehensive multimodal analysis.</p>
	<p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
	<p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	uploaded_video = st.file_uploader(
	"Choose a video file",
	type=SUPPORTED_VIDEO_FORMATS,
	help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
	)

	video_source = "uploaded_file"
	video_name = uploaded_video.name if uploaded_video else None
	video_file = uploaded_video

	if video_file is not None:
	# Display video or photo
	if video_source == "camera_photo":
	# For camera photos, we already displayed the image above
	st.info(f"Source: Camera Photo \| Ready for vision analysis")

	# Add audio upload option for camera photo mode
	st.subheader("🎵 Audio Input for Analysis")
	st.info(
	"Since we're using a photo, please upload an audio file for audio sentiment analysis:"
	)

	uploaded_audio = st.file_uploader(
	"Upload audio file for audio analysis:",
	type=SUPPORTED_AUDIO_FORMATS,
	key="camera_audio",
	help="Upload an audio file to complement the photo analysis",
	)

	if uploaded_audio:
	st.audio(
	uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
	)
	st.success("✅ Audio uploaded successfully!")
	audio_bytes = uploaded_audio.getvalue()
	else:
	audio_bytes = None
	st.warning("⚠️ Please upload an audio file for complete analysis")

	else:
	# For uploaded videos
	st.video(video_file)
	file_info = get_file_info(video_file)
	st.info(
	f"File: {file_info['name']} \| Size: {format_file_size(file_info['size_bytes'])}"
	)
	audio_bytes = None # Will be extracted from video

	# Video Processing Pipeline
	st.subheader("🎬 Video Processing Pipeline")

	# Initialize variables
	frames = []
	audio_bytes = None
	transcribed_text = ""

	# Process uploaded video
	if uploaded_video:
	st.info("📁 Processing uploaded video file...")

	# Extract frames
	st.markdown("1. 🎥 Frame Extraction")
	frames = extract_frames_from_video(uploaded_video, max_frames=5)

	if frames:
	st.success(f"✅ Extracted {len(frames)} representative frames")

	# Display extracted frames
	cols = st.columns(len(frames))
	for i, frame in enumerate(frames):
	with cols[i]:
	st.image(
	frame, caption=f"Frame {i+1}", use_container_width=True
	)
	else:
	st.warning("⚠️ Could not extract frames from video")
	frames = []

	# Extract audio
	st.markdown("2. 🎵 Audio Extraction")
	audio_bytes = extract_audio_from_video(uploaded_video)

	if audio_bytes:
	st.success("✅ Audio extracted successfully")
	st.audio(audio_bytes, format="audio/wav")
	else:
	st.warning("⚠️ Could not extract audio from video")
	audio_bytes = None

	# Transcribe audio
	st.markdown("3. 📝 Audio Transcription")
	if audio_bytes:
	transcribed_text = transcribe_audio(audio_bytes)
	if transcribed_text:
	st.success("✅ Audio transcribed successfully")
	st.markdown(f'Transcribed Text: "{transcribed_text}"')
	else:
	st.warning("⚠️ Could not transcribe audio")
	transcribed_text = ""
	else:
	transcribed_text = ""
	st.info("ℹ️ No audio available for transcription")

	# Analysis button
	if st.button(
	"🚀 Run Max Fusion Analysis", type="primary", use_container_width=True
	):
	with st.spinner(
	"🔄 Processing video and running comprehensive analysis..."
	):
	# Run individual analyses
	st.subheader("🔍 Individual Model Analysis")

	results_data = []

	# Vision analysis (use first frame for uploaded videos)
	if frames:
	st.markdown("Vision Analysis:")

	# For uploaded videos, use first frame
	vision_sentiment, vision_conf = predict_vision_sentiment(
	frames[0], crop_tightness=0.0
	)
	results_data.append(
	{
	"Model": "Vision (ResNet-50)",
	"Input": f"Video Frame 1",
	"Sentiment": vision_sentiment,
	"Confidence": f"{vision_conf:.2f}",
	}
	)
	st.success(
	f"Vision: {vision_sentiment} (Confidence: {vision_conf:.2f})"
	)

	# Audio analysis
	if audio_bytes:
	st.markdown("Audio Analysis:")
	audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
	results_data.append(
	{
	"Model": "Audio (Wav2Vec2)",
	"Input": f"Video Audio",
	"Sentiment": audio_sentiment,
	"Confidence": f"{audio_conf:.2f}",
	}
	)
	st.success(
	f"Audio: {audio_sentiment} (Confidence: {audio_conf:.2f})"
	)

	# Text analysis
	if transcribed_text:
	st.markdown("Text Analysis:")
	text_sentiment, text_conf = predict_text_sentiment(transcribed_text)
	results_data.append(
	{
	"Model": "Text (TextBlob)",
	"Input": f"Transcribed: {transcribed_text[:50]}...",
	"Sentiment": text_sentiment,
	"Confidence": f"{text_conf:.2f}",
	}
	)
	st.success(f"Text: {text_sentiment} (Confidence: {text_conf:.2f})")

	# Run fused analysis
	st.subheader("🎯 Max Fusion Results")

	if results_data:
	# Display results table
	df = pd.DataFrame(results_data)
	st.dataframe(df, use_container_width=True)

	# Calculate fused sentiment
	image_for_fusion = frames[0] if frames else None
	sentiment, confidence = predict_fused_sentiment(
	text=transcribed_text if transcribed_text else None,
	audio_bytes=audio_bytes,
	image=image_for_fusion,
	)

	# Display final results
	col1, col2 = st.columns(2)
	with col1:
	st.metric("🎯 Final Sentiment", sentiment)
	with col2:
	st.metric("📊 Overall Confidence", f"{confidence:.2f}")

	# Color-coded sentiment display
	sentiment_colors = get_sentiment_colors()
	emoji = sentiment_colors.get(sentiment, "❓")

	st.markdown(
	f"""
	<div class="result-box">
	<h4>{emoji} Max Fusion Sentiment: {sentiment}</h4>
	<p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
	<p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
	<p><strong>Video Source:</strong> {video_name}</p>
	<p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
	</div>
	""",
	unsafe_allow_html=True,
	)
	else:
	st.error(
	"❌ No analysis could be performed. Please check your video input."
	)

	else:
	if video_input_method == "Record Video (Coming Soon)":
	st.info(
	"🎥 Video recording feature is coming soon! Please use Upload Video File for now."
	)
	else:
	st.info("📁 Please upload a video file to begin Max Fusion analysis.")


	def main():
	"""Main application function."""
	# Sidebar navigation
	st.sidebar.title("Sentiment Analysis")
	st.sidebar.markdown("---")

	# Navigation
	page = st.sidebar.selectbox(
	"Choose a page:",
	[
	"Home",
	"Text Sentiment",
	"Audio Sentiment",
	"Vision Sentiment",
	"Fused Model",
	"Max Fusion",
	],
	)

	# Page routing
	if page == "Home":
	render_home_page()
	elif page == "Text Sentiment":
	render_text_sentiment_page()
	elif page == "Audio Sentiment":
	render_audio_sentiment_page()
	elif page == "Vision Sentiment":
	render_vision_sentiment_page()
	elif page == "Fused Model":
	render_fused_model_page()
	elif page == "Max Fusion":
	render_max_fusion_page()

	# Footer
	st.markdown("---")
	st.markdown(
	"""
	<div style="text-align: center; color: #666; padding: 1rem;">
	<p>Built with ❤️ \| by <a href="https://github.com/iamfaham">iamfaham</a></p>
	<p>Version: {version}</p>
	</div>
	""".format(
	version=APP_VERSION
	),
	unsafe_allow_html=True,
	)


	if __name__ == "__main__":
	main()