|
""" |
|
Refactored Sentiment Fused - Multimodal Sentiment Analysis Application |
|
|
|
This is the main entry point for the application, now using a modular structure. |
|
""" |
|
|
|
import streamlit as st |
|
import pandas as pd |
|
from PIL import Image |
|
import logging |
|
|
|
|
|
from src.config.settings import ( |
|
APP_NAME, |
|
APP_VERSION, |
|
APP_ICON, |
|
APP_LAYOUT, |
|
CUSTOM_CSS, |
|
SUPPORTED_IMAGE_FORMATS, |
|
SUPPORTED_AUDIO_FORMATS, |
|
SUPPORTED_VIDEO_FORMATS, |
|
) |
|
from src.models.text_model import predict_text_sentiment |
|
from src.models.audio_model import predict_audio_sentiment, load_audio_model |
|
from src.models.vision_model import predict_vision_sentiment, load_vision_model |
|
from src.models.fused_model import predict_fused_sentiment |
|
from src.utils.preprocessing import ( |
|
extract_frames_from_video, |
|
extract_audio_from_video, |
|
transcribe_audio, |
|
) |
|
from src.utils.file_handling import get_file_info, format_file_size |
|
from src.utils.sentiment_mapping import get_sentiment_colors, format_sentiment_result |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
st.set_page_config( |
|
page_title=APP_NAME, |
|
page_icon=APP_ICON, |
|
layout=APP_LAYOUT, |
|
initial_sidebar_state="expanded", |
|
) |
|
|
|
|
|
st.markdown(CUSTOM_CSS, unsafe_allow_html=True) |
|
|
|
|
|
def render_home_page(): |
|
"""Render the home page with model information.""" |
|
st.markdown( |
|
f'<h1 class="main-header">{APP_NAME}</h1>', |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2> |
|
<p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Text Sentiment Model</h3> |
|
<p>READY TO USE - Analyze sentiment from text input using TextBlob</p> |
|
<ul> |
|
<li>Process any text input</li> |
|
<li>Get sentiment classification (Positive/Negative/Neutral)</li> |
|
<li>View confidence scores</li> |
|
<li>Real-time NLP analysis</li> |
|
</ul> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
with col2: |
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Audio Sentiment Model</h3> |
|
<p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p> |
|
<ul> |
|
<li>Upload audio files (.wav, .mp3, .m4a, .flac)</li> |
|
<li>Record audio directly with microphone (max 5s)</li> |
|
<li>Automatic preprocessing: 16kHz sampling, 5s max duration</li> |
|
<li>Listen to uploaded/recorded audio</li> |
|
<li>Get sentiment predictions</li> |
|
<li>Real-time audio analysis</li> |
|
</ul> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
with col3: |
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Vision Sentiment Model</h3> |
|
<p>Analyze sentiment from images using fine-tuned ResNet-50</p> |
|
<ul> |
|
<li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li> |
|
<li>Automatic face detection & preprocessing</li> |
|
<li>Fixed 0% padding for tightest face crop</li> |
|
<li>Convert to 224x224 grayscale β 3-channel RGB (FER2013 format)</li> |
|
<li>Transforms: Resize(224) β CenterCrop(224) β ImageNet Normalization</li> |
|
<li>Preview original & preprocessed images</li> |
|
<li>Get sentiment predictions</li> |
|
</ul> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Fused Model</h3> |
|
<p>Combine predictions from all three models for enhanced accuracy</p> |
|
<ul> |
|
<li>Multi-modal input processing</li> |
|
<li>Ensemble prediction strategies</li> |
|
<li>Comprehensive sentiment analysis</li> |
|
</ul> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>π¬ Max Fusion</h3> |
|
<p>Ultimate video-based sentiment analysis combining all three modalities</p> |
|
<ul> |
|
<li>π₯ Record or upload 5-second videos</li> |
|
<li>π Extract frames for vision analysis</li> |
|
<li>π΅ Extract audio for vocal sentiment</li> |
|
<li>π Transcribe audio for text analysis</li> |
|
<li>π Comprehensive multi-modal results</li> |
|
</ul> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown("---") |
|
st.markdown( |
|
""" |
|
<div style="text-align: center; color: #666;"> |
|
<p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p> |
|
<p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p> |
|
<p><strong>Models are now loaded from Google Drive automatically!</strong></p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
def render_text_sentiment_page(): |
|
"""Render the text sentiment analysis page.""" |
|
st.title("Text Sentiment Analysis") |
|
st.markdown("Analyze the sentiment of your text using our TextBlob-based model.") |
|
|
|
|
|
text_input = st.text_area( |
|
"Enter your text here:", |
|
height=150, |
|
placeholder="Type or paste your text here to analyze its sentiment...", |
|
) |
|
|
|
|
|
if st.button("Analyze Sentiment", type="primary", use_container_width=True): |
|
if text_input and text_input.strip(): |
|
with st.spinner("Analyzing text sentiment..."): |
|
sentiment, confidence = predict_text_sentiment(text_input) |
|
|
|
|
|
st.markdown("### Results") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.metric("Sentiment", sentiment) |
|
with col2: |
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
sentiment_colors = get_sentiment_colors() |
|
emoji = sentiment_colors.get(sentiment, "β") |
|
|
|
st.markdown( |
|
f""" |
|
<div class="result-box"> |
|
<h4>{emoji} Sentiment: {sentiment}</h4> |
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
<p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p> |
|
<p><strong>Model:</strong> TextBlob (Natural Language Processing)</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
else: |
|
st.error("Please enter some text to analyze.") |
|
|
|
|
|
def render_audio_sentiment_page(): |
|
"""Render the audio sentiment analysis page.""" |
|
st.title("Audio Sentiment Analysis") |
|
st.markdown( |
|
"Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model." |
|
) |
|
|
|
|
|
st.info( |
|
"**Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: " |
|
"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction." |
|
) |
|
|
|
|
|
model, device, num_classes, feature_extractor = load_audio_model() |
|
if model is None: |
|
st.error( |
|
"Audio model could not be loaded. Please check the Google Drive setup." |
|
) |
|
st.info( |
|
"Expected: Models should be configured in Google Drive and accessible via the model manager." |
|
) |
|
else: |
|
st.success( |
|
f"Audio model loaded successfully on {device} with {num_classes} classes!" |
|
) |
|
|
|
|
|
st.subheader("Choose Input Method") |
|
input_method = st.radio( |
|
"Select how you want to provide audio:", |
|
["Upload Audio File", "Record Audio"], |
|
horizontal=True, |
|
) |
|
|
|
if input_method == "Upload Audio File": |
|
|
|
uploaded_audio = st.file_uploader( |
|
"Choose an audio file", |
|
type=SUPPORTED_AUDIO_FORMATS, |
|
help="Supported formats: WAV, MP3, M4A, FLAC", |
|
) |
|
|
|
audio_source = "uploaded_file" |
|
audio_name = uploaded_audio.name if uploaded_audio else None |
|
|
|
else: |
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Audio Recording</h3> |
|
<p>Record audio directly with your microphone (max 5 seconds).</p> |
|
<p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
recorded_audio = st.audio_input( |
|
label="Click to start recording", |
|
help="Click the microphone button to start/stop recording. Maximum recording time is 5 seconds.", |
|
) |
|
|
|
if recorded_audio is not None: |
|
|
|
st.audio(recorded_audio, format="audio/wav") |
|
st.success("Audio recorded successfully!") |
|
|
|
|
|
uploaded_audio = recorded_audio |
|
audio_source = "recorded" |
|
audio_name = "Recorded Audio" |
|
else: |
|
uploaded_audio = None |
|
audio_source = None |
|
audio_name = None |
|
|
|
if uploaded_audio is not None: |
|
|
|
if audio_source == "recorded": |
|
st.audio(uploaded_audio, format="audio/wav") |
|
st.info(f"{audio_name} | Source: Microphone Recording") |
|
else: |
|
st.audio( |
|
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}' |
|
) |
|
|
|
file_info = get_file_info(uploaded_audio) |
|
st.info( |
|
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}" |
|
) |
|
|
|
|
|
if st.button( |
|
"Analyze Audio Sentiment", type="primary", use_container_width=True |
|
): |
|
if model is None: |
|
st.error("Model not loaded. Cannot analyze audio.") |
|
else: |
|
with st.spinner("Analyzing audio sentiment..."): |
|
audio_bytes = uploaded_audio.getvalue() |
|
sentiment, confidence = predict_audio_sentiment(audio_bytes) |
|
|
|
|
|
st.markdown("### Results") |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.metric("Sentiment", sentiment) |
|
with col2: |
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
sentiment_colors = get_sentiment_colors() |
|
emoji = sentiment_colors.get(sentiment, "β") |
|
|
|
st.markdown( |
|
f""" |
|
<div class="result-box"> |
|
<h4>{emoji} Sentiment: {sentiment}</h4> |
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
<p><strong>Audio Source:</strong> {audio_name}</p> |
|
<p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
else: |
|
if input_method == "Upload Audio File": |
|
st.info("Please upload an audio file to begin analysis.") |
|
else: |
|
st.info("Click the microphone button above to record audio for analysis.") |
|
|
|
|
|
def render_vision_sentiment_page(): |
|
"""Render the vision sentiment analysis page.""" |
|
st.title("Vision Sentiment Analysis") |
|
st.markdown( |
|
"Analyze the sentiment of your images using our fine-tuned ResNet-50 model." |
|
) |
|
|
|
st.info( |
|
"**Note**: Images will be automatically preprocessed to match FER2013 format: face detection, grayscale conversion, and 224x224 resize (converted to 3-channel RGB)." |
|
) |
|
|
|
|
|
st.info("**Face Cropping**: Set to 0% padding for tightest crop on facial features") |
|
|
|
|
|
model, device, num_classes = load_vision_model() |
|
if model is None: |
|
st.error( |
|
"Vision model could not be loaded. Please check the Google Drive setup." |
|
) |
|
st.info( |
|
"Expected: Models should be configured in Google Drive and accessible via the model manager." |
|
) |
|
else: |
|
st.success( |
|
f"Vision model loaded successfully on {device} with {num_classes} classes!" |
|
) |
|
|
|
|
|
st.subheader("Choose Input Method") |
|
input_method = st.radio( |
|
"Select how you want to provide an image:", |
|
["Upload Image File", "Take Photo with Camera"], |
|
horizontal=True, |
|
) |
|
|
|
if input_method == "Upload Image File": |
|
|
|
uploaded_image = st.file_uploader( |
|
"Choose an image file", |
|
type=SUPPORTED_IMAGE_FORMATS, |
|
help="Supported formats: PNG, JPG, JPEG, BMP, TIFF", |
|
) |
|
|
|
if uploaded_image is not None: |
|
|
|
image = Image.open(uploaded_image) |
|
st.image( |
|
image, |
|
caption=f"Uploaded Image: {uploaded_image.name}", |
|
use_container_width=True, |
|
) |
|
|
|
|
|
file_info = get_file_info(uploaded_image) |
|
st.info( |
|
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])} | Dimensions: {image.size[0]}x{image.size[1]}" |
|
) |
|
|
|
|
|
if st.button( |
|
"Analyze Image Sentiment", type="primary", use_container_width=True |
|
): |
|
if model is None: |
|
st.error("Model not loaded. Cannot analyze image.") |
|
else: |
|
with st.spinner("Analyzing image sentiment..."): |
|
sentiment, confidence = predict_vision_sentiment(image) |
|
|
|
|
|
st.markdown("### Results") |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.metric("Sentiment", sentiment) |
|
with col2: |
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
sentiment_colors = get_sentiment_colors() |
|
emoji = sentiment_colors.get(sentiment, "β") |
|
|
|
st.markdown( |
|
f""" |
|
<div class="result-box"> |
|
<h4>{emoji} Sentiment: {sentiment}</h4> |
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
<p><strong>Image File:</strong> {uploaded_image.name}</p> |
|
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
else: |
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Camera Capture</h3> |
|
<p>Take a photo directly with your camera to analyze its sentiment.</p> |
|
<p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
camera_photo = st.camera_input( |
|
"Take a photo", |
|
help="Click the camera button to take a photo, or use the upload button to select an existing photo", |
|
) |
|
|
|
if camera_photo is not None: |
|
|
|
image = Image.open(camera_photo) |
|
st.image( |
|
image, |
|
caption="Captured Photo", |
|
use_container_width=True, |
|
) |
|
|
|
|
|
st.info( |
|
f"Captured Photo | Dimensions: {image.size[0]}x{image.size[1]} | Format: {image.format}" |
|
) |
|
|
|
|
|
if st.button( |
|
"Analyze Photo Sentiment", type="primary", use_container_width=True |
|
): |
|
if model is None: |
|
st.error("Model not loaded. Cannot analyze image.") |
|
else: |
|
with st.spinner("Analyzing photo sentiment..."): |
|
sentiment, confidence = predict_vision_sentiment(image) |
|
|
|
|
|
st.markdown("### Results") |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.metric("Sentiment", sentiment) |
|
with col2: |
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
sentiment_colors = get_sentiment_colors() |
|
emoji = sentiment_colors.get(sentiment, "β") |
|
|
|
st.markdown( |
|
f""" |
|
<div class="result-box"> |
|
<h4>{emoji} Sentiment: {sentiment}</h4> |
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
<p><strong>Image Source:</strong> Camera Capture</p> |
|
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
if input_method == "Upload Image File" and "uploaded_image" not in locals(): |
|
st.info("Please upload an image file to begin analysis.") |
|
elif input_method == "Take Photo with Camera" and "camera_photo" not in locals(): |
|
st.info("Click the camera button above to take a photo for analysis.") |
|
|
|
|
|
def render_fused_model_page(): |
|
"""Render the fused model analysis page.""" |
|
st.title("Fused Model Analysis") |
|
st.markdown( |
|
"Combine predictions from all three models for enhanced sentiment analysis." |
|
) |
|
|
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Multi-Modal Sentiment Analysis</h3> |
|
<p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis |
|
using all three models combined.</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.subheader("Text Input") |
|
text_input = st.text_area( |
|
"Enter text (optional):", |
|
height=100, |
|
placeholder="Type or paste your text here...", |
|
) |
|
|
|
st.subheader("Audio Input") |
|
|
|
|
|
st.info( |
|
"**Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: " |
|
"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction." |
|
) |
|
|
|
|
|
audio_input_method = st.radio( |
|
"Audio input method:", |
|
["Upload File", "Record Audio"], |
|
key="fused_audio_method", |
|
horizontal=True, |
|
) |
|
|
|
if audio_input_method == "Upload File": |
|
uploaded_audio = st.file_uploader( |
|
"Upload audio file (optional):", |
|
type=SUPPORTED_AUDIO_FORMATS, |
|
key="fused_audio", |
|
) |
|
audio_source = "uploaded_file" |
|
audio_name = uploaded_audio.name if uploaded_audio else None |
|
else: |
|
|
|
recorded_audio = st.audio_input( |
|
label="Record audio (optional):", |
|
key="fused_audio_recorder", |
|
help="Click to record audio for sentiment analysis", |
|
) |
|
|
|
if recorded_audio is not None: |
|
st.audio(recorded_audio, format="audio/wav") |
|
st.success("Audio recorded successfully!") |
|
uploaded_audio = recorded_audio |
|
audio_source = "recorded" |
|
audio_name = "Recorded Audio" |
|
else: |
|
uploaded_audio = None |
|
audio_source = None |
|
audio_name = None |
|
|
|
with col2: |
|
st.subheader("Image Input") |
|
|
|
|
|
st.info( |
|
"**Face Cropping**: Set to 0% padding for tightest crop on facial features" |
|
) |
|
|
|
|
|
image_input_method = st.radio( |
|
"Image input method:", |
|
["Upload File", "Take Photo"], |
|
key="fused_image_method", |
|
horizontal=True, |
|
) |
|
|
|
if image_input_method == "Upload File": |
|
uploaded_image = st.file_uploader( |
|
"Upload image file (optional):", |
|
type=SUPPORTED_IMAGE_FORMATS, |
|
key="fused_image", |
|
) |
|
|
|
if uploaded_image: |
|
image = Image.open(uploaded_image) |
|
st.image(image, caption="Uploaded Image", use_container_width=True) |
|
else: |
|
|
|
camera_photo = st.camera_input( |
|
"Take a photo (optional):", |
|
key="fused_camera", |
|
help="Click to take a photo for sentiment analysis", |
|
) |
|
|
|
if camera_photo: |
|
image = Image.open(camera_photo) |
|
st.image(image, caption="Captured Photo", use_container_width=True) |
|
|
|
uploaded_image = camera_photo |
|
|
|
if uploaded_audio: |
|
st.audio( |
|
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}' |
|
) |
|
|
|
|
|
if st.button("Run Fused Analysis", type="primary", use_container_width=True): |
|
if text_input or uploaded_audio or uploaded_image: |
|
with st.spinner("Running fused sentiment analysis..."): |
|
|
|
audio_bytes = uploaded_audio.getvalue() if uploaded_audio else None |
|
image = Image.open(uploaded_image) if uploaded_image else None |
|
|
|
|
|
sentiment, confidence = predict_fused_sentiment( |
|
text=text_input if text_input else None, |
|
audio_bytes=audio_bytes, |
|
image=image, |
|
) |
|
|
|
|
|
st.markdown("### Fused Model Results") |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.metric("Final Sentiment", sentiment) |
|
with col2: |
|
st.metric("Overall Confidence", f"{confidence:.2f}") |
|
|
|
|
|
st.markdown("### Individual Model Results") |
|
|
|
results_data = [] |
|
|
|
if text_input: |
|
text_sentiment, text_conf = predict_text_sentiment(text_input) |
|
results_data.append( |
|
{ |
|
"Model": "Text (TextBlob)", |
|
"Input": f"Text: {text_input[:50]}...", |
|
"Sentiment": text_sentiment, |
|
"Confidence": f"{text_conf:.2f}", |
|
} |
|
) |
|
|
|
if uploaded_audio: |
|
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes) |
|
results_data.append( |
|
{ |
|
"Model": "Audio (Wav2Vec2)", |
|
"Input": f"Audio: {audio_name}", |
|
"Sentiment": audio_sentiment, |
|
"Confidence": f"{audio_conf:.2f}", |
|
} |
|
) |
|
|
|
if uploaded_image: |
|
|
|
vision_sentiment, vision_conf = predict_vision_sentiment( |
|
image, crop_tightness=0.0 |
|
) |
|
results_data.append( |
|
{ |
|
"Model": "Vision (ResNet-50)", |
|
"Input": f"Image: {uploaded_image.name}", |
|
"Sentiment": vision_sentiment, |
|
"Confidence": f"{vision_conf:.2f}", |
|
} |
|
) |
|
|
|
if results_data: |
|
df = pd.DataFrame(results_data) |
|
st.dataframe(df, use_container_width=True) |
|
|
|
|
|
sentiment_colors = get_sentiment_colors() |
|
emoji = sentiment_colors.get(sentiment, "β") |
|
|
|
st.markdown( |
|
f""" |
|
<div class="result-box"> |
|
<h4>{emoji} Final Fused Sentiment: {sentiment}</h4> |
|
<p><strong>Overall Confidence:</strong> {confidence:.2f}</p> |
|
<p><strong>Models Used:</strong> {len(results_data)}</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
else: |
|
st.warning( |
|
"Please provide at least one input (text, audio, or image) for fused analysis." |
|
) |
|
|
|
|
|
def render_max_fusion_page(): |
|
"""Render the max fusion page for video-based analysis.""" |
|
st.title("Max Fusion - Multi-Modal Sentiment Analysis") |
|
st.markdown( |
|
""" |
|
<div class="model-card"> |
|
<h3>Ultimate Multi-Modal Sentiment Analysis</h3> |
|
<p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p> |
|
<ul> |
|
<li>πΈ <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li> |
|
<li>π΅ <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li> |
|
<li>π <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li> |
|
</ul> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
st.subheader("Video Input") |
|
video_input_method = st.radio( |
|
"Choose input method:", |
|
["Upload Video File", "Record Video (Coming Soon)"], |
|
horizontal=True, |
|
index=0, |
|
) |
|
|
|
if video_input_method == "Record Video (Coming Soon)": |
|
|
|
st.info("π₯ Video recording feature is coming soon!") |
|
st.info("π Please use the Upload Video File option for now.") |
|
|
|
|
|
st.markdown("---") |
|
col1, col2, col3 = st.columns([1, 2, 1]) |
|
with col2: |
|
st.markdown( |
|
""" |
|
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;"> |
|
<h3>π§ Coming Soon π§</h3> |
|
<p>Video recording feature is under development</p> |
|
<p>Use Upload Video File for now!</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
st.markdown( |
|
""" |
|
**Future Features:** |
|
- Real-time video recording with camera |
|
- Audio capture during recording |
|
- Automatic frame extraction |
|
- Live transcription |
|
- WebRTC integration for low-latency streaming |
|
""" |
|
) |
|
|
|
|
|
uploaded_video = None |
|
video_source = None |
|
video_name = None |
|
video_file = None |
|
|
|
elif video_input_method == "Upload Video File": |
|
|
|
st.markdown( |
|
""" |
|
<div class="upload-section"> |
|
<h4>π Upload Video File</h4> |
|
<p>Upload a video file for comprehensive multimodal analysis.</p> |
|
<p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p> |
|
<p><strong>Recommended:</strong> Videos with clear audio and visual content</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
uploaded_video = st.file_uploader( |
|
"Choose a video file", |
|
type=SUPPORTED_VIDEO_FORMATS, |
|
help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV", |
|
) |
|
|
|
video_source = "uploaded_file" |
|
video_name = uploaded_video.name if uploaded_video else None |
|
video_file = uploaded_video |
|
|
|
if video_file is not None: |
|
|
|
if video_source == "camera_photo": |
|
|
|
st.info(f"Source: Camera Photo | Ready for vision analysis") |
|
|
|
|
|
st.subheader("π΅ Audio Input for Analysis") |
|
st.info( |
|
"Since we're using a photo, please upload an audio file for audio sentiment analysis:" |
|
) |
|
|
|
uploaded_audio = st.file_uploader( |
|
"Upload audio file for audio analysis:", |
|
type=SUPPORTED_AUDIO_FORMATS, |
|
key="camera_audio", |
|
help="Upload an audio file to complement the photo analysis", |
|
) |
|
|
|
if uploaded_audio: |
|
st.audio( |
|
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}' |
|
) |
|
st.success("β
Audio uploaded successfully!") |
|
audio_bytes = uploaded_audio.getvalue() |
|
else: |
|
audio_bytes = None |
|
st.warning("β οΈ Please upload an audio file for complete analysis") |
|
|
|
else: |
|
|
|
st.video(video_file) |
|
file_info = get_file_info(video_file) |
|
st.info( |
|
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}" |
|
) |
|
audio_bytes = None |
|
|
|
|
|
st.subheader("π¬ Video Processing Pipeline") |
|
|
|
|
|
frames = [] |
|
audio_bytes = None |
|
transcribed_text = "" |
|
|
|
|
|
if uploaded_video: |
|
st.info("π Processing uploaded video file...") |
|
|
|
|
|
st.markdown("**1. π₯ Frame Extraction**") |
|
frames = extract_frames_from_video(uploaded_video, max_frames=5) |
|
|
|
if frames: |
|
st.success(f"β
Extracted {len(frames)} representative frames") |
|
|
|
|
|
cols = st.columns(len(frames)) |
|
for i, frame in enumerate(frames): |
|
with cols[i]: |
|
st.image( |
|
frame, caption=f"Frame {i+1}", use_container_width=True |
|
) |
|
else: |
|
st.warning("β οΈ Could not extract frames from video") |
|
frames = [] |
|
|
|
|
|
st.markdown("**2. π΅ Audio Extraction**") |
|
audio_bytes = extract_audio_from_video(uploaded_video) |
|
|
|
if audio_bytes: |
|
st.success("β
Audio extracted successfully") |
|
st.audio(audio_bytes, format="audio/wav") |
|
else: |
|
st.warning("β οΈ Could not extract audio from video") |
|
audio_bytes = None |
|
|
|
|
|
st.markdown("**3. π Audio Transcription**") |
|
if audio_bytes: |
|
transcribed_text = transcribe_audio(audio_bytes) |
|
if transcribed_text: |
|
st.success("β
Audio transcribed successfully") |
|
st.markdown(f'**Transcribed Text:** "{transcribed_text}"') |
|
else: |
|
st.warning("β οΈ Could not transcribe audio") |
|
transcribed_text = "" |
|
else: |
|
transcribed_text = "" |
|
st.info("βΉοΈ No audio available for transcription") |
|
|
|
|
|
if st.button( |
|
"π Run Max Fusion Analysis", type="primary", use_container_width=True |
|
): |
|
with st.spinner( |
|
"π Processing video and running comprehensive analysis..." |
|
): |
|
|
|
st.subheader("π Individual Model Analysis") |
|
|
|
results_data = [] |
|
|
|
|
|
if frames: |
|
st.markdown("**Vision Analysis:**") |
|
|
|
|
|
vision_sentiment, vision_conf = predict_vision_sentiment( |
|
frames[0], crop_tightness=0.0 |
|
) |
|
results_data.append( |
|
{ |
|
"Model": "Vision (ResNet-50)", |
|
"Input": f"Video Frame 1", |
|
"Sentiment": vision_sentiment, |
|
"Confidence": f"{vision_conf:.2f}", |
|
} |
|
) |
|
st.success( |
|
f"Vision: {vision_sentiment} (Confidence: {vision_conf:.2f})" |
|
) |
|
|
|
|
|
if audio_bytes: |
|
st.markdown("**Audio Analysis:**") |
|
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes) |
|
results_data.append( |
|
{ |
|
"Model": "Audio (Wav2Vec2)", |
|
"Input": f"Video Audio", |
|
"Sentiment": audio_sentiment, |
|
"Confidence": f"{audio_conf:.2f}", |
|
} |
|
) |
|
st.success( |
|
f"Audio: {audio_sentiment} (Confidence: {audio_conf:.2f})" |
|
) |
|
|
|
|
|
if transcribed_text: |
|
st.markdown("**Text Analysis:**") |
|
text_sentiment, text_conf = predict_text_sentiment(transcribed_text) |
|
results_data.append( |
|
{ |
|
"Model": "Text (TextBlob)", |
|
"Input": f"Transcribed: {transcribed_text[:50]}...", |
|
"Sentiment": text_sentiment, |
|
"Confidence": f"{text_conf:.2f}", |
|
} |
|
) |
|
st.success(f"Text: {text_sentiment} (Confidence: {text_conf:.2f})") |
|
|
|
|
|
st.subheader("π― Max Fusion Results") |
|
|
|
if results_data: |
|
|
|
df = pd.DataFrame(results_data) |
|
st.dataframe(df, use_container_width=True) |
|
|
|
|
|
image_for_fusion = frames[0] if frames else None |
|
sentiment, confidence = predict_fused_sentiment( |
|
text=transcribed_text if transcribed_text else None, |
|
audio_bytes=audio_bytes, |
|
image=image_for_fusion, |
|
) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.metric("π― Final Sentiment", sentiment) |
|
with col2: |
|
st.metric("π Overall Confidence", f"{confidence:.2f}") |
|
|
|
|
|
sentiment_colors = get_sentiment_colors() |
|
emoji = sentiment_colors.get(sentiment, "β") |
|
|
|
st.markdown( |
|
f""" |
|
<div class="result-box"> |
|
<h4>{emoji} Max Fusion Sentiment: {sentiment}</h4> |
|
<p><strong>Overall Confidence:</strong> {confidence:.2f}</p> |
|
<p><strong>Modalities Analyzed:</strong> {len(results_data)}</p> |
|
<p><strong>Video Source:</strong> {video_name}</p> |
|
<p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p> |
|
</div> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
else: |
|
st.error( |
|
"β No analysis could be performed. Please check your video input." |
|
) |
|
|
|
else: |
|
if video_input_method == "Record Video (Coming Soon)": |
|
st.info( |
|
"π₯ Video recording feature is coming soon! Please use Upload Video File for now." |
|
) |
|
else: |
|
st.info("π Please upload a video file to begin Max Fusion analysis.") |
|
|
|
|
|
def main(): |
|
"""Main application function.""" |
|
|
|
st.sidebar.title("Sentiment Analysis") |
|
st.sidebar.markdown("---") |
|
|
|
|
|
page = st.sidebar.selectbox( |
|
"Choose a page:", |
|
[ |
|
"Home", |
|
"Text Sentiment", |
|
"Audio Sentiment", |
|
"Vision Sentiment", |
|
"Fused Model", |
|
"Max Fusion", |
|
], |
|
) |
|
|
|
|
|
if page == "Home": |
|
render_home_page() |
|
elif page == "Text Sentiment": |
|
render_text_sentiment_page() |
|
elif page == "Audio Sentiment": |
|
render_audio_sentiment_page() |
|
elif page == "Vision Sentiment": |
|
render_vision_sentiment_page() |
|
elif page == "Fused Model": |
|
render_fused_model_page() |
|
elif page == "Max Fusion": |
|
render_max_fusion_page() |
|
|
|
|
|
st.markdown("---") |
|
st.markdown( |
|
""" |
|
<div style="text-align: center; color: #666; padding: 1rem;"> |
|
<p>Built with β€οΈ | by <a href="https://github.com/iamfaham">iamfaham</a></p> |
|
<p>Version: {version}</p> |
|
</div> |
|
""".format( |
|
version=APP_VERSION |
|
), |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|