File size: 40,321 Bytes
b1acf7e
 
 
 
 
 
4b35e49
 
 
b1acf7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b35e49
b1acf7e
 
 
d919881
4b35e49
 
b1acf7e
 
 
4b35e49
 
 
b1acf7e
 
1d798d1
4b35e49
b1acf7e
 
4b35e49
b1acf7e
4b35e49
 
 
 
 
b1acf7e
 
 
 
 
4b35e49
 
 
 
 
 
 
 
b1acf7e
 
 
 
 
 
 
 
 
 
 
4b35e49
 
 
 
 
 
b1acf7e
 
 
 
 
 
 
 
 
 
 
 
 
4b35e49
 
 
 
 
 
b1acf7e
 
 
 
 
 
 
 
 
 
 
 
 
 
4b35e49
 
 
 
 
b1acf7e
 
 
 
 
 
 
 
 
 
4b35e49
 
 
1d798d1
 
b1acf7e
 
 
 
 
 
 
 
 
 
 
 
1d798d1
 
 
4b35e49
 
 
b1acf7e
 
 
 
 
 
4b35e49
 
 
b1acf7e
 
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
4b35e49
 
 
b1acf7e
 
 
 
 
 
 
4b35e49
 
 
 
 
b1acf7e
 
 
d919881
4b35e49
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
d919881
 
 
 
 
 
4b35e49
 
d919881
4b35e49
 
 
d919881
4b35e49
 
d919881
4b35e49
 
 
d919881
4b35e49
 
 
b1acf7e
4b35e49
 
 
 
 
 
 
 
 
b1acf7e
 
 
 
 
 
4b35e49
 
 
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
b1acf7e
 
 
 
4b35e49
 
 
d919881
4b35e49
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
4b35e49
 
 
b1acf7e
 
 
 
 
 
 
4b35e49
 
 
d919881
 
4b35e49
d919881
4b35e49
b1acf7e
 
 
d919881
4b35e49
 
 
 
 
d919881
4b35e49
 
 
d919881
4b35e49
 
 
 
d919881
 
 
 
 
 
4b35e49
 
d919881
4b35e49
 
 
d919881
4b35e49
 
d919881
4b35e49
 
 
d919881
4b35e49
 
 
b1acf7e
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
4b35e49
b1acf7e
4b35e49
 
 
 
d919881
4b35e49
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
4b35e49
 
 
b1acf7e
 
 
 
 
 
 
4b35e49
 
 
 
 
 
b1acf7e
 
 
 
 
 
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
d919881
4b35e49
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
4b35e49
 
 
b1acf7e
 
 
 
 
 
 
4b35e49
 
 
 
d919881
 
 
 
4b35e49
b1acf7e
 
 
d919881
4b35e49
 
 
 
 
 
b1acf7e
 
 
 
 
 
4b35e49
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
d919881
4b35e49
 
 
d919881
4b35e49
 
 
 
 
 
d919881
4b35e49
 
 
 
d919881
4b35e49
 
b1acf7e
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
d919881
4b35e49
 
 
 
 
d919881
4b35e49
 
 
 
d919881
4b35e49
 
b1acf7e
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
d919881
4b35e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
4b35e49
 
 
b1acf7e
 
 
 
 
 
4b35e49
 
 
 
d919881
4b35e49
 
b1acf7e
 
 
1d798d1
 
 
b1acf7e
 
 
 
 
 
 
 
 
 
1d798d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
 
 
 
 
1d798d1
 
 
 
 
 
b1acf7e
 
 
 
 
 
 
1d798d1
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
 
 
 
 
 
1d798d1
 
 
 
 
b1acf7e
1d798d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
1d798d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
 
 
1d798d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
1d798d1
 
 
b1acf7e
 
 
 
 
 
1d798d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1acf7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
"""
Refactored Sentiment Fused - Multimodal Sentiment Analysis Application

This is the main entry point for the application, now using a modular structure.
"""

import streamlit as st
import pandas as pd
from PIL import Image
import logging

# Import our modular components
from src.config.settings import (
    APP_NAME,
    APP_VERSION,
    APP_ICON,
    APP_LAYOUT,
    CUSTOM_CSS,
    SUPPORTED_IMAGE_FORMATS,
    SUPPORTED_AUDIO_FORMATS,
    SUPPORTED_VIDEO_FORMATS,
)
from src.models.text_model import predict_text_sentiment
from src.models.audio_model import predict_audio_sentiment, load_audio_model
from src.models.vision_model import predict_vision_sentiment, load_vision_model
from src.models.fused_model import predict_fused_sentiment
from src.utils.preprocessing import (
    extract_frames_from_video,
    extract_audio_from_video,
    transcribe_audio,
)
from src.utils.file_handling import get_file_info, format_file_size
from src.utils.sentiment_mapping import get_sentiment_colors, format_sentiment_result

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Page configuration
st.set_page_config(
    page_title=APP_NAME,
    page_icon=APP_ICON,
    layout=APP_LAYOUT,
    initial_sidebar_state="expanded",
)

# Apply custom CSS
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)


def render_home_page():
    """Render the home page with model information."""
    st.markdown(
        f'<h1 class="main-header">{APP_NAME}</h1>',
        unsafe_allow_html=True,
    )

    st.markdown(
        """
        <div class="model-card">
            <h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
            <p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
        </div>
        """,
        unsafe_allow_html=True,
    )

    col1, col2, col3 = st.columns(3)

    with col1:
        st.markdown(
            """
            <div class="model-card">
                <h3>Text Sentiment Model</h3>
                <p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
                <ul>
                    <li>Process any text input</li>
                    <li>Get sentiment classification (Positive/Negative/Neutral)</li>
                    <li>View confidence scores</li>
                    <li>Real-time NLP analysis</li>
                </ul>
            </div>
            """,
            unsafe_allow_html=True,
        )

    with col2:
        st.markdown(
            """
            <div class="model-card">
                <h3>Audio Sentiment Model</h3>
                <p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
                <ul>
                    <li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
                    <li>Record audio directly with microphone (max 5s)</li>
                    <li>Automatic preprocessing: 16kHz sampling, 5s max duration</li>
                    <li>Listen to uploaded/recorded audio</li>
                    <li>Get sentiment predictions</li>
                    <li>Real-time audio analysis</li>
                </ul>
            </div>
            """,
            unsafe_allow_html=True,
        )

    with col3:
        st.markdown(
            """
            <div class="model-card">
                <h3>Vision Sentiment Model</h3>
                <p>Analyze sentiment from images using fine-tuned ResNet-50</p>
                <ul>
                    <li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
                    <li>Automatic face detection & preprocessing</li>
                    <li>Fixed 0% padding for tightest face crop</li>
                    <li>Convert to 224x224 grayscale β†’ 3-channel RGB (FER2013 format)</li>
                    <li>Transforms: Resize(224) β†’ CenterCrop(224) β†’ ImageNet Normalization</li>
                    <li>Preview original & preprocessed images</li>
                    <li>Get sentiment predictions</li>
                </ul>
            </div>
            """,
            unsafe_allow_html=True,
        )

    st.markdown(
        """
        <div class="model-card">
            <h3>Fused Model</h3>
            <p>Combine predictions from all three models for enhanced accuracy</p>
            <ul>
                <li>Multi-modal input processing</li>
                <li>Ensemble prediction strategies</li>
                <li>Comprehensive sentiment analysis</li>
            </ul>
        </div>
        """,
        unsafe_allow_html=True,
    )

    st.markdown(
        """
        <div class="model-card">
            <h3>🎬 Max Fusion</h3>
            <p>Ultimate video-based sentiment analysis combining all three modalities</p>
            <ul>
                <li>πŸŽ₯ Record or upload 5-second videos</li>
                <li>πŸ” Extract frames for vision analysis</li>
                <li>🎡 Extract audio for vocal sentiment</li>
                <li>πŸ“ Transcribe audio for text analysis</li>
                <li>πŸš€ Comprehensive multi-modal results</li>
            </ul>
        </div>
        """,
        unsafe_allow_html=True,
    )

    st.markdown("---")
    st.markdown(
        """
        <div style="text-align: center; color: #666;">
            <p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
            <p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
            <p><strong>Models are now loaded from Google Drive automatically!</strong></p>
        </div>
        """,
        unsafe_allow_html=True,
    )


def render_text_sentiment_page():
    """Render the text sentiment analysis page."""
    st.title("Text Sentiment Analysis")
    st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")

    # Text input
    text_input = st.text_area(
        "Enter your text here:",
        height=150,
        placeholder="Type or paste your text here to analyze its sentiment...",
    )

    # Analyze button
    if st.button("Analyze Sentiment", type="primary", use_container_width=True):
        if text_input and text_input.strip():
            with st.spinner("Analyzing text sentiment..."):
                sentiment, confidence = predict_text_sentiment(text_input)

                # Display results
                st.markdown("### Results")

                # Display results in columns
                col1, col2 = st.columns(2)
                with col1:
                    st.metric("Sentiment", sentiment)
                with col2:
                    st.metric("Confidence", f"{confidence:.2f}")

                # Color-coded sentiment display
                sentiment_colors = get_sentiment_colors()
                emoji = sentiment_colors.get(sentiment, "❓")

                st.markdown(
                    f"""
                    <div class="result-box">
                        <h4>{emoji} Sentiment: {sentiment}</h4>
                        <p><strong>Confidence:</strong> {confidence:.2f}</p>
                        <p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
                        <p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
                    </div>
                    """,
                    unsafe_allow_html=True,
                )
        else:
            st.error("Please enter some text to analyze.")


def render_audio_sentiment_page():
    """Render the audio sentiment analysis page."""
    st.title("Audio Sentiment Analysis")
    st.markdown(
        "Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
    )

    # Preprocessing information
    st.info(
        "**Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: "
        "16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction."
    )

    # Model status
    model, device, num_classes, feature_extractor = load_audio_model()
    if model is None:
        st.error(
            "Audio model could not be loaded. Please check the Google Drive setup."
        )
        st.info(
            "Expected: Models should be configured in Google Drive and accessible via the model manager."
        )
    else:
        st.success(
            f"Audio model loaded successfully on {device} with {num_classes} classes!"
        )

    # Input method selection
    st.subheader("Choose Input Method")
    input_method = st.radio(
        "Select how you want to provide audio:",
        ["Upload Audio File", "Record Audio"],
        horizontal=True,
    )

    if input_method == "Upload Audio File":
        # File uploader
        uploaded_audio = st.file_uploader(
            "Choose an audio file",
            type=SUPPORTED_AUDIO_FORMATS,
            help="Supported formats: WAV, MP3, M4A, FLAC",
        )

        audio_source = "uploaded_file"
        audio_name = uploaded_audio.name if uploaded_audio else None

    else:  # Audio recording
        st.markdown(
            """
            <div class="model-card">
                <h3>Audio Recording</h3>
                <p>Record audio directly with your microphone (max 5 seconds).</p>
                <p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
            </div>
            """,
            unsafe_allow_html=True,
        )

        # Audio recorder
        recorded_audio = st.audio_input(
            label="Click to start recording",
            help="Click the microphone button to start/stop recording. Maximum recording time is 5 seconds.",
        )

        if recorded_audio is not None:
            # Display recorded audio
            st.audio(recorded_audio, format="audio/wav")
            st.success("Audio recorded successfully!")

            # Convert recorded audio to bytes for processing
            uploaded_audio = recorded_audio
            audio_source = "recorded"
            audio_name = "Recorded Audio"
        else:
            uploaded_audio = None
            audio_source = None
            audio_name = None

    if uploaded_audio is not None:
        # Display audio player
        if audio_source == "recorded":
            st.audio(uploaded_audio, format="audio/wav")
            st.info(f"{audio_name} | Source: Microphone Recording")
        else:
            st.audio(
                uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
            )
            # File info for uploaded files
            file_info = get_file_info(uploaded_audio)
            st.info(
                f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
            )

        # Analyze button
        if st.button(
            "Analyze Audio Sentiment", type="primary", use_container_width=True
        ):
            if model is None:
                st.error("Model not loaded. Cannot analyze audio.")
            else:
                with st.spinner("Analyzing audio sentiment..."):
                    audio_bytes = uploaded_audio.getvalue()
                    sentiment, confidence = predict_audio_sentiment(audio_bytes)

                # Display results
                st.markdown("### Results")

                col1, col2 = st.columns(2)
                with col1:
                    st.metric("Sentiment", sentiment)
                with col2:
                    st.metric("Confidence", f"{confidence:.2f}")

                # Color-coded sentiment display
                sentiment_colors = get_sentiment_colors()
                emoji = sentiment_colors.get(sentiment, "❓")

                st.markdown(
                    f"""
                    <div class="result-box">
                        <h4>{emoji} Sentiment: {sentiment}</h4>
                        <p><strong>Confidence:</strong> {confidence:.2f}</p>
                        <p><strong>Audio Source:</strong> {audio_name}</p>
                        <p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
                    </div>
                    """,
                    unsafe_allow_html=True,
                )
    else:
        if input_method == "Upload Audio File":
            st.info("Please upload an audio file to begin analysis.")
        else:
            st.info("Click the microphone button above to record audio for analysis.")


def render_vision_sentiment_page():
    """Render the vision sentiment analysis page."""
    st.title("Vision Sentiment Analysis")
    st.markdown(
        "Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
    )

    st.info(
        "**Note**: Images will be automatically preprocessed to match FER2013 format: face detection, grayscale conversion, and 224x224 resize (converted to 3-channel RGB)."
    )

    # Face cropping is set to 0% (no padding) for tightest crop
    st.info("**Face Cropping**: Set to 0% padding for tightest crop on facial features")

    # Model status
    model, device, num_classes = load_vision_model()
    if model is None:
        st.error(
            "Vision model could not be loaded. Please check the Google Drive setup."
        )
        st.info(
            "Expected: Models should be configured in Google Drive and accessible via the model manager."
        )
    else:
        st.success(
            f"Vision model loaded successfully on {device} with {num_classes} classes!"
        )

    # Input method selection
    st.subheader("Choose Input Method")
    input_method = st.radio(
        "Select how you want to provide an image:",
        ["Upload Image File", "Take Photo with Camera"],
        horizontal=True,
    )

    if input_method == "Upload Image File":
        # File uploader
        uploaded_image = st.file_uploader(
            "Choose an image file",
            type=SUPPORTED_IMAGE_FORMATS,
            help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
        )

        if uploaded_image is not None:
            # Display image
            image = Image.open(uploaded_image)
            st.image(
                image,
                caption=f"Uploaded Image: {uploaded_image.name}",
                use_container_width=True,
            )

            # File info
            file_info = get_file_info(uploaded_image)
            st.info(
                f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])} | Dimensions: {image.size[0]}x{image.size[1]}"
            )

            # Analyze button
            if st.button(
                "Analyze Image Sentiment", type="primary", use_container_width=True
            ):
                if model is None:
                    st.error("Model not loaded. Cannot analyze image.")
                else:
                    with st.spinner("Analyzing image sentiment..."):
                        sentiment, confidence = predict_vision_sentiment(image)

                        # Display results
                        st.markdown("### Results")

                        col1, col2 = st.columns(2)
                        with col1:
                            st.metric("Sentiment", sentiment)
                        with col2:
                            st.metric("Confidence", f"{confidence:.2f}")

                        # Color-coded sentiment display
                        sentiment_colors = get_sentiment_colors()
                        emoji = sentiment_colors.get(sentiment, "❓")

                        st.markdown(
                            f"""
                            <div class="result-box">
                                <h4>{emoji} Sentiment: {sentiment}</h4>
                                <p><strong>Confidence:</strong> {confidence:.2f}</p>
                                <p><strong>Image File:</strong> {uploaded_image.name}</p>
                                <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
                            </div>
                            """,
                            unsafe_allow_html=True,
                        )

    else:  # Camera capture
        st.markdown(
            """
            <div class="model-card">
                <h3>Camera Capture</h3>
                <p>Take a photo directly with your camera to analyze its sentiment.</p>
                <p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
            </div>
            """,
            unsafe_allow_html=True,
        )

        # Camera input
        camera_photo = st.camera_input(
            "Take a photo",
            help="Click the camera button to take a photo, or use the upload button to select an existing photo",
        )

        if camera_photo is not None:
            # Display captured image
            image = Image.open(camera_photo)
            st.image(
                image,
                caption="Captured Photo",
                use_container_width=True,
            )

            # Image info
            st.info(
                f"Captured Photo | Dimensions: {image.size[0]}x{image.size[1]} | Format: {image.format}"
            )

            # Analyze button
            if st.button(
                "Analyze Photo Sentiment", type="primary", use_container_width=True
            ):
                if model is None:
                    st.error("Model not loaded. Cannot analyze image.")
                else:
                    with st.spinner("Analyzing photo sentiment..."):
                        sentiment, confidence = predict_vision_sentiment(image)

                        # Display results
                        st.markdown("### Results")

                        col1, col2 = st.columns(2)
                        with col1:
                            st.metric("Sentiment", sentiment)
                        with col2:
                            st.metric("Confidence", f"{confidence:.2f}")

                        # Color-coded sentiment display
                        sentiment_colors = get_sentiment_colors()
                        emoji = sentiment_colors.get(sentiment, "❓")

                        st.markdown(
                            f"""
                            <div class="result-box">
                                <h4>{emoji} Sentiment: {sentiment}</h4>
                                <p><strong>Confidence:</strong> {confidence:.2f}</p>
                                <p><strong>Image Source:</strong> Camera Capture</p>
                                <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
                            </div>
                            """,
                            unsafe_allow_html=True,
                        )

    # Show info if no image is provided
    if input_method == "Upload Image File" and "uploaded_image" not in locals():
        st.info("Please upload an image file to begin analysis.")
    elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
        st.info("Click the camera button above to take a photo for analysis.")


def render_fused_model_page():
    """Render the fused model analysis page."""
    st.title("Fused Model Analysis")
    st.markdown(
        "Combine predictions from all three models for enhanced sentiment analysis."
    )

    st.markdown(
        """
        <div class="model-card">
            <h3>Multi-Modal Sentiment Analysis</h3>
            <p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis 
            using all three models combined.</p>
        </div>
        """,
        unsafe_allow_html=True,
    )

    # Input sections
    col1, col2 = st.columns(2)

    with col1:
        st.subheader("Text Input")
        text_input = st.text_area(
            "Enter text (optional):",
            height=100,
            placeholder="Type or paste your text here...",
        )

        st.subheader("Audio Input")

        # Audio preprocessing information for fused model
        st.info(
            "**Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: "
            "16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction."
        )

        # Audio input method for fused model
        audio_input_method = st.radio(
            "Audio input method:",
            ["Upload File", "Record Audio"],
            key="fused_audio_method",
            horizontal=True,
        )

        if audio_input_method == "Upload File":
            uploaded_audio = st.file_uploader(
                "Upload audio file (optional):",
                type=SUPPORTED_AUDIO_FORMATS,
                key="fused_audio",
            )
            audio_source = "uploaded_file"
            audio_name = uploaded_audio.name if uploaded_audio else None
        else:
            # Audio recorder for fused model
            recorded_audio = st.audio_input(
                label="Record audio (optional):",
                key="fused_audio_recorder",
                help="Click to record audio for sentiment analysis",
            )

            if recorded_audio is not None:
                st.audio(recorded_audio, format="audio/wav")
                st.success("Audio recorded successfully!")
                uploaded_audio = recorded_audio
                audio_source = "recorded"
                audio_name = "Recorded Audio"
            else:
                uploaded_audio = None
                audio_source = None
                audio_name = None

    with col2:
        st.subheader("Image Input")

        # Face cropping is set to 0% (no padding) for tightest crop
        st.info(
            "**Face Cropping**: Set to 0% padding for tightest crop on facial features"
        )

        # Image input method for fused model
        image_input_method = st.radio(
            "Image input method:",
            ["Upload File", "Take Photo"],
            key="fused_image_method",
            horizontal=True,
        )

        if image_input_method == "Upload File":
            uploaded_image = st.file_uploader(
                "Upload image file (optional):",
                type=SUPPORTED_IMAGE_FORMATS,
                key="fused_image",
            )

            if uploaded_image:
                image = Image.open(uploaded_image)
                st.image(image, caption="Uploaded Image", use_container_width=True)
        else:
            # Camera capture for fused model
            camera_photo = st.camera_input(
                "Take a photo (optional):",
                key="fused_camera",
                help="Click to take a photo for sentiment analysis",
            )

            if camera_photo:
                image = Image.open(camera_photo)
                st.image(image, caption="Captured Photo", use_container_width=True)
                # Set uploaded_image to camera_photo for processing
                uploaded_image = camera_photo

        if uploaded_audio:
            st.audio(
                uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
            )

    # Analyze button
    if st.button("Run Fused Analysis", type="primary", use_container_width=True):
        if text_input or uploaded_audio or uploaded_image:
            with st.spinner("Running fused sentiment analysis..."):
                # Prepare inputs
                audio_bytes = uploaded_audio.getvalue() if uploaded_audio else None
                image = Image.open(uploaded_image) if uploaded_image else None

                # Get fused prediction
                sentiment, confidence = predict_fused_sentiment(
                    text=text_input if text_input else None,
                    audio_bytes=audio_bytes,
                    image=image,
                )

                # Display results
                st.markdown("### Fused Model Results")

                col1, col2 = st.columns(2)
                with col1:
                    st.metric("Final Sentiment", sentiment)
                with col2:
                    st.metric("Overall Confidence", f"{confidence:.2f}")

                # Show individual model results
                st.markdown("### Individual Model Results")

                results_data = []

                if text_input:
                    text_sentiment, text_conf = predict_text_sentiment(text_input)
                    results_data.append(
                        {
                            "Model": "Text (TextBlob)",
                            "Input": f"Text: {text_input[:50]}...",
                            "Sentiment": text_sentiment,
                            "Confidence": f"{text_conf:.2f}",
                        }
                    )

                if uploaded_audio:
                    audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
                    results_data.append(
                        {
                            "Model": "Audio (Wav2Vec2)",
                            "Input": f"Audio: {audio_name}",
                            "Sentiment": audio_sentiment,
                            "Confidence": f"{audio_conf:.2f}",
                        }
                    )

                if uploaded_image:
                    # Face cropping is set to 0% (no padding) for tightest crop
                    vision_sentiment, vision_conf = predict_vision_sentiment(
                        image, crop_tightness=0.0
                    )
                    results_data.append(
                        {
                            "Model": "Vision (ResNet-50)",
                            "Input": f"Image: {uploaded_image.name}",
                            "Sentiment": vision_sentiment,
                            "Confidence": f"{vision_conf:.2f}",
                        }
                    )

                if results_data:
                    df = pd.DataFrame(results_data)
                    st.dataframe(df, use_container_width=True)

                # Final result display
                sentiment_colors = get_sentiment_colors()
                emoji = sentiment_colors.get(sentiment, "❓")

                st.markdown(
                    f"""
                    <div class="result-box">
                        <h4>{emoji} Final Fused Sentiment: {sentiment}</h4>
                        <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
                        <p><strong>Models Used:</strong> {len(results_data)}</p>
                    </div>
                    """,
                    unsafe_allow_html=True,
                )
        else:
            st.warning(
                "Please provide at least one input (text, audio, or image) for fused analysis."
            )


def render_max_fusion_page():
    """Render the max fusion page for video-based analysis."""
    st.title("Max Fusion - Multi-Modal Sentiment Analysis")
    st.markdown(
        """
        <div class="model-card">
            <h3>Ultimate Multi-Modal Sentiment Analysis</h3>
            <p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
            <ul>
                <li>πŸ“Έ <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
                <li>🎡 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
                <li>πŸ“ <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
            </ul>
        </div>
        """,
        unsafe_allow_html=True,
    )

    # Video input method selection
    st.subheader("Video Input")
    video_input_method = st.radio(
        "Choose input method:",
        ["Upload Video File", "Record Video (Coming Soon)"],
        horizontal=True,
        index=0,  # Default to upload video
    )

    if video_input_method == "Record Video (Coming Soon)":
        # Coming Soon message for video recording
        st.info("πŸŽ₯ Video recording feature is coming soon!")
        st.info("πŸ“ Please use the Upload Video File option for now.")

        # Show a nice coming soon message
        st.markdown("---")
        col1, col2, col3 = st.columns([1, 2, 1])
        with col2:
            st.markdown(
                """
                <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
                    <h3>🚧 Coming Soon 🚧</h3>
                    <p>Video recording feature is under development</p>
                    <p>Use Upload Video File for now!</p>
                </div>
                """,
                unsafe_allow_html=True,
            )

        # Placeholder for future recording functionality
        st.markdown(
            """
            **Future Features:**
            - Real-time video recording with camera
            - Audio capture during recording
            - Automatic frame extraction
            - Live transcription
            - WebRTC integration for low-latency streaming
            """
        )

        # Skip all the recording logic for now
        uploaded_video = None
        video_source = None
        video_name = None
        video_file = None

    elif video_input_method == "Upload Video File":
        # File upload option
        st.markdown(
            """
            <div class="upload-section">
                <h4>πŸ“ Upload Video File</h4>
                <p>Upload a video file for comprehensive multimodal analysis.</p>
                <p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
                <p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
            </div>
            """,
            unsafe_allow_html=True,
        )

        uploaded_video = st.file_uploader(
            "Choose a video file",
            type=SUPPORTED_VIDEO_FORMATS,
            help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
        )

        video_source = "uploaded_file"
        video_name = uploaded_video.name if uploaded_video else None
        video_file = uploaded_video

    if video_file is not None:
        # Display video or photo
        if video_source == "camera_photo":
            # For camera photos, we already displayed the image above
            st.info(f"Source: Camera Photo | Ready for vision analysis")

            # Add audio upload option for camera photo mode
            st.subheader("🎡 Audio Input for Analysis")
            st.info(
                "Since we're using a photo, please upload an audio file for audio sentiment analysis:"
            )

            uploaded_audio = st.file_uploader(
                "Upload audio file for audio analysis:",
                type=SUPPORTED_AUDIO_FORMATS,
                key="camera_audio",
                help="Upload an audio file to complement the photo analysis",
            )

            if uploaded_audio:
                st.audio(
                    uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
                )
                st.success("βœ… Audio uploaded successfully!")
                audio_bytes = uploaded_audio.getvalue()
            else:
                audio_bytes = None
                st.warning("⚠️ Please upload an audio file for complete analysis")

        else:
            # For uploaded videos
            st.video(video_file)
            file_info = get_file_info(video_file)
            st.info(
                f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
            )
            audio_bytes = None  # Will be extracted from video

        # Video Processing Pipeline
        st.subheader("🎬 Video Processing Pipeline")

        # Initialize variables
        frames = []
        audio_bytes = None
        transcribed_text = ""

        # Process uploaded video
        if uploaded_video:
            st.info("πŸ“ Processing uploaded video file...")

            # Extract frames
            st.markdown("**1. πŸŽ₯ Frame Extraction**")
            frames = extract_frames_from_video(uploaded_video, max_frames=5)

            if frames:
                st.success(f"βœ… Extracted {len(frames)} representative frames")

                # Display extracted frames
                cols = st.columns(len(frames))
                for i, frame in enumerate(frames):
                    with cols[i]:
                        st.image(
                            frame, caption=f"Frame {i+1}", use_container_width=True
                        )
            else:
                st.warning("⚠️ Could not extract frames from video")
                frames = []

            # Extract audio
            st.markdown("**2. 🎡 Audio Extraction**")
            audio_bytes = extract_audio_from_video(uploaded_video)

            if audio_bytes:
                st.success("βœ… Audio extracted successfully")
                st.audio(audio_bytes, format="audio/wav")
            else:
                st.warning("⚠️ Could not extract audio from video")
                audio_bytes = None

            # Transcribe audio
            st.markdown("**3. πŸ“ Audio Transcription**")
            if audio_bytes:
                transcribed_text = transcribe_audio(audio_bytes)
                if transcribed_text:
                    st.success("βœ… Audio transcribed successfully")
                    st.markdown(f'**Transcribed Text:** "{transcribed_text}"')
                else:
                    st.warning("⚠️ Could not transcribe audio")
                    transcribed_text = ""
            else:
                transcribed_text = ""
                st.info("ℹ️ No audio available for transcription")

        # Analysis button
        if st.button(
            "πŸš€ Run Max Fusion Analysis", type="primary", use_container_width=True
        ):
            with st.spinner(
                "πŸ”„ Processing video and running comprehensive analysis..."
            ):
                # Run individual analyses
                st.subheader("πŸ” Individual Model Analysis")

                results_data = []

                # Vision analysis (use first frame for uploaded videos)
                if frames:
                    st.markdown("**Vision Analysis:**")

                    # For uploaded videos, use first frame
                    vision_sentiment, vision_conf = predict_vision_sentiment(
                        frames[0], crop_tightness=0.0
                    )
                    results_data.append(
                        {
                            "Model": "Vision (ResNet-50)",
                            "Input": f"Video Frame 1",
                            "Sentiment": vision_sentiment,
                            "Confidence": f"{vision_conf:.2f}",
                        }
                    )
                    st.success(
                        f"Vision: {vision_sentiment} (Confidence: {vision_conf:.2f})"
                    )

                # Audio analysis
                if audio_bytes:
                    st.markdown("**Audio Analysis:**")
                    audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
                    results_data.append(
                        {
                            "Model": "Audio (Wav2Vec2)",
                            "Input": f"Video Audio",
                            "Sentiment": audio_sentiment,
                            "Confidence": f"{audio_conf:.2f}",
                        }
                    )
                    st.success(
                        f"Audio: {audio_sentiment} (Confidence: {audio_conf:.2f})"
                    )

                # Text analysis
                if transcribed_text:
                    st.markdown("**Text Analysis:**")
                    text_sentiment, text_conf = predict_text_sentiment(transcribed_text)
                    results_data.append(
                        {
                            "Model": "Text (TextBlob)",
                            "Input": f"Transcribed: {transcribed_text[:50]}...",
                            "Sentiment": text_sentiment,
                            "Confidence": f"{text_conf:.2f}",
                        }
                    )
                    st.success(f"Text: {text_sentiment} (Confidence: {text_conf:.2f})")

                # Run fused analysis
                st.subheader("🎯 Max Fusion Results")

                if results_data:
                    # Display results table
                    df = pd.DataFrame(results_data)
                    st.dataframe(df, use_container_width=True)

                    # Calculate fused sentiment
                    image_for_fusion = frames[0] if frames else None
                    sentiment, confidence = predict_fused_sentiment(
                        text=transcribed_text if transcribed_text else None,
                        audio_bytes=audio_bytes,
                        image=image_for_fusion,
                    )

                    # Display final results
                    col1, col2 = st.columns(2)
                    with col1:
                        st.metric("🎯 Final Sentiment", sentiment)
                    with col2:
                        st.metric("πŸ“Š Overall Confidence", f"{confidence:.2f}")

                    # Color-coded sentiment display
                    sentiment_colors = get_sentiment_colors()
                    emoji = sentiment_colors.get(sentiment, "❓")

                    st.markdown(
                        f"""
                        <div class="result-box">
                            <h4>{emoji} Max Fusion Sentiment: {sentiment}</h4>
                            <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
                            <p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
                            <p><strong>Video Source:</strong> {video_name}</p>
                            <p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
                        </div>
                        """,
                        unsafe_allow_html=True,
                    )
                else:
                    st.error(
                        "❌ No analysis could be performed. Please check your video input."
                    )

    else:
        if video_input_method == "Record Video (Coming Soon)":
            st.info(
                "πŸŽ₯ Video recording feature is coming soon! Please use Upload Video File for now."
            )
        else:
            st.info("πŸ“ Please upload a video file to begin Max Fusion analysis.")


def main():
    """Main application function."""
    # Sidebar navigation
    st.sidebar.title("Sentiment Analysis")
    st.sidebar.markdown("---")

    # Navigation
    page = st.sidebar.selectbox(
        "Choose a page:",
        [
            "Home",
            "Text Sentiment",
            "Audio Sentiment",
            "Vision Sentiment",
            "Fused Model",
            "Max Fusion",
        ],
    )

    # Page routing
    if page == "Home":
        render_home_page()
    elif page == "Text Sentiment":
        render_text_sentiment_page()
    elif page == "Audio Sentiment":
        render_audio_sentiment_page()
    elif page == "Vision Sentiment":
        render_vision_sentiment_page()
    elif page == "Fused Model":
        render_fused_model_page()
    elif page == "Max Fusion":
        render_max_fusion_page()

    # Footer
    st.markdown("---")
    st.markdown(
        """
        <div style="text-align: center; color: #666; padding: 1rem;">
            <p>Built with ❀️ | by <a href="https://github.com/iamfaham">iamfaham</a></p>
            <p>Version: {version}</p>
        </div>
        """.format(
            version=APP_VERSION
        ),
        unsafe_allow_html=True,
    )


if __name__ == "__main__":
    main()