File size: 6,156 Bytes
b1acf7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
Fused sentiment analysis model combining text, audio, and vision models.
"""

import logging
from typing import Tuple, Optional, List
from PIL import Image

from .text_model import predict_text_sentiment
from .audio_model import predict_audio_sentiment
from .vision_model import predict_vision_sentiment

logger = logging.getLogger(__name__)


def predict_fused_sentiment(
    text: Optional[str] = None,
    audio_bytes: Optional[bytes] = None,
    image: Optional[Image.Image] = None,
) -> Tuple[str, float]:
    """
    Implement ensemble/fusion logic combining all three models.

    Args:
        text: Input text for text sentiment analysis
        audio_bytes: Audio bytes for audio sentiment analysis
        image: Input image for vision sentiment analysis

    Returns:
        Tuple of (fused_sentiment, overall_confidence)
    """
    results = []

    if text:
        text_sentiment, text_conf = predict_text_sentiment(text)
        results.append(("Text", text_sentiment, text_conf))

    if audio_bytes:
        audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
        results.append(("Audio", audio_sentiment, audio_conf))

    if image:
        vision_sentiment, vision_conf = predict_vision_sentiment(image)
        results.append(("Vision", vision_sentiment, vision_conf))

    if not results:
        return "No inputs provided", 0.0

    # Simple ensemble logic (can be enhanced with more sophisticated fusion strategies)
    sentiment_counts = {}
    total_confidence = 0
    modality_weights = {"Text": 0.3, "Audio": 0.35, "Vision": 0.35}  # Weighted voting

    for modality, sentiment, confidence in results:
        if sentiment not in sentiment_counts:
            sentiment_counts[sentiment] = {"count": 0, "weighted_conf": 0}

        sentiment_counts[sentiment]["count"] += 1
        weight = modality_weights.get(modality, 0.33)
        sentiment_counts[sentiment]["weighted_conf"] += confidence * weight
        total_confidence += confidence

    # Weighted majority voting with confidence averaging
    if sentiment_counts:
        # Find sentiment with highest weighted confidence
        final_sentiment = max(
            sentiment_counts.keys(), key=lambda s: sentiment_counts[s]["weighted_conf"]
        )

        # Calculate overall confidence as weighted average
        avg_confidence = total_confidence / len(results)

        logger.info(
            f"Fused sentiment analysis completed: {final_sentiment} (confidence: {avg_confidence:.2f})"
        )
        logger.info(f"Individual results: {results}")

        return final_sentiment, avg_confidence
    else:
        return "No valid predictions", 0.0


def get_fusion_strategy_info() -> dict:
    """Get information about the fusion strategy."""
    return {
        "strategy_name": "Weighted Ensemble Fusion",
        "description": "Combines predictions from text, audio, and vision models using weighted voting",
        "modality_weights": {"Text": 0.3, "Audio": 0.35, "Vision": 0.35},
        "fusion_method": "Weighted majority voting with confidence averaging",
        "advantages": [
            "Robust to individual model failures",
            "Leverages complementary information from different modalities",
            "Configurable modality weights",
            "Real-time ensemble prediction",
        ],
        "use_cases": [
            "Multi-modal content analysis",
            "Enhanced sentiment accuracy",
            "Cross-validation of predictions",
            "Comprehensive emotional understanding",
        ],
    }


def analyze_modality_agreement(
    text: Optional[str] = None,
    audio_bytes: Optional[bytes] = None,
    image: Optional[Image.Image] = None,
) -> dict:
    """
    Analyze agreement between different modalities.

    Args:
        text: Input text
        audio_bytes: Audio bytes
        image: Input image

    Returns:
        Dictionary containing agreement analysis
    """
    results = {}

    if text:
        text_sentiment, text_conf = predict_text_sentiment(text)
        results["text"] = {"sentiment": text_sentiment, "confidence": text_conf}

    if audio_bytes:
        audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
        results["audio"] = {"sentiment": audio_sentiment, "confidence": audio_conf}

    if image:
        vision_sentiment, vision_conf = predict_vision_sentiment(image)
        results["vision"] = {"sentiment": vision_sentiment, "confidence": vision_conf}

    if len(results) < 2:
        return {"agreement_level": "insufficient_modalities", "details": results}

    # Analyze agreement
    sentiments = [result["sentiment"] for result in results.values()]
    unique_sentiments = set(sentiments)

    if len(unique_sentiments) == 1:
        agreement_level = "perfect"
        agreement_score = 1.0
    elif len(unique_sentiments) == 2:
        agreement_level = "partial"
        agreement_score = 0.5
    else:
        agreement_level = "low"
        agreement_score = 0.0

    # Calculate confidence consistency
    confidences = [result["confidence"] for result in results.values()]
    confidence_std = sum(confidences) / len(confidences) if confidences else 0

    return {
        "agreement_level": agreement_level,
        "agreement_score": agreement_score,
        "modalities_analyzed": len(results),
        "sentiment_distribution": {s: sentiments.count(s) for s in unique_sentiments},
        "confidence_consistency": confidence_std,
        "individual_results": results,
        "recommendation": _get_agreement_recommendation(agreement_level, len(results)),
    }


def _get_agreement_recommendation(agreement_level: str, num_modalities: int) -> str:
    """Get recommendation based on agreement level."""
    if agreement_level == "perfect":
        return "High confidence in prediction - all modalities agree"
    elif agreement_level == "partial":
        return "Moderate confidence - consider modality-specific factors"
    elif agreement_level == "low":
        return "Low confidence - modalities disagree, consider context"
    else:
        return "Insufficient data for reliable fusion"