File size: 4,493 Bytes
7fb74eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# backend/sarcasm_detector.py

from transformers import pipeline

# Initialize the sarcasm/irony detection pipeline once when the module is loaded
_sarcasm_pipeline = None

def _load_sarcasm_pipeline():
    """Loads the sarcasm/irony detection pipeline if not already loaded."""
    global _sarcasm_pipeline
    if _sarcasm_pipeline is None:
        try:
            # Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection
            _sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony")
            print("Sarcasm/Irony detection pipeline loaded successfully.")
        except Exception as e:
            print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}")
            _sarcasm_pipeline = None
    return _sarcasm_pipeline

def detect_sarcasm_and_highlight(sentence: str) -> dict:
    """
    Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts.
    NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex
          and often requires attention mechanisms or custom token-level analysis
          from a specialized NLP model. This implementation focuses on the
          sarcasm percentage and provides a placeholder for highlighting.

    Args:
        sentence (str): The input sentence to analyze.

    Returns:
        dict: A dictionary containing the sarcasm percentage and
              a highlighted version of the sentence (simplified for now).
    """
    pipeline_instance = _load_sarcasm_pipeline()
    if not isinstance(sentence, str) or not pipeline_instance:
        return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."}

    results = pipeline_instance(sentence)
    sarcasm_percent = 0.0
    predicted_label = "not sarcastic" # Default label

    if results:
        # This model (cardiffnlp/twitter-roberta-base-irony)
        # returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic.
        main_result = results[0]
        predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony'
        score = main_result['score']

        if predicted_label_raw == 'irony': # This model's label for irony/sarcasm
            sarcasm_percent = round(score * 100, 2)
            predicted_label = "sarcastic" # Make it more readable for the UI
        elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic
            # If it's 'non_irony', the score is confidence in NOT_SARCASM.
            # So, sarcasm_percent is (1 - score) * 100.
            sarcasm_percent = round((1 - score) * 100, 2)
            predicted_label = "not sarcastic" # Make it more readable for the UI
        else:
            sarcasm_percent = 0.0
            predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model)


    # For highlighting, a simple approach: if sarcasm is detected above a threshold,
    # we can wrap the whole sentence or specific keywords.
    highlighted_sentence = sentence
    if sarcasm_percent > 50: # Arbitrary threshold for highlighting
        highlighted_sentence = f"<mark>{sentence}</mark>" # Simple HTML-like highlighting

    return {
        "sarcasm_percent": sarcasm_percent,
        "highlighted_sentence": highlighted_sentence,
        "predicted_sarcasm_label": predicted_label # More descriptive label for the frontend
    }

# Example Usage (for testing this module independently)
if __name__ == "__main__":
    print("--- Testing Sarcasm/Irony Detection (New Model) ---")
    # This sentence should now ideally be detected as sarcastic
    sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!"
    sentence2 = "Oh, great, another Monday."
    sentence3 = "I just love getting stuck in traffic for hours."
    sentence4 = "The sun is shining brightly today." # Should be not sarcastic
    sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic

    print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}")
    print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}")
    print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}")
    print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}")
    print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}")