# backend/sarcasm_detector.py from transformers import pipeline # Initialize the sarcasm/irony detection pipeline once when the module is loaded _sarcasm_pipeline = None def _load_sarcasm_pipeline(): """Loads the sarcasm/irony detection pipeline if not already loaded.""" global _sarcasm_pipeline if _sarcasm_pipeline is None: try: # Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection _sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony") print("Sarcasm/Irony detection pipeline loaded successfully.") except Exception as e: print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}") _sarcasm_pipeline = None return _sarcasm_pipeline def detect_sarcasm_and_highlight(sentence: str) -> dict: """ Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts. NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex and often requires attention mechanisms or custom token-level analysis from a specialized NLP model. This implementation focuses on the sarcasm percentage and provides a placeholder for highlighting. Args: sentence (str): The input sentence to analyze. Returns: dict: A dictionary containing the sarcasm percentage and a highlighted version of the sentence (simplified for now). """ pipeline_instance = _load_sarcasm_pipeline() if not isinstance(sentence, str) or not pipeline_instance: return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."} results = pipeline_instance(sentence) sarcasm_percent = 0.0 predicted_label = "not sarcastic" # Default label if results: # This model (cardiffnlp/twitter-roberta-base-irony) # returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic. main_result = results[0] predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony' score = main_result['score'] if predicted_label_raw == 'irony': # This model's label for irony/sarcasm sarcasm_percent = round(score * 100, 2) predicted_label = "sarcastic" # Make it more readable for the UI elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic # If it's 'non_irony', the score is confidence in NOT_SARCASM. # So, sarcasm_percent is (1 - score) * 100. sarcasm_percent = round((1 - score) * 100, 2) predicted_label = "not sarcastic" # Make it more readable for the UI else: sarcasm_percent = 0.0 predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model) # For highlighting, a simple approach: if sarcasm is detected above a threshold, # we can wrap the whole sentence or specific keywords. highlighted_sentence = sentence if sarcasm_percent > 50: # Arbitrary threshold for highlighting highlighted_sentence = f"{sentence}" # Simple HTML-like highlighting return { "sarcasm_percent": sarcasm_percent, "highlighted_sentence": highlighted_sentence, "predicted_sarcasm_label": predicted_label # More descriptive label for the frontend } # Example Usage (for testing this module independently) if __name__ == "__main__": print("--- Testing Sarcasm/Irony Detection (New Model) ---") # This sentence should now ideally be detected as sarcastic sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!" sentence2 = "Oh, great, another Monday." sentence3 = "I just love getting stuck in traffic for hours." sentence4 = "The sun is shining brightly today." # Should be not sarcastic sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}") print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}") print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}") print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}") print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}")