Spaces:
Runtime error
Runtime error
# backend/sarcasm_detector.py | |
from transformers import pipeline | |
# Initialize the sarcasm/irony detection pipeline once when the module is loaded | |
_sarcasm_pipeline = None | |
def _load_sarcasm_pipeline(): | |
"""Loads the sarcasm/irony detection pipeline if not already loaded.""" | |
global _sarcasm_pipeline | |
if _sarcasm_pipeline is None: | |
try: | |
# Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection | |
_sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony") | |
print("Sarcasm/Irony detection pipeline loaded successfully.") | |
except Exception as e: | |
print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}") | |
_sarcasm_pipeline = None | |
return _sarcasm_pipeline | |
def detect_sarcasm_and_highlight(sentence: str) -> dict: | |
""" | |
Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts. | |
NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex | |
and often requires attention mechanisms or custom token-level analysis | |
from a specialized NLP model. This implementation focuses on the | |
sarcasm percentage and provides a placeholder for highlighting. | |
Args: | |
sentence (str): The input sentence to analyze. | |
Returns: | |
dict: A dictionary containing the sarcasm percentage and | |
a highlighted version of the sentence (simplified for now). | |
""" | |
pipeline_instance = _load_sarcasm_pipeline() | |
if not isinstance(sentence, str) or not pipeline_instance: | |
return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."} | |
results = pipeline_instance(sentence) | |
sarcasm_percent = 0.0 | |
predicted_label = "not sarcastic" # Default label | |
if results: | |
# This model (cardiffnlp/twitter-roberta-base-irony) | |
# returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic. | |
main_result = results[0] | |
predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony' | |
score = main_result['score'] | |
if predicted_label_raw == 'irony': # This model's label for irony/sarcasm | |
sarcasm_percent = round(score * 100, 2) | |
predicted_label = "sarcastic" # Make it more readable for the UI | |
elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic | |
# If it's 'non_irony', the score is confidence in NOT_SARCASM. | |
# So, sarcasm_percent is (1 - score) * 100. | |
sarcasm_percent = round((1 - score) * 100, 2) | |
predicted_label = "not sarcastic" # Make it more readable for the UI | |
else: | |
sarcasm_percent = 0.0 | |
predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model) | |
# For highlighting, a simple approach: if sarcasm is detected above a threshold, | |
# we can wrap the whole sentence or specific keywords. | |
highlighted_sentence = sentence | |
if sarcasm_percent > 50: # Arbitrary threshold for highlighting | |
highlighted_sentence = f"<mark>{sentence}</mark>" # Simple HTML-like highlighting | |
return { | |
"sarcasm_percent": sarcasm_percent, | |
"highlighted_sentence": highlighted_sentence, | |
"predicted_sarcasm_label": predicted_label # More descriptive label for the frontend | |
} | |
# Example Usage (for testing this module independently) | |
if __name__ == "__main__": | |
print("--- Testing Sarcasm/Irony Detection (New Model) ---") | |
# This sentence should now ideally be detected as sarcastic | |
sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!" | |
sentence2 = "Oh, great, another Monday." | |
sentence3 = "I just love getting stuck in traffic for hours." | |
sentence4 = "The sun is shining brightly today." # Should be not sarcastic | |
sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic | |
print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}") | |
print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}") | |
print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}") | |
print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}") | |
print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}") | |