Sentilyze-APP / backend /sarcasm_detector.py
Numan sheikh
Upload Sentilyze app files to Hugging Face Space
7fb74eb
# backend/sarcasm_detector.py
from transformers import pipeline
# Initialize the sarcasm/irony detection pipeline once when the module is loaded
_sarcasm_pipeline = None
def _load_sarcasm_pipeline():
"""Loads the sarcasm/irony detection pipeline if not already loaded."""
global _sarcasm_pipeline
if _sarcasm_pipeline is None:
try:
# Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection
_sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony")
print("Sarcasm/Irony detection pipeline loaded successfully.")
except Exception as e:
print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}")
_sarcasm_pipeline = None
return _sarcasm_pipeline
def detect_sarcasm_and_highlight(sentence: str) -> dict:
"""
Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts.
NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex
and often requires attention mechanisms or custom token-level analysis
from a specialized NLP model. This implementation focuses on the
sarcasm percentage and provides a placeholder for highlighting.
Args:
sentence (str): The input sentence to analyze.
Returns:
dict: A dictionary containing the sarcasm percentage and
a highlighted version of the sentence (simplified for now).
"""
pipeline_instance = _load_sarcasm_pipeline()
if not isinstance(sentence, str) or not pipeline_instance:
return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."}
results = pipeline_instance(sentence)
sarcasm_percent = 0.0
predicted_label = "not sarcastic" # Default label
if results:
# This model (cardiffnlp/twitter-roberta-base-irony)
# returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic.
main_result = results[0]
predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony'
score = main_result['score']
if predicted_label_raw == 'irony': # This model's label for irony/sarcasm
sarcasm_percent = round(score * 100, 2)
predicted_label = "sarcastic" # Make it more readable for the UI
elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic
# If it's 'non_irony', the score is confidence in NOT_SARCASM.
# So, sarcasm_percent is (1 - score) * 100.
sarcasm_percent = round((1 - score) * 100, 2)
predicted_label = "not sarcastic" # Make it more readable for the UI
else:
sarcasm_percent = 0.0
predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model)
# For highlighting, a simple approach: if sarcasm is detected above a threshold,
# we can wrap the whole sentence or specific keywords.
highlighted_sentence = sentence
if sarcasm_percent > 50: # Arbitrary threshold for highlighting
highlighted_sentence = f"<mark>{sentence}</mark>" # Simple HTML-like highlighting
return {
"sarcasm_percent": sarcasm_percent,
"highlighted_sentence": highlighted_sentence,
"predicted_sarcasm_label": predicted_label # More descriptive label for the frontend
}
# Example Usage (for testing this module independently)
if __name__ == "__main__":
print("--- Testing Sarcasm/Irony Detection (New Model) ---")
# This sentence should now ideally be detected as sarcastic
sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!"
sentence2 = "Oh, great, another Monday."
sentence3 = "I just love getting stuck in traffic for hours."
sentence4 = "The sun is shining brightly today." # Should be not sarcastic
sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic
print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}")
print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}")
print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}")
print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}")
print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}")