Spaces:

sanjanavenkat
/

emotionclassifier2

Running

App Files Files Community

emotionclassifier2 / app.py

sanjanavenkat

Update app.py

929e2ad verified about 11 hours ago

raw

history blame contribute delete

6.5 kB

	import gradio as gr
	from transformers import (
	AutoTokenizer,
	AutoModelForSequenceClassification,
	pipeline,
	BlipProcessor,
	BlipForConditionalGeneration,
	)
	from PIL import Image
	import whisper
	import subprocess
	import uuid
	import os

	# ----------- Load GoEmotions Model (28 emotion classes) -----------
	model_name = "joeddav/distilbert-base-uncased-go-emotions-student"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	classifier = pipeline(
	"text-classification",
	model=model,
	tokenizer=tokenizer,
	top_k=None, # get scores for all emotions
	function_to_apply="sigmoid",
	)

	# ----------- Load Summarizer -----------
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

	# ----------- Load Captioning Model -----------
	caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	# ----------- Load Whisper Model -----------
	whisper_model = whisper.load_model("base")

	# ----------- Define Emotion Categories -----------
	EMOTION_LABELS = [
	"admiration",
	"amusement",
	"anger",
	"annoyance",
	"approval",
	"caring",
	"confusion",
	"curiosity",
	"desire",
	"disappointment",
	"disapproval",
	"disgust",
	"embarrassment",
	"excitement",
	"fear",
	"gratitude",
	"grief",
	"joy",
	"love",
	"nervousness",
	"optimism",
	"pride",
	"realization",
	"relief",
	"remorse",
	"sadness",
	"surprise",
	"neutral",
	]

	POSITIVE = {
	"admiration",
	"amusement",
	"approval",
	"caring",
	"excitement",
	"gratitude",
	"joy",
	"love",
	"optimism",
	"pride",
	"relief",
	}

	NEGATIVE = {
	"anger",
	"annoyance",
	"disappointment",
	"disapproval",
	"disgust",
	"embarrassment",
	"fear",
	"grief",
	"nervousness",
	"remorse",
	"sadness",
	}

	SURPRISE_UNCERTAINTY = {
	"surprise",
	"confusion",
	"curiosity",
	"realization",
	}

	DESIRE = {"desire"}

	NEUTRAL = {"neutral"}

	CATEGORY_MAP = {
	"Positive": POSITIVE,
	"Negative": NEGATIVE,
	"Surprise/Uncertainty": SURPRISE_UNCERTAINTY,
	"Desire": DESIRE,
	"Neutral": NEUTRAL,
	}

	# ----------- Text Classification -----------

	def process_text(text):
	if not text.strip():
	return "Please enter some text."

	# Summarize only if text is longer than 1000 words
	if len(text.split()) > 1000:
	summary = summarizer(text, max_length=200, min_length=100, do_sample=False)[0]["summary_text"]
	else:
	summary = text

	print("Summary:", summary)

	preds = classifier(summary, truncation=True, max_length=512)[0]

	label_scores = {pred["label"]: pred["score"] for pred in preds}

	print("Raw classifier preds:", preds)
	print("Label scores dict:", label_scores)

	category_totals = {}
	for cat_name, emotions in CATEGORY_MAP.items():
	category_totals[cat_name] = sum(label_scores.get(e, 0) for e in emotions)

	best_category = max(category_totals, key=category_totals.get)

	emotions_in_cat = [(e, label_scores.get(e, 0)) for e in CATEGORY_MAP[best_category]]
	emotions_in_cat.sort(key=lambda x: x[1], reverse=True)

	threshold = 0.8
	strong_emotions = [(e, s) for e, s in emotions_in_cat if s > threshold]

	# Add the summary or original text here:
	out = f"Summary/Text:\n{summary}\n\nDominant Category: {best_category}\n\nEmotions in this category:\n"
	if strong_emotions:
	for emotion, score in strong_emotions:
	out += f"{emotion}: {score:.4f}\n"
	else:
	top_two = emotions_in_cat[:2]
	for emotion, score in top_two:
	out += f"{emotion}: {score:.4f}\n"

	return out.strip()



	# ----------- Image Processing -----------

	def image_to_text(image_path):
	image = Image.open(image_path).convert("RGB")
	inputs = caption_processor(images=image, return_tensors="pt")
	out = caption_model.generate(**inputs)
	return caption_processor.decode(out[0], skip_special_tokens=True)


	def process_image(image_path):
	caption = image_to_text(image_path)
	summary = summarizer(caption, max_length=60, min_length=5, do_sample=False)[0]["summary_text"]
	return process_text(summary)

	# ----------- Audio/Video Transcription -----------

	def extract_audio(video_path):
	audio_path = f"/tmp/{uuid.uuid4().hex}.mp3"
	subprocess.run(["ffmpeg", "-y", "-i", video_path, "-q:a", "0", "-map", "a", audio_path], check=True)
	return audio_path

	def transcribe_audio(audio_path):
	result = whisper_model.transcribe(audio_path)
	return result["text"]

	def process_audio(audio_path):
	text = transcribe_audio(audio_path)
	return process_text(text)

	def process_video(video_path):
	audio_path = extract_audio(video_path)
	text = transcribe_audio(audio_path)
	os.remove(audio_path)
	return process_text(text)

	# ----------- Gradio Interfaces -----------

	text_input = gr.Interface(
	fn=process_text,
	inputs=gr.Textbox(lines=7, placeholder="Enter text...", label="Text Input"),
	outputs=gr.Textbox(label="Emotion Output"),
	title="Text Emotion Classifier (GoEmotions, Category Based)",
	description="Enter text to detect nuanced emotions grouped by dominant category.",
	)

	image_input = gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="filepath", label="Upload Image"),
	outputs=gr.Textbox(label="Emotion Output"),
	title="Image Emotion Classifier",
	description="Upload an image. Model will caption it, summarize, and predict emotions grouped by category.",
	)

	audio_input = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="filepath", label="Upload Audio"),
	outputs=gr.Textbox(label="Emotion Output"),
	title="Audio Emotion Classifier",
	description="Upload audio. Model will transcribe, summarize, and detect emotions grouped by category.",
	)

	video_input = gr.Interface(
	fn=process_video,
	inputs=gr.File(file_types=[".mp4", ".mov", ".avi"], label="Upload Video"),
	outputs=gr.Textbox(label="Emotion Output"),
	title="Video Emotion Classifier",
	description="Upload video. Model will extract audio, transcribe, summarize, and detect emotions grouped by category.",
	)

	gr.TabbedInterface(
	[text_input, image_input, audio_input, video_input], ["Text", "Image", "Audio", "Video"]
	).launch()