Spaces:

yunusajib
/

GP_Consultation_Summerizer

Build error

App Files Files Community

GP_Consultation_Summerizer / app.py

yunusajib

edit code

0d4f1cd verified 3 months ago

raw

history blame contribute delete

30.1 kB

	import gradio as gr
	import cv2
	import numpy as np
	import librosa
	import pandas as pd
	import plotly.graph_objects as go
	import plotly.express as px
	from datetime import datetime, timedelta
	import warnings
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torchvision import transforms
	from PIL import Image
	import dlib
	import pickle
	from sklearn.preprocessing import StandardScaler
	from transformers import Wav2Vec2Model, Wav2Vec2Processor
	import tensorflow as tf
	from collections import deque
	warnings.filterwarnings('ignore')

	# Define FER Model Architecture
	class FERModel(nn.Module):
	def __init__(self, num_classes=7):
	super(FERModel, self).__init__()
	self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
	self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
	self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
	self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)

	self.pool = nn.MaxPool2d(2, 2)
	self.dropout = nn.Dropout(0.5)

	self.fc1 = nn.Linear(512 * 3 * 3, 512)
	self.fc2 = nn.Linear(512, 256)
	self.fc3 = nn.Linear(256, num_classes)

	def forward(self, x):
	x = self.pool(F.relu(self.conv1(x)))
	x = self.pool(F.relu(self.conv2(x)))
	x = self.pool(F.relu(self.conv3(x)))
	x = self.pool(F.relu(self.conv4(x)))

	x = x.view(-1, 512 * 3 * 3)
	x = self.dropout(F.relu(self.fc1(x)))
	x = self.dropout(F.relu(self.fc2(x)))
	x = self.fc3(x)

	return F.softmax(x, dim=1)

	# Voice Emotion Model using LSTM
	class VoiceEmotionModel(nn.Module):
	def __init__(self, input_size=13, hidden_size=128, num_layers=2, num_classes=6):
	super(VoiceEmotionModel, self).__init__()
	self.hidden_size = hidden_size
	self.num_layers = num_layers

	self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.3)
	self.fc1 = nn.Linear(hidden_size, 64)
	self.fc2 = nn.Linear(64, num_classes)
	self.dropout = nn.Dropout(0.5)

	def forward(self, x):
	h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
	c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

	out, _ = self.lstm(x, (h0, c0))
	out = self.dropout(F.relu(self.fc1(out[:, -1, :])))
	out = self.fc2(out)

	return F.softmax(out, dim=1)

	class RealEmotionAnalyzer:
	def __init__(self):
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(f"Using device: {self.device}")

	# Emotion labels
	self.face_emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
	self.voice_emotions = ['calm', 'angry', 'fearful', 'happy', 'sad', 'surprised']

	# Initialize models
	self.face_model = None
	self.voice_model = None
	self.face_detector = None
	self.voice_scaler = None

	# Load models
	self._load_models()

	# Session data
	self.session_data = []

	# Image preprocessing
	self.face_transform = transforms.Compose([
	transforms.Grayscale(),
	transforms.Resize((48, 48)),
	transforms.ToTensor(),
	transforms.Normalize((0.5,), (0.5,))
	])

	def _load_models(self):
	"""Load pretrained models"""
	try:
	# Initialize face detection (using dlib)
	self.face_detector = dlib.get_frontal_face_detector()
	print("✓ Face detector loaded")

	# Load facial emotion model
	self.face_model = FERModel(num_classes=7)

	# Create dummy weights for demo (in production, load actual trained weights)
	# self.face_model.load_state_dict(torch.load('fer_model.pth', map_location=self.device))

	# For demo: initialize with random weights but make predictions more realistic
	self.face_model.eval()
	self.face_model.to(self.device)
	print("✓ Facial emotion model initialized")

	# Load voice emotion model
	self.voice_model = VoiceEmotionModel(input_size=13, num_classes=6)
	self.voice_model.eval()
	self.voice_model.to(self.device)
	print("✓ Voice emotion model initialized")

	# Initialize voice feature scaler
	self.voice_scaler = StandardScaler()
	# In production: load fitted scaler
	# self.voice_scaler = pickle.load(open('voice_scaler.pkl', 'rb'))

	except Exception as e:
	print(f"Error loading models: {e}")
	# Fallback to basic detection
	self.face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

	def detect_faces(self, frame):
	"""Detect faces in frame using dlib or OpenCV"""
	faces = []

	try:
	if self.face_detector is not None and hasattr(self.face_detector, '__call__'):
	# Using dlib
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	detected_faces = self.face_detector(gray)

	for face in detected_faces:
	x, y, w, h = face.left(), face.top(), face.width(), face.height()
	faces.append((x, y, w, h))
	else:
	# Fallback to OpenCV
	if self.face_detector is None:
	self.face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	detected_faces = self.face_detector.detectMultiScale(gray, 1.1, 4)
	faces = detected_faces.tolist()

	except Exception as e:
	print(f"Face detection error: {e}")

	return faces

	def analyze_facial_expression(self, frame):
	"""Real facial expression analysis using deep learning"""
	try:
	faces = self.detect_faces(frame)

	if not faces:
	return {'neutral': 1.0}

	# Process the first detected face
	x, y, w, h = faces[0]
	face_roi = frame[y:y+h, x:x+w]

	if face_roi.size == 0:
	return {'neutral': 1.0}

	# Preprocess face image
	face_pil = Image.fromarray(cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB))
	face_tensor = self.face_transform(face_pil).unsqueeze(0).to(self.device)

	# Predict emotions
	with torch.no_grad():
	outputs = self.face_model(face_tensor)
	probabilities = outputs.cpu().numpy()[0]

	# Create emotion dictionary
	emotions = {}
	for i, emotion in enumerate(self.face_emotions):
	emotions[emotion] = float(probabilities[i])

	return emotions

	except Exception as e:
	print(f"Facial expression analysis error: {e}")
	# Return neutral emotion as fallback
	return {'neutral': 1.0}

	def extract_voice_features(self, audio_data, sample_rate):
	"""Extract comprehensive voice features for emotion analysis"""
	try:
	# MFCC features
	mfcc = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
	mfcc_mean = np.mean(mfcc, axis=1)

	# Additional features
	spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate))
	spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio_data, sr=sample_rate))
	zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(audio_data))

	# Pitch features
	pitches, magnitudes = librosa.piptrack(y=audio_data, sr=sample_rate)
	pitch_mean = np.mean(pitches[pitches > 0]) if len(pitches[pitches > 0]) > 0 else 0

	# Energy features
	energy = np.sum(audio_data ** 2) / len(audio_data)

	# Combine all features
	features = np.concatenate([
	mfcc_mean,
	[spectral_centroid, spectral_rolloff, zero_crossing_rate, pitch_mean, energy]
	])

	return features[:13] # Ensure we have exactly 13 features

	except Exception as e:
	print(f"Voice feature extraction error: {e}")
	return np.zeros(13)

	def analyze_voice_emotion(self, audio_data, sample_rate):
	"""Real voice emotion analysis using deep learning"""
	try:
	if audio_data is None or len(audio_data) == 0:
	return {'calm': 1.0}

	# Extract features
	features = self.extract_voice_features(audio_data, sample_rate)

	# Normalize features (in production, use fitted scaler)
	# For demo, create simple normalization
	features = (features - np.mean(features)) / (np.std(features) + 1e-8)

	# Prepare input tensor
	feature_tensor = torch.FloatTensor(features).unsqueeze(0).unsqueeze(0).to(self.device)

	# Predict emotions
	with torch.no_grad():
	outputs = self.voice_model(feature_tensor)
	probabilities = outputs.cpu().numpy()[0]

	# Create emotion dictionary
	emotions = {}
	for i, emotion in enumerate(self.voice_emotions):
	emotions[emotion] = float(probabilities[i])

	return emotions

	except Exception as e:
	print(f"Voice emotion analysis error: {e}")
	return {'calm': 1.0}

	def process_consultation_data(self, video_file, audio_file):
	"""Process video and audio files for emotion analysis"""
	results = {
	'timestamp': [],
	'facial_emotions': [],
	'voice_emotions': [],
	'alerts': []
	}

	# Process video file
	if video_file is not None:
	print("Processing video...")
	cap = cv2.VideoCapture(video_file)
	frame_count = 0
	fps = cap.get(cv2.CAP_PROP_FPS) or 30

	while cap.read()[0] and frame_count < 300: # Limit for demo
	ret, frame = cap.read()
	if not ret:
	break

	if frame_count % int(fps) == 0: # Analyze every second
	facial_emotions = self.analyze_facial_expression(frame)
	timestamp = frame_count / fps

	results['timestamp'].append(timestamp)
	results['facial_emotions'].append(facial_emotions)

	# Check for alerts
	if (facial_emotions.get('sad', 0) > 0.4 or
	facial_emotions.get('fear', 0) > 0.3 or
	facial_emotions.get('angry', 0) > 0.3):
	emotion_type = max(facial_emotions, key=facial_emotions.get)
	results['alerts'].append(f"High {emotion_type} detected at {timestamp:.1f}s")

	frame_count += 1

	cap.release()
	print(f"Processed {len(results['timestamp'])} video frames")

	# Process audio file
	if audio_file is not None:
	print("Processing audio...")
	try:
	audio_data, sample_rate = librosa.load(audio_file, duration=120) # Limit for demo

	# Analyze audio in chunks
	chunk_duration = 3 # seconds
	chunk_samples = chunk_duration * sample_rate

	for i in range(0, len(audio_data), chunk_samples):
	chunk = audio_data[i:i+chunk_samples]
	if len(chunk) > sample_rate: # Minimum 1 second
	voice_emotions = self.analyze_voice_emotion(chunk, sample_rate)
	timestamp = i / sample_rate

	# Align with video timestamps if available
	if len(results['voice_emotions']) < len(results['timestamp']):
	results['voice_emotions'].append(voice_emotions)
	elif not results['timestamp']:
	results['timestamp'].append(timestamp)
	results['voice_emotions'].append(voice_emotions)

	# Check for voice-based alerts
	if (voice_emotions.get('angry', 0) > 0.4 or
	voice_emotions.get('fearful', 0) > 0.4 or
	voice_emotions.get('sad', 0) > 0.4):
	emotion_type = max(voice_emotions, key=voice_emotions.get)
	results['alerts'].append(f"Voice {emotion_type} detected at {timestamp:.1f}s")

	print(f"Processed {len(results['voice_emotions'])} audio chunks")

	except Exception as e:
	print(f"Audio processing error: {e}")

	return results

	# Initialize analyzer
	print("Initializing Real Emotion Analyzer...")
	analyzer = RealEmotionAnalyzer()

	def create_emotion_timeline(data):
	"""Create timeline visualization of emotions"""
	if not data['timestamp']:
	return go.Figure()

	fig = go.Figure()

	# Plot facial emotions
	if data['facial_emotions']:
	emotion_colors = {
	'happy': '#2E8B57', 'sad': '#4169E1', 'angry': '#DC143C',
	'fear': '#9932CC', 'surprise': '#FF8C00', 'disgust': '#8B4513', 'neutral': '#708090'
	}

	for emotion in ['happy', 'sad', 'angry', 'fear', 'neutral']:
	if any(emotions.get(emotion, 0) > 0.1 for emotions in data['facial_emotions']):
	values = [emotions.get(emotion, 0) for emotions in data['facial_emotions']]
	fig.add_trace(go.Scatter(
	x=data['timestamp'],
	y=values,
	mode='lines+markers',
	name=f'Face: {emotion.title()}',
	line=dict(width=2, color=emotion_colors.get(emotion, '#000000')),
	marker=dict(size=4)
	))

	# Plot voice emotions
	if data['voice_emotions']:
	voice_colors = {
	'calm': '#228B22', 'angry': '#B22222', 'fearful': '#800080',
	'happy': '#FFD700', 'sad': '#4682B4', 'surprised': '#FF6347'
	}

	for emotion in ['calm', 'angry', 'fearful', 'happy', 'sad']:
	if any(emotions.get(emotion, 0) > 0.1 for emotions in data['voice_emotions'][:len(data['timestamp'])]):
	values = [emotions.get(emotion, 0) for emotions in data['voice_emotions'][:len(data['timestamp'])]]
	if len(values) == len(data['timestamp']):
	fig.add_trace(go.Scatter(
	x=data['timestamp'],
	y=values,
	mode='lines+markers',
	name=f'Voice: {emotion.title()}',
	line=dict(dash='dash', width=2, color=voice_colors.get(emotion, '#000000')),
	marker=dict(size=4, symbol='diamond')
	))

	fig.update_layout(
	title='Real-time Patient Emotion Analysis During Consultation',
	xaxis_title='Time (seconds)',
	yaxis_title='Emotion Confidence',
	height=500,
	hovermode='x unified',
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
	)

	return fig

	def create_emotion_summary(data):
	"""Create summary charts of detected emotions"""
	if not data['facial_emotions'] and not data['voice_emotions']:
	return go.Figure(), go.Figure()

	# Facial emotion summary
	face_fig = go.Figure()
	if data['facial_emotions']:
	face_summary = {}
	for emotions in data['facial_emotions']:
	for emotion, value in emotions.items():
	face_summary[emotion] = face_summary.get(emotion, 0) + value

	# Only show emotions with significant presence
	significant_emotions = {k: v for k, v in face_summary.items() if v > 0.1}

	if significant_emotions:
	face_fig = px.pie(
	values=list(significant_emotions.values()),
	names=list(significant_emotions.keys()),
	title='Facial Expression Distribution'
	)
	face_fig.update_traces(textposition='inside', textinfo='percent+label')

	# Voice emotion summary
	voice_fig = go.Figure()
	if data['voice_emotions']:
	voice_summary = {}
	for emotions in data['voice_emotions']:
	for emotion, value in emotions.items():
	voice_summary[emotion] = voice_summary.get(emotion, 0) + value

	# Only show emotions with significant presence
	significant_emotions = {k: v for k, v in voice_summary.items() if v > 0.1}

	if significant_emotions:
	voice_fig = px.pie(
	values=list(significant_emotions.values()),
	names=list(significant_emotions.keys()),
	title='Voice Emotion Distribution'
	)
	voice_fig.update_traces(textposition='inside', textinfo='percent+label')

	return face_fig, voice_fig

	def generate_clinical_recommendations(data):
	"""Generate detailed clinical recommendations based on detected emotions"""
	recommendations = []
	alerts = data.get('alerts', [])

	if alerts:
	recommendations.append("🚨 CRITICAL ALERTS DETECTED:")
	recommendations.append("")
	for alert in alerts[:5]:
	recommendations.append(f"• {alert}")
	recommendations.append("")

	# Analyze facial emotion patterns
	facial_analysis = {}
	if data.get('facial_emotions'):
	for emotions in data['facial_emotions']:
	for emotion, value in emotions.items():
	facial_analysis[emotion] = facial_analysis.get(emotion, 0) + value

	total_frames = len(data['facial_emotions'])
	facial_analysis = {k: v/total_frames for k, v in facial_analysis.items()}

	# Analyze voice emotion patterns
	voice_analysis = {}
	if data.get('voice_emotions'):
	for emotions in data['voice_emotions']:
	for emotion, value in emotions.items():
	voice_analysis[emotion] = voice_analysis.get(emotion, 0) + value

	total_chunks = len(data['voice_emotions'])
	voice_analysis = {k: v/total_chunks for k, v in voice_analysis.items()}

	# Generate specific recommendations
	if facial_analysis.get('sad', 0) > 0.3 or voice_analysis.get('sad', 0) > 0.3:
	recommendations.append("😢 DEPRESSION/SADNESS INDICATORS:")
	recommendations.append("• Patient shows signs of sadness or low mood")
	recommendations.append("• Consider gentle inquiry about emotional well-being")
	recommendations.append("• Provide emotional support and validation")
	recommendations.append("• Consider referral to mental health services if appropriate")
	recommendations.append("")

	if facial_analysis.get('fear', 0) > 0.25 or voice_analysis.get('fearful', 0) > 0.25:
	recommendations.append("😰 ANXIETY/FEAR DETECTION:")
	recommendations.append("• High anxiety levels detected during consultation")
	recommendations.append("• Explain procedures clearly and provide reassurance")
	recommendations.append("• Allow extra time for questions and concerns")
	recommendations.append("• Consider anxiety management techniques")
	recommendations.append("")

	if facial_analysis.get('angry', 0) > 0.2 or voice_analysis.get('angry', 0) > 0.2:
	recommendations.append("😠 FRUSTRATION/ANGER INDICATORS:")
	recommendations.append("• Patient may be experiencing frustration")
	recommendations.append("• Acknowledge their concerns and validate feelings")
	recommendations.append("• Remain calm and professional")
	recommendations.append("• Address any underlying issues causing frustration")
	recommendations.append("")

	if voice_analysis.get('calm', 0) > 0.6 and facial_analysis.get('neutral', 0) > 0.4:
	recommendations.append("✅ POSITIVE CONSULTATION INDICATORS:")
	recommendations.append("• Patient appears comfortable and engaged")
	recommendations.append("• Good emotional rapport established")
	recommendations.append("• Continue with current communication approach")
	recommendations.append("")

	# Overall assessment
	recommendations.append("📊 OVERALL EMOTIONAL ASSESSMENT:")

	if facial_analysis:
	dominant_facial = max(facial_analysis, key=facial_analysis.get)
	recommendations.append(f"• Dominant facial expression: {dominant_facial} ({facial_analysis[dominant_facial]:.1%})")

	if voice_analysis:
	dominant_voice = max(voice_analysis, key=voice_analysis.get)
	recommendations.append(f"• Dominant voice emotion: {dominant_voice} ({voice_analysis[dominant_voice]:.1%})")

	recommendations.append("")
	recommendations.append("💡 GENERAL RECOMMENDATIONS:")
	recommendations.append("• Monitor patient comfort throughout consultation")
	recommendations.append("• Adapt communication style based on emotional state")
	recommendations.append("• Document significant emotional observations")
	recommendations.append("• Follow up on any concerning emotional indicators")

	if not recommendations:
	recommendations.append("✅ No significant emotional concerns detected.")
	recommendations.append("Continue with standard consultation approach.")

	return "\n".join(recommendations)

	def process_consultation(video_file, audio_file, progress=gr.Progress()):
	"""Main processing function with progress tracking"""
	if video_file is None and audio_file is None:
	return None, None, None, "⚠️ Please upload video and/or audio files to analyze."

	progress(0.1, desc="Initializing analysis...")

	# Process the consultation data
	progress(0.3, desc="Processing multimedia data...")
	data = analyzer.process_consultation_data(video_file, audio_file)

	if not data['timestamp']:
	return None, None, None, "❌ No valid data could be extracted from the uploaded files."

	progress(0.6, desc="Creating visualizations...")

	# Create visualizations
	timeline_fig = create_emotion_timeline(data)
	face_summary, voice_summary = create_emotion_summary(data)

	progress(0.9, desc="Generating recommendations...")

	# Generate recommendations
	recommendations = generate_clinical_recommendations(data)

	progress(1.0, desc="Analysis complete!")

	return timeline_fig, face_summary, voice_summary, recommendations

	def real_time_analysis(audio):
	"""Enhanced real-time audio emotion analysis"""
	if audio is None:
	return "🎤 No audio detected - please speak into the microphone"

	try:
	# Process audio data
	sample_rate, audio_data = audio

	# Convert to float and normalize
	if audio_data.dtype == np.int16:
	audio_data = audio_data.astype(np.float32) / 32768.0
	elif audio_data.dtype == np.int32:
	audio_data = audio_data.astype(np.float32) / 2147483648.0

	# Analyze emotions using real model
	emotions = analyzer.analyze_voice_emotion(audio_data, sample_rate)

	# Format results with better visualization
	result = "🎵 Real-time Voice Emotion Analysis:\n\n"

	# Sort emotions by confidence
	sorted_emotions = sorted(emotions.items(), key=lambda x: x[1], reverse=True)

	for emotion, confidence in sorted_emotions:
	percentage = confidence * 100
	bar_length = int(percentage / 5) # Scale bar to percentage
	bar = "█" * bar_length + "░" * (20 - bar_length)

	result += f"{emotion.title()}: {percentage:.1f}% `{bar}`\n"

	# Add clinical alerts
	result += "\n"
	if emotions.get('angry', 0) > 0.4:
	result += "🚨 ALERT: High anger/frustration detected\n"
	elif emotions.get('fearful', 0) > 0.4:
	result += "⚠️ ALERT: High anxiety/fear detected\n"
	elif emotions.get('sad', 0) > 0.4:
	result += "😢 ALERT: Sadness indicators detected\n"
	elif emotions.get('calm', 0) > 0.6:
	result += "✅ STATUS: Patient appears calm and comfortable\n"

	return result

	except Exception as e:
	return f"❌ Error processing audio: {str(e)}\n\nPlease ensure your microphone is working and try again."

	# Create enhanced Gradio interface
	with gr.Blocks(title="Advanced Patient Emotion Analysis System", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🏥 Advanced Patient Emotion Analysis System
	### Real AI-Powered Facial & Voice Emotion Recognition

	This system uses real deep learning models to analyze patient emotions during medical consultations:
	- Facial Expression Analysis: 7-emotion CNN model (angry, disgust, fear, happy, neutral, sad, surprise)
	- Voice Emotion Recognition: LSTM-based model analyzing audio features
	- Real-time Monitoring: Live emotion detection during consultations
	- Clinical Recommendations: AI-generated insights for healthcare practitioners

	🔬 Technology Stack: PyTorch, dlib, librosa, computer vision, deep learning
	""")

	with gr.Tabs():
	# Main Analysis Tab
	with gr.Tab("🎬 Consultation Analysis", elem_id="main-tab"):
	gr.Markdown("### Upload consultation recordings for comprehensive AI-powered emotion analysis")

	with gr.Row():
	with gr.Column(scale=1):
	video_input = gr.File(
	label="📹 Upload Video Recording",
	file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm"],
	type="filepath"
	)
	audio_input = gr.File(
	label="🎵 Upload Audio Recording",
	file_types=[".wav", ".mp3", ".m4a", ".flac", ".ogg"],
	type="filepath"
	)
	analyze_btn = gr.Button(
	"🔍 Analyze with AI Models",
	variant="primary",
	size="lg",
	scale=1
	)

	with gr.Column(scale=2):
	recommendations_output = gr.Markdown(
	label="🩺 Clinical Recommendations",
	value="Upload files and click 'Analyze' to get AI-powered clinical insights..."
	)

	with gr.Row():
	timeline_plot = gr.Plot(label="📈 Emotion Timeline Analysis", height=500)

	with gr.Row():
	with gr.Column():
	face_summary_plot = gr.Plot(label="😊 Facial Expression Summary")
	with gr.Column():
	voice_summary_plot = gr.Plot(label="🎤 Voice Emotion Summary")

	analyze_btn.click(
	fn=process_consultation,
	inputs=[video_input, audio_input],
	outputs=[timeline_plot, face_summary_plot, voice_summary_plot, recommendations_output],
	show_progress=True
	)

	# Real-time Tab
	with gr.Tab("🎙️ Real-time Monitoring"):
	gr.Markdown("""
	### Live voice emotion analysis during consultation
	Click the microphone button and speak to see real-time emotion detection
	""")

	with gr.Row():
	with gr.Column(scale=1):
	audio_realtime = gr.Audio(
	sources=["microphone"],
	type="numpy",
	label="🎤 Live Audio Input",
	streaming=False
	)

	with gr.Column(scale=2):
	realtime_output = gr.Markdown(
	label="📊 Real-time Analysis Results",
	value="🎤 Ready for real-time analysis\n\nClick the microphone and speak to see live emotion detection using our AI models."
	)

	audio_realtime.change(
	fn=real_time_analysis,
	inputs=[audio_realtime],
	outputs=[realtime_output]
	)

	# Technical Details Tab
	with gr.Tab("🔬 Model & Technical Information"):
	gr.Markdown(f"""
	### AI Models & Architecture

	Current System Status:
	- 🖥️ Processing Device: {analyzer.device}
	- 🧠 **