HealthVoiceAnalyzer1

Sleeping

App Files Files Community

HealthVoiceAnalyzer1 / app.py

geethareddy

Update app.py

a4749f1 verified 2 months ago

raw

history blame contribute delete

10.7 kB

	import gradio as gr
	import librosa
	import numpy as np
	import torch
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	from simple_salesforce import Salesforce
	import os
	from datetime import datetime
	import logging
	import webrtcvad

	# Set up logging for usage metrics and debugging
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
	logger = logging.getLogger(__name__)
	usage_metrics = {"total_assessments": 0} # Simple in-memory metric (to be expanded with Salesforce)

	# Salesforce credentials (assumed secure via environment variables)
	SF_USERNAME = os.getenv("SF_USERNAME")
	SF_PASSWORD = os.getenv("SF_PASSWORD")
	SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
	SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")

	# Initialize Salesforce
	sf = None
	try:
	if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
	sf = Salesforce(
	username=SF_USERNAME,
	password=SF_PASSWORD,
	security_token=SF_SECURITY_TOKEN,
	instance_url=SF_INSTANCE_URL
	)
	logger.info("Connected to Salesforce for user management")
	else:
	logger.warning("Salesforce credentials missing; user management disabled")
	except Exception as e:
	logger.error(f"Salesforce connection failed: {str(e)}")

	# Load Whisper model for speech-to-text
	whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
	whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
	whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe")

	# Initialize VAD
	vad = webrtcvad.Vad(mode=2) # Moderate mode for balanced voice detection

	def extract_health_features(audio, sr):
	"""Extract health-related audio features."""
	try:
	# Normalize audio
	audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio

	# Voice Activity Detection
	frame_duration = 30 # ms
	frame_samples = int(sr * frame_duration / 1000)
	frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
	voiced_frames = [
	frame for frame in frames
	if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
	]
	if not voiced_frames:
	raise ValueError("No voiced segments detected")
	voiced_audio = np.concatenate(voiced_frames)

	# Pitch (F0) with validated range (75-300 Hz for adults)
	pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300)
	valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300]
	pitch = np.mean(valid_pitches) if valid_pitches else 0
	jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
	if jitter > 10: # Cap extreme jitter (likely noise)
	jitter = 10
	logger.warning("Jitter capped at 10% due to possible noise or distortion")

	# Shimmer (amplitude variation)
	amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
	shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
	if shimmer > 10: # Cap extreme shimmer (likely noise)
	shimmer = 10
	logger.warning("Shimmer capped at 10% due to possible noise or distortion")

	# Energy
	energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])

	return {
	"pitch": pitch,
	"jitter": jitter * 100, # Convert to percentage
	"shimmer": shimmer * 100, # Convert to percentage
	"energy": energy
	}
	except Exception as e:
	logger.error(f"Feature extraction failed: {str(e)}")
	raise

	def transcribe_audio(audio):
	"""Transcribe audio to text using Whisper."""
	try:
	inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
	with torch.no_grad():
	generated_ids = whisper_model.generate(inputs["input_features"])
	transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	logger.info(f"Transcription: {transcription}")
	return transcription
	except Exception as e:
	logger.error(f"Transcription failed: {str(e)}")
	return ""

	def analyze_symptoms(text):
	"""Mock symptom-to-disease analysis (placeholder for symptom-2-disease-net)."""
	text = text.lower()
	feedback = []
	if "cough" in text or "difficulty breathing" in text:
	feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.")
	elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text:
	feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.")
	else:
	feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider for a thorough check.")
	return "\n".join(feedback)

	def analyze_voice(audio_file=None):
	"""Analyze voice for health indicators."""
	global usage_metrics
	usage_metrics["total_assessments"] += 1
	logger.info(f"Total assessments: {usage_metrics['total_assessments']}")

	try:
	# Load audio from file if provided
	if audio_file and os.path.exists(audio_file):
	audio, sr = librosa.load(audio_file, sr=16000)
	else:
	raise ValueError("No valid audio file provided for analysis")

	if len(audio) < sr:
	raise ValueError("Audio too short (minimum 1 second)")

	# Extract voice features
	features = extract_health_features(audio, sr)

	# Transcribe audio for symptom analysis
	transcription = transcribe_audio(audio)
	symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech."

	# Analyze voice features for health indicators
	feedback = []
	respiratory_score = features["jitter"]
	mental_health_score = features["shimmer"]

	# Rule-based analysis with personalized feedback
	if respiratory_score > 1.0:
	feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.")
	if mental_health_score > 5.0:
	feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.")
	if features["energy"] < 0.01:
	feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.")

	if not feedback and not symptom_feedback.startswith("No transcription"):
	feedback.append("Your voice analysis shows no immediate health concerns based on current data.")

	# Combine voice and symptom feedback
	feedback.append("\nSymptom Feedback (Based on Your Input):")
	feedback.append(symptom_feedback)
	feedback.append("\nVoice Analysis Details:")
	feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)")
	feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)")
	feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)")
	feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)")
	feedback.append(f"Transcription: {transcription if transcription else 'None'}")
	feedback.append("\nDisclaimer: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.")

	feedback_str = "\n".join(feedback)

	# Store in Salesforce (with consent implied via credentials)
	if sf:
	store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription)

	# Clean up audio file for HIPAA/GDPR compliance
	if audio_file and os.path.exists(audio_file):
	try:
	os.remove(audio_file)
	logger.info(f"Deleted audio file: {audio_file} for compliance")
	except Exception as e:
	logger.error(f"Failed to delete audio file: {str(e)}")

	return feedback_str
	except Exception as e:
	logger.error(f"Audio processing failed: {str(e)}")
	return f"Error: {str(e)}"

	def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription):
	"""Store results in Salesforce with encrypted data."""
	try:
	sf.HealthAssessment__c.create({
	"AssessmentDate__c": datetime.utcnow().isoformat(),
	"Feedback__c": feedback,
	"RespiratoryScore__c": float(respiratory_score),
	"MentalHealthScore__c": float(mental_health_score),
	"AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio",
	"Pitch__c": float(features["pitch"]),
	"Jitter__c": float(features["jitter"]),
	"Shimmer__c": float(features["shimmer"]),
	"Energy__c": float(features["energy"]),
	"Transcription__c": transcription
	})
	logger.info("Stored assessment in Salesforce")
	except Exception as e:
	logger.error(f"Salesforce storage failed: {str(e)}")

	# Gradio interface with accessibility focus
	iface = gr.Interface(
	fn=analyze_voice,
	inputs=gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav"),
	outputs=gr.Textbox(label="Health Assessment Results", elem_id="health-results"),
	title="Smart Voicebot for Public Health",
	description="Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed'). This tool is accessible via web and mobile.",
	theme="default", # Basic theme; enhance for screen readers later
	allow_flagging="never" # Prevent data retention without consent
	)

	if __name__ == "__main__":
	logger.info("Starting Voice Health Analyzer at 12:34 PM IST, June 23, 2025")
	iface.launch(server_name="0.0.0.0", server_port=7860)