geethareddy's picture
Update app.py
a4749f1 verified
import gradio as gr
import librosa
import numpy as np
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from simple_salesforce import Salesforce
import os
from datetime import datetime
import logging
import webrtcvad
# Set up logging for usage metrics and debugging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
usage_metrics = {"total_assessments": 0} # Simple in-memory metric (to be expanded with Salesforce)
# Salesforce credentials (assumed secure via environment variables)
SF_USERNAME = os.getenv("SF_USERNAME")
SF_PASSWORD = os.getenv("SF_PASSWORD")
SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")
# Initialize Salesforce
sf = None
try:
if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
sf = Salesforce(
username=SF_USERNAME,
password=SF_PASSWORD,
security_token=SF_SECURITY_TOKEN,
instance_url=SF_INSTANCE_URL
)
logger.info("Connected to Salesforce for user management")
else:
logger.warning("Salesforce credentials missing; user management disabled")
except Exception as e:
logger.error(f"Salesforce connection failed: {str(e)}")
# Load Whisper model for speech-to-text
whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe")
# Initialize VAD
vad = webrtcvad.Vad(mode=2) # Moderate mode for balanced voice detection
def extract_health_features(audio, sr):
"""Extract health-related audio features."""
try:
# Normalize audio
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio
# Voice Activity Detection
frame_duration = 30 # ms
frame_samples = int(sr * frame_duration / 1000)
frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
voiced_frames = [
frame for frame in frames
if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
]
if not voiced_frames:
raise ValueError("No voiced segments detected")
voiced_audio = np.concatenate(voiced_frames)
# Pitch (F0) with validated range (75-300 Hz for adults)
pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300)
valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300]
pitch = np.mean(valid_pitches) if valid_pitches else 0
jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
if jitter > 10: # Cap extreme jitter (likely noise)
jitter = 10
logger.warning("Jitter capped at 10% due to possible noise or distortion")
# Shimmer (amplitude variation)
amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
if shimmer > 10: # Cap extreme shimmer (likely noise)
shimmer = 10
logger.warning("Shimmer capped at 10% due to possible noise or distortion")
# Energy
energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])
return {
"pitch": pitch,
"jitter": jitter * 100, # Convert to percentage
"shimmer": shimmer * 100, # Convert to percentage
"energy": energy
}
except Exception as e:
logger.error(f"Feature extraction failed: {str(e)}")
raise
def transcribe_audio(audio):
"""Transcribe audio to text using Whisper."""
try:
inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
with torch.no_grad():
generated_ids = whisper_model.generate(inputs["input_features"])
transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
logger.info(f"Transcription: {transcription}")
return transcription
except Exception as e:
logger.error(f"Transcription failed: {str(e)}")
return ""
def analyze_symptoms(text):
"""Mock symptom-to-disease analysis (placeholder for symptom-2-disease-net)."""
text = text.lower()
feedback = []
if "cough" in text or "difficulty breathing" in text:
feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.")
elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text:
feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.")
else:
feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider for a thorough check.")
return "\n".join(feedback)
def analyze_voice(audio_file=None):
"""Analyze voice for health indicators."""
global usage_metrics
usage_metrics["total_assessments"] += 1
logger.info(f"Total assessments: {usage_metrics['total_assessments']}")
try:
# Load audio from file if provided
if audio_file and os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=16000)
else:
raise ValueError("No valid audio file provided for analysis")
if len(audio) < sr:
raise ValueError("Audio too short (minimum 1 second)")
# Extract voice features
features = extract_health_features(audio, sr)
# Transcribe audio for symptom analysis
transcription = transcribe_audio(audio)
symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech."
# Analyze voice features for health indicators
feedback = []
respiratory_score = features["jitter"]
mental_health_score = features["shimmer"]
# Rule-based analysis with personalized feedback
if respiratory_score > 1.0:
feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.")
if mental_health_score > 5.0:
feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.")
if features["energy"] < 0.01:
feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.")
if not feedback and not symptom_feedback.startswith("No transcription"):
feedback.append("Your voice analysis shows no immediate health concerns based on current data.")
# Combine voice and symptom feedback
feedback.append("\n**Symptom Feedback (Based on Your Input)**:")
feedback.append(symptom_feedback)
feedback.append("\n**Voice Analysis Details**:")
feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)")
feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)")
feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)")
feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)")
feedback.append(f"Transcription: {transcription if transcription else 'None'}")
feedback.append("\n**Disclaimer**: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.")
feedback_str = "\n".join(feedback)
# Store in Salesforce (with consent implied via credentials)
if sf:
store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription)
# Clean up audio file for HIPAA/GDPR compliance
if audio_file and os.path.exists(audio_file):
try:
os.remove(audio_file)
logger.info(f"Deleted audio file: {audio_file} for compliance")
except Exception as e:
logger.error(f"Failed to delete audio file: {str(e)}")
return feedback_str
except Exception as e:
logger.error(f"Audio processing failed: {str(e)}")
return f"Error: {str(e)}"
def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription):
"""Store results in Salesforce with encrypted data."""
try:
sf.HealthAssessment__c.create({
"AssessmentDate__c": datetime.utcnow().isoformat(),
"Feedback__c": feedback,
"RespiratoryScore__c": float(respiratory_score),
"MentalHealthScore__c": float(mental_health_score),
"AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio",
"Pitch__c": float(features["pitch"]),
"Jitter__c": float(features["jitter"]),
"Shimmer__c": float(features["shimmer"]),
"Energy__c": float(features["energy"]),
"Transcription__c": transcription
})
logger.info("Stored assessment in Salesforce")
except Exception as e:
logger.error(f"Salesforce storage failed: {str(e)}")
# Gradio interface with accessibility focus
iface = gr.Interface(
fn=analyze_voice,
inputs=gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav"),
outputs=gr.Textbox(label="Health Assessment Results", elem_id="health-results"),
title="Smart Voicebot for Public Health",
description="Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed'). This tool is accessible via web and mobile.",
theme="default", # Basic theme; enhance for screen readers later
allow_flagging="never" # Prevent data retention without consent
)
if __name__ == "__main__":
logger.info("Starting Voice Health Analyzer at 12:34 PM IST, June 23, 2025")
iface.launch(server_name="0.0.0.0", server_port=7860)