HealthVoiceAnalyzer1

Sleeping

File size: 10,745 Bytes

410fd66
 
 
 
0d6f092
68390a5
410fd66
 
cdb9181
 
410fd66
a4749f1
cdb9181
 
a4749f1
68390a5
a4749f1
cdb9181
 
 
 
 
 
 
68390a5
cdb9181
 
 
 
 
 
 
a4749f1
cdb9181
a4749f1
68390a5
cdb9181
410fd66
9c963a6
 
 
 
410fd66
cdb9181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4749f1
 
930e181
cdb9181
 
a4749f1
930e181
a4749f1
cdb9181
 
 
 
a4749f1
930e181
a4749f1
cdb9181
 
 
 
 
 
 
 
234e921
cdb9181
 
 
 
 
f66fff5
9c963a6
 
f66fff5
9c963a6
 
 
 
 
 
 
 
 
 
 
 
 
 
a4749f1
930e181
a4749f1
930e181
a4749f1
9c963a6
 
9d6f645
410fd66
a4749f1
 
 
 
410fd66
9d6f645
f66fff5
 
 
9d6f645
f66fff5
cdb9181
 
 
9c963a6
cdb9181
 
9c963a6
f66fff5
930e181
9c963a6
 
cdb9181
 
 
 
a4749f1
cdb9181
a4749f1
cdb9181
a4749f1
cdb9181
a4749f1
cdb9181
930e181
a4749f1
cdb9181
9c963a6
a4749f1
9c963a6
 
0dc01e7
 
 
 
9c963a6
0dc01e7
cdb9181
 
 
a4749f1
9d6f645
9c963a6
cdb9181
a4749f1
 
 
 
 
 
 
 
cdb9181
410fd66
cdb9181
 
410fd66
9c963a6
a4749f1
68390a5
 
 
 
 
 
9d6f645
cdb9181
 
 
9c963a6
 
68390a5
a4749f1
68390a5
cdb9181
410fd66
a4749f1
410fd66
 
341af6d
a4749f1
 
 
 
 
410fd66
 
 
a4749f1

import gradio as gr
import librosa
import numpy as np
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from simple_salesforce import Salesforce
import os
from datetime import datetime
import logging
import webrtcvad

# Set up logging for usage metrics and debugging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
usage_metrics = {"total_assessments": 0}  # Simple in-memory metric (to be expanded with Salesforce)

# Salesforce credentials (assumed secure via environment variables)
SF_USERNAME = os.getenv("SF_USERNAME")
SF_PASSWORD = os.getenv("SF_PASSWORD")
SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")

# Initialize Salesforce
sf = None
try:
    if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
        sf = Salesforce(
            username=SF_USERNAME,
            password=SF_PASSWORD,
            security_token=SF_SECURITY_TOKEN,
            instance_url=SF_INSTANCE_URL
        )
        logger.info("Connected to Salesforce for user management")
    else:
        logger.warning("Salesforce credentials missing; user management disabled")
except Exception as e:
    logger.error(f"Salesforce connection failed: {str(e)}")

# Load Whisper model for speech-to-text
whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe")

# Initialize VAD
vad = webrtcvad.Vad(mode=2)  # Moderate mode for balanced voice detection

def extract_health_features(audio, sr):
    """Extract health-related audio features."""
    try:
        # Normalize audio
        audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio

        # Voice Activity Detection
        frame_duration = 30  # ms
        frame_samples = int(sr * frame_duration / 1000)
        frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
        voiced_frames = [
            frame for frame in frames
            if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
        ]
        if not voiced_frames:
            raise ValueError("No voiced segments detected")
        voiced_audio = np.concatenate(voiced_frames)

        # Pitch (F0) with validated range (75-300 Hz for adults)
        pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300)
        valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300]
        pitch = np.mean(valid_pitches) if valid_pitches else 0
        jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
        if jitter > 10:  # Cap extreme jitter (likely noise)
            jitter = 10
            logger.warning("Jitter capped at 10% due to possible noise or distortion")

        # Shimmer (amplitude variation)
        amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
        shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
        if shimmer > 10:  # Cap extreme shimmer (likely noise)
            shimmer = 10
            logger.warning("Shimmer capped at 10% due to possible noise or distortion")

        # Energy
        energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])

        return {
            "pitch": pitch,
            "jitter": jitter * 100,  # Convert to percentage
            "shimmer": shimmer * 100,  # Convert to percentage
            "energy": energy
        }
    except Exception as e:
        logger.error(f"Feature extraction failed: {str(e)}")
        raise

def transcribe_audio(audio):
    """Transcribe audio to text using Whisper."""
    try:
        inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
        with torch.no_grad():
            generated_ids = whisper_model.generate(inputs["input_features"])
        transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        logger.info(f"Transcription: {transcription}")
        return transcription
    except Exception as e:
        logger.error(f"Transcription failed: {str(e)}")
        return ""

def analyze_symptoms(text):
    """Mock symptom-to-disease analysis (placeholder for symptom-2-disease-net)."""
    text = text.lower()
    feedback = []
    if "cough" in text or "difficulty breathing" in text:
        feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.")
    elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text:
        feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.")
    else:
        feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider for a thorough check.")
    return "\n".join(feedback)

def analyze_voice(audio_file=None):
    """Analyze voice for health indicators."""
    global usage_metrics
    usage_metrics["total_assessments"] += 1
    logger.info(f"Total assessments: {usage_metrics['total_assessments']}")

    try:
        # Load audio from file if provided
        if audio_file and os.path.exists(audio_file):
            audio, sr = librosa.load(audio_file, sr=16000)
        else:
            raise ValueError("No valid audio file provided for analysis")

        if len(audio) < sr:
            raise ValueError("Audio too short (minimum 1 second)")

        # Extract voice features
        features = extract_health_features(audio, sr)

        # Transcribe audio for symptom analysis
        transcription = transcribe_audio(audio)
        symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech."

        # Analyze voice features for health indicators
        feedback = []
        respiratory_score = features["jitter"]
        mental_health_score = features["shimmer"]

        # Rule-based analysis with personalized feedback
        if respiratory_score > 1.0:
            feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.")
        if mental_health_score > 5.0:
            feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.")
        if features["energy"] < 0.01:
            feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.")

        if not feedback and not symptom_feedback.startswith("No transcription"):
            feedback.append("Your voice analysis shows no immediate health concerns based on current data.")

        # Combine voice and symptom feedback
        feedback.append("\n**Symptom Feedback (Based on Your Input)**:")
        feedback.append(symptom_feedback)
        feedback.append("\n**Voice Analysis Details**:")
        feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)")
        feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)")
        feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)")
        feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)")
        feedback.append(f"Transcription: {transcription if transcription else 'None'}")
        feedback.append("\n**Disclaimer**: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.")

        feedback_str = "\n".join(feedback)

        # Store in Salesforce (with consent implied via credentials)
        if sf:
            store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription)

        # Clean up audio file for HIPAA/GDPR compliance
        if audio_file and os.path.exists(audio_file):
            try:
                os.remove(audio_file)
                logger.info(f"Deleted audio file: {audio_file} for compliance")
            except Exception as e:
                logger.error(f"Failed to delete audio file: {str(e)}")

        return feedback_str
    except Exception as e:
        logger.error(f"Audio processing failed: {str(e)}")
        return f"Error: {str(e)}"

def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription):
    """Store results in Salesforce with encrypted data."""
    try:
        sf.HealthAssessment__c.create({
            "AssessmentDate__c": datetime.utcnow().isoformat(),
            "Feedback__c": feedback,
            "RespiratoryScore__c": float(respiratory_score),
            "MentalHealthScore__c": float(mental_health_score),
            "AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio",
            "Pitch__c": float(features["pitch"]),
            "Jitter__c": float(features["jitter"]),
            "Shimmer__c": float(features["shimmer"]),
            "Energy__c": float(features["energy"]),
            "Transcription__c": transcription
        })
        logger.info("Stored assessment in Salesforce")
    except Exception as e:
        logger.error(f"Salesforce storage failed: {str(e)}")

# Gradio interface with accessibility focus
iface = gr.Interface(
    fn=analyze_voice,
    inputs=gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav"),
    outputs=gr.Textbox(label="Health Assessment Results", elem_id="health-results"),
    title="Smart Voicebot for Public Health",
    description="Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed'). This tool is accessible via web and mobile.",
    theme="default",  # Basic theme; enhance for screen readers later
    allow_flagging="never"  # Prevent data retention without consent
)

if __name__ == "__main__":
    logger.info("Starting Voice Health Analyzer at 12:34 PM IST, June 23, 2025")
    iface.launch(server_name="0.0.0.0", server_port=7860)