File size: 10,745 Bytes
410fd66
 
 
 
0d6f092
68390a5
410fd66
 
cdb9181
 
410fd66
a4749f1
cdb9181
 
a4749f1
68390a5
a4749f1
cdb9181
 
 
 
 
 
 
68390a5
cdb9181
 
 
 
 
 
 
a4749f1
cdb9181
a4749f1
68390a5
cdb9181
410fd66
9c963a6
 
 
 
410fd66
cdb9181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4749f1
 
930e181
cdb9181
 
a4749f1
930e181
a4749f1
cdb9181
 
 
 
a4749f1
930e181
a4749f1
cdb9181
 
 
 
 
 
 
 
234e921
cdb9181
 
 
 
 
f66fff5
9c963a6
 
f66fff5
9c963a6
 
 
 
 
 
 
 
 
 
 
 
 
 
a4749f1
930e181
a4749f1
930e181
a4749f1
9c963a6
 
9d6f645
410fd66
a4749f1
 
 
 
410fd66
9d6f645
f66fff5
 
 
9d6f645
f66fff5
cdb9181
 
 
9c963a6
cdb9181
 
9c963a6
f66fff5
930e181
9c963a6
 
cdb9181
 
 
 
a4749f1
cdb9181
a4749f1
cdb9181
a4749f1
cdb9181
a4749f1
cdb9181
930e181
a4749f1
cdb9181
9c963a6
a4749f1
9c963a6
 
0dc01e7
 
 
 
9c963a6
0dc01e7
cdb9181
 
 
a4749f1
9d6f645
9c963a6
cdb9181
a4749f1
 
 
 
 
 
 
 
cdb9181
410fd66
cdb9181
 
410fd66
9c963a6
a4749f1
68390a5
 
 
 
 
 
9d6f645
cdb9181
 
 
9c963a6
 
68390a5
a4749f1
68390a5
cdb9181
410fd66
a4749f1
410fd66
 
341af6d
a4749f1
 
 
 
 
410fd66
 
 
a4749f1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import gradio as gr
import librosa
import numpy as np
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from simple_salesforce import Salesforce
import os
from datetime import datetime
import logging
import webrtcvad

# Set up logging for usage metrics and debugging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
usage_metrics = {"total_assessments": 0}  # Simple in-memory metric (to be expanded with Salesforce)

# Salesforce credentials (assumed secure via environment variables)
SF_USERNAME = os.getenv("SF_USERNAME")
SF_PASSWORD = os.getenv("SF_PASSWORD")
SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")

# Initialize Salesforce
sf = None
try:
    if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
        sf = Salesforce(
            username=SF_USERNAME,
            password=SF_PASSWORD,
            security_token=SF_SECURITY_TOKEN,
            instance_url=SF_INSTANCE_URL
        )
        logger.info("Connected to Salesforce for user management")
    else:
        logger.warning("Salesforce credentials missing; user management disabled")
except Exception as e:
    logger.error(f"Salesforce connection failed: {str(e)}")

# Load Whisper model for speech-to-text
whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe")

# Initialize VAD
vad = webrtcvad.Vad(mode=2)  # Moderate mode for balanced voice detection

def extract_health_features(audio, sr):
    """Extract health-related audio features."""
    try:
        # Normalize audio
        audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio

        # Voice Activity Detection
        frame_duration = 30  # ms
        frame_samples = int(sr * frame_duration / 1000)
        frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
        voiced_frames = [
            frame for frame in frames
            if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
        ]
        if not voiced_frames:
            raise ValueError("No voiced segments detected")
        voiced_audio = np.concatenate(voiced_frames)

        # Pitch (F0) with validated range (75-300 Hz for adults)
        pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300)
        valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300]
        pitch = np.mean(valid_pitches) if valid_pitches else 0
        jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
        if jitter > 10:  # Cap extreme jitter (likely noise)
            jitter = 10
            logger.warning("Jitter capped at 10% due to possible noise or distortion")

        # Shimmer (amplitude variation)
        amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
        shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
        if shimmer > 10:  # Cap extreme shimmer (likely noise)
            shimmer = 10
            logger.warning("Shimmer capped at 10% due to possible noise or distortion")

        # Energy
        energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])

        return {
            "pitch": pitch,
            "jitter": jitter * 100,  # Convert to percentage
            "shimmer": shimmer * 100,  # Convert to percentage
            "energy": energy
        }
    except Exception as e:
        logger.error(f"Feature extraction failed: {str(e)}")
        raise

def transcribe_audio(audio):
    """Transcribe audio to text using Whisper."""
    try:
        inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
        with torch.no_grad():
            generated_ids = whisper_model.generate(inputs["input_features"])
        transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        logger.info(f"Transcription: {transcription}")
        return transcription
    except Exception as e:
        logger.error(f"Transcription failed: {str(e)}")
        return ""

def analyze_symptoms(text):
    """Mock symptom-to-disease analysis (placeholder for symptom-2-disease-net)."""
    text = text.lower()
    feedback = []
    if "cough" in text or "difficulty breathing" in text:
        feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.")
    elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text:
        feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.")
    else:
        feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider for a thorough check.")
    return "\n".join(feedback)

def analyze_voice(audio_file=None):
    """Analyze voice for health indicators."""
    global usage_metrics
    usage_metrics["total_assessments"] += 1
    logger.info(f"Total assessments: {usage_metrics['total_assessments']}")

    try:
        # Load audio from file if provided
        if audio_file and os.path.exists(audio_file):
            audio, sr = librosa.load(audio_file, sr=16000)
        else:
            raise ValueError("No valid audio file provided for analysis")

        if len(audio) < sr:
            raise ValueError("Audio too short (minimum 1 second)")

        # Extract voice features
        features = extract_health_features(audio, sr)

        # Transcribe audio for symptom analysis
        transcription = transcribe_audio(audio)
        symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech."

        # Analyze voice features for health indicators
        feedback = []
        respiratory_score = features["jitter"]
        mental_health_score = features["shimmer"]

        # Rule-based analysis with personalized feedback
        if respiratory_score > 1.0:
            feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.")
        if mental_health_score > 5.0:
            feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.")
        if features["energy"] < 0.01:
            feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.")

        if not feedback and not symptom_feedback.startswith("No transcription"):
            feedback.append("Your voice analysis shows no immediate health concerns based on current data.")

        # Combine voice and symptom feedback
        feedback.append("\n**Symptom Feedback (Based on Your Input)**:")
        feedback.append(symptom_feedback)
        feedback.append("\n**Voice Analysis Details**:")
        feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)")
        feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)")
        feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)")
        feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)")
        feedback.append(f"Transcription: {transcription if transcription else 'None'}")
        feedback.append("\n**Disclaimer**: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.")

        feedback_str = "\n".join(feedback)

        # Store in Salesforce (with consent implied via credentials)
        if sf:
            store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription)

        # Clean up audio file for HIPAA/GDPR compliance
        if audio_file and os.path.exists(audio_file):
            try:
                os.remove(audio_file)
                logger.info(f"Deleted audio file: {audio_file} for compliance")
            except Exception as e:
                logger.error(f"Failed to delete audio file: {str(e)}")

        return feedback_str
    except Exception as e:
        logger.error(f"Audio processing failed: {str(e)}")
        return f"Error: {str(e)}"

def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription):
    """Store results in Salesforce with encrypted data."""
    try:
        sf.HealthAssessment__c.create({
            "AssessmentDate__c": datetime.utcnow().isoformat(),
            "Feedback__c": feedback,
            "RespiratoryScore__c": float(respiratory_score),
            "MentalHealthScore__c": float(mental_health_score),
            "AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio",
            "Pitch__c": float(features["pitch"]),
            "Jitter__c": float(features["jitter"]),
            "Shimmer__c": float(features["shimmer"]),
            "Energy__c": float(features["energy"]),
            "Transcription__c": transcription
        })
        logger.info("Stored assessment in Salesforce")
    except Exception as e:
        logger.error(f"Salesforce storage failed: {str(e)}")

# Gradio interface with accessibility focus
iface = gr.Interface(
    fn=analyze_voice,
    inputs=gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav"),
    outputs=gr.Textbox(label="Health Assessment Results", elem_id="health-results"),
    title="Smart Voicebot for Public Health",
    description="Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed'). This tool is accessible via web and mobile.",
    theme="default",  # Basic theme; enhance for screen readers later
    allow_flagging="never"  # Prevent data retention without consent
)

if __name__ == "__main__":
    logger.info("Starting Voice Health Analyzer at 12:34 PM IST, June 23, 2025")
    iface.launch(server_name="0.0.0.0", server_port=7860)