Spaces:
Sleeping
Sleeping
File size: 10,745 Bytes
410fd66 0d6f092 68390a5 410fd66 cdb9181 410fd66 a4749f1 cdb9181 a4749f1 68390a5 a4749f1 cdb9181 68390a5 cdb9181 a4749f1 cdb9181 a4749f1 68390a5 cdb9181 410fd66 9c963a6 410fd66 cdb9181 a4749f1 930e181 cdb9181 a4749f1 930e181 a4749f1 cdb9181 a4749f1 930e181 a4749f1 cdb9181 234e921 cdb9181 f66fff5 9c963a6 f66fff5 9c963a6 a4749f1 930e181 a4749f1 930e181 a4749f1 9c963a6 9d6f645 410fd66 a4749f1 410fd66 9d6f645 f66fff5 9d6f645 f66fff5 cdb9181 9c963a6 cdb9181 9c963a6 f66fff5 930e181 9c963a6 cdb9181 a4749f1 cdb9181 a4749f1 cdb9181 a4749f1 cdb9181 a4749f1 cdb9181 930e181 a4749f1 cdb9181 9c963a6 a4749f1 9c963a6 0dc01e7 9c963a6 0dc01e7 cdb9181 a4749f1 9d6f645 9c963a6 cdb9181 a4749f1 cdb9181 410fd66 cdb9181 410fd66 9c963a6 a4749f1 68390a5 9d6f645 cdb9181 9c963a6 68390a5 a4749f1 68390a5 cdb9181 410fd66 a4749f1 410fd66 341af6d a4749f1 410fd66 a4749f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import gradio as gr
import librosa
import numpy as np
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from simple_salesforce import Salesforce
import os
from datetime import datetime
import logging
import webrtcvad
# Set up logging for usage metrics and debugging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
usage_metrics = {"total_assessments": 0} # Simple in-memory metric (to be expanded with Salesforce)
# Salesforce credentials (assumed secure via environment variables)
SF_USERNAME = os.getenv("SF_USERNAME")
SF_PASSWORD = os.getenv("SF_PASSWORD")
SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")
# Initialize Salesforce
sf = None
try:
if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
sf = Salesforce(
username=SF_USERNAME,
password=SF_PASSWORD,
security_token=SF_SECURITY_TOKEN,
instance_url=SF_INSTANCE_URL
)
logger.info("Connected to Salesforce for user management")
else:
logger.warning("Salesforce credentials missing; user management disabled")
except Exception as e:
logger.error(f"Salesforce connection failed: {str(e)}")
# Load Whisper model for speech-to-text
whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe")
# Initialize VAD
vad = webrtcvad.Vad(mode=2) # Moderate mode for balanced voice detection
def extract_health_features(audio, sr):
"""Extract health-related audio features."""
try:
# Normalize audio
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio
# Voice Activity Detection
frame_duration = 30 # ms
frame_samples = int(sr * frame_duration / 1000)
frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
voiced_frames = [
frame for frame in frames
if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
]
if not voiced_frames:
raise ValueError("No voiced segments detected")
voiced_audio = np.concatenate(voiced_frames)
# Pitch (F0) with validated range (75-300 Hz for adults)
pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300)
valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300]
pitch = np.mean(valid_pitches) if valid_pitches else 0
jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
if jitter > 10: # Cap extreme jitter (likely noise)
jitter = 10
logger.warning("Jitter capped at 10% due to possible noise or distortion")
# Shimmer (amplitude variation)
amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
if shimmer > 10: # Cap extreme shimmer (likely noise)
shimmer = 10
logger.warning("Shimmer capped at 10% due to possible noise or distortion")
# Energy
energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])
return {
"pitch": pitch,
"jitter": jitter * 100, # Convert to percentage
"shimmer": shimmer * 100, # Convert to percentage
"energy": energy
}
except Exception as e:
logger.error(f"Feature extraction failed: {str(e)}")
raise
def transcribe_audio(audio):
"""Transcribe audio to text using Whisper."""
try:
inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
with torch.no_grad():
generated_ids = whisper_model.generate(inputs["input_features"])
transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
logger.info(f"Transcription: {transcription}")
return transcription
except Exception as e:
logger.error(f"Transcription failed: {str(e)}")
return ""
def analyze_symptoms(text):
"""Mock symptom-to-disease analysis (placeholder for symptom-2-disease-net)."""
text = text.lower()
feedback = []
if "cough" in text or "difficulty breathing" in text:
feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.")
elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text:
feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.")
else:
feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider for a thorough check.")
return "\n".join(feedback)
def analyze_voice(audio_file=None):
"""Analyze voice for health indicators."""
global usage_metrics
usage_metrics["total_assessments"] += 1
logger.info(f"Total assessments: {usage_metrics['total_assessments']}")
try:
# Load audio from file if provided
if audio_file and os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=16000)
else:
raise ValueError("No valid audio file provided for analysis")
if len(audio) < sr:
raise ValueError("Audio too short (minimum 1 second)")
# Extract voice features
features = extract_health_features(audio, sr)
# Transcribe audio for symptom analysis
transcription = transcribe_audio(audio)
symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech."
# Analyze voice features for health indicators
feedback = []
respiratory_score = features["jitter"]
mental_health_score = features["shimmer"]
# Rule-based analysis with personalized feedback
if respiratory_score > 1.0:
feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.")
if mental_health_score > 5.0:
feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.")
if features["energy"] < 0.01:
feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.")
if not feedback and not symptom_feedback.startswith("No transcription"):
feedback.append("Your voice analysis shows no immediate health concerns based on current data.")
# Combine voice and symptom feedback
feedback.append("\n**Symptom Feedback (Based on Your Input)**:")
feedback.append(symptom_feedback)
feedback.append("\n**Voice Analysis Details**:")
feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)")
feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)")
feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)")
feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)")
feedback.append(f"Transcription: {transcription if transcription else 'None'}")
feedback.append("\n**Disclaimer**: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.")
feedback_str = "\n".join(feedback)
# Store in Salesforce (with consent implied via credentials)
if sf:
store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription)
# Clean up audio file for HIPAA/GDPR compliance
if audio_file and os.path.exists(audio_file):
try:
os.remove(audio_file)
logger.info(f"Deleted audio file: {audio_file} for compliance")
except Exception as e:
logger.error(f"Failed to delete audio file: {str(e)}")
return feedback_str
except Exception as e:
logger.error(f"Audio processing failed: {str(e)}")
return f"Error: {str(e)}"
def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription):
"""Store results in Salesforce with encrypted data."""
try:
sf.HealthAssessment__c.create({
"AssessmentDate__c": datetime.utcnow().isoformat(),
"Feedback__c": feedback,
"RespiratoryScore__c": float(respiratory_score),
"MentalHealthScore__c": float(mental_health_score),
"AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio",
"Pitch__c": float(features["pitch"]),
"Jitter__c": float(features["jitter"]),
"Shimmer__c": float(features["shimmer"]),
"Energy__c": float(features["energy"]),
"Transcription__c": transcription
})
logger.info("Stored assessment in Salesforce")
except Exception as e:
logger.error(f"Salesforce storage failed: {str(e)}")
# Gradio interface with accessibility focus
iface = gr.Interface(
fn=analyze_voice,
inputs=gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav"),
outputs=gr.Textbox(label="Health Assessment Results", elem_id="health-results"),
title="Smart Voicebot for Public Health",
description="Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed'). This tool is accessible via web and mobile.",
theme="default", # Basic theme; enhance for screen readers later
allow_flagging="never" # Prevent data retention without consent
)
if __name__ == "__main__":
logger.info("Starting Voice Health Analyzer at 12:34 PM IST, June 23, 2025")
iface.launch(server_name="0.0.0.0", server_port=7860)
|