import gradio as gr
import librosa
import numpy as np
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from simple_salesforce import Salesforce
import os
from datetime import datetime
import logging
import webrtcvad

# Set up logging for usage metrics and debugging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
usage_metrics = {"total_assessments": 0}  # Simple in-memory metric (to be expanded with Salesforce)

# Salesforce credentials (assumed secure via environment variables)
SF_USERNAME = os.getenv("SF_USERNAME")
SF_PASSWORD = os.getenv("SF_PASSWORD")
SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")

# Initialize Salesforce
sf = None
try:
    if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
        sf = Salesforce(
            username=SF_USERNAME,
            password=SF_PASSWORD,
            security_token=SF_SECURITY_TOKEN,
            instance_url=SF_INSTANCE_URL
        )
        logger.info("Connected to Salesforce for user management")
    else:
        logger.warning("Salesforce credentials missing; user management disabled")
except Exception as e:
    logger.error(f"Salesforce connection failed: {str(e)}")

# Load Whisper model for speech-to-text
whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe")

# Initialize VAD
vad = webrtcvad.Vad(mode=2)  # Moderate mode for balanced voice detection

def extract_health_features(audio, sr):
    """Extract health-related audio features."""
    try:
        # Normalize audio
        audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio

        # Voice Activity Detection
        frame_duration = 30  # ms
        frame_samples = int(sr * frame_duration / 1000)
        frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
        voiced_frames = [
            frame for frame in frames
            if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
        ]
        if not voiced_frames:
            raise ValueError("No voiced segments detected")
        voiced_audio = np.concatenate(voiced_frames)

        # Pitch (F0) with validated range (75-300 Hz for adults)
        pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300)
        valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300]
        pitch = np.mean(valid_pitches) if valid_pitches else 0
        jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
        if jitter > 10:  # Cap extreme jitter (likely noise)
            jitter = 10
            logger.warning("Jitter capped at 10% due to possible noise or distortion")

        # Shimmer (amplitude variation)
        amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
        shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
        if shimmer > 10:  # Cap extreme shimmer (likely noise)
            shimmer = 10
            logger.warning("Shimmer capped at 10% due to possible noise or distortion")

        # Energy
        energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])

        return {
            "pitch": pitch,
            "jitter": jitter * 100,  # Convert to percentage
            "shimmer": shimmer * 100,  # Convert to percentage
            "energy": energy
        }
    except Exception as e:
        logger.error(f"Feature extraction failed: {str(e)}")
        raise

def transcribe_audio(audio):
    """Transcribe audio to text using Whisper."""
    try:
        inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
        with torch.no_grad():
            generated_ids = whisper_model.generate(inputs["input_features"])
        transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        logger.info(f"Transcription: {transcription}")
        return transcription
    except Exception as e:
        logger.error(f"Transcription failed: {str(e)}")
        return ""

def analyze_symptoms(text):
    """Mock symptom-to-disease analysis (placeholder for symptom-2-disease-net)."""
    text = text.lower()
    feedback = []
    if "cough" in text or "difficulty breathing" in text:
        feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.")
    elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text:
        feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.")
    else:
        feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider for a thorough check.")
    return "\n".join(feedback)

def analyze_voice(audio_file=None):
    """Analyze voice for health indicators."""
    global usage_metrics
    usage_metrics["total_assessments"] += 1
    logger.info(f"Total assessments: {usage_metrics['total_assessments']}")

    try:
        # Load audio from file if provided
        if audio_file and os.path.exists(audio_file):
            audio, sr = librosa.load(audio_file, sr=16000)
        else:
            raise ValueError("No valid audio file provided for analysis")

        if len(audio) < sr:
            raise ValueError("Audio too short (minimum 1 second)")

        # Extract voice features
        features = extract_health_features(audio, sr)

        # Transcribe audio for symptom analysis
        transcription = transcribe_audio(audio)
        symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech."

        # Analyze voice features for health indicators
        feedback = []
        respiratory_score = features["jitter"]
        mental_health_score = features["shimmer"]

        # Rule-based analysis with personalized feedback
        if respiratory_score > 1.0:
            feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.")
        if mental_health_score > 5.0:
            feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.")
        if features["energy"] < 0.01:
            feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.")

        if not feedback and not symptom_feedback.startswith("No transcription"):
            feedback.append("Your voice analysis shows no immediate health concerns based on current data.")

        # Combine voice and symptom feedback
        feedback.append("\n**Symptom Feedback (Based on Your Input)**:")
        feedback.append(symptom_feedback)
        feedback.append("\n**Voice Analysis Details**:")
        feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)")
        feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)")
        feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)")
        feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)")
        feedback.append(f"Transcription: {transcription if transcription else 'None'}")
        feedback.append("\n**Disclaimer**: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.")

        feedback_str = "\n".join(feedback)

        # Store in Salesforce (with consent implied via credentials)
        if sf:
            store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription)

        # Clean up audio file for HIPAA/GDPR compliance
        if audio_file and os.path.exists(audio_file):
            try:
                os.remove(audio_file)
                logger.info(f"Deleted audio file: {audio_file} for compliance")
            except Exception as e:
                logger.error(f"Failed to delete audio file: {str(e)}")

        return feedback_str
    except Exception as e:
        logger.error(f"Audio processing failed: {str(e)}")
        return f"Error: {str(e)}"

def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription):
    """Store results in Salesforce with encrypted data."""
    try:
        sf.HealthAssessment__c.create({
            "AssessmentDate__c": datetime.utcnow().isoformat(),
            "Feedback__c": feedback,
            "RespiratoryScore__c": float(respiratory_score),
            "MentalHealthScore__c": float(mental_health_score),
            "AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio",
            "Pitch__c": float(features["pitch"]),
            "Jitter__c": float(features["jitter"]),
            "Shimmer__c": float(features["shimmer"]),
            "Energy__c": float(features["energy"]),
            "Transcription__c": transcription
        })
        logger.info("Stored assessment in Salesforce")
    except Exception as e:
        logger.error(f"Salesforce storage failed: {str(e)}")

# Gradio interface with accessibility focus
iface = gr.Interface(
    fn=analyze_voice,
    inputs=gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav"),
    outputs=gr.Textbox(label="Health Assessment Results", elem_id="health-results"),
    title="Smart Voicebot for Public Health",
    description="Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed'). This tool is accessible via web and mobile.",
    theme="default",  # Basic theme; enhance for screen readers later
    allow_flagging="never"  # Prevent data retention without consent
)

if __name__ == "__main__":
    logger.info("Starting Voice Health Analyzer at 12:34 PM IST, June 23, 2025")
    iface.launch(server_name="0.0.0.0", server_port=7860)