File size: 13,840 Bytes
22d3f74
4e737d5
22d3f74
11aa9b0
 
4e737d5
11aa9b0
8abb11f
11aa9b0
4e737d5
e176a37
0cbb12f
4994554
0cbb12f
8abb11f
 
 
 
4994554
 
 
 
8abb11f
4994554
 
 
 
0cbb12f
4e737d5
8abb11f
 
 
 
4bf7be2
 
 
 
 
 
8abb11f
 
0cbb12f
8abb11f
 
 
 
4994554
4bf7be2
8abb11f
 
 
 
 
 
 
 
4e737d5
0cbb12f
22d3f74
8abb11f
 
 
 
 
 
 
0cbb12f
8abb11f
0cbb12f
 
8abb11f
0cbb12f
8abb11f
 
 
 
0cbb12f
 
8abb11f
0cbb12f
8abb11f
 
 
 
 
 
 
 
0cbb12f
8abb11f
 
0cbb12f
 
11aa9b0
8abb11f
 
 
 
4bf7be2
11aa9b0
8abb11f
4994554
8abb11f
 
4994554
 
 
 
 
11aa9b0
8abb11f
11aa9b0
8abb11f
11aa9b0
0cbb12f
11aa9b0
 
0cbb12f
8abb11f
 
 
 
 
 
 
 
 
 
 
 
 
 
0cbb12f
 
8abb11f
 
 
 
 
 
 
 
 
 
 
 
11aa9b0
4bf7be2
8abb11f
4bf7be2
8abb11f
4994554
8abb11f
 
4994554
8abb11f
0cbb12f
11aa9b0
 
 
8abb11f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e737d5
11aa9b0
 
 
 
 
e176a37
8abb11f
11aa9b0
 
 
8abb11f
 
 
11aa9b0
8abb11f
 
 
 
 
 
4e737d5
8abb11f
4ee6a10
0cbb12f
8abb11f
22d3f74
 
11aa9b0
8abb11f
 
 
 
 
 
 
11aa9b0
22d3f74
8abb11f
11aa9b0
8abb11f
 
 
11aa9b0
8abb11f
22d3f74
b453cec
8abb11f
 
 
 
 
11aa9b0
8abb11f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11aa9b0
 
8abb11f
 
 
 
 
11aa9b0
8abb11f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11aa9b0
0cbb12f
b453cec
8abb11f
 
 
 
11aa9b0
 
0cbb12f
11aa9b0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import gradio as gr
import subprocess
import os
import threading
import time
import librosa
import requests
import numpy as np # Added for sounddevice callback
from datetime import datetime
from transformers import pipeline

# πŸŽ™οΈ Load detection model
try:
    print("[INFO] Loading Hugging Face model...")
    # The current model includes 'screaming' as a label, but might misclassify
    # high-pitched screams as 'crying' due to its general non-verbal vocalization training.
    # For higher accuracy in distinguishing screams from crying, fine-tuning on a specific
    # dataset or exploring other specialized models would be recommended.
    classifier = pipeline(
        "audio-classification",
        model="padmalcom/wav2vec2-large-nonverbalvocalization-classification"
    )
    print(f"[INFO] Model labels: {classifier.model.config.id2label.values()}")
except Exception as e:
    print(f"[ERROR] Failed to load model: {e}")
    classifier = None

# === Audio Conversion ===
def convert_audio(input_path, output_path="input.wav"):
    """
    Converts audio files to a standard WAV format (16kHz, mono, 16-bit PCM).
    This ensures compatibility with the Hugging Face model.
    """
    try:
        cmd = [
            "ffmpeg", "-i", input_path,
            "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
            output_path, "-y"
        ]
        # Use subprocess.run with capture_output=True for better error handling
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(f"[DEBUG] Audio converted to WAV: {output_path}")
        if result.stdout:
            print(f"[DEBUG] ffmpeg stdout: {result.stdout.strip()}")
        if result.stderr:
            print(f"[DEBUG] ffmpeg stderr: {result.stderr.strip()}")
        return output_path
    except subprocess.CalledProcessError as e:
        print(f"[ERROR] ffmpeg conversion failed: {e.stderr.strip()}")
        raise RuntimeError(f"Audio conversion failed: {e.stderr.strip()}")
    except FileNotFoundError:
        print("[ERROR] ffmpeg command not found. Please ensure ffmpeg is installed and in your PATH.")
        raise RuntimeError("ffmpeg not found. Please install it.")
    except Exception as e:
        print(f"[ERROR] Unexpected error during audio conversion: {e}")
        raise RuntimeError(f"Unexpected audio conversion error: {e}")

# === Scream Detection ===
def detect_scream(audio_path):
    """
    Detects screams in an audio file using the loaded Hugging Face model.
    Returns the top detected label and its confidence score.
    """
    if classifier is None:
        return {"label": "model_not_loaded", "score": 0.0}

    try:
        # Librosa loads audio, automatically resamples if needed
        audio, sr = librosa.load(audio_path, sr=16000)
        print(f"[DEBUG] Loaded audio: {len(audio)} samples at {sr} Hz")

        if len(audio) == 0:
            print("[WARNING] Empty audio file provided for detection.")
            return {"label": "no_audio_data", "score": 0.0}

        # The pipeline expects raw audio data (numpy array)
        results = classifier(audio)
        print(f"[DEBUG] Model output: {results}")

        if not results:
            print("[WARNING] Model returned no detection results.")
            return {"label": "no_detection", "score": 0.0}

        # Sort results by score in descending order to get the top prediction
        top_prediction = sorted(results, key=lambda x: x['score'], reverse=True)[0]
        
        # Ensure label is lowercase for consistent comparison
        return {"label": top_prediction["label"].lower(), "score": float(top_prediction["score"]) * 100}
    except Exception as e:
        print(f"[ERROR] Detection failed for {audio_path}: {e}")
        return {"label": "detection_error", "score": 0.0}

# === Send Alert to Salesforce ===
def send_salesforce_alert(audio_meta, detection):
    """
    Sends an alert payload to a configured Salesforce endpoint.
    Retrieves Salesforce URL and token from environment variables.
    """
    SF_URL = os.getenv("SF_ALERT_URL")
    SF_TOKEN = os.getenv("SF_API_TOKEN")

    if not SF_URL or not SF_TOKEN:
        print("[ERROR] Salesforce configuration (SF_ALERT_URL or SF_API_TOKEN) missing.")
        raise RuntimeError("Salesforce configuration missing. Cannot send alert.")

    headers = {
        "Authorization": f"Bearer {SF_TOKEN}",
        "Content-Type": "application/json"
    }
    payload = {
        "AudioName": audio_meta.get("filename", "unknown_audio"),
        "DetectedLabel": detection["label"],
        "Score": round(detection["score"], 2), # Round score for cleaner data
        "AlertLevel": audio_meta["alert_level"],
        "Timestamp": audio_meta["timestamp"],
    }

    print(f"[DEBUG] Sending payload to Salesforce: {payload}")
    try:
        resp = requests.post(SF_URL, json=payload, headers=headers, timeout=10) # Increased timeout
        resp.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
        print(f"[INFO] Salesforce alert sent successfully. Response: {resp.json()}")
        return resp.json()
    except requests.exceptions.Timeout:
        print("[ERROR] Salesforce alert request timed out.")
        raise RuntimeError("Salesforce alert timed out.")
    except requests.exceptions.RequestException as e:
        print(f"[ERROR] Error sending Salesforce alert: {e}")
        # Attempt to print response content if available for more details
        if hasattr(e, 'response') and e.response is not None:
            print(f"[ERROR] Salesforce response content: {e.response.text}")
        raise RuntimeError(f"Failed to send Salesforce alert: {e}")

# === Main Gradio Function ===
def process_uploaded(audio_file, system_state, high_thresh, med_thresh):
    """
    Main function for Gradio interface. Processes uploaded audio,
    performs scream detection, and sends alerts to Salesforce based on thresholds.
    """
    if system_state != "Start":
        return "πŸ›‘ System is stopped. Change 'System State' to 'Start' to enable processing."

    if audio_file is None:
        return "Please upload an audio file or record one."
    
    print(f"[INFO] Processing uploaded audio: {audio_file}")

    try:
        # Convert audio to the required WAV format
        wav_path = convert_audio(audio_file)
    except RuntimeError as e:
        return f"❌ Audio conversion error: {e}"
    except Exception as e:
        return f"❌ An unexpected error occurred during audio conversion: {e}"

    # Perform scream detection
    detection = detect_scream(wav_path)
    label = detection["label"]
    score = detection["score"]

    # Determine risk level based on detected label and score
    alert_message = f"🟒 Detection: {label} ({score:.1f}%) β€” "
    level = "None"

    # Check for 'scream' or related labels. The model might output 'screaming' or similar.
    # It's important to check if 'scream' is *in* the label, as some models might output
    # variations or combine labels.
    if "scream" in label:
        if score >= high_thresh:
            level = "High-Risk"
        elif score >= med_thresh:
            level = "Medium-Risk"
    # Add explicit check for 'crying' if it's a known misclassification target
    elif "crying" in label and score >= med_thresh: # Consider if crying should also trigger an alert
        # This part can be adjusted based on whether 'crying' is also an alertable event.
        # For now, we'll treat it as 'None' unless explicitly defined as a risk.
        level = "None" # Or set to "Low-Risk" if crying is also a concern.
    
    alert_message += f"Alert Level: {level}"

    audio_meta = {
        "filename": os.path.basename(audio_file),
        "timestamp": datetime.utcnow().isoformat() + "Z",
        "alert_level": level
    }

    # Send to Salesforce if a risk level is determined
    if level in ("High-Risk", "Medium-Risk"):
        try:
            sf_resp = send_salesforce_alert(audio_meta, detection)
            alert_message = f"βœ… Detection: {label} ({score:.1f}%) β€” {level} β€” Alert sent to Salesforce (ID: {sf_resp.get('id', 'N/A')})"
        except RuntimeError as e:
            alert_message = f"⚠️ Detection: {label} ({score:.1f}%) β€” {level} β€” Salesforce ERROR: {e}"
        except Exception as e:
            alert_message = f"⚠️ Detection: {label} ({score:.1f}%) β€” {level} β€” Unexpected Salesforce error: {e}"
    
    # Clean up the converted WAV file
    if os.path.exists(wav_path):
        os.remove(wav_path)
        print(f"[DEBUG] Cleaned up {wav_path}")

    return alert_message

# === Gradio UI ===
# Ensure the title and description align with the requirements
iface = gr.Interface(
    fn=process_uploaded,
    inputs=[
        gr.Audio(type="filepath", label="Upload Audio (or Record)"),
        gr.Radio(["Start", "Stop"], label="System State", value="Start",
                 info="Set to 'Start' to enable audio processing and alerts."),
        gr.Slider(0, 100, value=80, step=1, label="High-Risk Threshold (%)",
                  info="Confidence score for High-Risk scream detection."),
        gr.Slider(0, 100, value=50, step=1, label="Medium-Risk Threshold (%)",
                  info="Confidence score for Medium-Risk scream detection.")
    ],
    outputs="text",
    title="πŸ“’ Emotion-Triggered Alarm System",
    description="""
🎧 Upload or record audio for real-time scream detection.
⚠️ Alerts are sent to Salesforce for High-Risk (confidence > 80%) and Medium-Risk (confidence 50-80%) detections.
The system aims to detect panic-indicating screams.
""",
    allow_flagging="never" # As per requirement
)

# === Optional Real-Time Listener (for Raspberry Pi or similar) ===
# This section demonstrates how a real-time listener could be implemented.
# It requires `sounddevice` and `numpy`.
# For actual deployment, environment variables for SF_URL and SF_TOKEN must be set.
# This part is commented out by default as it requires specific hardware/setup.
def pi_listener(high_thresh=80, med_thresh=50, interval=1.0):
    """
    Simulates a real-time audio listener for devices like Raspberry Pi.
    Captures audio chunks, processes them, and sends alerts.
    """
    try:
        import sounddevice as sd
        import numpy as np
    except ImportError:
        print("[ERROR] sounddevice or numpy not found. Real-time listener cannot be started.")
        print("Please install them: pip install sounddevice numpy")
        return

    if classifier is None:
        print("[ERROR] Model not loaded. Real-time listener cannot operate.")
        return

    def callback(indata, frames, time_info, status):
        """Callback function for sounddevice to process audio chunks."""
        if status:
            print(f"[WARNING] Sounddevice status: {status}")
        
        # Ensure indata is a 1D array of float32
        wav = indata.squeeze()
        if wav.ndim > 1:
            wav = wav[:, 0] # Take first channel if stereo
        wav = wav.astype(np.float32)

        if len(wav) == 0:
            return # Skip if no audio data

        try:
            # Classify the audio chunk
            detection_results = classifier(wav)
            if not detection_results:
                return

            # Get the top prediction
            top_prediction = sorted(detection_results, key=lambda x: x['score'], reverse=True)[0]
            lbl, sc = (top_prediction["label"].lower(), float(top_prediction["score"]) * 100)

            level = "None"
            if "scream" in lbl: # Check if 'scream' is in the label
                if sc >= high_thresh:
                    level = "High-Risk"
                elif sc >= med_thresh:
                    level = "Medium-Risk"
            
            if level != "None":
                timestamp = datetime.utcnow().isoformat() + "Z"
                audio_meta = {
                    "filename": f"live-stream-{timestamp}",
                    "timestamp": timestamp,
                    "alert_level": level
                }
                detection_info = {"label": lbl, "score": sc}
                
                try:
                    send_salesforce_alert(audio_meta, detection_info)
                    print(f"[{timestamp}] {level} scream detected ({sc:.1f}%) – alert sent.")
                except RuntimeError as e:
                    print(f"[{timestamp}] {level} scream detected ({sc:.1f}%) – Salesforce alert failed: {e}")
                except Exception as e:
                    print(f"[{timestamp}] {level} scream detected ({sc:.1f}%) – Unexpected error sending alert: {e}")

        except Exception as e:
            print(f"[ERROR] Error in real-time detection callback: {e}")

    # Start audio stream
    try:
        # Adjust blocksize if needed for performance vs. latency
        with sd.InputStream(channels=1, samplerate=16000, callback=callback, blocksize=16000): # 1 second chunks
            print("πŸ”Š Real-time detection started...")
            while True:
                time.sleep(interval) # Keep the main thread alive
    except sd.PortAudioError as e:
        print(f"[ERROR] PortAudio error: {e}. Check your audio device setup.")
    except Exception as e:
        print(f"[ERROR] An unexpected error occurred in the real-time listener: {e}")

# === App Entry ===
if __name__ == "__main__":
    # Optional: enable real-time listener for Raspberry Pi or similar.
    # Uncomment the lines below to enable it.
    # Remember to install sounddevice and numpy: pip install sounddevice numpy
    # Also, ensure your system has PortAudio installed for sounddevice to work.
    # pi_thread = threading.Thread(target=pi_listener, daemon=True)
    # pi_thread.start()

    iface.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))