Spaces:

Sajidahamed
/

AccentClassification

Sleeping

App Files Files Community

Sajidahamed commited on 13 days ago

Commit

b883875

verified ·

1 Parent(s): 7624bc3

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -10

app.py CHANGED Viewed

@@ -3,25 +3,43 @@ import os
 import torchaudio
 from speechbrain.pretrained import EncoderClassifier
-def accent_detect(video_path):
-    # video_path is a string file path provided by Gradio
-    # Extract audio from the video using ffmpeg
     os.system(f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -vn audio.wav")
-    # Check if audio.wav was created and is of reasonable size
     if not os.path.exists("audio.wav") or os.path.getsize("audio.wav") < 1000:
-        return "Audio extraction failed. Please check your file."
-    # Load the accent classification model
     accent_model = EncoderClassifier.from_hparams(
         source="speechbrain/lang-id-commonlanguage_ecapa",
         savedir="tmp_accent_model"
     )
-    # Load audio
     signal, fs = torchaudio.load("audio.wav")
     if signal.shape[0] > 1:
         signal = signal[0].unsqueeze(0)
-    # Predict accent
     prediction = accent_model.classify_batch(signal)
     pred_label = prediction[3][0]
     pred_scores = prediction[1][0]
@@ -34,10 +52,13 @@ def accent_detect(video_path):
 demo = gr.Interface(
     fn=accent_detect,
-    inputs=gr.Video(label="Upload a Video File (MP4, WEBM, etc.)"),
     outputs="text",
     title="🗣️ English Accent Classifier (Gradio Demo)",
-    description="Upload a short video clip of English speech. This tool predicts the English accent and confidence."
 )
 if __name__ == "__main__":

 import torchaudio
 from speechbrain.pretrained import EncoderClassifier
+def accent_detect(video_link, video_file):
+    # Decide which input to use
+    video_path = None
+    # If a video file is uploaded, use it
+    if video_file is not None:
+        video_path = "uploaded_input.mp4"
+        with open(video_path, "wb") as f:
+            f.write(video_file.read())
+    # Else if a link is provided, try to download it
+    elif video_link and len(video_link.strip()) > 8:
+        # Use yt-dlp for YouTube or wget for direct link
+        if "youtube.com" in video_link or "youtu.be" in video_link:
+            os.system(f'yt-dlp -o input_video.mp4 "{video_link}"')
+        else:
+            os.system(f'wget -O input_video.mp4 "{video_link}"')
+        if os.path.exists("input_video.mp4") and os.path.getsize("input_video.mp4") > 0:
+            video_path = "input_video.mp4"
+        else:
+            return "Failed to download the video. Please check your link."
+    else:
+        return "Please upload a video file or provide a valid video link."
+    # Extract audio from video
     os.system(f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -vn audio.wav")
     if not os.path.exists("audio.wav") or os.path.getsize("audio.wav") < 1000:
+        return "Audio extraction failed. Please use a different video."
+    # Load model and classify accent
     accent_model = EncoderClassifier.from_hparams(
         source="speechbrain/lang-id-commonlanguage_ecapa",
         savedir="tmp_accent_model"
     )
     signal, fs = torchaudio.load("audio.wav")
     if signal.shape[0] > 1:
         signal = signal[0].unsqueeze(0)
     prediction = accent_model.classify_batch(signal)
     pred_label = prediction[3][0]
     pred_scores = prediction[1][0]
 demo = gr.Interface(
     fn=accent_detect,
+    inputs=[
+        gr.Textbox(label="YouTube or direct MP4 link (optional)", placeholder="https://youtube.com/yourvideo"),
+        gr.File(label="Or upload a video file (MP4, WEBM, etc.)"),
+    ],
     outputs="text",
     title="🗣️ English Accent Classifier (Gradio Demo)",
+    description="Paste a YouTube/direct MP4 link or upload a video file with English speech. The tool predicts the English accent and confidence."
 )
 if __name__ == "__main__":