Spaces:

podcasts-org
/

bgm

Sleeping

App Files Files Community

mrfakename commited on Oct 25

Commit

d5c9390

verified ·

1 Parent(s): 21d1159

Create app.py

Browse files

Files changed (1) hide show

app.py +48 -0

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import gradio as gr
+import numpy as np
+from transformers import pipeline
+import os
+# Load the fine-tuned model using pipeline
+model_path = "podcasts-org/detect-background-music"
+classifier = pipeline("audio-classification", model=model_path, token=os.getenv("HF_TOKEN"))
+def classify_audio(audio):
+    """Classify whether audio has background music or not."""
+    if audio is None:
+        return "Please provide an audio file"
+    # audio is a tuple of (sample_rate, audio_array)
+    sample_rate, audio_array = audio
+    # Convert to float32 and normalize if needed
+    if audio_array.dtype == np.int16:
+        audio_array = audio_array.astype(np.float32) / 32768.0
+    elif audio_array.dtype == np.int32:
+        audio_array = audio_array.astype(np.float32) / 2147483648.0
+    # Convert stereo to mono if needed
+    if len(audio_array.shape) > 1:
+        audio_array = audio_array.mean(axis=1)
+    # Use the pipeline for inference
+    # Pipeline expects dict with "array" and "sampling_rate" keys
+    predictions = classifier({"array": audio_array, "sampling_rate": sample_rate})
+    # Convert list of dicts to single dict for Gradio Label component
+    results = {pred["label"]: pred["score"] for pred in predictions}
+    return results
+# Create Gradio interface
+demo = gr.Interface(
+    fn=classify_audio,
+    inputs=gr.Audio(type="numpy", label="Upload Audio"),
+    outputs=gr.Label(num_top_classes=2, label="Prediction"),
+    title="Background Music Detection",
+    description="Upload an audio file to detect whether it contains background music (BGM) or not. Model: Whisper-base fine-tuned on podcasts-org/bgm dataset.",
+    examples=None
+)
+if __name__ == "__main__":
+    demo.launch()