Spaces:
Sleeping
Sleeping
File size: 1,996 Bytes
f6625cd 5ce022b f6625cd b5bcbc0 08926b6 923ae66 08926b6 923ae66 08926b6 f6625cd 5ce022b f6625cd 5ce022b f6625cd 6c04d8b f6625cd 5ce022b 923ae66 5ce022b 923ae66 f6625cd 923ae66 f6625cd 923ae66 f6625cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import time
import json
import numpy as np
from pathlib import Path
from utils_media import video_to_frame_audio, load_audio_16k
HERE = Path(__file__).parent
lables_PATH = HERE / "labels.json"
lables = [x["name"] for x in json.loads(lables_PATH.read_text())["labels"]]
# lables = [x ["name"] for x in json.load(Path("fusion-app/labels.json").read_text())["labels"]]
def predict_vid(video):
t0= time.time()
frame, audio16k = video_to_frame_audio(video)
probs = np.ones(len(lables))/len(lables)
pred = lables[int(np.argmax(probs))]
lat = {"t_total_ms": int((time.time()-t0)*1000), "note": "decoded media"}
return pred, {k: float(v) for k,v in zip(lables, probs)}, lat
def predict_aud_img(audio, image):
t0 = time.time()
wave = load_audio_16k(audio)
frame = image
probs = np.ones(len(lables)) / len(lables)
pred = lables[int(np.argmax(probs))]
lat = {"t_total_ms": int((time.time()-t0)*1000), "note": "loaded media"}
return pred, {k: float(v) for k,v in zip(lables, probs)}, lat
with gr.Blocks(title="Scene Mood Detection") as demo:
gr.Markdown("# Scene Mood Classifier\nUpload a short **video** or an **image + audio** pair.")
with gr.Tab("Video"):
v = gr.Video(sources=["upload"], height=240)
btn_v = gr.Button("Analyze")
out_v1 = gr.Label(label="Prediction")
out_v2 = gr.JSON(label="Probabilities")
out_v3 = gr.JSON(label="Latency (ms)")
btn_v.click(predict_vid, inputs=[v], outputs=[out_v1,out_v2,out_v3])
with gr.Tab("Image + Audio"):
img = gr.Image(type="pil", height=240)
aud = gr.Audio(sources=["upload"], type="filepath")
btn_ia = gr.Button("Analyze")
out_i1 = gr.Label(label="Prediction")
out_i2 = gr.JSON(label="Probabilities")
out_i3 = gr.JSON(label="Latency (ms)")
btn_ia.click(predict_aud_img, inputs=[img,aud], outputs=[out_i1,out_i2,out_i3])
if __name__ == "__main__":
demo.launch() |