Spaces:

SonicaB
/

Scene-Mood-Classifier

Sleeping

File size: 1,996 Bytes

f6625cd
 
 
 
 
5ce022b
f6625cd
b5bcbc0
08926b6
923ae66
08926b6
923ae66
08926b6
f6625cd
 
 
5ce022b
f6625cd
 
5ce022b
f6625cd
 
6c04d8b
f6625cd
5ce022b
 
923ae66
 
5ce022b
923ae66
f6625cd
 
 
 
 
 
 
 
 
 
923ae66
f6625cd
 
 
 
 
 
 
923ae66
f6625cd

import gradio as gr
import time
import json
import numpy as np
from pathlib import Path
from utils_media import video_to_frame_audio,  load_audio_16k

HERE = Path(__file__).parent
lables_PATH = HERE / "labels.json"

lables = [x["name"] for x in json.loads(lables_PATH.read_text())["labels"]]

# lables = [x ["name"] for x in json.load(Path("fusion-app/labels.json").read_text())["labels"]]

def predict_vid(video):
    t0= time.time()
    frame, audio16k = video_to_frame_audio(video)
    probs = np.ones(len(lables))/len(lables)
    pred = lables[int(np.argmax(probs))]
    lat = {"t_total_ms": int((time.time()-t0)*1000), "note": "decoded media"}
    return pred, {k: float(v) for k,v in zip(lables, probs)}, lat

def predict_aud_img(audio, image):
    t0 = time.time()
    wave = load_audio_16k(audio)
    frame = image
    probs = np.ones(len(lables)) / len(lables)
    pred = lables[int(np.argmax(probs))]
    lat = {"t_total_ms": int((time.time()-t0)*1000), "note": "loaded media"}
    return pred, {k: float(v) for k,v in zip(lables, probs)}, lat


with gr.Blocks(title="Scene Mood Detection") as demo:
    gr.Markdown("# Scene Mood Classifier\nUpload a short **video** or an **image + audio** pair.")
    with gr.Tab("Video"):
        v = gr.Video(sources=["upload"], height=240)
        btn_v = gr.Button("Analyze")
        out_v1 = gr.Label(label="Prediction")
        out_v2 = gr.JSON(label="Probabilities")
        out_v3 = gr.JSON(label="Latency (ms)")
        btn_v.click(predict_vid, inputs=[v], outputs=[out_v1,out_v2,out_v3])
    with gr.Tab("Image + Audio"):
        img = gr.Image(type="pil", height=240)
        aud = gr.Audio(sources=["upload"], type="filepath")
        btn_ia = gr.Button("Analyze")
        out_i1 = gr.Label(label="Prediction")
        out_i2 = gr.JSON(label="Probabilities")
        out_i3 = gr.JSON(label="Latency (ms)")
        btn_ia.click(predict_aud_img, inputs=[img,aud], outputs=[out_i1,out_i2,out_i3])

if __name__ == "__main__":
    demo.launch()