from spaces import GPU import gradio as gr import torch import os import time from torchvision import models from joblib import load from extractor.visualise_vit_layer import VitGenerator from relax_vqa import get_deep_feature, process_video_feature, process_patches, get_frame_patches, flow_to_rgb, merge_fragments, concatenate_features from extractor.vf_extract import process_video_residual from model_regression import Mlp, preprocess_data from demo_test_gpu import evaluate_video_quality, load_model @GPU def run_relax_vqa(video_path, is_finetune, framerate, video_type): if not os.path.exists(video_path): return "❌ No video uploaded or the uploaded file has expired. Please upload again." config = { 'is_finetune': is_finetune, 'framerate': framerate, 'video_type': video_type, 'save_path': 'model/', 'train_data_name': 'lsvq_train', 'select_criteria': 'byrmse', 'video_path': video_path, 'video_name': os.path.splitext(os.path.basename(video_path))[0] } device = torch.device("cuda" if torch.cuda.is_available() else "cpu") resnet50 = models.resnet50(pretrained=True).to(device) vit = VitGenerator('vit_base', 16, device, evaluate=True, random=False, verbose=False) model_mlp = load_model(config, device) try: score, runtime = evaluate_video_quality(config, resnet50, vit, model_mlp, device) return f"Predicted Quality Score: {score:.4f} (in {runtime:.2f}s)" except Exception as e: return f"❌ Error: {str(e)}" finally: if "gradio" in video_path and os.path.exists(video_path): os.remove(video_path) def toggle_dataset_visibility(is_finetune): return gr.update(visible=is_finetune) with gr.Blocks() as demo: gr.Markdown("# 🎬 ReLaX-VQA Online Demo") gr.Markdown( "Upload a short video and get the predicted perceptual quality score using the ReLaX-VQA model. " "You can try our test video from the " "demo video " "(fps = 24, dataset = konvid_1k).

" "⚙️ This demo is currently running on Hugging Face ZeroGPU Space: Dynamic resources (NVIDIA A100)." ) with gr.Row(): with gr.Column(scale=2): video_input = gr.Video(label="Upload a Video (e.g. mp4)") framerate_slider = gr.Slider(label="Source Video Framerate (fps)", minimum=1, maximum=60, step=1, value=24) is_finetune_checkbox = gr.Checkbox(label="Use Finetuning?", value=False) dataset_dropdown = gr.Dropdown( label="Source Video Dataset for Finetuning", choices=["konvid_1k", "youtube_ugc", "live_vqc", "cvd_2014"], value="konvid_1k", visible=False ) run_button = gr.Button("Run Prediction") with gr.Column(scale=1): output_box = gr.Textbox(label="Predicted Quality Score", lines=5) is_finetune_checkbox.change( fn=toggle_dataset_visibility, inputs=is_finetune_checkbox, outputs=dataset_dropdown ) run_button.click( fn=run_relax_vqa, inputs=[video_input, is_finetune_checkbox, framerate_slider, dataset_dropdown], outputs=output_box ) demo.launch()