#!/usr/bin/env python3 # NOVIC Gradio Space # Imports import os import glob from typing import Optional import PIL.Image import gradio as gr import app_novic # Sample images IMAGE_EXTS = ('jpg', 'jpeg', 'png', 'webp') SAMPLE_IMAGES = sorted(image_path for image_ext in IMAGE_EXTS for image_path in glob.glob(os.path.join('sample_images', f'*.{image_ext}'))) # Model checkpoints MODEL_CHECKPOINTS = { 'NOVIC SigLIP B/16 FT2': os.path.join('ovod_20240610_105233', 'ovod_chunk0899_20240612_005748.train'), 'NOVIC SigLIP SO/14 FT2': os.path.join('ovod_20240626_001447', 'ovod_chunk0899_20240627_112729.train'), 'NOVIC DFN-5B H/14-378 FT2': os.path.join('ovod_20240620_162925', 'ovod_chunk0899_20240621_202727.train'), 'NOVIC DFN-5B H/14-378 FT0': os.path.join('ovod_20240628_142131', 'ovod_chunk0433_20240630_235415.train'), } DEFAULT_MODEL = 'NOVIC DFN-5B H/14-378 FT0' # Get a checkpoint path def get_checkpoint(model: str) -> str: return os.path.join('checkpoints', MODEL_CHECKPOINTS[model]) # Ensure the default model is preloaded app_novic.get_model(checkpoint=get_checkpoint(model=DEFAULT_MODEL)) # Classify an image def classify_image(image: Optional[PIL.Image.Image], model: Optional[str]) -> dict[str, float]: if image is None or model is None: return {} return app_novic.classify_image(image=image, checkpoint=get_checkpoint(model=model)) # Gradio UI with gr.Blocks( theme=None, analytics_enabled=True, title="🖼️ NOVIC Demo", fill_width=False, ) as demo: gr.HTML("

🖼️ NOVIC: Unconstrained Open Vocabulary Image Classification

Select an example image below OR Upload an image file OR Capture a camera image OR Copy-paste an image from your clipboard ⇒ The label predictions on the right will update automatically!
Note that inference on GPU is naturally MUCH faster (real-time) than the CPU inference in this demo. CPU inference is also slightly numerically different than proper GPU inference.
GitHub: https://github.com/pallgeuer/novicPaper: https://arxiv.org/abs/2407.11211
") with gr.Row(equal_height=True): with gr.Column(scale=1): input_model = gr.Dropdown( choices=list(MODEL_CHECKPOINTS), value=DEFAULT_MODEL, type='value', multiselect=False, allow_custom_value=False, filterable=False, label='NOVIC model', show_label=True, interactive=True, ) input_image = gr.Image( height=400, image_mode='RGB', type='pil', label='Input image', show_label=True, interactive=True, show_fullscreen_button=True, ) with gr.Column(scale=1): output_label = gr.Label( num_top_classes=3, label='Predicted label', show_label=True, scale=1, show_heading=True, ) with gr.Row(equal_height=True): gr.ClearButton( components=[input_image], value='Clear input image', variant='secondary', size='lg', scale=1, ) gr.DeepLinkButton( variant='secondary', size='lg', scale=1, ) gr.Examples( examples=SAMPLE_IMAGES, inputs=input_image, cache_examples=False, examples_per_page=100, label='Example images', ) gr.on( triggers=[input_image.change, input_model.change], fn=classify_image, inputs=[input_image, input_model], # noqa outputs=output_label, api_name='classify', show_progress='full', ) # Run demo if __name__ == '__main__': demo.launch() # EOF