#!/usr/bin/env python3
# NOVIC Gradio Space
# Imports
import os
import glob
from typing import Optional
import PIL.Image
import gradio as gr
import app_novic
# Sample images
IMAGE_EXTS = ('jpg', 'jpeg', 'png', 'webp')
SAMPLE_IMAGES = sorted(image_path for image_ext in IMAGE_EXTS for image_path in glob.glob(os.path.join('sample_images', f'*.{image_ext}')))
# Model checkpoints
MODEL_CHECKPOINTS = {
'NOVIC SigLIP B/16 FT2': os.path.join('ovod_20240610_105233', 'ovod_chunk0899_20240612_005748.train'),
'NOVIC SigLIP SO/14 FT2': os.path.join('ovod_20240626_001447', 'ovod_chunk0899_20240627_112729.train'),
'NOVIC DFN-5B H/14-378 FT2': os.path.join('ovod_20240620_162925', 'ovod_chunk0899_20240621_202727.train'),
'NOVIC DFN-5B H/14-378 FT0': os.path.join('ovod_20240628_142131', 'ovod_chunk0433_20240630_235415.train'),
}
DEFAULT_MODEL = 'NOVIC DFN-5B H/14-378 FT0'
# Get a checkpoint path
def get_checkpoint(model: str) -> str:
return os.path.join('checkpoints', MODEL_CHECKPOINTS[model])
# Ensure the default model is preloaded
app_novic.get_model(checkpoint=get_checkpoint(model=DEFAULT_MODEL))
# Classify an image
def classify_image(image: Optional[PIL.Image.Image], model: Optional[str]) -> dict[str, float]:
if image is None or model is None:
return {}
return app_novic.classify_image(image=image, checkpoint=get_checkpoint(model=model))
# Gradio UI
with gr.Blocks(
theme=None,
analytics_enabled=True,
title="🖼️ NOVIC Demo",
fill_width=False,
) as demo:
gr.HTML("
🖼️ NOVIC: Unconstrained Open Vocabulary Image Classification
Select an example image below
OR Upload an image file
OR Capture a camera image
OR Copy-paste an image from your clipboard ⇒ The label predictions on the right will update automatically!
Note that inference on GPU is naturally
MUCH faster (real-time) than the CPU inference in this demo. CPU inference is also slightly numerically different than proper GPU inference.
GitHub: https://github.com/pallgeuer/novicPaper: https://arxiv.org/abs/2407.11211 ")
with gr.Row(equal_height=True):
with gr.Column(scale=1):
input_model = gr.Dropdown(
choices=list(MODEL_CHECKPOINTS),
value=DEFAULT_MODEL,
type='value',
multiselect=False,
allow_custom_value=False,
filterable=False,
label='NOVIC model',
show_label=True,
interactive=True,
)
input_image = gr.Image(
height=400,
image_mode='RGB',
type='pil',
label='Input image',
show_label=True,
interactive=True,
show_fullscreen_button=True,
)
with gr.Column(scale=1):
output_label = gr.Label(
num_top_classes=3,
label='Predicted label',
show_label=True,
scale=1,
show_heading=True,
)
with gr.Row(equal_height=True):
gr.ClearButton(
components=[input_image],
value='Clear input image',
variant='secondary',
size='lg',
scale=1,
)
gr.DeepLinkButton(
variant='secondary',
size='lg',
scale=1,
)
gr.Examples(
examples=SAMPLE_IMAGES,
inputs=input_image,
cache_examples=False,
examples_per_page=100,
label='Example images',
)
gr.on(
triggers=[input_image.change, input_model.change],
fn=classify_image,
inputs=[input_image, input_model], # noqa
outputs=output_label,
api_name='classify',
show_progress='full',
)
# Run demo
if __name__ == '__main__':
demo.launch()
# EOF