|
|
|
|
|
|
|
|
|
import os |
|
import glob |
|
from typing import Optional |
|
import PIL.Image |
|
import gradio as gr |
|
import app_novic |
|
|
|
|
|
IMAGE_EXTS = ('jpg', 'jpeg', 'png', 'webp') |
|
SAMPLE_IMAGES = sorted(image_path for image_ext in IMAGE_EXTS for image_path in glob.glob(os.path.join('sample_images', f'*.{image_ext}'))) |
|
|
|
|
|
MODEL_CHECKPOINTS = { |
|
'NOVIC SigLIP B/16 FT2': os.path.join('ovod_20240610_105233', 'ovod_chunk0899_20240612_005748.train'), |
|
'NOVIC SigLIP SO/14 FT2': os.path.join('ovod_20240626_001447', 'ovod_chunk0899_20240627_112729.train'), |
|
'NOVIC DFN-5B H/14-378 FT2': os.path.join('ovod_20240620_162925', 'ovod_chunk0899_20240621_202727.train'), |
|
'NOVIC DFN-5B H/14-378 FT0': os.path.join('ovod_20240628_142131', 'ovod_chunk0433_20240630_235415.train'), |
|
} |
|
DEFAULT_MODEL = 'NOVIC DFN-5B H/14-378 FT0' |
|
|
|
|
|
def get_checkpoint(model: str) -> str: |
|
return os.path.join('checkpoints', MODEL_CHECKPOINTS[model]) |
|
|
|
|
|
app_novic.get_model(checkpoint=get_checkpoint(model=DEFAULT_MODEL)) |
|
|
|
|
|
def classify_image(image: Optional[PIL.Image.Image], model: Optional[str]) -> dict[str, float]: |
|
if image is None or model is None: |
|
return {} |
|
return app_novic.classify_image(image=image, checkpoint=get_checkpoint(model=model)) |
|
|
|
|
|
with gr.Blocks( |
|
theme=None, |
|
analytics_enabled=True, |
|
title="🖼️ NOVIC Demo", |
|
fill_width=False, |
|
) as demo: |
|
|
|
gr.HTML("<h1 style='text-align: center; margin-bottom: 1rem'>🖼️ NOVIC: Unconstrained Open Vocabulary Image Classification</h1><div style='text-align: center'><i>Select</i> an example image below <b>OR</b> <i>Upload</i> an image file <b>OR</b> <i>Capture</i> a camera image <b>OR</b> <i>Copy-paste</i> an image from your clipboard ⇒ The label predictions on the right will update automatically!<br>Note that inference on GPU is naturally <i>MUCH</i> faster (real-time) than the CPU inference in this demo. CPU inference is also slightly numerically different than proper GPU inference.<br><span style=\"margin-right: 20px;\"><b>GitHub:</b> <a href=\"https://github.com/pallgeuer/novic\" target=\"_blank\">https://github.com/pallgeuer/novic</a></span><span><b>Paper:</b> <a href=\"https://arxiv.org/abs/2407.11211\" target=\"_blank\">https://arxiv.org/abs/2407.11211</a></span></div>") |
|
|
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
input_model = gr.Dropdown( |
|
choices=list(MODEL_CHECKPOINTS), |
|
value=DEFAULT_MODEL, |
|
type='value', |
|
multiselect=False, |
|
allow_custom_value=False, |
|
filterable=False, |
|
label='NOVIC model', |
|
show_label=True, |
|
interactive=True, |
|
) |
|
input_image = gr.Image( |
|
height=400, |
|
image_mode='RGB', |
|
type='pil', |
|
label='Input image', |
|
show_label=True, |
|
interactive=True, |
|
show_fullscreen_button=True, |
|
) |
|
|
|
with gr.Column(scale=1): |
|
output_label = gr.Label( |
|
num_top_classes=3, |
|
label='Predicted label', |
|
show_label=True, |
|
scale=1, |
|
show_heading=True, |
|
) |
|
|
|
with gr.Row(equal_height=True): |
|
gr.ClearButton( |
|
components=[input_image], |
|
value='Clear input image', |
|
variant='secondary', |
|
size='lg', |
|
scale=1, |
|
) |
|
gr.DeepLinkButton( |
|
variant='secondary', |
|
size='lg', |
|
scale=1, |
|
) |
|
|
|
gr.Examples( |
|
examples=SAMPLE_IMAGES, |
|
inputs=input_image, |
|
cache_examples=False, |
|
examples_per_page=100, |
|
label='Example images', |
|
) |
|
|
|
gr.on( |
|
triggers=[input_image.change, input_model.change], |
|
fn=classify_image, |
|
inputs=[input_image, input_model], |
|
outputs=output_label, |
|
api_name='classify', |
|
show_progress='full', |
|
) |
|
|
|
|
|
if __name__ == '__main__': |
|
demo.launch() |
|
|
|
|