Spaces:
Runtime error
Runtime error
import gradio as gr | |
from autodistill_clip import CLIP | |
from autodistill_metaclip import MetaCLIP | |
from autodistill.detection import CaptionOntology | |
from PIL import Image | |
import tempfile | |
clip_model = CLIP(None) | |
metaclip_model = MetaCLIP(None) | |
# create side by side interface | |
def clip_model_interface(image, text): | |
text = text + ", something else" | |
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp: | |
image = Image.fromarray(image.astype("uint8"), "RGB") | |
image.save(temp.name) | |
ontology = CaptionOntology( | |
{ | |
t: t for t in text.split(",") | |
} | |
) | |
clip_model.ontology = ontology | |
predictions = clip_model.predict(temp.name) | |
labels = [text.split(",")[i] for i in predictions.class_id.tolist()] | |
confidences = predictions.confidence.tolist() | |
return { | |
k: v for k, v in zip(labels, confidences) | |
} | |
def metaclip_model_interface(image, text): | |
text = text + ", something else" | |
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp: | |
image = Image.fromarray(image.astype("uint8"), "RGB") | |
image.save(temp.name) | |
ontology = CaptionOntology( | |
{ | |
t: t for t in text.split(",") | |
} | |
) | |
metaclip_model.ontology = ontology | |
predictions = metaclip_model.predict(temp.name, confidence=0) | |
labels = [text.split(",")[i] for i in predictions.class_id.tolist()] | |
confidences = predictions.confidence.tolist() | |
return { | |
k: v for k, v in zip(labels, confidences) | |
} | |
def combined_model_interface(input_image, input_text): | |
# Call the first function | |
clip_output = clip_model_interface(input_image, input_text) | |
# Call the second function | |
metaclip_output = metaclip_model_interface(input_image, input_text) | |
# Return the results from both functions as a tuple | |
return clip_output, metaclip_output | |
inputs = [ | |
"image", | |
"text" | |
] | |
outputs = [ | |
gr.outputs.Label(type="confidences", label="CLIP"), | |
gr.outputs.Label(type="confidences", label="MetaCLIP") | |
] | |
title = "CLIP vs MetaCLIP" | |
description = """ | |
CLIP is a zero-shot classification and embedding model developed by OpenAI. | |
MetaCLIP is a model that uses a CLIP architecture with an open dataset, developed by Meta AI. | |
Use this space to try out the models and see how they perform on your own images and text. | |
Note: Due to the way this space was implemented, CLIP will only return the top class. A fix is coming soon. | |
This project uses the following dependencies: | |
- [autodistill-clip](https://github.com/autodistill/autodistill-clip) | |
- [autodistill-metaclip](https://github.com/autodistill/autodistill-metaclip) | |
""" | |
gr.Interface( | |
fn=combined_model_interface, | |
inputs=inputs, | |
outputs=outputs, | |
title=title, | |
description=description, | |
allow_flagging=False, | |
layout="vertical" | |
).launch() |