capjamesg's picture
update requirements
4566d18
import gradio as gr
from autodistill_clip import CLIP
from autodistill_metaclip import MetaCLIP
from autodistill.detection import CaptionOntology
from PIL import Image
import tempfile
clip_model = CLIP(None)
metaclip_model = MetaCLIP(None)
# create side by side interface
def clip_model_interface(image, text):
text = text + ", something else"
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp:
image = Image.fromarray(image.astype("uint8"), "RGB")
image.save(temp.name)
ontology = CaptionOntology(
{
t: t for t in text.split(",")
}
)
clip_model.ontology = ontology
predictions = clip_model.predict(temp.name)
labels = [text.split(",")[i] for i in predictions.class_id.tolist()]
confidences = predictions.confidence.tolist()
return {
k: v for k, v in zip(labels, confidences)
}
def metaclip_model_interface(image, text):
text = text + ", something else"
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp:
image = Image.fromarray(image.astype("uint8"), "RGB")
image.save(temp.name)
ontology = CaptionOntology(
{
t: t for t in text.split(",")
}
)
metaclip_model.ontology = ontology
predictions = metaclip_model.predict(temp.name, confidence=0)
labels = [text.split(",")[i] for i in predictions.class_id.tolist()]
confidences = predictions.confidence.tolist()
return {
k: v for k, v in zip(labels, confidences)
}
def combined_model_interface(input_image, input_text):
# Call the first function
clip_output = clip_model_interface(input_image, input_text)
# Call the second function
metaclip_output = metaclip_model_interface(input_image, input_text)
# Return the results from both functions as a tuple
return clip_output, metaclip_output
inputs = [
"image",
"text"
]
outputs = [
gr.outputs.Label(type="confidences", label="CLIP"),
gr.outputs.Label(type="confidences", label="MetaCLIP")
]
title = "CLIP vs MetaCLIP"
description = """
CLIP is a zero-shot classification and embedding model developed by OpenAI.
MetaCLIP is a model that uses a CLIP architecture with an open dataset, developed by Meta AI.
Use this space to try out the models and see how they perform on your own images and text.
Note: Due to the way this space was implemented, CLIP will only return the top class. A fix is coming soon.
This project uses the following dependencies:
- [autodistill-clip](https://github.com/autodistill/autodistill-clip)
- [autodistill-metaclip](https://github.com/autodistill/autodistill-metaclip)
"""
gr.Interface(
fn=combined_model_interface,
inputs=inputs,
outputs=outputs,
title=title,
description=description,
allow_flagging=False,
layout="vertical"
).launch()