Spaces:
Runtime error
Runtime error
File size: 2,964 Bytes
99c1a8d 4566d18 99c1a8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
from autodistill_clip import CLIP
from autodistill_metaclip import MetaCLIP
from autodistill.detection import CaptionOntology
from PIL import Image
import tempfile
clip_model = CLIP(None)
metaclip_model = MetaCLIP(None)
# create side by side interface
def clip_model_interface(image, text):
text = text + ", something else"
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp:
image = Image.fromarray(image.astype("uint8"), "RGB")
image.save(temp.name)
ontology = CaptionOntology(
{
t: t for t in text.split(",")
}
)
clip_model.ontology = ontology
predictions = clip_model.predict(temp.name)
labels = [text.split(",")[i] for i in predictions.class_id.tolist()]
confidences = predictions.confidence.tolist()
return {
k: v for k, v in zip(labels, confidences)
}
def metaclip_model_interface(image, text):
text = text + ", something else"
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp:
image = Image.fromarray(image.astype("uint8"), "RGB")
image.save(temp.name)
ontology = CaptionOntology(
{
t: t for t in text.split(",")
}
)
metaclip_model.ontology = ontology
predictions = metaclip_model.predict(temp.name, confidence=0)
labels = [text.split(",")[i] for i in predictions.class_id.tolist()]
confidences = predictions.confidence.tolist()
return {
k: v for k, v in zip(labels, confidences)
}
def combined_model_interface(input_image, input_text):
# Call the first function
clip_output = clip_model_interface(input_image, input_text)
# Call the second function
metaclip_output = metaclip_model_interface(input_image, input_text)
# Return the results from both functions as a tuple
return clip_output, metaclip_output
inputs = [
"image",
"text"
]
outputs = [
gr.outputs.Label(type="confidences", label="CLIP"),
gr.outputs.Label(type="confidences", label="MetaCLIP")
]
title = "CLIP vs MetaCLIP"
description = """
CLIP is a zero-shot classification and embedding model developed by OpenAI.
MetaCLIP is a model that uses a CLIP architecture with an open dataset, developed by Meta AI.
Use this space to try out the models and see how they perform on your own images and text.
Note: Due to the way this space was implemented, CLIP will only return the top class. A fix is coming soon.
This project uses the following dependencies:
- [autodistill-clip](https://github.com/autodistill/autodistill-clip)
- [autodistill-metaclip](https://github.com/autodistill/autodistill-metaclip)
"""
gr.Interface(
fn=combined_model_interface,
inputs=inputs,
outputs=outputs,
title=title,
description=description,
allow_flagging=False,
layout="vertical"
).launch() |