d-fine-demo / app.py
jagennath-hari's picture
update demo
26e8604
import gradio as gr
from transformers import AutoModel, AutoProcessor
from PIL import Image, ImageDraw, ImageFont
import logging
from datasets import load_dataset
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class DFineDemo:
def __init__(self):
self.processor = AutoProcessor.from_pretrained("Laudando-Associates-LLC/d-fine", trust_remote_code=True)
self.model_variants = {
"D-FINE Nano": "Laudando-Associates-LLC/d-fine-nano",
"D-FINE Small": "Laudando-Associates-LLC/d-fine-small",
"D-FINE Medium": "Laudando-Associates-LLC/d-fine-medium",
"D-FINE Large": "Laudando-Associates-LLC/d-fine-large",
"D-FINE X-Large": "Laudando-Associates-LLC/d-fine-xlarge"
}
logger.info("Loading all D-FINE model variants into memory...")
self.models = {
name: AutoModel.from_pretrained(repo, trust_remote_code=True)
for name, repo in self.model_variants.items()
}
dataset = load_dataset("Laudando-Associates-LLC/pucks", split="test")
self.image_cache = {
f"Test Image {i+1}": {
"input": example["image"],
"annotated": example["annotated_image"]
}
for i, example in enumerate(dataset)
}
self.image_labels = list(self.image_cache.keys())
def run_inference(self, input_image, model_name, threshold):
# Find matching annotated image based on value in self.image_cache
for label, pair in self.image_cache.items():
if pair["input"] == input_image:
annotated = pair["annotated"]
break
else:
annotated = input_image # fallback
# Predict
image = input_image.copy()
inputs = self.processor(image)
outputs = self.models[model_name](**inputs, conf_threshold=threshold)
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("DejaVuSans-Bold.ttf", size=24)
for result in outputs:
for box, score in zip(result["boxes"], result["scores"]):
x1, y1, x2, y2 = box.tolist()
draw.rectangle([x1, y1, x2, y2], outline="blue", width=5)
draw.text((x1, max(0, y1 - 25)), f"{score:.2f}", fill="blue", font=font)
# Return: (annotated_image, predicted_image)
return gr.update(value=(annotated, image), slider_position=50, format="png", type="pil")
def select_image(self, evt: gr.SelectData):
if evt is None or evt.index is None:
return gr.update()
label = self.image_labels[evt.index]
return self.image_cache[label]["input"]
def launch(self):
with gr.Blocks(theme=gr.themes.Ocean()) as demo:
gr.Markdown("""
## D-FINE Detection Demo
This demo compares annotated ground truth data (in **red**) and model predictions (in **blue**).
Use the **slider** to visually compare both views:
- The **left image** shows the annotated labels.
- The **right image** displays predictions from the selected D-FINE model, with each bounding box and its confidence score.
📂 **Training Dataset**: All D-FINE variants were trained on the [L&A Pucks Dataset](https://huggingface.co/datasets/Laudando-Associates-LLC/pucks) available on Hugging Face.
""")
output = gr.ImageSlider(type="pil", label="Detected Output", height=500, width=880, slider_position=50, format="png")
with gr.Row():
model_selector = gr.Radio(
choices=list(self.model_variants.keys()),
label="Choose D-FINE model",
value="D-FINE Nano"
)
threshold_slider = gr.Slider(
minimum=0.1,
maximum=0.955,
value=0.4,
step=0.05,
label="Confidence Threshold"
)
run_btn = gr.Button("Run Detection")
selected_image = gr.State(value=self.image_cache[self.image_labels[0]])
gr.Markdown("### Select a sample image below:")
gallery = gr.Gallery(
value=[(pair["input"], label) for label, pair in self.image_cache.items()],
label=None,
show_label=False,
columns=[3],
object_fit="cover",
height="auto",
allow_preview=False
)
gallery.select(
fn=self.select_image,
inputs=[],
outputs=selected_image
)
run_btn.click(
fn=self.run_inference,
inputs=[selected_image, model_selector, threshold_slider],
outputs=output
)
gr.Markdown("### Citation")
gr.Markdown("""
If you use **D-FINE** or its methods in your work, please cite the following BibTeX entry:
```latex
@misc{peng2024dfine,
title={D-FINE: Redefine Regression Task in DETRs as Fine-grained Distribution Refinement},
author={Yansong Peng and Hebei Li and Peixi Wu and Yueyi Zhang and Xiaoyan Sun and Feng Wu},
year={2024},
eprint={2410.13842},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
""")
demo.launch()
if __name__ == "__main__":
app = DFineDemo()
app.launch()