|
import gradio as gr |
|
import cv2 |
|
import requests |
|
import os |
|
from collections import deque |
|
|
|
from ultralytics import YOLO |
|
|
|
file_urls = [ |
|
'https://huggingface.co/spaces/iamsuman/ripe-and-unripe-tomatoes-detection/resolve/main/samples/riped_tomato_93.jpeg?download=true', |
|
'https://huggingface.co/spaces/iamsuman/ripe-and-unripe-tomatoes-detection/resolve/main/samples/unriped_tomato_18.jpeg?download=true', |
|
'https://huggingface.co/spaces/iamsuman/ripe-and-unripe-tomatoes-detection/resolve/main/samples/tomatoes.mp4?download=true', |
|
] |
|
|
|
def download_file(url, save_name): |
|
url = url |
|
if not os.path.exists(save_name): |
|
file = requests.get(url) |
|
open(save_name, 'wb').write(file.content) |
|
|
|
for i, url in enumerate(file_urls): |
|
if 'mp4' in file_urls[i]: |
|
download_file( |
|
file_urls[i], |
|
f"video.mp4" |
|
) |
|
else: |
|
download_file( |
|
file_urls[i], |
|
f"image_{i}.jpg" |
|
) |
|
|
|
model = YOLO('best.pt') |
|
path = [['image_0.jpg'], ['image_1.jpg']] |
|
video_path = [['video.mp4']] |
|
|
|
|
|
|
|
|
|
def show_preds_image(image_path): |
|
image = cv2.imread(image_path) |
|
outputs = model.predict(source=image_path) |
|
results = outputs[0].cpu().numpy() |
|
|
|
|
|
box = results[0].boxes |
|
names = model.model.names |
|
boxes = results.boxes |
|
|
|
for box, conf, cls in zip(boxes.xyxy, boxes.conf, boxes.cls): |
|
|
|
x1, y1, x2, y2 = map(int, box) |
|
|
|
class_name = names[int(cls)] |
|
print(class_name, "class_name", class_name.lower() == 'ripe') |
|
if class_name.lower() == 'ripe': |
|
color = (0, 0, 255) |
|
else: |
|
color = (0, 255, 0) |
|
|
|
|
|
cv2.rectangle( |
|
image, |
|
(x1, y1), |
|
(x2, y2), |
|
color=color, |
|
thickness=2, |
|
lineType=cv2.LINE_AA |
|
) |
|
|
|
|
|
label = f"{class_name.capitalize()}: {conf:.2f}" |
|
cv2.putText(image, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, |
|
2, |
|
cv2.LINE_AA) |
|
|
|
|
|
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
inputs_image = [ |
|
gr.components.Image(type="filepath", label="Input Image"), |
|
] |
|
outputs_image = [ |
|
gr.components.Image(type="numpy", label="Output Image"), |
|
] |
|
interface_image = gr.Interface( |
|
fn=show_preds_image, |
|
inputs=inputs_image, |
|
outputs=outputs_image, |
|
title="Ripe And Unripe Tomatoes Detection", |
|
examples=path, |
|
cache_examples=False, |
|
) |
|
|
|
def show_preds_video_batch_centered(video_path, batch_size=16, iou_threshold=0.5): |
|
cap = cv2.VideoCapture(video_path) |
|
if not cap.isOpened(): |
|
print("Error: Could not open video.") |
|
return |
|
|
|
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
|
names = model.model.names |
|
|
|
|
|
unique_objects = {} |
|
next_id = 0 |
|
total_ripe, total_unripe = 0, 0 |
|
|
|
frame_buffer = deque() |
|
|
|
def compute_iou(box1, box2): |
|
xA = max(box1[0], box2[0]) |
|
yA = max(box1[1], box2[1]) |
|
xB = min(box1[2], box2[2]) |
|
yB = min(box1[3], box2[3]) |
|
|
|
inter_area = max(0, xB - xA) * max(0, yB - yA) |
|
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) |
|
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1]) |
|
union_area = box1_area + box2_area - inter_area |
|
return inter_area / union_area if union_area > 0 else 0 |
|
|
|
def match_or_register_object(cls_name, box): |
|
nonlocal next_id, total_ripe, total_unripe |
|
|
|
for obj_id, (existing_cls, existing_box) in unique_objects.items(): |
|
if compute_iou(existing_box, box) > iou_threshold: |
|
unique_objects[obj_id] = (cls_name, box) |
|
return obj_id |
|
|
|
unique_objects[next_id] = (cls_name, box) |
|
if cls_name.lower() == "ripe": |
|
total_ripe += 1 |
|
else: |
|
total_unripe += 1 |
|
next_id += 1 |
|
return next_id - 1 |
|
|
|
def process_batch(frames, results): |
|
for frame, output in zip(frames, results): |
|
current_ripe, current_unripe = set(), set() |
|
|
|
if output.boxes: |
|
boxes = output.boxes |
|
cls_ids = boxes.cls.cpu().numpy().astype(int) |
|
|
|
for box, cls_id in zip(boxes.xyxy, cls_ids): |
|
x1, y1, x2, y2 = map(int, box) |
|
class_name = names[cls_id] |
|
|
|
obj_id = match_or_register_object(class_name, (x1, y1, x2, y2)) |
|
|
|
if class_name.lower() == "ripe": |
|
current_ripe.add(obj_id) |
|
color = (0, 0, 255) |
|
else: |
|
current_unripe.add(obj_id) |
|
color = (0, 255, 0) |
|
|
|
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) |
|
cv2.putText(frame, f"{class_name.capitalize()} ID:{obj_id}", |
|
(x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) |
|
|
|
|
|
current_text = f"Current β Ripe: {len(current_ripe)} | Unripe: {len(current_unripe)}" |
|
(text_w, _), _ = cv2.getTextSize(current_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2) |
|
text_x = (frame_width - text_w) // 2 |
|
cv2.putText(frame, current_text, (text_x, 40), |
|
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) |
|
|
|
|
|
total_text = f"Total Seen β Ripe: {total_ripe} | Unripe: {total_unripe}" |
|
(text_w, _), _ = cv2.getTextSize(total_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2) |
|
text_x = (frame_width - text_w) // 2 |
|
cv2.putText(frame, total_text, (text_x, 80), |
|
cv2.FONT_HERSHEY_SIMPLEX, 1, (200, 200, 0), 2) |
|
|
|
yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
while True: |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
frame_buffer.append(frame) |
|
|
|
if len(frame_buffer) == batch_size: |
|
results = model.track(source=list(frame_buffer), persist=True, tracker="bytetrack.yaml", verbose=False) |
|
yield from process_batch(frame_buffer, results) |
|
frame_buffer.clear() |
|
|
|
if frame_buffer: |
|
results = model.track(source=list(frame_buffer), persist=True, tracker="bytetrack.yaml", verbose=False) |
|
yield from process_batch(frame_buffer, results) |
|
|
|
cap.release() |
|
print(f"Final Totals β Ripe: {total_ripe}, Unripe: {total_unripe}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inputs_video = [ |
|
gr.components.Video(label="Input Video"), |
|
|
|
] |
|
outputs_video = [ |
|
gr.components.Image(type="numpy", label="Output Image"), |
|
] |
|
interface_video = gr.Interface( |
|
fn=show_preds_video_batch_centered, |
|
inputs=inputs_video, |
|
outputs=outputs_video, |
|
title="Ripe And Unripe Tomatoes Detection", |
|
examples=video_path, |
|
cache_examples=False, |
|
) |
|
|
|
gr.TabbedInterface( |
|
[interface_image, interface_video], |
|
tab_names=['Image inference', 'Video inference'] |
|
).queue().launch(share=True) |