|
import xml.etree.ElementTree as ET |
|
import gradio as gr |
|
import PIL.Image as Image |
|
import numpy as np |
|
import cv2 |
|
from ultralytics import ASSETS, YOLOv10 |
|
from exiftool import ExifToolHelper |
|
from geopy.distance import geodesic |
|
import folium |
|
import base64 |
|
import supervision as sv |
|
import os |
|
|
|
|
|
IMAGE_WIDTH = 4000 |
|
IMAGE_HEIGHT = 3000 |
|
|
|
|
|
model = YOLOv10("weights/yolov10m-e100-b16-full-best.pt") |
|
|
|
|
|
UPLOAD_DIR = 'uploads' |
|
os.makedirs(UPLOAD_DIR, exist_ok=True) |
|
|
|
|
|
|
|
def calculate_ground_distance(altitude, fov_deg, image_dimension, pixel_distance): |
|
fov_rad = np.radians(fov_deg) |
|
ground_distance = (2 * altitude * np.tan(fov_rad / 2)) * (pixel_distance / image_dimension) |
|
return ground_distance |
|
|
|
|
|
def get_gps_coordinates(lat, lon, north_offset, east_offset): |
|
new_location = geodesic(meters=north_offset).destination((lat, lon), 0) |
|
new_location = geodesic(meters=east_offset).destination(new_location, 90) |
|
return new_location.latitude, new_location.longitude |
|
|
|
def extract_xmp_metadata(xmp_data): |
|
|
|
root = ET.fromstring(xmp_data) |
|
|
|
|
|
ns = { |
|
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', |
|
'drone-dji': 'http://www.dji.com/drone-dji/1.0/' |
|
} |
|
|
|
|
|
rdf_description = root.find('.//rdf:Description', ns) |
|
|
|
|
|
relative_altitude = float(rdf_description.get('{http://www.dji.com/drone-dji/1.0/}RelativeAltitude', '0')) |
|
gimbal_yaw_degree = float(rdf_description.get('{http://www.dji.com/drone-dji/1.0/}GimbalYawDegree', '0')) |
|
gimbal_pitch_degree = float(rdf_description.get('{http://www.dji.com/drone-dji/1.0/}GimbalPitchDegree', '0')) |
|
|
|
return relative_altitude, gimbal_yaw_degree, gimbal_pitch_degree |
|
|
|
def save_image_with_metadata(img, img_path): |
|
|
|
img_format = img.format or 'JPEG' |
|
|
|
|
|
img.save(img_path, format=img_format) |
|
|
|
|
|
def predict_image(img, conf_threshold, iou_threshold): |
|
|
|
img_path = os.path.join(UPLOAD_DIR, 'uploaded_image.jpg') |
|
|
|
|
|
save_image_with_metadata(img, img_path) |
|
|
|
|
|
xmp_data = img.info.get("xmp") |
|
|
|
if xmp_data: |
|
relative_altitude, gimbal_yaw_degree, gimbal_pitch_degree = extract_xmp_metadata(xmp_data) |
|
|
|
print("Extracted XMP Metadata:") |
|
print(f"Relative Altitude: {relative_altitude}") |
|
print(f"Gimbal Yaw Degree: {gimbal_yaw_degree}") |
|
print(f"Gimbal Pitch Degree: {gimbal_pitch_degree}") |
|
else: |
|
print("XMP data not found in the image.") |
|
|
|
relative_altitude = 60.0 |
|
gimbal_yaw_degree = 30.0 |
|
gimbal_pitch_degree = -90.0 |
|
|
|
|
|
|
|
exif_data = img.info.get("exif") |
|
try: |
|
xmp_data = img.info.get("xmp") |
|
|
|
except: |
|
print("error loading xmp data") |
|
|
|
|
|
|
|
if exif_data: |
|
img.save(img_path, exif=exif_data) |
|
else: |
|
img.save(img_path) |
|
|
|
|
|
img_cv2 = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) |
|
|
|
|
|
metadata = {} |
|
tag_list = [ |
|
"Composite:FOV", |
|
"Composite:GPSLatitude", |
|
"Composite:GPSLongitude", |
|
"XMP:AbsoluteAltitude", |
|
"XMP:RelativeAltitude", |
|
"XMP:GimbalRollDegree", |
|
"XMP:GimbalYawDegree", |
|
"XMP:GimbalPitchDegree" |
|
] |
|
|
|
|
|
|
|
with ExifToolHelper() as et: |
|
for d in et.get_metadata(img_path): |
|
metadata.update({k: v for k, v in d.items() if k in tag_list}) |
|
|
|
|
|
CAMERA_GPS = (metadata["Composite:GPSLatitude"], metadata["Composite:GPSLongitude"]) |
|
RELATIVE_ALTITUDE = float(relative_altitude) |
|
GIMBAL_YAW_DEGREE = float(gimbal_yaw_degree) |
|
FOV_HORIZONTAL = float(metadata["Composite:FOV"]) |
|
FOV_VERTICAL = FOV_HORIZONTAL * (IMAGE_HEIGHT / IMAGE_WIDTH) |
|
|
|
|
|
|
|
yaw_rad = np.radians(GIMBAL_YAW_DEGREE) |
|
|
|
|
|
|
|
results = model.predict( |
|
source=img_cv2, |
|
conf=conf_threshold, |
|
iou=iou_threshold, |
|
show_labels=True, |
|
show_conf=True, |
|
imgsz=640, |
|
) |
|
|
|
detections = sv.Detections.from_ultralytics(results[0]) |
|
|
|
|
|
for r in results: |
|
im_array = r.plot() |
|
im = Image.fromarray(im_array[..., ::-1]) |
|
|
|
|
|
building_locations = [] |
|
for i, box in enumerate(detections.xyxy): |
|
|
|
|
|
x_min, y_min, x_max, y_max = box |
|
class_id = int(detections.class_id[i]) |
|
|
|
x_center = (x_min + x_max) / 2 |
|
y_center = (y_min + y_max) / 2 |
|
|
|
pixel_distance_x = x_center - IMAGE_WIDTH / 2 |
|
pixel_distance_y = IMAGE_HEIGHT / 2 - y_center |
|
|
|
ground_distance_x = calculate_ground_distance(RELATIVE_ALTITUDE, FOV_HORIZONTAL, IMAGE_WIDTH, pixel_distance_x) |
|
ground_distance_y = calculate_ground_distance(RELATIVE_ALTITUDE, FOV_VERTICAL, IMAGE_HEIGHT, pixel_distance_y) |
|
|
|
east_offset = ground_distance_x * np.cos(yaw_rad) - ground_distance_y * np.sin(yaw_rad) |
|
north_offset = ground_distance_x * np.sin(yaw_rad) + ground_distance_y * np.cos(yaw_rad) |
|
|
|
building_lat, building_lon = get_gps_coordinates(CAMERA_GPS[0], CAMERA_GPS[1], north_offset, east_offset) |
|
building_locations.append((building_lat, building_lon, class_id)) |
|
|
|
|
|
map_center = CAMERA_GPS |
|
m = folium.Map( |
|
location=map_center, |
|
zoom_start=18, |
|
tiles='Esri.WorldImagery' |
|
) |
|
|
|
|
|
|
|
damaged_count = 0 |
|
undamaged_count = 0 |
|
|
|
|
|
for i, (building_lat, building_lon, class_id) in enumerate(building_locations): |
|
building_status = 'Damaged' if class_id == 1 else 'Undamaged' |
|
if class_id == 1: |
|
damaged_count += 1 |
|
else: |
|
undamaged_count += 1 |
|
|
|
folium.Marker( |
|
location=(building_lat, building_lon), |
|
popup=f'Building {i+1}: {building_status}', |
|
icon=folium.Icon(color='red' if class_id == 1 else 'green', icon='home') |
|
).add_to(m) |
|
|
|
|
|
m.save('temp_map.html') |
|
with open('temp_map.html', 'r') as f: |
|
folium_map_html = f.read() |
|
|
|
encoded_html = base64.b64encode(folium_map_html.encode()).decode('utf-8') |
|
data_url = f"data:text/html;base64,{encoded_html}" |
|
|
|
|
|
summary = f"Damaged Buildings: {damaged_count}, Undamaged Buildings: {undamaged_count}" |
|
|
|
|
|
table_html = "<table style='width: 100%; border-collapse: collapse;'>" |
|
table_html += "<tr><th style='border: 1px solid black;'>Building Number</th><th style='border: 1px solid black;'>Building Type</th><th style='border: 1px solid black;'>Location (Lat, Lon)</th></tr>" |
|
|
|
for i, (lat, lon, class_id) in enumerate(building_locations): |
|
building_type = 'Damaged' if class_id == 1 else 'Undamaged' |
|
table_html += f"<tr><td style='border: 1px solid black;'>{i+1}</td><td style='border: 1px solid black;'>{building_type}</td><td style='border: 1px solid black;'>{lat}, {lon}</td></tr>" |
|
|
|
table_html += "</table>" |
|
|
|
return im, f'<iframe src="{data_url}" width="100%" height="600" style="border:none;"></iframe>', summary, table_html |
|
|
|
description_with_logo = """ |
|
<p>Upload images for inference and view detected building locations on the map.</p> |
|
<p>For test images, visit <a href="https://drive.google.com/drive/folders/15_WULrRqvPDuhWqC8hmA6LhBtH98X0dV?usp=drive_link" target="_blank">this Google Drive folder</a>.</p> |
|
""" |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict_image, |
|
inputs=[ |
|
gr.Image(type="pil", label="Upload Image"), |
|
gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold"), |
|
gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU threshold"), |
|
], |
|
outputs=[ |
|
gr.Image(type="pil", label="Annotated Image"), |
|
gr.HTML(label="Map"), |
|
gr.HTML(label="Summary"), |
|
gr.HTML(label="Building Information"), |
|
], |
|
title="Custom trained Yolov10 Model on Rescuenet Dataset", |
|
description=description_with_logo, |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|