Spaces:

nitikaborkar
/

seat-depth-analyser

Sleeping

App Files Files Community

seat-depth-analyser / seat_depth_analysis.py

nitikaborkar

Upload 20 files

a6beb58 verified 27 days ago

raw

history blame contribute delete

12.5 kB

	import cv2
	import numpy as np
	import torch
	from segment_anything import sam_model_registry, SamPredictor
	from ultralytics import YOLO
	import mediapipe as mp
	import json
	from datetime import datetime
	import time
	import os

	def process_seat_depth_analysis(image_path, eye_to_ear_cm=7.0, sam_checkpoint="sam_vit_b_01ec64.pth"):
	"""
	Main function to process seat depth analysis

	Args:
	image_path: Path to the input image
	eye_to_ear_cm: Real-world eye to ear distance for scaling (default 7.0 cm)
	sam_checkpoint: Path to SAM model checkpoint

	Returns:
	tuple: (output_json, pose_image, seat_band_image, final_image)
	"""
	start_time = time.time()

	def put_text_safe(image, text, org, font, font_scale, color, thickness):
	text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
	text_width, text_height = text_size
	x, y = org
	h, w = image.shape[:2]

	# Adjust X if text goes out on the right
	if x + text_width > w:
	x = w - text_width - 5 # 5 pixel padding from right

	# Adjust X if text goes out on the left
	if x < 0:
	x = 5 # 5 pixel padding from left

	# Adjust Y if text goes above image
	if y - text_height < 0:
	y = text_height + 5 # push down

	# Adjust Y if text goes below image
	if y > h:
	y = h - 5

	cv2.putText(image, text, (x, y), font, font_scale, color, thickness)


	# === Load image ===
	image_bgr = cv2.imread(image_path)
	if image_bgr is None:
	raise ValueError(f"Could not load image from {image_path}")

	image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
	h, w = image_rgb.shape[:2]

	# === Run MediaPipe Pose Detection ===
	mp_pose = mp.solutions.pose
	pose = mp_pose.Pose(static_image_mode=True)
	results = pose.process(image_rgb)

	if not results.pose_landmarks:
	raise ValueError("No pose detected in the image")

	landmarks = results.pose_landmarks.landmark

	# === Get Knee and Eye X,Y coordinates ===
	left_knee = landmarks[25]
	right_knee = landmarks[26]
	left_eye = landmarks[2]
	right_eye = landmarks[5]
	right_ear = landmarks[8]
	left_ear = landmarks[7]
	left_hip = landmarks[23]
	right_hip = landmarks[24]

	# Convert to pixel coordinates
	left_knee_px = (int(left_knee.x * w), int(left_knee.y * h))
	right_knee_px = (int(right_knee.x * w), int(right_knee.y * h))
	left_eye_px = (int(left_eye.x * w), int(left_eye.y * h))
	right_eye_px = (int(right_eye.x * w), int(right_eye.y * h))
	left_ear_px = (int(left_ear.x * w), int(left_ear.y * h))
	right_ear_px = (int(right_ear.x * w), int(right_ear.y * h))
	left_hip_px = (int(left_hip.x * w), int(left_hip.y * h))
	right_hip_px = (int(right_hip.x * w), int(right_hip.y * h))

	# === Determine Facing Direction ===
	avg_knee_x = (left_knee_px[0] + right_knee_px[0]) / 2
	avg_eye_x = (left_eye_px[0] + right_eye_px[0]) / 2
	facing_direction = "right" if avg_knee_x > avg_eye_x else "left"

	# === Create Pose Overlay (Image 1) ===
	pose_image = image_rgb.copy()
	mp_drawing = mp.solutions.drawing_utils
	mp_drawing_styles = mp.solutions.drawing_styles

	mp_drawing.draw_landmarks(
	pose_image,
	results.pose_landmarks,
	mp_pose.POSE_CONNECTIONS,
	landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
	)

	# === Step 1: Detect Chair with YOLOv8 ===
	yolo_model = YOLO("yolov8n.pt")
	yolo_results = yolo_model(image_rgb)

	# === Step 2: Get Chair Box ===
	chair_box = None
	chair_confidence = 0.0
	for result in yolo_results:
	for box, cls, conf in zip(result.boxes.xyxy, result.boxes.cls, result.boxes.conf):
	if int(cls.item()) == 56: # 56 = chair
	chair_box = box.cpu().numpy().astype(int)
	chair_confidence = float(conf.item())
	break

	if chair_box is None:
	raise ValueError("No chair detected in the image")

	x1, y1, x2, y2 = chair_box
	chair_height = y2 - y1
	adjusted_y1 = y1 + int(0.25 * chair_height)
	input_box = np.array([x1, adjusted_y1, x2, y2])

	# === Step 3: Load SAM ===
	model_type = "vit_b"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
	sam.to(device=device)
	predictor = SamPredictor(sam)

	# === Step 4: Predict Mask from Bounding Box ===
	predictor.set_image(image_rgb)
	masks, scores, _ = predictor.predict(box=input_box[None, :], multimask_output=True)
	best_mask = masks[np.argmax(scores)]

	# === Step 5: Largest Component Only ===
	def get_largest_connected_component(mask):
	num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask.astype(np.uint8), connectivity=8)
	if num_labels <= 1:
	return mask
	largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
	return labels == largest_label

	cleaned_mask = get_largest_connected_component(best_mask)

	# === Step 6: Estimate Seat Front ===
	knee_y = int((left_knee_px[1] + right_knee_px[1]) / 2)
	band_thickness = chair_height // 2
	y_min = max(0, knee_y - band_thickness)
	y_max = min(h, knee_y + band_thickness)
	band = cleaned_mask[y_min:y_max, :]
	chair_pixels_x = np.where(band)[1]

	if chair_pixels_x.size == 0:
	raise ValueError("No chair pixels detected at knee level")

	seat_front_x = chair_pixels_x.max() if facing_direction == "right" else chair_pixels_x.min()
	seat_front_y = knee_y

	# === Create Seat Front Band Visualization (Image 2) ===
	seat_band_image = image_rgb.copy()
	cv2.line(seat_band_image, (0, y_min), (w, y_min), (0, 255, 0), 2)
	cv2.line(seat_band_image, (0, y_max), (w, y_max), (0, 255, 0), 2)
	cv2.circle(seat_band_image, (seat_front_x, seat_front_y), 8, (0, 0, 255), -1)
	put_text_safe(seat_band_image, "Seat Front", (seat_front_x + 10, seat_front_y - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

	# === Calculate Back of Knee Position ===
	def euclidean_distance(p1, p2):
	return np.linalg.norm(np.array(p1) - np.array(p2))

	# Calculate thigh length for proportional offset
	if facing_direction == "right":
	hip_pt = right_hip_px
	knee_pt_original = right_knee_px
	else:
	hip_pt = left_hip_px
	knee_pt_original = left_knee_px

	thigh_length_px = euclidean_distance(hip_pt, knee_pt_original)

	# Back of knee is typically 12-15% of thigh length behind knee center
	back_of_knee_offset = thigh_length_px * 0.13 # 13% of thigh length

	# Apply offset in the backward direction
	if facing_direction == "right":
	knee_pt = (int(knee_pt_original[0] - back_of_knee_offset), knee_pt_original[1])
	else:
	knee_pt = (int(knee_pt_original[0] + back_of_knee_offset), knee_pt_original[1])

	# === Calculate Measurements ===
	clearance_px = abs(seat_front_x - knee_pt[0])

	# Check visibility and calculate eye-to-ear distance
	visibility_warnings = []
	if facing_direction == "right" and (right_eye.visibility < 0.5 or right_ear.visibility < 0.5):
	visibility_warnings.append("Right eye or ear not clearly visible. Scaling may be inaccurate.")
	elif facing_direction == "left" and (left_eye.visibility < 0.5 or left_ear.visibility < 0.5):
	visibility_warnings.append("Left eye or ear not clearly visible. Scaling may be inaccurate.")

	if facing_direction == "right":
	eye_coord = right_eye_px
	ear_coord = right_ear_px
	else:
	eye_coord = left_eye_px
	ear_coord = left_ear_px

	eye_to_ear_px = euclidean_distance(eye_coord, ear_coord)
	pixels_per_cm = eye_to_ear_px / eye_to_ear_cm
	clearance_cm = clearance_px / pixels_per_cm

	# Determine if back of knee is behind seat front
	if facing_direction == "right":
	knee_behind_seat = knee_pt[0] < seat_front_x
	else:
	knee_behind_seat = knee_pt[0] > seat_front_x

	# === Classification ===

	category = "Too Short"
	if knee_behind_seat or clearance_cm < 2:
	if clearance_cm < 2:
	category = "Too Deep"
	reasoning = f"Clearance of {clearance_cm:.2f}cm is less than 2cm minimum"
	elif knee_behind_seat:
	category = "Too Deep"
	reasoning = "Back of knee is behind seat front"
	elif clearance_cm <= 6:
	category = "Optimal"
	reasoning = f"Clearance of {clearance_cm:.2f}cm falls within optimal range (2-6cm)"
	else:
	category = "Too Short"
	reasoning = f"Clearance of {clearance_cm:.2f}cm exceeds 6cm optimal maximum"

	# === Create Final Visualization (Image 3) ===
	final_image = image_rgb.copy()

	# Draw seat front and knee
	cv2.circle(final_image, (seat_front_x, seat_front_y), 8, (0, 0, 255), -1)
	cv2.circle(final_image, knee_pt, 8, (255, 0, 0), -1)

	# Height at which the line floats
	line_y = min(seat_front_y, knee_pt[1]) - 30

	# Draw horizontal line (floating)
	cv2.line(final_image, (min(seat_front_x, knee_pt[0]), line_y),
	(max(seat_front_x, knee_pt[0]), line_y),
	(255, 255, 0), 2)

	# Add arrow tips
	cv2.arrowedLine(final_image,
	(min(seat_front_x, knee_pt[0]) + 20, line_y),
	(min(seat_front_x, knee_pt[0]), line_y),
	(255, 255, 0), 2, tipLength=0.4)

	cv2.arrowedLine(final_image,
	(max(seat_front_x, knee_pt[0]) - 20, line_y),
	(max(seat_front_x, knee_pt[0]), line_y),
	(255, 255, 0), 2, tipLength=0.4)

	# Put clearance text above the line
	put_text_safe(final_image, f"Knee clearance: {clearance_cm:.1f} cm",
	(min(seat_front_x, knee_pt[0]) + 10, line_y - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

	# Draw eye-to-ear line
	cv2.line(final_image, eye_coord, ear_coord, (0, 255, 0), 2)
	put_text_safe(final_image, f"{eye_to_ear_cm:.1f}cm",
	(eye_coord[0], eye_coord[1] - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

	# === Generate JSON Output ===
	processing_time = int((time.time() - start_time) * 1000)

	output_json = {
	"frame_id": os.path.basename(image_path),
	"timestamp": datetime.now().isoformat(),
	"pose_detection": {
	"pose_detected": True,
	"facing_direction": facing_direction,
	"landmarks_visibility": {
	"left_eye": float(left_eye.visibility),
	"right_eye": float(right_eye.visibility),
	"left_ear": float(left_ear.visibility),
	"right_ear": float(right_ear.visibility),
	"left_knee": float(left_knee.visibility),
	"right_knee": float(right_knee.visibility),
	"left_hip": float(left_hip.visibility),
	"right_hip": float(right_hip.visibility)
	}
	},
	"chair_detection": {
	"chair_detected": True,
	"chair_bbox": chair_box.tolist(),
	"chair_confidence": chair_confidence
	},
	"measurements": {
	"eye_to_ear_distance_px": float(eye_to_ear_px),
	"eye_to_ear_distance_cm": float(eye_to_ear_cm),
	"pixels_per_cm": float(pixels_per_cm),
	"seat_front_position": [int(seat_front_x), int(seat_front_y)],
	"back_of_knee_position": [int(knee_pt[0]), int(knee_pt[1])],
	"knee_clearance_px": float(clearance_px),
	"knee_clearance_cm": float(clearance_cm),
	"thigh_length_px": float(thigh_length_px),
	"back_of_knee_offset_applied": float(back_of_knee_offset)
	},
	"classification": {
	"category": category,
	"knee_behind_seat": bool(knee_behind_seat),
	"reasoning": reasoning
	},
	"debug_info": {
	"band_y_range": [int(y_min), int(y_max)],
	"chair_pixels_detected": int(chair_pixels_x.size),
	"segmentation_success": True,
	"scaling_method": "eye_to_ear_reference"
	},
	"warnings": visibility_warnings,
	"processing_time_ms": processing_time
	}

	return output_json, pose_image, seat_band_image, final_image