File size: 12,465 Bytes
a6beb58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
import cv2
import numpy as np
import torch
from segment_anything import sam_model_registry, SamPredictor
from ultralytics import YOLO
import mediapipe as mp
import json
from datetime import datetime
import time
import os

def process_seat_depth_analysis(image_path, eye_to_ear_cm=7.0, sam_checkpoint="sam_vit_b_01ec64.pth"):
    """
    Main function to process seat depth analysis
    
    Args:
        image_path: Path to the input image
        eye_to_ear_cm: Real-world eye to ear distance for scaling (default 7.0 cm)
        sam_checkpoint: Path to SAM model checkpoint
    
    Returns:
        tuple: (output_json, pose_image, seat_band_image, final_image)
    """
    start_time = time.time()

    def put_text_safe(image, text, org, font, font_scale, color, thickness):
        text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
        text_width, text_height = text_size
        x, y = org
        h, w = image.shape[:2]

        # Adjust X if text goes out on the right
        if x + text_width > w:
            x = w - text_width - 5  # 5 pixel padding from right

        # Adjust X if text goes out on the left
        if x < 0:
            x = 5  # 5 pixel padding from left

        # Adjust Y if text goes above image
        if y - text_height < 0:
            y = text_height + 5  # push down

        # Adjust Y if text goes below image
        if y > h:
            y = h - 5

        cv2.putText(image, text, (x, y), font, font_scale, color, thickness)

        
    # === Load image ===
    image_bgr = cv2.imread(image_path)
    if image_bgr is None:
        raise ValueError(f"Could not load image from {image_path}")
    
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    h, w = image_rgb.shape[:2]

    # === Run MediaPipe Pose Detection ===
    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose(static_image_mode=True)
    results = pose.process(image_rgb)

    if not results.pose_landmarks:
        raise ValueError("No pose detected in the image")

    landmarks = results.pose_landmarks.landmark

    # === Get Knee and Eye X,Y coordinates ===
    left_knee = landmarks[25]
    right_knee = landmarks[26]
    left_eye = landmarks[2]
    right_eye = landmarks[5]
    right_ear = landmarks[8]
    left_ear = landmarks[7]
    left_hip = landmarks[23]
    right_hip = landmarks[24]

    # Convert to pixel coordinates
    left_knee_px = (int(left_knee.x * w), int(left_knee.y * h))
    right_knee_px = (int(right_knee.x * w), int(right_knee.y * h))
    left_eye_px = (int(left_eye.x * w), int(left_eye.y * h))
    right_eye_px = (int(right_eye.x * w), int(right_eye.y * h))
    left_ear_px = (int(left_ear.x * w), int(left_ear.y * h))
    right_ear_px = (int(right_ear.x * w), int(right_ear.y * h))
    left_hip_px = (int(left_hip.x * w), int(left_hip.y * h))
    right_hip_px = (int(right_hip.x * w), int(right_hip.y * h))

    # === Determine Facing Direction ===
    avg_knee_x = (left_knee_px[0] + right_knee_px[0]) / 2
    avg_eye_x = (left_eye_px[0] + right_eye_px[0]) / 2
    facing_direction = "right" if avg_knee_x > avg_eye_x else "left"

    # === Create Pose Overlay (Image 1) ===
    pose_image = image_rgb.copy()
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles

    mp_drawing.draw_landmarks(
        pose_image,
        results.pose_landmarks,
        mp_pose.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
    )

    # === Step 1: Detect Chair with YOLOv8 ===
    yolo_model = YOLO("yolov8n.pt")
    yolo_results = yolo_model(image_rgb)

    # === Step 2: Get Chair Box ===
    chair_box = None
    chair_confidence = 0.0
    for result in yolo_results:
        for box, cls, conf in zip(result.boxes.xyxy, result.boxes.cls, result.boxes.conf):
            if int(cls.item()) == 56:  # 56 = chair
                chair_box = box.cpu().numpy().astype(int)
                chair_confidence = float(conf.item())
                break

    if chair_box is None:
        raise ValueError("No chair detected in the image")

    x1, y1, x2, y2 = chair_box
    chair_height = y2 - y1
    adjusted_y1 = y1 + int(0.25 * chair_height)
    input_box = np.array([x1, adjusted_y1, x2, y2])

    # === Step 3: Load SAM ===
    model_type = "vit_b"
    device = "cuda" if torch.cuda.is_available() else "cpu"

    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
    sam.to(device=device)
    predictor = SamPredictor(sam)

    # === Step 4: Predict Mask from Bounding Box ===
    predictor.set_image(image_rgb)
    masks, scores, _ = predictor.predict(box=input_box[None, :], multimask_output=True)
    best_mask = masks[np.argmax(scores)]

    # === Step 5: Largest Component Only ===
    def get_largest_connected_component(mask):
        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask.astype(np.uint8), connectivity=8)
        if num_labels <= 1:
            return mask
        largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
        return labels == largest_label

    cleaned_mask = get_largest_connected_component(best_mask)

    # === Step 6: Estimate Seat Front ===
    knee_y = int((left_knee_px[1] + right_knee_px[1]) / 2)
    band_thickness = chair_height // 2
    y_min = max(0, knee_y - band_thickness)
    y_max = min(h, knee_y + band_thickness)
    band = cleaned_mask[y_min:y_max, :]
    chair_pixels_x = np.where(band)[1]

    if chair_pixels_x.size == 0:
        raise ValueError("No chair pixels detected at knee level")

    seat_front_x = chair_pixels_x.max() if facing_direction == "right" else chair_pixels_x.min()
    seat_front_y = knee_y

    # === Create Seat Front Band Visualization (Image 2) ===
    seat_band_image = image_rgb.copy()
    cv2.line(seat_band_image, (0, y_min), (w, y_min), (0, 255, 0), 2)
    cv2.line(seat_band_image, (0, y_max), (w, y_max), (0, 255, 0), 2)
    cv2.circle(seat_band_image, (seat_front_x, seat_front_y), 8, (0, 0, 255), -1)
    put_text_safe(seat_band_image, "Seat Front", (seat_front_x + 10, seat_front_y - 10),
              cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # === Calculate Back of Knee Position ===
    def euclidean_distance(p1, p2):
        return np.linalg.norm(np.array(p1) - np.array(p2))

    # Calculate thigh length for proportional offset
    if facing_direction == "right":
        hip_pt = right_hip_px
        knee_pt_original = right_knee_px
    else:
        hip_pt = left_hip_px
        knee_pt_original = left_knee_px

    thigh_length_px = euclidean_distance(hip_pt, knee_pt_original)

    # Back of knee is typically 12-15% of thigh length behind knee center
    back_of_knee_offset = thigh_length_px * 0.13  # 13% of thigh length

    # Apply offset in the backward direction
    if facing_direction == "right":
        knee_pt = (int(knee_pt_original[0] - back_of_knee_offset), knee_pt_original[1])
    else:
        knee_pt = (int(knee_pt_original[0] + back_of_knee_offset), knee_pt_original[1])

    # === Calculate Measurements ===
    clearance_px = abs(seat_front_x - knee_pt[0])

    # Check visibility and calculate eye-to-ear distance
    visibility_warnings = []
    if facing_direction == "right" and (right_eye.visibility < 0.5 or right_ear.visibility < 0.5):
        visibility_warnings.append("Right eye or ear not clearly visible. Scaling may be inaccurate.")
    elif facing_direction == "left" and (left_eye.visibility < 0.5 or left_ear.visibility < 0.5):
        visibility_warnings.append("Left eye or ear not clearly visible. Scaling may be inaccurate.")

    if facing_direction == "right":
        eye_coord = right_eye_px
        ear_coord = right_ear_px
    else:
        eye_coord = left_eye_px
        ear_coord = left_ear_px

    eye_to_ear_px = euclidean_distance(eye_coord, ear_coord)
    pixels_per_cm = eye_to_ear_px / eye_to_ear_cm
    clearance_cm = clearance_px / pixels_per_cm

    # Determine if back of knee is behind seat front
    if facing_direction == "right":
        knee_behind_seat = knee_pt[0] < seat_front_x
    else:
        knee_behind_seat = knee_pt[0] > seat_front_x

    # === Classification ===

    category = "Too Short"
    if knee_behind_seat or clearance_cm < 2:
        if clearance_cm < 2:
            category = "Too Deep"
            reasoning = f"Clearance of {clearance_cm:.2f}cm is less than 2cm minimum"
        elif knee_behind_seat:
            category = "Too Deep"
            reasoning = "Back of knee is behind seat front"
    elif clearance_cm <= 6:
        category = "Optimal"
        reasoning = f"Clearance of {clearance_cm:.2f}cm falls within optimal range (2-6cm)"
    else:
        category = "Too Short"
        reasoning = f"Clearance of {clearance_cm:.2f}cm exceeds 6cm optimal maximum"

    # === Create Final Visualization (Image 3) ===
    final_image = image_rgb.copy()

    # Draw seat front and knee
    cv2.circle(final_image, (seat_front_x, seat_front_y), 8, (0, 0, 255), -1)
    cv2.circle(final_image, knee_pt, 8, (255, 0, 0), -1)

    # Height at which the line floats
    line_y = min(seat_front_y, knee_pt[1]) - 30

    # Draw horizontal line (floating)
    cv2.line(final_image, (min(seat_front_x, knee_pt[0]), line_y), 
                   (max(seat_front_x, knee_pt[0]), line_y), 
             (255, 255, 0), 2)

    # Add arrow tips
    cv2.arrowedLine(final_image,
                    (min(seat_front_x, knee_pt[0]) + 20, line_y),
                    (min(seat_front_x, knee_pt[0]), line_y),
                    (255, 255, 0), 2, tipLength=0.4)

    cv2.arrowedLine(final_image,
                    (max(seat_front_x, knee_pt[0]) - 20, line_y),
                    (max(seat_front_x, knee_pt[0]), line_y),
                    (255, 255, 0), 2, tipLength=0.4)

    # Put clearance text above the line
    put_text_safe(final_image, f"Knee clearance: {clearance_cm:.1f} cm", 
              (min(seat_front_x, knee_pt[0]) + 10, line_y - 10),
              cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

    # Draw eye-to-ear line
    cv2.line(final_image, eye_coord, ear_coord, (0, 255, 0), 2)
    put_text_safe(final_image, f"{eye_to_ear_cm:.1f}cm", 
              (eye_coord[0], eye_coord[1] - 10), 
              cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # === Generate JSON Output ===
    processing_time = int((time.time() - start_time) * 1000)
    
    output_json = {
       "frame_id": os.path.basename(image_path),
        "timestamp": datetime.now().isoformat(),
        "pose_detection": {
            "pose_detected": True,
            "facing_direction": facing_direction,
            "landmarks_visibility": {
                "left_eye": float(left_eye.visibility),
                "right_eye": float(right_eye.visibility),
                "left_ear": float(left_ear.visibility),
                "right_ear": float(right_ear.visibility),
                "left_knee": float(left_knee.visibility),
                "right_knee": float(right_knee.visibility),
                "left_hip": float(left_hip.visibility),
                "right_hip": float(right_hip.visibility)
            }
        },
        "chair_detection": {
            "chair_detected": True,
            "chair_bbox": chair_box.tolist(),
            "chair_confidence": chair_confidence
        },
        "measurements": {
            "eye_to_ear_distance_px": float(eye_to_ear_px),
            "eye_to_ear_distance_cm": float(eye_to_ear_cm),
            "pixels_per_cm": float(pixels_per_cm),
            "seat_front_position": [int(seat_front_x), int(seat_front_y)],
            "back_of_knee_position": [int(knee_pt[0]), int(knee_pt[1])],
            "knee_clearance_px": float(clearance_px),
            "knee_clearance_cm": float(clearance_cm),
            "thigh_length_px": float(thigh_length_px),
            "back_of_knee_offset_applied": float(back_of_knee_offset)
        },
        "classification": {
            "category": category,
            "knee_behind_seat": bool(knee_behind_seat),
            "reasoning": reasoning
        },
        "debug_info": {
            "band_y_range": [int(y_min), int(y_max)],
            "chair_pixels_detected": int(chair_pixels_x.size),
            "segmentation_success": True,
            "scaling_method": "eye_to_ear_reference"
        },
        "warnings": visibility_warnings,
        "processing_time_ms": processing_time
    }

    return output_json, pose_image, seat_band_image, final_image