File size: 20,313 Bytes
400a879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7867cd
400a879
b7867cd
 
 
 
 
 
 
 
 
400a879
 
b7867cd
 
400a879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
from PIL import Image
from insightface.app import FaceAnalysis
import numpy as np
import os
from pathlib import Path
import time
import argparse
import cv2

class FaceInference:
    """人脸检测推理类,封装insightface的推理功能"""
    
    def __init__(self, det_thresh=0.5, det_size=(640, 640), ctx_id=0):
        """
        初始化人脸检测器
        
        Args:
            det_thresh: 检测阈值
            det_size: 检测图像尺寸
            ctx_id: GPU设备ID,如果为-1则使用CPU,否则使用GPU
        """
        # 如果 ctx_id 为 -1,使用 CPU;否则使用 GPU
        if ctx_id == -1:
            providers = ['CPUExecutionProvider']
            provider_options = [{}]
            ctx_id = -1  # InsightFace 使用 -1 表示 CPU
        else:
            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
            provider_options = [{"device_id": str(ctx_id)}, {}]
        
        self.face_analysis = FaceAnalysis(
            allowed_modules=['detection'], 
            providers=providers, 
            provider_options=provider_options,
        )
        
        self.face_analysis.prepare(ctx_id=ctx_id, det_thresh=det_thresh, det_size=det_size)
    
    def _make_square_bbox(self, x1, y1, x2, y2, image_width, image_height):
        """
        将矩形bbox转换为方形bbox,保持人脸比例不变
        
        Args:
            x1, y1, x2, y2: 原始bbox坐标
            image_width, image_height: 图像尺寸
            
        Returns:
            tuple: (new_x1, new_y1, new_x2, new_y2) 方形bbox坐标
        """
        # 计算原始bbox的中心点和尺寸
        center_x = (x1 + x2) / 2
        center_y = (y1 + y2) / 2
        width = x2 - x1
        height = y2 - y1
        
        # 取较大的边作为方形的边长
        square_size = max(width, height)
        
        # 计算方形bbox的坐标
        half_size = square_size / 2
        new_x1 = center_x - half_size
        new_y1 = center_y - half_size
        new_x2 = center_x + half_size
        new_y2 = center_y + half_size
        
        # 处理边界情况,确保方形bbox在图像范围内
        if new_x1 < 0:
            new_x1 = 0
            new_x2 = square_size
        if new_y1 < 0:
            new_y1 = 0
            new_y2 = square_size
        if new_x2 > image_width:
            new_x2 = image_width
            new_x1 = image_width - square_size
        if new_y2 > image_height:
            new_y2 = image_height
            new_y1 = image_height - square_size
        
        # 再次确保坐标在有效范围内
        new_x1 = max(0, new_x1)
        new_y1 = max(0, new_y1)
        new_x2 = min(image_width, new_x2)
        new_y2 = min(image_height, new_y2)
        
        return new_x1, new_y1, new_x2, new_y2
    
    def infer_from_array(self, image_array, n=None):
        """
        对输入numpy数组进行人脸检测推理
        
        Args:
            image_array: numpy数组,形状为[H, W, 3],值范围为0-255
            n: 选择前n个最大的人脸,如果为None则选择所有人脸
            
        Returns:
            dict: 包含检测结果的字典,格式为:
                  {
                      'faces': 检测到的人脸列表,
                      'bboxes': bbox列表,每个元素为[x, y, width, height],
                      'masks': mask列表,每个元素为单通道mask图像,
                      'masked_images': masked图像列表,每个元素为应用mask后的图像,
                      'image_shape': 原始图像的形状 (height, width, channels)
                  }
                  如果未检测到人脸,返回中心区域矩形作为默认bbox
        """
        try:
            if image_array is None:
                print("错误:输入图像数组为空")
                return {}
            
            # 确保图像数组是正确的格式
            if len(image_array.shape) != 3 or image_array.shape[2] != 3:
                print(f"错误:图像数组形状不正确,期望[H, W, 3],实际{image_array.shape}")
                return {}
            
            # 确保数据类型和值范围正确
            if image_array.dtype != np.uint8:
                image_array = image_array.astype(np.uint8)
            
            faces = self.face_analysis.get(image_array)
            height, width = image_array.shape[:2]
            
            if not faces: 
                return {
                    'faces': [],
                    'bboxes': [],
                    'masks': [],
                    'masked_images': [],
                    'image_shape': image_array.shape
                }
            
            # 先按人脸面积大小排序,选择前n个最大的人脸
            if n is not None and n > 0:
                # 计算每个人脸的面积并排序
                faces_with_area = [(face, (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1])) for face in faces]
                faces_with_area.sort(key=lambda x: x[1], reverse=True)  # 按面积从大到小排序
                faces = [face for face, _ in faces_with_area[:n]]  # 取前n个最大的人脸
                # print(f"选择了前{n}个最大的人脸,总面积分别为: {[area for _, area in faces_with_area[:n]]}")
            
            # 再按x坐标从左到右排序
            faces = sorted(faces, key=lambda x: x['bbox'][0])
            
            # 生成bbox、mask和masked图像
            bboxes = []
            masks = []
            masked_images = []
            
            for i, face in enumerate(faces):
                bbox = face['bbox']
                x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
                
                # 将矩形bbox转换为方形bbox
                square_x1, square_y1, square_x2, square_y2 = self._make_square_bbox(
                    x1, y1, x2, y2, width, height
                )
                
                # 创建方形mask
                mask = np.zeros(image_array.shape[:2], dtype=np.uint8)
                mask[int(square_y1):int(square_y2), int(square_x1):int(square_x2)] = 1.0
                
                # 创建mask与原图相乘的结果
                masked_image = image_array.copy()
                masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask)
                
                bboxes.append([square_x1, square_y1, square_x2 - square_x1, square_y2 - square_y1])
                masks.append(mask)
                masked_images.append(masked_image)
                
                # print(f"  人脸 {i+1}: 原始bbox=[{x1:.1f}, {y1:.1f}, {x2-x1:.1f}, {y2-y1:.1f}] -> 方形bbox=[{square_x1:.1f}, {square_y1:.1f}, {square_x2-square_x1:.1f}, {square_y2-square_y1:.1f}]")
            
            return {
                'faces': faces,
                'bboxes': bboxes,
                'masks': masks,
                'masked_images': masked_images,
                'image_shape': image_array.shape
            }
            
        except Exception as e:
            print(f"处理图像数组时出错: {str(e)}")
            # 异常情况下也返回中心区域
            if 'image_array' in locals() and image_array is not None:
                return {
                    'faces': [],
                    'bboxes': [],
                    'masks': [],
                    'masked_images': [],
                    'image_shape': image_array.shape
                }
            
            return {}
    
    def infer(self, image_path, n=None):
        """
        对输入图像进行人脸检测推理
        
        Args:
            image_path: 图像文件路径或图片
            n: 选择前n个最大的人脸,如果为None则选择所有人脸
            
        Returns:
            dict: 包含检测结果的字典,格式为:
                  {
                      'faces': 检测到的人脸列表,
                      'bboxes': bbox列表,每个元素为[x, y, width, height],
                      'masks': mask列表,每个元素为单通道mask图像,
                      'masked_images': masked图像列表,每个元素为应用mask后的图像,
                      'image_shape': 原始图像的形状 (height, width, channels)
                  }
                  如果未检测到人脸,返回中心区域矩形作为默认bbox
        """
        try:
            image = cv2.imread(image_path)
            if image is None:
                print(f"错误:无法读取图像 {image_path}")
                # 无法读取图像,返回空结果
                return {}
            
            faces = self.face_analysis.get(image)
            height, width = image.shape[:2]
            
            if not faces:
                print(f"警告:图像 {os.path.basename(image_path)} 中未检测到人脸,使用中心区域作为默认方形bbox")
                
                # 计算中心区域方形(边长为原图较小边的50%)
                min_dim = min(width, height)
                square_size = min_dim // 2
                center_x, center_y = width // 2, height // 2
                
                x1 = center_x - square_size // 2
                y1 = center_y - square_size // 2
                x2 = x1 + square_size
                y2 = y1 + square_size
                
                # 确保bbox在图像范围内
                x1 = max(0, x1)
                y1 = max(0, y1)
                x2 = min(width, x2)
                y2 = min(height, y2)
                
                # 创建中心区域的方形mask
                mask = np.zeros(image.shape[:2], dtype=np.uint8)
                mask[int(y1):int(y2), int(x1):int(x2)] = 1.0
                
                # 创建masked图像
                masked_image = image.copy()
                masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask)
                
                return {
                    'faces': [],
                    'bboxes': [[x1, y1, x2 - x1, y2 - y1]],
                    'masks': [mask],
                    'masked_images': [masked_image],
                    'image_shape': image.shape
                }
            
            # 先按人脸面积大小排序,选择前n个最大的人脸
            if n is not None and n > 0:
                # 计算每个人脸的面积并排序
                faces_with_area = [(face, (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1])) for face in faces]
                faces_with_area.sort(key=lambda x: x[1], reverse=True)  # 按面积从大到小排序
                faces = [face for face, _ in faces_with_area[:n]]  # 取前n个最大的人脸
            
            # 再按x坐标从左到右排序
            faces = sorted(faces, key=lambda x: x['bbox'][0])
            
            # 生成bbox、mask和masked图像
            bboxes = []
            masks = []
            masked_images = []
            
            for i, face in enumerate(faces):
                bbox = face['bbox']
                x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
                
                # 将矩形bbox转换为方形bbox
                square_x1, square_y1, square_x2, square_y2 = self._make_square_bbox(
                    x1, y1, x2, y2, width, height
                )
                
                # 创建方形mask
                mask = np.zeros(image.shape[:2], dtype=np.uint8)
                mask[int(square_y1):int(square_y2), int(square_x1):int(square_x2)] = 1.0
                
                # 创建mask与原图相乘的结果
                masked_image = image.copy()
                masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask)
                
                bboxes.append([square_x1, square_y1, square_x2 - square_x1, square_y2 - square_y1])
                masks.append(mask)
                masked_images.append(masked_image)
                
            return {
                'faces': faces,
                'bboxes': bboxes,
                'masks': masks,
                'masked_images': masked_images,
                'image_shape': image.shape
            }
            
        except Exception as e:
            print(f"处理图像 {image_path} 时出错: {str(e)}")
            # 异常情况下也返回中心区域方形
            if 'image' in locals() and image is not None:
                height, width = image.shape[:2]
                
                # 计算中心区域方形(边长为原图较小边的50%)
                min_dim = min(width, height)
                square_size = min_dim // 2
                center_x, center_y = width // 2, height // 2
                
                x1 = center_x - square_size // 2
                y1 = center_y - square_size // 2
                x2 = x1 + square_size
                y2 = y1 + square_size
                
                # 确保bbox在图像范围内
                x1 = max(0, x1)
                y1 = max(0, y1)
                x2 = min(width, x2)
                y2 = min(height, y2)
                
                # 创建中心区域的方形mask
                mask = np.zeros(image.shape[:2], dtype=np.uint8)
                mask[int(y1):int(y2), int(x1):int(x2)] = 1.0
                
                # 创建masked图像
                masked_image = image.copy()
                masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask)
                
                return {
                    'faces': [],
                    'bboxes': [[x1, y1, x2 - x1, y2 - y1]],
                    'masks': [mask],
                    'masked_images': [masked_image],
                    'image_shape': image.shape
                }
            
            return {}


class FaceProcessor:
    def __init__(self, det_thresh=0.5, det_size=(640, 640)):
        self.face_analysis = FaceAnalysis(allowed_modules=['detection'])
        self.face_analysis.prepare(ctx_id=0, det_thresh=det_thresh, det_size=det_size)
    
    def _make_square_bbox(self, x1, y1, x2, y2, image_width, image_height):
        """
        将矩形bbox转换为方形bbox,保持人脸比例不变
        
        Args:
            x1, y1, x2, y2: 原始bbox坐标
            image_width, image_height: 图像尺寸
            
        Returns:
            tuple: (new_x1, new_y1, new_x2, new_y2) 方形bbox坐标
        """
        # 计算原始bbox的中心点和尺寸
        center_x = (x1 + x2) / 2
        center_y = (y1 + y2) / 2
        width = x2 - x1
        height = y2 - y1
        
        # 取较大的边作为方形的边长
        square_size = max(width, height)
        
        # 计算方形bbox的坐标
        half_size = square_size / 2
        new_x1 = center_x - half_size
        new_y1 = center_y - half_size
        new_x2 = center_x + half_size
        new_y2 = center_y + half_size
        
        # 处理边界情况,确保方形bbox在图像范围内
        if new_x1 < 0:
            new_x1 = 0
            new_x2 = square_size
        if new_y1 < 0:
            new_y1 = 0
            new_y2 = square_size
        if new_x2 > image_width:
            new_x2 = image_width
            new_x1 = image_width - square_size
        if new_y2 > image_height:
            new_y2 = image_height
            new_y1 = image_height - square_size
        
        # 再次确保坐标在有效范围内
        new_x1 = max(0, new_x1)
        new_y1 = max(0, new_y1)
        new_x2 = min(image_width, new_x2)
        new_y2 = min(image_height, new_y2)
        
        return new_x1, new_y1, new_x2, new_y2

    def get_face_bbox_and_mask(self, image):
        faces = self.face_analysis.get(image)
        if not faces:
            print("警告:图像中未检测到人脸。")
            return None, None, None
        
        # 按x坐标从左到右排序
        faces = sorted(faces, key=lambda x: x['bbox'][0])
        
        height, width = image.shape[:2]
        bboxes = []
        masks = []
        masked_images = []
        
        for i, face in enumerate(faces):
            bbox = face['bbox']
            x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
            
            # 将矩形bbox转换为方形bbox
            square_x1, square_y1, square_x2, square_y2 = self._make_square_bbox(
                x1, y1, x2, y2, width, height
            )
            
            # 创建方形mask
            mask = np.zeros(image.shape[:2], dtype=np.uint8)
            mask[int(square_y1):int(square_y2), int(square_x1):int(square_x2)] = 1.0
            
            # 创建mask与原图相乘的结果
            masked_image = image.copy()
            masked_image = cv2.bitwise_and(masked_image, masked_image, mask=mask)
            
            bboxes.append([square_x1, square_y1, square_x2 - square_x1, square_y2 - square_y1])
            masks.append(mask)
            masked_images.append(masked_image)
            
        return bboxes, masks, masked_images

def main():
    parser = argparse.ArgumentParser(description='Process images to detect faces and save bbox, mask, and masked images.')
    parser.add_argument('--input_dir', type=str, default="./data/bbox_test_input", help='Directory containing input images.')
    parser.add_argument('--bbox_output_dir', type=str, default="./temp/bbox", help='Directory to save bbox npy files.')
    parser.add_argument('--mask_output_dir', type=str, default="./temp/mask", help='Directory to save mask images.')
    parser.add_argument('--masked_image_output_dir', type=str, default="./temp/masked_images", help='Directory to save masked images.')
    args = parser.parse_args()
    
    # 创建输出目录
    os.makedirs(args.bbox_output_dir, exist_ok=True)
    os.makedirs(args.mask_output_dir, exist_ok=True)
    os.makedirs(args.masked_image_output_dir, exist_ok=True)
    
    # 初始化人脸检测器
    face_processor = FaceProcessor()
    
    # 支持的图像格式
    supported_formats = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
    
    # 获取所有图像文件
    image_files = []
    for file in os.listdir(args.input_dir):
        if Path(file).suffix.lower() in supported_formats:
            image_files.append(file)
    
    if not image_files:
        print(f"警告:在目录 {args.input_dir} 中未找到支持的图像文件")
        return
       
    # 处理每个图像
    for image_file in image_files:
        image_path = os.path.join(args.input_dir, image_file)
        
        # 读取图像
        image = cv2.imread(image_path)
        if image is None:
            print(f"  错误:无法读取图像 {image_path}")
            continue
        
        # 获取人脸检测结果
        bboxes, masks, masked_images = face_processor.get_face_bbox_and_mask(image)
        
        if bboxes is None:
            print(f"  跳过:未检测到人脸")
            continue
        
        # 生成基础文件名(不含扩展名)
        base_name = Path(image_file).stem
        
        # 保存bbox为npy文件
        bbox_file = os.path.join(args.bbox_output_dir, f"{base_name}_bbox.npy")
        np.save(bbox_file, np.array(bboxes))
        
        # 保存mask和masked图像
        for i, (mask, masked_image) in enumerate(zip(masks, masked_images)):
            # 保存mask
            mask_file = os.path.join(args.mask_output_dir, f"{base_name}_face{i+1}_mask.png")
            cv2.imwrite(mask_file, mask)
            
            # 保存masked图像
            masked_image_file = os.path.join(args.masked_image_output_dir, f"{base_name}_face{i+1}_masked.png")
            cv2.imwrite(masked_image_file, masked_image)
            
            print(f"  已保存人脸{i+1}的mask: {mask_file}")
            print(f"  已保存人脸{i+1}的masked图像: {masked_image_file}")
    
    print(f"\n处理完成!")
    print(f"bbox文件保存在: {args.bbox_output_dir}")
    print(f"mask文件保存在: {args.mask_output_dir}")
    print(f"masked图像保存在: {args.masked_image_output_dir}")


if __name__ == "__main__":
    main()