Spaces:
Runtime error
Runtime error
| """Anchor utils modified from https://github.com/biubug6/Pytorch_Retinaface""" | |
| import math | |
| import tensorflow as tf | |
| import numpy as np | |
| from itertools import product as product | |
| ############################################################################### | |
| # Tensorflow / Numpy Priors # | |
| ############################################################################### | |
| def prior_box(image_sizes, min_sizes, steps, clip=False): | |
| """prior box""" | |
| feature_maps = [ | |
| [math.ceil(image_sizes[0] / step), math.ceil(image_sizes[1] / step)] | |
| for step in steps] | |
| anchors = [] | |
| for k, f in enumerate(feature_maps): | |
| for i, j in product(range(f[0]), range(f[1])): | |
| for min_size in min_sizes[k]: | |
| s_kx = min_size / image_sizes[1] | |
| s_ky = min_size / image_sizes[0] | |
| cx = (j + 0.5) * steps[k] / image_sizes[1] | |
| cy = (i + 0.5) * steps[k] / image_sizes[0] | |
| anchors += [cx, cy, s_kx, s_ky] | |
| output = np.asarray(anchors).reshape([-1, 4]) | |
| if clip: | |
| output = np.clip(output, 0, 1) | |
| return output | |
| def prior_box_tf(image_sizes, min_sizes, steps, clip=False): | |
| """prior box""" | |
| image_sizes = tf.cast(tf.convert_to_tensor(image_sizes), tf.float32) | |
| feature_maps = tf.math.ceil( | |
| tf.reshape(image_sizes, [1, 2]) / | |
| tf.reshape(tf.cast(steps, tf.float32), [-1, 1])) | |
| anchors = [] | |
| for k in range(len(min_sizes)): | |
| grid_x, grid_y = _meshgrid_tf(tf.range(feature_maps[k][1]), | |
| tf.range(feature_maps[k][0])) | |
| cx = (grid_x + 0.5) * steps[k] / image_sizes[1] | |
| cy = (grid_y + 0.5) * steps[k] / image_sizes[0] | |
| cxcy = tf.stack([cx, cy], axis=-1) | |
| cxcy = tf.reshape(cxcy, [-1, 2]) | |
| cxcy = tf.repeat(cxcy, repeats=tf.shape(min_sizes[k])[0], axis=0) | |
| sx = min_sizes[k] / image_sizes[1] | |
| sy = min_sizes[k] / image_sizes[0] | |
| sxsy = tf.stack([sx, sy], 1) | |
| sxsy = tf.repeat(sxsy[tf.newaxis], | |
| repeats=tf.shape(grid_x)[0] * tf.shape(grid_x)[1], | |
| axis=0) | |
| sxsy = tf.reshape(sxsy, [-1, 2]) | |
| anchors.append(tf.concat([cxcy, sxsy], 1)) | |
| output = tf.concat(anchors, axis=0) | |
| if clip: | |
| output = tf.clip_by_value(output, 0, 1) | |
| return output | |
| def _meshgrid_tf(x, y): | |
| """ workaround solution of the tf.meshgrid() issue: | |
| https://github.com/tensorflow/tensorflow/issues/34470""" | |
| grid_shape = [tf.shape(y)[0], tf.shape(x)[0]] | |
| grid_x = tf.broadcast_to(tf.reshape(x, [1, -1]), grid_shape) | |
| grid_y = tf.broadcast_to(tf.reshape(y, [-1, 1]), grid_shape) | |
| return grid_x, grid_y | |
| ############################################################################### | |
| # Tensorflow Encoding # | |
| ############################################################################### | |
| def encode_tf(labels, priors, match_thresh, ignore_thresh, | |
| variances=[0.1, 0.2]): | |
| """tensorflow encoding""" | |
| assert ignore_thresh <= match_thresh | |
| priors = tf.cast(priors, tf.float32) | |
| bbox = labels[:, :4] | |
| landm = labels[:, 4:-1] | |
| landm_valid = labels[:, -1] # 1: with landm, 0: w/o landm. | |
| # jaccard index | |
| overlaps = _jaccard(bbox, _point_form(priors)) | |
| # (Bipartite Matching) | |
| # [num_objects] best prior for each ground truth | |
| best_prior_overlap, best_prior_idx = tf.math.top_k(overlaps, k=1) | |
| best_prior_overlap = best_prior_overlap[:, 0] | |
| best_prior_idx = best_prior_idx[:, 0] | |
| # [num_priors] best ground truth for each prior | |
| overlaps_t = tf.transpose(overlaps) | |
| best_truth_overlap, best_truth_idx = tf.math.top_k(overlaps_t, k=1) | |
| best_truth_overlap = best_truth_overlap[:, 0] | |
| best_truth_idx = best_truth_idx[:, 0] | |
| # ensure best prior | |
| def _loop_body(i, bt_idx, bt_overlap): | |
| bp_mask = tf.one_hot(best_prior_idx[i], tf.shape(bt_idx)[0]) | |
| bp_mask_int = tf.cast(bp_mask, tf.int32) | |
| new_bt_idx = bt_idx * (1 - bp_mask_int) + bp_mask_int * i | |
| bp_mask_float = tf.cast(bp_mask, tf.float32) | |
| new_bt_overlap = bt_overlap * (1 - bp_mask_float) + bp_mask_float * 2 | |
| return tf.cond(best_prior_overlap[i] > match_thresh, | |
| lambda: (i + 1, new_bt_idx, new_bt_overlap), | |
| lambda: (i + 1, bt_idx, bt_overlap)) | |
| _, best_truth_idx, best_truth_overlap = tf.while_loop( | |
| lambda i, bt_idx, bt_overlap: tf.less(i, tf.shape(best_prior_idx)[0]), | |
| _loop_body, [tf.constant(0), best_truth_idx, best_truth_overlap]) | |
| matches_bbox = tf.gather(bbox, best_truth_idx) # [num_priors, 4] | |
| matches_landm = tf.gather(landm, best_truth_idx) # [num_priors, 10] | |
| matches_landm_v = tf.gather(landm_valid, best_truth_idx) # [num_priors] | |
| loc_t = _encode_bbox(matches_bbox, priors, variances) | |
| landm_t = _encode_landm(matches_landm, priors, variances) | |
| landm_valid_t = tf.cast(matches_landm_v > 0, tf.float32) | |
| conf_t = tf.cast(best_truth_overlap > match_thresh, tf.float32) | |
| conf_t = tf.where( | |
| tf.logical_and(best_truth_overlap < match_thresh, | |
| best_truth_overlap > ignore_thresh), | |
| tf.ones_like(conf_t) * -1, conf_t) # 1: pos, 0: neg, -1: ignore | |
| return tf.concat([loc_t, landm_t, landm_valid_t[..., tf.newaxis], | |
| conf_t[..., tf.newaxis]], axis=1) | |
| def _encode_bbox(matched, priors, variances): | |
| """Encode the variances from the priorbox layers into the ground truth | |
| boxes we have matched (based on jaccard overlap) with the prior boxes. | |
| Args: | |
| matched: (tensor) Coords of ground truth for each prior in point-form | |
| Shape: [num_priors, 4]. | |
| priors: (tensor) Prior boxes in center-offset form | |
| Shape: [num_priors,4]. | |
| variances: (list[float]) Variances of priorboxes | |
| Return: | |
| encoded boxes (tensor), Shape: [num_priors, 4] | |
| """ | |
| # dist b/t match center and prior's center | |
| g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] | |
| # encode variance | |
| g_cxcy /= (variances[0] * priors[:, 2:]) | |
| # match wh / prior wh | |
| g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] | |
| g_wh = tf.math.log(g_wh) / variances[1] | |
| # return target for smooth_l1_loss | |
| return tf.concat([g_cxcy, g_wh], 1) # [num_priors,4] | |
| def _encode_landm(matched, priors, variances): | |
| """Encode the variances from the priorbox layers into the ground truth | |
| boxes we have matched (based on jaccard overlap) with the prior boxes. | |
| Args: | |
| matched: (tensor) Coords of ground truth for each prior in point-form | |
| Shape: [num_priors, 10]. | |
| priors: (tensor) Prior boxes in center-offset form | |
| Shape: [num_priors,4]. | |
| variances: (list[float]) Variances of priorboxes | |
| Return: | |
| encoded landm (tensor), Shape: [num_priors, 10] | |
| """ | |
| # dist b/t match center and prior's center | |
| matched = tf.reshape(matched, [tf.shape(matched)[0], 5, 2]) | |
| priors = tf.broadcast_to( | |
| tf.expand_dims(priors, 1), [tf.shape(matched)[0], 5, 4]) | |
| g_cxcy = matched[:, :, :2] - priors[:, :, :2] | |
| # encode variance | |
| g_cxcy /= (variances[0] * priors[:, :, 2:]) | |
| # g_cxcy /= priors[:, :, 2:] | |
| g_cxcy = tf.reshape(g_cxcy, [tf.shape(g_cxcy)[0], -1]) | |
| # return target for smooth_l1_loss | |
| return g_cxcy | |
| def _point_form(boxes): | |
| """ Convert prior_boxes to (xmin, ymin, xmax, ymax) | |
| representation for comparison to point form ground truth data. | |
| Args: | |
| boxes: (tensor) center-size default boxes from priorbox layers. | |
| Return: | |
| boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. | |
| """ | |
| return tf.concat((boxes[:, :2] - boxes[:, 2:] / 2, | |
| boxes[:, :2] + boxes[:, 2:] / 2), axis=1) | |
| def _intersect(box_a, box_b): | |
| """ We resize both tensors to [A,B,2]: | |
| [A,2] -> [A,1,2] -> [A,B,2] | |
| [B,2] -> [1,B,2] -> [A,B,2] | |
| Then we compute the area of intersect between box_a and box_b. | |
| Args: | |
| box_a: (tensor) bounding boxes, Shape: [A,4]. | |
| box_b: (tensor) bounding boxes, Shape: [B,4]. | |
| Return: | |
| (tensor) intersection area, Shape: [A,B]. | |
| """ | |
| A = tf.shape(box_a)[0] | |
| B = tf.shape(box_b)[0] | |
| max_xy = tf.minimum( | |
| tf.broadcast_to(tf.expand_dims(box_a[:, 2:], 1), [A, B, 2]), | |
| tf.broadcast_to(tf.expand_dims(box_b[:, 2:], 0), [A, B, 2])) | |
| min_xy = tf.maximum( | |
| tf.broadcast_to(tf.expand_dims(box_a[:, :2], 1), [A, B, 2]), | |
| tf.broadcast_to(tf.expand_dims(box_b[:, :2], 0), [A, B, 2])) | |
| inter = tf.maximum((max_xy - min_xy), tf.zeros_like(max_xy - min_xy)) | |
| return inter[:, :, 0] * inter[:, :, 1] | |
| def _jaccard(box_a, box_b): | |
| """Compute the jaccard overlap of two sets of boxes. The jaccard overlap | |
| is simply the intersection over union of two boxes. Here we operate on | |
| ground truth boxes and default boxes. | |
| E.g.: | |
| A β© B / A βͺ B = A β© B / (area(A) + area(B) - A β© B) | |
| Args: | |
| box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] | |
| box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] | |
| Return: | |
| jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] | |
| """ | |
| inter = _intersect(box_a, box_b) | |
| area_a = tf.broadcast_to( | |
| tf.expand_dims( | |
| (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]), 1), | |
| tf.shape(inter)) # [A,B] | |
| area_b = tf.broadcast_to( | |
| tf.expand_dims( | |
| (box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]), 0), | |
| tf.shape(inter)) # [A,B] | |
| union = area_a + area_b - inter | |
| return inter / union # [A,B] | |
| ############################################################################### | |
| # Tensorflow Decoding # | |
| ############################################################################### | |
| def decode_tf(labels, priors, variances=[0.1, 0.2]): | |
| """tensorflow decoding""" | |
| bbox = _decode_bbox(labels[:, :4], priors, variances) | |
| landm = _decode_landm(labels[:, 4:14], priors, variances) | |
| landm_valid = labels[:, 14][:, tf.newaxis] | |
| conf = labels[:, 15][:, tf.newaxis] | |
| return tf.concat([bbox, landm, landm_valid, conf], axis=1) | |
| def _decode_bbox(pre, priors, variances=[0.1, 0.2]): | |
| """Decode locations from predictions using priors to undo | |
| the encoding we did for offset regression at train time. | |
| Args: | |
| pre (tensor): location predictions for loc layers, | |
| Shape: [num_priors,4] | |
| priors (tensor): Prior boxes in center-offset form. | |
| Shape: [num_priors,4]. | |
| variances: (list[float]) Variances of priorboxes | |
| Return: | |
| decoded bounding box predictions | |
| """ | |
| centers = priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:] | |
| sides = priors[:, 2:] * tf.math.exp(pre[:, 2:] * variances[1]) | |
| return tf.concat([centers - sides / 2, centers + sides / 2], axis=1) | |
| def _decode_landm(pre, priors, variances=[0.1, 0.2]): | |
| """Decode landm from predictions using priors to undo | |
| the encoding we did for offset regression at train time. | |
| Args: | |
| pre (tensor): landm predictions for loc layers, | |
| Shape: [num_priors,10] | |
| priors (tensor): Prior boxes in center-offset form. | |
| Shape: [num_priors,4]. | |
| variances: (list[float]) Variances of priorboxes | |
| Return: | |
| decoded landm predictions | |
| """ | |
| landms = tf.concat( | |
| [priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], | |
| priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], | |
| priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], | |
| priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], | |
| priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]], axis=1) | |
| return landms |