Spaces:
Running
Running
# Copyright 2017 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Class Head. | |
Contains Class prediction head classes for different meta architectures. | |
All the class prediction heads have a predict function that receives the | |
`features` as the first argument and returns class predictions with background. | |
""" | |
import functools | |
import tensorflow.compat.v1 as tf | |
import tf_slim as slim | |
from object_detection.predictors.heads import head | |
class MaskRCNNClassHead(head.Head): | |
"""Mask RCNN class prediction head. | |
Please refer to Mask RCNN paper: | |
https://arxiv.org/abs/1703.06870 | |
""" | |
def __init__(self, | |
is_training, | |
num_class_slots, | |
fc_hyperparams_fn, | |
use_dropout, | |
dropout_keep_prob, | |
scope='ClassPredictor'): | |
"""Constructor. | |
Args: | |
is_training: Indicates whether the BoxPredictor is in training mode. | |
num_class_slots: number of class slots. Note that num_class_slots may or | |
may not include an implicit background category. | |
fc_hyperparams_fn: A function to generate tf-slim arg_scope with | |
hyperparameters for fully connected ops. | |
use_dropout: Option to use dropout or not. Note that a single dropout | |
op is applied here prior to both box and class predictions, which stands | |
in contrast to the ConvolutionalBoxPredictor below. | |
dropout_keep_prob: Keep probability for dropout. | |
This is only used if use_dropout is True. | |
scope: Scope name for the convolution operation. | |
""" | |
super(MaskRCNNClassHead, self).__init__() | |
self._is_training = is_training | |
self._num_class_slots = num_class_slots | |
self._fc_hyperparams_fn = fc_hyperparams_fn | |
self._use_dropout = use_dropout | |
self._dropout_keep_prob = dropout_keep_prob | |
self._scope = scope | |
def predict(self, features, num_predictions_per_location=1): | |
"""Predicts boxes and class scores. | |
Args: | |
features: A float tensor of shape [batch_size, height, width, channels] | |
containing features for a batch of images. | |
num_predictions_per_location: Int containing number of predictions per | |
location. | |
Returns: | |
class_predictions_with_background: A float tensor of shape | |
[batch_size, 1, num_class_slots] representing the class predictions for | |
the proposals. | |
Raises: | |
ValueError: If num_predictions_per_location is not 1. | |
""" | |
if num_predictions_per_location != 1: | |
raise ValueError('Only num_predictions_per_location=1 is supported') | |
spatial_averaged_roi_pooled_features = tf.reduce_mean( | |
features, [1, 2], keep_dims=True, name='AvgPool') | |
flattened_roi_pooled_features = slim.flatten( | |
spatial_averaged_roi_pooled_features) | |
if self._use_dropout: | |
flattened_roi_pooled_features = slim.dropout( | |
flattened_roi_pooled_features, | |
keep_prob=self._dropout_keep_prob, | |
is_training=self._is_training) | |
with slim.arg_scope(self._fc_hyperparams_fn()): | |
class_predictions_with_background = slim.fully_connected( | |
flattened_roi_pooled_features, | |
self._num_class_slots, | |
reuse=tf.AUTO_REUSE, | |
activation_fn=None, | |
scope=self._scope) | |
class_predictions_with_background = tf.reshape( | |
class_predictions_with_background, | |
[-1, 1, self._num_class_slots]) | |
return class_predictions_with_background | |
class ConvolutionalClassHead(head.Head): | |
"""Convolutional class prediction head.""" | |
def __init__(self, | |
is_training, | |
num_class_slots, | |
use_dropout, | |
dropout_keep_prob, | |
kernel_size, | |
apply_sigmoid_to_scores=False, | |
class_prediction_bias_init=0.0, | |
use_depthwise=False, | |
scope='ClassPredictor'): | |
"""Constructor. | |
Args: | |
is_training: Indicates whether the BoxPredictor is in training mode. | |
num_class_slots: number of class slots. Note that num_class_slots may or | |
may not include an implicit background category. | |
use_dropout: Option to use dropout or not. Note that a single dropout | |
op is applied here prior to both box and class predictions, which stands | |
in contrast to the ConvolutionalBoxPredictor below. | |
dropout_keep_prob: Keep probability for dropout. | |
This is only used if use_dropout is True. | |
kernel_size: Size of final convolution kernel. If the | |
spatial resolution of the feature map is smaller than the kernel size, | |
then the kernel size is automatically set to be | |
min(feature_width, feature_height). | |
apply_sigmoid_to_scores: if True, apply the sigmoid on the output | |
class_predictions. | |
class_prediction_bias_init: constant value to initialize bias of the last | |
conv2d layer before class prediction. | |
use_depthwise: Whether to use depthwise convolutions for prediction | |
steps. Default is False. | |
scope: Scope name for the convolution operation. | |
Raises: | |
ValueError: if min_depth > max_depth. | |
ValueError: if use_depthwise is True and kernel_size is 1. | |
""" | |
if use_depthwise and (kernel_size == 1): | |
raise ValueError('Should not use 1x1 kernel when using depthwise conv') | |
super(ConvolutionalClassHead, self).__init__() | |
self._is_training = is_training | |
self._num_class_slots = num_class_slots | |
self._use_dropout = use_dropout | |
self._dropout_keep_prob = dropout_keep_prob | |
self._kernel_size = kernel_size | |
self._apply_sigmoid_to_scores = apply_sigmoid_to_scores | |
self._class_prediction_bias_init = class_prediction_bias_init | |
self._use_depthwise = use_depthwise | |
self._scope = scope | |
def predict(self, features, num_predictions_per_location): | |
"""Predicts boxes. | |
Args: | |
features: A float tensor of shape [batch_size, height, width, channels] | |
containing image features. | |
num_predictions_per_location: Number of box predictions to be made per | |
spatial location. | |
Returns: | |
class_predictions_with_background: A float tensors of shape | |
[batch_size, num_anchors, num_class_slots] representing the class | |
predictions for the proposals. | |
""" | |
net = features | |
if self._use_dropout: | |
net = slim.dropout(net, keep_prob=self._dropout_keep_prob) | |
if self._use_depthwise: | |
depthwise_scope = self._scope + '_depthwise' | |
class_predictions_with_background = slim.separable_conv2d( | |
net, None, [self._kernel_size, self._kernel_size], | |
padding='SAME', depth_multiplier=1, stride=1, | |
rate=1, scope=depthwise_scope) | |
class_predictions_with_background = slim.conv2d( | |
class_predictions_with_background, | |
num_predictions_per_location * self._num_class_slots, [1, 1], | |
activation_fn=None, | |
normalizer_fn=None, | |
normalizer_params=None, | |
scope=self._scope) | |
else: | |
class_predictions_with_background = slim.conv2d( | |
net, | |
num_predictions_per_location * self._num_class_slots, | |
[self._kernel_size, self._kernel_size], | |
activation_fn=None, | |
normalizer_fn=None, | |
normalizer_params=None, | |
scope=self._scope, | |
biases_initializer=tf.constant_initializer( | |
self._class_prediction_bias_init)) | |
if self._apply_sigmoid_to_scores: | |
class_predictions_with_background = tf.sigmoid( | |
class_predictions_with_background) | |
batch_size = features.get_shape().as_list()[0] | |
if batch_size is None: | |
batch_size = tf.shape(features)[0] | |
class_predictions_with_background = tf.reshape( | |
class_predictions_with_background, | |
[batch_size, -1, self._num_class_slots]) | |
return class_predictions_with_background | |
# TODO(alirezafathi): See if possible to unify Weight Shared with regular | |
# convolutional class head. | |
class WeightSharedConvolutionalClassHead(head.Head): | |
"""Weight shared convolutional class prediction head. | |
This head allows sharing the same set of parameters (weights) when called more | |
then once on different feature maps. | |
""" | |
def __init__(self, | |
num_class_slots, | |
kernel_size=3, | |
class_prediction_bias_init=0.0, | |
use_dropout=False, | |
dropout_keep_prob=0.8, | |
use_depthwise=False, | |
score_converter_fn=tf.identity, | |
return_flat_predictions=True, | |
scope='ClassPredictor'): | |
"""Constructor. | |
Args: | |
num_class_slots: number of class slots. Note that num_class_slots may or | |
may not include an implicit background category. | |
kernel_size: Size of final convolution kernel. | |
class_prediction_bias_init: constant value to initialize bias of the last | |
conv2d layer before class prediction. | |
use_dropout: Whether to apply dropout to class prediction head. | |
dropout_keep_prob: Probability of keeping activiations. | |
use_depthwise: Whether to use depthwise convolutions for prediction | |
steps. Default is False. | |
score_converter_fn: Callable elementwise nonlinearity (that takes tensors | |
as inputs and returns tensors). | |
return_flat_predictions: If true, returns flattened prediction tensor | |
of shape [batch, height * width * num_predictions_per_location, | |
box_coder]. Otherwise returns the prediction tensor before reshaping, | |
whose shape is [batch, height, width, num_predictions_per_location * | |
num_class_slots]. | |
scope: Scope name for the convolution operation. | |
Raises: | |
ValueError: if use_depthwise is True and kernel_size is 1. | |
""" | |
if use_depthwise and (kernel_size == 1): | |
raise ValueError('Should not use 1x1 kernel when using depthwise conv') | |
super(WeightSharedConvolutionalClassHead, self).__init__() | |
self._num_class_slots = num_class_slots | |
self._kernel_size = kernel_size | |
self._class_prediction_bias_init = class_prediction_bias_init | |
self._use_dropout = use_dropout | |
self._dropout_keep_prob = dropout_keep_prob | |
self._use_depthwise = use_depthwise | |
self._score_converter_fn = score_converter_fn | |
self._return_flat_predictions = return_flat_predictions | |
self._scope = scope | |
def predict(self, features, num_predictions_per_location): | |
"""Predicts boxes. | |
Args: | |
features: A float tensor of shape [batch_size, height, width, channels] | |
containing image features. | |
num_predictions_per_location: Number of box predictions to be made per | |
spatial location. | |
Returns: | |
class_predictions_with_background: A tensor of shape | |
[batch_size, num_anchors, num_class_slots] representing the class | |
predictions for the proposals, or a tensor of shape [batch, height, | |
width, num_predictions_per_location * num_class_slots] representing | |
class predictions before reshaping if self._return_flat_predictions is | |
False. | |
""" | |
class_predictions_net = features | |
if self._use_dropout: | |
class_predictions_net = slim.dropout( | |
class_predictions_net, keep_prob=self._dropout_keep_prob) | |
if self._use_depthwise: | |
conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) | |
else: | |
conv_op = slim.conv2d | |
class_predictions_with_background = conv_op( | |
class_predictions_net, | |
num_predictions_per_location * self._num_class_slots, | |
[self._kernel_size, self._kernel_size], | |
activation_fn=None, stride=1, padding='SAME', | |
normalizer_fn=None, | |
biases_initializer=tf.constant_initializer( | |
self._class_prediction_bias_init), | |
scope=self._scope) | |
batch_size = features.get_shape().as_list()[0] | |
if batch_size is None: | |
batch_size = tf.shape(features)[0] | |
class_predictions_with_background = self._score_converter_fn( | |
class_predictions_with_background) | |
if self._return_flat_predictions: | |
class_predictions_with_background = tf.reshape( | |
class_predictions_with_background, | |
[batch_size, -1, self._num_class_slots]) | |
return class_predictions_with_background | |