Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /object_detection /predictors /heads /class_head.py

NCTCMumbai

Upload 2571 files

0b8359d almost 2 years ago

raw

history blame contribute delete

12.8 kB

	# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Class Head.

	Contains Class prediction head classes for different meta architectures.
	All the class prediction heads have a predict function that receives the
	`features` as the first argument and returns class predictions with background.
	"""
	import functools
	import tensorflow.compat.v1 as tf
	import tf_slim as slim

	from object_detection.predictors.heads import head


	class MaskRCNNClassHead(head.Head):
	"""Mask RCNN class prediction head.

	Please refer to Mask RCNN paper:
	https://arxiv.org/abs/1703.06870
	"""

	def __init__(self,
	is_training,
	num_class_slots,
	fc_hyperparams_fn,
	use_dropout,
	dropout_keep_prob,
	scope='ClassPredictor'):
	"""Constructor.

	Args:
	is_training: Indicates whether the BoxPredictor is in training mode.
	num_class_slots: number of class slots. Note that num_class_slots may or
	may not include an implicit background category.
	fc_hyperparams_fn: A function to generate tf-slim arg_scope with
	hyperparameters for fully connected ops.
	use_dropout: Option to use dropout or not. Note that a single dropout
	op is applied here prior to both box and class predictions, which stands
	in contrast to the ConvolutionalBoxPredictor below.
	dropout_keep_prob: Keep probability for dropout.
	This is only used if use_dropout is True.
	scope: Scope name for the convolution operation.
	"""
	super(MaskRCNNClassHead, self).__init__()
	self._is_training = is_training
	self._num_class_slots = num_class_slots
	self._fc_hyperparams_fn = fc_hyperparams_fn
	self._use_dropout = use_dropout
	self._dropout_keep_prob = dropout_keep_prob
	self._scope = scope

	def predict(self, features, num_predictions_per_location=1):
	"""Predicts boxes and class scores.

	Args:
	features: A float tensor of shape [batch_size, height, width, channels]
	containing features for a batch of images.
	num_predictions_per_location: Int containing number of predictions per
	location.

	Returns:
	class_predictions_with_background: A float tensor of shape
	[batch_size, 1, num_class_slots] representing the class predictions for
	the proposals.

	Raises:
	ValueError: If num_predictions_per_location is not 1.
	"""
	if num_predictions_per_location != 1:
	raise ValueError('Only num_predictions_per_location=1 is supported')
	spatial_averaged_roi_pooled_features = tf.reduce_mean(
	features, [1, 2], keep_dims=True, name='AvgPool')
	flattened_roi_pooled_features = slim.flatten(
	spatial_averaged_roi_pooled_features)
	if self._use_dropout:
	flattened_roi_pooled_features = slim.dropout(
	flattened_roi_pooled_features,
	keep_prob=self._dropout_keep_prob,
	is_training=self._is_training)

	with slim.arg_scope(self._fc_hyperparams_fn()):
	class_predictions_with_background = slim.fully_connected(
	flattened_roi_pooled_features,
	self._num_class_slots,
	reuse=tf.AUTO_REUSE,
	activation_fn=None,
	scope=self._scope)
	class_predictions_with_background = tf.reshape(
	class_predictions_with_background,
	[-1, 1, self._num_class_slots])
	return class_predictions_with_background


	class ConvolutionalClassHead(head.Head):
	"""Convolutional class prediction head."""

	def __init__(self,
	is_training,
	num_class_slots,
	use_dropout,
	dropout_keep_prob,
	kernel_size,
	apply_sigmoid_to_scores=False,
	class_prediction_bias_init=0.0,
	use_depthwise=False,
	scope='ClassPredictor'):
	"""Constructor.

	Args:
	is_training: Indicates whether the BoxPredictor is in training mode.
	num_class_slots: number of class slots. Note that num_class_slots may or
	may not include an implicit background category.
	use_dropout: Option to use dropout or not. Note that a single dropout
	op is applied here prior to both box and class predictions, which stands
	in contrast to the ConvolutionalBoxPredictor below.
	dropout_keep_prob: Keep probability for dropout.
	This is only used if use_dropout is True.
	kernel_size: Size of final convolution kernel. If the
	spatial resolution of the feature map is smaller than the kernel size,
	then the kernel size is automatically set to be
	min(feature_width, feature_height).
	apply_sigmoid_to_scores: if True, apply the sigmoid on the output
	class_predictions.
	class_prediction_bias_init: constant value to initialize bias of the last
	conv2d layer before class prediction.
	use_depthwise: Whether to use depthwise convolutions for prediction
	steps. Default is False.
	scope: Scope name for the convolution operation.

	Raises:
	ValueError: if min_depth > max_depth.
	ValueError: if use_depthwise is True and kernel_size is 1.
	"""
	if use_depthwise and (kernel_size == 1):
	raise ValueError('Should not use 1x1 kernel when using depthwise conv')

	super(ConvolutionalClassHead, self).__init__()
	self._is_training = is_training
	self._num_class_slots = num_class_slots
	self._use_dropout = use_dropout
	self._dropout_keep_prob = dropout_keep_prob
	self._kernel_size = kernel_size
	self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
	self._class_prediction_bias_init = class_prediction_bias_init
	self._use_depthwise = use_depthwise
	self._scope = scope

	def predict(self, features, num_predictions_per_location):
	"""Predicts boxes.

	Args:
	features: A float tensor of shape [batch_size, height, width, channels]
	containing image features.
	num_predictions_per_location: Number of box predictions to be made per
	spatial location.

	Returns:
	class_predictions_with_background: A float tensors of shape
	[batch_size, num_anchors, num_class_slots] representing the class
	predictions for the proposals.
	"""
	net = features
	if self._use_dropout:
	net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
	if self._use_depthwise:
	depthwise_scope = self._scope + '_depthwise'
	class_predictions_with_background = slim.separable_conv2d(
	net, None, [self._kernel_size, self._kernel_size],
	padding='SAME', depth_multiplier=1, stride=1,
	rate=1, scope=depthwise_scope)
	class_predictions_with_background = slim.conv2d(
	class_predictions_with_background,
	num_predictions_per_location * self._num_class_slots, [1, 1],
	activation_fn=None,
	normalizer_fn=None,
	normalizer_params=None,
	scope=self._scope)
	else:
	class_predictions_with_background = slim.conv2d(
	net,
	num_predictions_per_location * self._num_class_slots,
	[self._kernel_size, self._kernel_size],
	activation_fn=None,
	normalizer_fn=None,
	normalizer_params=None,
	scope=self._scope,
	biases_initializer=tf.constant_initializer(
	self._class_prediction_bias_init))
	if self._apply_sigmoid_to_scores:
	class_predictions_with_background = tf.sigmoid(
	class_predictions_with_background)
	batch_size = features.get_shape().as_list()[0]
	if batch_size is None:
	batch_size = tf.shape(features)[0]
	class_predictions_with_background = tf.reshape(
	class_predictions_with_background,
	[batch_size, -1, self._num_class_slots])
	return class_predictions_with_background


	# TODO(alirezafathi): See if possible to unify Weight Shared with regular
	# convolutional class head.
	class WeightSharedConvolutionalClassHead(head.Head):
	"""Weight shared convolutional class prediction head.

	This head allows sharing the same set of parameters (weights) when called more
	then once on different feature maps.
	"""

	def __init__(self,
	num_class_slots,
	kernel_size=3,
	class_prediction_bias_init=0.0,
	use_dropout=False,
	dropout_keep_prob=0.8,
	use_depthwise=False,
	score_converter_fn=tf.identity,
	return_flat_predictions=True,
	scope='ClassPredictor'):
	"""Constructor.

	Args:
	num_class_slots: number of class slots. Note that num_class_slots may or
	may not include an implicit background category.
	kernel_size: Size of final convolution kernel.
	class_prediction_bias_init: constant value to initialize bias of the last
	conv2d layer before class prediction.
	use_dropout: Whether to apply dropout to class prediction head.
	dropout_keep_prob: Probability of keeping activiations.
	use_depthwise: Whether to use depthwise convolutions for prediction
	steps. Default is False.
	score_converter_fn: Callable elementwise nonlinearity (that takes tensors
	as inputs and returns tensors).
	return_flat_predictions: If true, returns flattened prediction tensor
	of shape [batch, height * width * num_predictions_per_location,
	box_coder]. Otherwise returns the prediction tensor before reshaping,
	whose shape is [batch, height, width, num_predictions_per_location *
	num_class_slots].
	scope: Scope name for the convolution operation.

	Raises:
	ValueError: if use_depthwise is True and kernel_size is 1.
	"""
	if use_depthwise and (kernel_size == 1):
	raise ValueError('Should not use 1x1 kernel when using depthwise conv')

	super(WeightSharedConvolutionalClassHead, self).__init__()
	self._num_class_slots = num_class_slots
	self._kernel_size = kernel_size
	self._class_prediction_bias_init = class_prediction_bias_init
	self._use_dropout = use_dropout
	self._dropout_keep_prob = dropout_keep_prob
	self._use_depthwise = use_depthwise
	self._score_converter_fn = score_converter_fn
	self._return_flat_predictions = return_flat_predictions
	self._scope = scope

	def predict(self, features, num_predictions_per_location):
	"""Predicts boxes.

	Args:
	features: A float tensor of shape [batch_size, height, width, channels]
	containing image features.
	num_predictions_per_location: Number of box predictions to be made per
	spatial location.

	Returns:
	class_predictions_with_background: A tensor of shape
	[batch_size, num_anchors, num_class_slots] representing the class
	predictions for the proposals, or a tensor of shape [batch, height,
	width, num_predictions_per_location * num_class_slots] representing
	class predictions before reshaping if self._return_flat_predictions is
	False.
	"""
	class_predictions_net = features
	if self._use_dropout:
	class_predictions_net = slim.dropout(
	class_predictions_net, keep_prob=self._dropout_keep_prob)
	if self._use_depthwise:
	conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
	else:
	conv_op = slim.conv2d
	class_predictions_with_background = conv_op(
	class_predictions_net,
	num_predictions_per_location * self._num_class_slots,
	[self._kernel_size, self._kernel_size],
	activation_fn=None, stride=1, padding='SAME',
	normalizer_fn=None,
	biases_initializer=tf.constant_initializer(
	self._class_prediction_bias_init),
	scope=self._scope)
	batch_size = features.get_shape().as_list()[0]
	if batch_size is None:
	batch_size = tf.shape(features)[0]
	class_predictions_with_background = self._score_converter_fn(
	class_predictions_with_background)
	if self._return_flat_predictions:
	class_predictions_with_background = tf.reshape(
	class_predictions_with_background,
	[batch_size, -1, self._num_class_slots])
	return class_predictions_with_background