# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library functions for ContextRCNN."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow.compat.v1 as tf
import tf_slim as slim


# The negative value used in padding the invalid weights.
_NEGATIVE_PADDING_VALUE = -100000


def filter_weight_value(weights, values, valid_mask):
  """Filters weights and values based on valid_mask.

  _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
  avoid their contribution in softmax. 0 will be set for the invalid elements in
  the values.

  Args:
    weights: A float Tensor of shape [batch_size, input_size, context_size].
    values: A float Tensor of shape [batch_size, context_size,
      projected_dimension].
    valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
      valid and False means invalid.

  Returns:
    weights: A float Tensor of shape [batch_size, input_size, context_size].
    values: A float Tensor of shape [batch_size, context_size,
      projected_dimension].

  Raises:
    ValueError: If shape of doesn't match.
  """
  w_batch_size, _, w_context_size = weights.shape
  v_batch_size, v_context_size, _ = values.shape
  m_batch_size, m_context_size = valid_mask.shape
  if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
    raise ValueError("Please make sure the first dimension of the input"
                     " tensors are the same.")

  if w_context_size != v_context_size:
    raise ValueError("Please make sure the third dimension of weights matches"
                     " the second dimension of values.")

  if w_context_size != m_context_size:
    raise ValueError("Please make sure the third dimension of the weights"
                     " matches the second dimension of the valid_mask.")

  valid_mask = valid_mask[..., tf.newaxis]

  # Force the invalid weights to be very negative so it won't contribute to
  # the softmax.
  weights += tf.transpose(
      tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
      _NEGATIVE_PADDING_VALUE,
      perm=[0, 2, 1])

  # Force the invalid values to be 0.
  values *= tf.cast(valid_mask, values.dtype)

  return weights, values


def compute_valid_mask(num_valid_elements, num_elements):
  """Computes mask of valid entries within padded context feature.

  Args:
    num_valid_elements: A int32 Tensor of shape [batch_size].
    num_elements: An int32 Tensor.

  Returns:
    A boolean Tensor of the shape [batch_size, num_elements]. True means
      valid and False means invalid.
  """
  batch_size = num_valid_elements.shape[0]
  element_idxs = tf.range(num_elements, dtype=tf.int32)
  batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
  num_valid_elements = num_valid_elements[..., tf.newaxis]
  valid_mask = tf.less(batch_element_idxs, num_valid_elements)
  return valid_mask


def project_features(features, projection_dimension, is_training, normalize):
  """Projects features to another feature space.

  Args:
    features: A float Tensor of shape [batch_size, features_size,
      num_features].
    projection_dimension: A int32 Tensor.
    is_training: A boolean Tensor (affecting batch normalization).
    normalize: A boolean Tensor. If true, the output features will be l2
      normalized on the last dimension.

  Returns:
    A float Tensor of shape [batch, features_size, projection_dimension].
  """
  # TODO(guanhangwu) Figure out a better way of specifying the batch norm
  # params.
  batch_norm_params = {
      "is_training": is_training,
      "decay": 0.97,
      "epsilon": 0.001,
      "center": True,
      "scale": True
  }

  batch_size, _, num_features = features.shape
  features = tf.reshape(features, [-1, num_features])
  projected_features = slim.fully_connected(
      features,
      num_outputs=projection_dimension,
      activation_fn=tf.nn.relu6,
      normalizer_fn=slim.batch_norm,
      normalizer_params=batch_norm_params)

  projected_features = tf.reshape(projected_features,
                                  [batch_size, -1, projection_dimension])

  if normalize:
    projected_features = tf.math.l2_normalize(projected_features, axis=-1)

  return projected_features


def attention_block(input_features, context_features, bottleneck_dimension,
                    output_dimension, attention_temperature, valid_mask,
                    is_training):
  """Generic attention block.

  Args:
    input_features: A float Tensor of shape [batch_size, input_size,
      num_input_features].
    context_features: A float Tensor of shape [batch_size, context_size,
      num_context_features].
    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
      for intermediate projections.
    output_dimension: A int32 Tensor representing the last dimension of the
      output feature.
    attention_temperature: A float Tensor. It controls the temperature of the
      softmax for weights calculation. The formula for calculation as follows:
        weights = exp(weights / temperature) / sum(exp(weights / temperature))
    valid_mask: A boolean Tensor of shape [batch_size, context_size].
    is_training: A boolean Tensor (affecting batch normalization).

  Returns:
    A float Tensor of shape [batch_size, input_size, output_dimension].
  """

  with tf.variable_scope("AttentionBlock"):
    queries = project_features(
        input_features, bottleneck_dimension, is_training, normalize=True)
    keys = project_features(
        context_features, bottleneck_dimension, is_training, normalize=True)
    values = project_features(
        context_features, bottleneck_dimension, is_training, normalize=True)

  weights = tf.matmul(queries, keys, transpose_b=True)

  weights, values = filter_weight_value(weights, values, valid_mask)

  weights = tf.nn.softmax(weights / attention_temperature)

  features = tf.matmul(weights, values)
  output_features = project_features(
      features, output_dimension, is_training, normalize=False)
  return output_features


def compute_box_context_attention(box_features, context_features,
                                  valid_context_size, bottleneck_dimension,
                                  attention_temperature, is_training):
  """Computes the attention feature from the context given a batch of box.

  Args:
    box_features: A float Tensor of shape [batch_size, max_num_proposals,
      height, width, channels]. It is pooled features from first stage
      proposals.
    context_features: A float Tensor of shape [batch_size, context_size,
      num_context_features].
    valid_context_size: A int32 Tensor of shape [batch_size].
    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
      for intermediate projections.
    attention_temperature: A float Tensor. It controls the temperature of the
      softmax for weights calculation. The formula for calculation as follows:
        weights = exp(weights / temperature) / sum(exp(weights / temperature))
    is_training: A boolean Tensor (affecting batch normalization).

  Returns:
    A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
  """
  _, context_size, _ = context_features.shape
  valid_mask = compute_valid_mask(valid_context_size, context_size)

  channels = box_features.shape[-1]
  # Average pools over height and width dimension so that the shape of
  # box_features becomes [batch_size, max_num_proposals, channels].
  box_features = tf.reduce_mean(box_features, [2, 3])

  output_features = attention_block(box_features, context_features,
                                    bottleneck_dimension, channels.value,
                                    attention_temperature, valid_mask,
                                    is_training)

  # Expands the dimension back to match with the original feature map.
  output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]

  return output_features