Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /official /nlp /modeling /layers /self_attention_mask.py

NCTCMumbai

Upload 2571 files

0b8359d almost 2 years ago

raw

history blame contribute delete

2.33 kB

	# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================
	"""Keras layer that creates a self-attention mask."""

	from __future__ import absolute_import
	from __future__ import division
	# from __future__ import google_type_annotations
	from __future__ import print_function

	import tensorflow as tf
	from official.modeling import tf_utils


	@tf.keras.utils.register_keras_serializable(package='Text')
	class SelfAttentionMask(tf.keras.layers.Layer):
	"""Create 3D attention mask from a 2D tensor mask.

	inputs[0]: from_tensor: 2D or 3D Tensor of shape
	[batch_size, from_seq_length, ...].
	inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length].

	Returns:
	float Tensor of shape [batch_size, from_seq_length, to_seq_length].
	"""

	def call(self, inputs):
	from_tensor = inputs[0]
	to_mask = inputs[1]
	from_shape = tf_utils.get_shape_list(from_tensor, expected_rank=[2, 3])
	batch_size = from_shape[0]
	from_seq_length = from_shape[1]

	to_shape = tf_utils.get_shape_list(to_mask, expected_rank=2)
	to_seq_length = to_shape[1]

	to_mask = tf.cast(
	tf.reshape(to_mask, [batch_size, 1, to_seq_length]),
	dtype=from_tensor.dtype)

	# We don't assume that `from_tensor` is a mask (although it could be). We
	# don't actually care if we attend from padding tokens (only to padding)
	# tokens so we create a tensor of all ones.
	#
	# `broadcast_ones` = [batch_size, from_seq_length, 1]
	broadcast_ones = tf.ones(
	shape=[batch_size, from_seq_length, 1], dtype=from_tensor.dtype)

	# Here we broadcast along two dimensions to create the mask.
	mask = broadcast_ones * to_mask

	return mask