Spaces:
Running
Running
# Copyright 2019 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Quantization related ops for LSTM.""" | |
from __future__ import absolute_import | |
from __future__ import division | |
import tensorflow.compat.v1 as tf | |
from tensorflow.contrib import framework as contrib_framework | |
from tensorflow.contrib import layers as contrib_layers | |
from tensorflow.python.training import moving_averages | |
def _quant_var( | |
name, | |
initializer_val, | |
vars_collection=tf.GraphKeys.MOVING_AVERAGE_VARIABLES, | |
): | |
"""Create an var for storing the min/max quantization range.""" | |
return contrib_framework.model_variable( | |
name, | |
shape=[], | |
initializer=tf.constant_initializer(initializer_val), | |
collections=[vars_collection], | |
trainable=False) | |
def quantizable_concat(inputs, | |
axis, | |
is_training, | |
is_quantized=True, | |
default_min=0, | |
default_max=6, | |
ema_decay=0.999, | |
scope='quantized_concat'): | |
"""Concat replacement with quantization option. | |
Allows concat inputs to share the same min max ranges, | |
from experimental/gazelle/synthetic/model/tpu/utils.py. | |
Args: | |
inputs: list of tensors to concatenate. | |
axis: dimension along which to concatenate. | |
is_training: true if the graph is a training graph. | |
is_quantized: flag to enable/disable quantization. | |
default_min: default min value for fake quant op. | |
default_max: default max value for fake quant op. | |
ema_decay: the moving average decay for the quantization variables. | |
scope: Optional scope for variable_scope. | |
Returns: | |
Tensor resulting from concatenation of input tensors | |
""" | |
if is_quantized: | |
with tf.variable_scope(scope): | |
tf.logging.info('inputs: {}'.format(inputs)) | |
for t in inputs: | |
tf.logging.info(t) | |
min_var = _quant_var('min', default_min) | |
max_var = _quant_var('max', default_max) | |
if not is_training: | |
# If we are building an eval graph just use the values in the variables. | |
quant_inputs = [ | |
tf.fake_quant_with_min_max_vars(t, min_var, max_var) for t in inputs | |
] | |
tf.logging.info('min_val: {}'.format(min_var)) | |
tf.logging.info('max_val: {}'.format(max_var)) | |
else: | |
concat_tensors = tf.concat(inputs, axis=axis) | |
tf.logging.info('concat_tensors: {}'.format(concat_tensors)) | |
# TFLite requires that 0.0 is always in the [min; max] range. | |
range_min = tf.minimum( | |
tf.reduce_min(concat_tensors), 0.0, name='SafeQuantRangeMin') | |
range_max = tf.maximum( | |
tf.reduce_max(concat_tensors), 0.0, name='SafeQuantRangeMax') | |
# Otherwise we need to keep track of the moving averages of the min and | |
# of the elements of the input tensor max. | |
min_val = moving_averages.assign_moving_average( | |
min_var, | |
range_min, | |
ema_decay, | |
name='AssignMinEma') | |
max_val = moving_averages.assign_moving_average( | |
max_var, | |
range_max, | |
ema_decay, | |
name='AssignMaxEma') | |
tf.logging.info('min_val: {}'.format(min_val)) | |
tf.logging.info('max_val: {}'.format(max_val)) | |
quant_inputs = [ | |
tf.fake_quant_with_min_max_vars(t, min_val, max_val) for t in inputs | |
] | |
tf.logging.info('quant_inputs: {}'.format(quant_inputs)) | |
outputs = tf.concat(quant_inputs, axis=axis) | |
tf.logging.info('outputs: {}'.format(outputs)) | |
else: | |
outputs = tf.concat(inputs, axis=axis) | |
return outputs | |
def quantizable_separable_conv2d(inputs, | |
num_outputs, | |
kernel_size, | |
is_quantized=True, | |
depth_multiplier=1, | |
stride=1, | |
activation_fn=tf.nn.relu6, | |
normalizer_fn=None, | |
weights_initializer=None, | |
pointwise_initializer=None, | |
scope=None): | |
"""Quantization friendly backward compatible separable conv2d. | |
This op has the same API is separable_conv2d. The main difference is that an | |
additional BiasAdd is manually inserted after the depthwise conv, such that | |
the depthwise bias will not have name conflict with pointwise bias. The | |
motivation of this op is that quantization script need BiasAdd in order to | |
recognize the op, in which a native call to separable_conv2d do not create | |
for the depthwise conv. | |
Args: | |
inputs: A tensor of size [batch_size, height, width, channels]. | |
num_outputs: The number of pointwise convolution output filters. If is | |
None, then we skip the pointwise convolution stage. | |
kernel_size: A list of length 2: [kernel_height, kernel_width] of the | |
filters. Can be an int if both values are the same. | |
is_quantized: flag to enable/disable quantization. | |
depth_multiplier: The number of depthwise convolution output channels for | |
each input channel. The total number of depthwise convolution output | |
channels will be equal to num_filters_in * depth_multiplier. | |
stride: A list of length 2: [stride_height, stride_width], specifying the | |
depthwise convolution stride. Can be an int if both strides are the same. | |
activation_fn: Activation function. The default value is a ReLU function. | |
Explicitly set it to None to skip it and maintain a linear activation. | |
normalizer_fn: Normalization function to use instead of biases. | |
weights_initializer: An initializer for the depthwise weights. | |
pointwise_initializer: An initializer for the pointwise weights. | |
scope: Optional scope for variable_scope. | |
Returns: | |
Tensor resulting from concatenation of input tensors | |
""" | |
if is_quantized: | |
outputs = contrib_layers.separable_conv2d( | |
inputs, | |
None, | |
kernel_size, | |
depth_multiplier=depth_multiplier, | |
stride=1, | |
activation_fn=None, | |
normalizer_fn=None, | |
biases_initializer=None, | |
weights_initializer=weights_initializer, | |
pointwise_initializer=None, | |
scope=scope) | |
outputs = contrib_layers.bias_add( | |
outputs, trainable=True, scope='%s_bias' % scope) | |
outputs = contrib_layers.conv2d( | |
outputs, | |
num_outputs, [1, 1], | |
activation_fn=activation_fn, | |
stride=stride, | |
normalizer_fn=normalizer_fn, | |
weights_initializer=pointwise_initializer, | |
scope=scope) | |
else: | |
outputs = contrib_layers.separable_conv2d( | |
inputs, | |
num_outputs, | |
kernel_size, | |
depth_multiplier=depth_multiplier, | |
stride=stride, | |
activation_fn=activation_fn, | |
normalizer_fn=normalizer_fn, | |
weights_initializer=weights_initializer, | |
pointwise_initializer=pointwise_initializer, | |
scope=scope) | |
return outputs | |
def quantize_op(inputs, | |
is_training=True, | |
is_quantized=True, | |
default_min=0, | |
default_max=6, | |
ema_decay=0.999, | |
scope='quant'): | |
"""Inserts a fake quantization op after inputs. | |
Args: | |
inputs: A tensor of size [batch_size, height, width, channels]. | |
is_training: true if the graph is a training graph. | |
is_quantized: flag to enable/disable quantization. | |
default_min: default min value for fake quant op. | |
default_max: default max value for fake quant op. | |
ema_decay: the moving average decay for the quantization variables. | |
scope: Optional scope for variable_scope. | |
Returns: | |
Tensor resulting from quantizing the input tensors. | |
""" | |
if not is_quantized: | |
return inputs | |
with tf.variable_scope(scope): | |
min_var = _quant_var('min', default_min) | |
max_var = _quant_var('max', default_max) | |
if not is_training: | |
# Just use variables in the checkpoint. | |
return tf.fake_quant_with_min_max_vars(inputs, min_var, max_var) | |
# While training, collect EMAs of ranges seen, store in min_var, max_var. | |
# TFLite requires that 0.0 is always in the [min; max] range. | |
range_min = tf.minimum(tf.reduce_min(inputs), 0.0, 'SafeQuantRangeMin') | |
# We set the lower_bound of max_range to prevent range collapse. | |
range_max = tf.maximum(tf.reduce_max(inputs), 1e-5, 'SafeQuantRangeMax') | |
min_val = moving_averages.assign_moving_average( | |
min_var, range_min, ema_decay, name='AssignMinEma') | |
max_val = moving_averages.assign_moving_average( | |
max_var, range_max, ema_decay, name='AssignMaxEma') | |
return tf.fake_quant_with_min_max_vars(inputs, min_val, max_val) | |
def fixed_quantize_op(inputs, is_quantized=True, | |
fixed_min=0.0, fixed_max=6.0, scope='quant'): | |
"""Inserts a fake quantization op with fixed range after inputs. | |
Args: | |
inputs: A tensor of size [batch_size, height, width, channels]. | |
is_quantized: flag to enable/disable quantization. | |
fixed_min: fixed min value for fake quant op. | |
fixed_max: fixed max value for fake quant op. | |
scope: Optional scope for variable_scope. | |
Returns: | |
Tensor resulting from quantizing the input tensors. | |
""" | |
if not is_quantized: | |
return inputs | |
with tf.variable_scope(scope): | |
# Just use fixed quantization range. | |
return tf.fake_quant_with_min_max_args(inputs, fixed_min, fixed_max) | |