NCTCMumbai's picture
Upload 2571 files
0b8359d
# Lint as: python2, python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utility for object detection tf.train.SequenceExamples."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow.compat.v1 as tf
def context_float_feature(ndarray):
"""Converts a numpy float array to a context float feature.
Args:
ndarray: A numpy float array.
Returns:
A context float feature.
"""
feature = tf.train.Feature()
for val in ndarray:
feature.float_list.value.append(val)
return feature
def context_int64_feature(ndarray):
"""Converts a numpy array to a context int64 feature.
Args:
ndarray: A numpy int64 array.
Returns:
A context int64 feature.
"""
feature = tf.train.Feature()
for val in ndarray:
feature.int64_list.value.append(val)
return feature
def context_bytes_feature(ndarray):
"""Converts a numpy bytes array to a context bytes feature.
Args:
ndarray: A numpy bytes array.
Returns:
A context bytes feature.
"""
feature = tf.train.Feature()
for val in ndarray:
if isinstance(val, np.ndarray):
val = val.tolist()
feature.bytes_list.value.append(tf.compat.as_bytes(val))
return feature
def sequence_float_feature(ndarray):
"""Converts a numpy float array to a sequence float feature.
Args:
ndarray: A numpy float array.
Returns:
A sequence float feature.
"""
feature_list = tf.train.FeatureList()
for row in ndarray:
feature = feature_list.feature.add()
if row.size:
feature.float_list.value[:] = row
return feature_list
def sequence_int64_feature(ndarray):
"""Converts a numpy int64 array to a sequence int64 feature.
Args:
ndarray: A numpy int64 array.
Returns:
A sequence int64 feature.
"""
feature_list = tf.train.FeatureList()
for row in ndarray:
feature = feature_list.feature.add()
if row.size:
feature.int64_list.value[:] = row
return feature_list
def sequence_bytes_feature(ndarray):
"""Converts a bytes float array to a sequence bytes feature.
Args:
ndarray: A numpy bytes array.
Returns:
A sequence bytes feature.
"""
feature_list = tf.train.FeatureList()
for row in ndarray:
if isinstance(row, np.ndarray):
row = row.tolist()
feature = feature_list.feature.add()
if row:
row = [tf.compat.as_bytes(val) for val in row]
feature.bytes_list.value[:] = row
return feature_list
def boxes_to_box_components(bboxes):
"""Converts a list of numpy arrays (boxes) to box components.
Args:
bboxes: A numpy array of bounding boxes.
Returns:
Bounding box component lists.
"""
ymin_list = []
xmin_list = []
ymax_list = []
xmax_list = []
for bbox in bboxes:
bbox = np.array(bbox).astype(np.float32)
ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1)
ymin_list.append(np.reshape(ymin, [-1]))
xmin_list.append(np.reshape(xmin, [-1]))
ymax_list.append(np.reshape(ymax, [-1]))
xmax_list.append(np.reshape(xmax, [-1]))
return ymin_list, xmin_list, ymax_list, xmax_list
def make_sequence_example(dataset_name,
video_id,
encoded_images,
image_height,
image_width,
image_format=None,
image_source_ids=None,
timestamps=None,
is_annotated=None,
bboxes=None,
label_strings=None,
detection_bboxes=None,
detection_classes=None,
detection_scores=None):
"""Constructs tf.SequenceExamples.
Args:
dataset_name: String with dataset name.
video_id: String with video id.
encoded_images: A [num_frames] list (or numpy array) of encoded image
frames.
image_height: Height of the images.
image_width: Width of the images.
image_format: Format of encoded images.
image_source_ids: (Optional) A [num_frames] list of unique string ids for
each image.
timestamps: (Optional) A [num_frames] list (or numpy array) array with image
timestamps.
is_annotated: (Optional) A [num_frames] list (or numpy array) array
in which each element indicates whether the frame has been annotated
(1) or not (0).
bboxes: (Optional) A list (with num_frames elements) of [num_boxes_i, 4]
numpy float32 arrays holding boxes for each frame.
label_strings: (Optional) A list (with num_frames_elements) of [num_boxes_i]
numpy string arrays holding object string labels for each frame.
detection_bboxes: (Optional) A list (with num_frames elements) of
[num_boxes_i, 4] numpy float32 arrays holding prediction boxes for each
frame.
detection_classes: (Optional) A list (with num_frames_elements) of
[num_boxes_i] numpy int64 arrays holding predicted classes for each frame.
detection_scores: (Optional) A list (with num_frames_elements) of
[num_boxes_i] numpy float32 arrays holding predicted object scores for
each frame.
Returns:
A tf.train.SequenceExample.
"""
num_frames = len(encoded_images)
image_encoded = np.expand_dims(encoded_images, axis=-1)
if timestamps is None:
timestamps = np.arange(num_frames)
image_timestamps = np.expand_dims(timestamps, axis=-1)
# Context fields.
context_dict = {
'example/dataset_name': context_bytes_feature([dataset_name]),
'clip/start/timestamp': context_int64_feature([image_timestamps[0][0]]),
'clip/end/timestamp': context_int64_feature([image_timestamps[-1][0]]),
'clip/frames': context_int64_feature([num_frames]),
'image/channels': context_int64_feature([3]),
'image/height': context_int64_feature([image_height]),
'image/width': context_int64_feature([image_width]),
'clip/media_id': context_bytes_feature([video_id])
}
# Sequence fields.
feature_list = {
'image/encoded': sequence_bytes_feature(image_encoded),
'image/timestamp': sequence_int64_feature(image_timestamps),
}
# Add optional fields.
if image_format is not None:
context_dict['image/format'] = context_bytes_feature([image_format])
if image_source_ids is not None:
feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids)
if bboxes is not None:
bbox_ymin, bbox_xmin, bbox_ymax, bbox_xmax = boxes_to_box_components(bboxes)
feature_list['region/bbox/xmin'] = sequence_float_feature(bbox_xmin)
feature_list['region/bbox/xmax'] = sequence_float_feature(bbox_xmax)
feature_list['region/bbox/ymin'] = sequence_float_feature(bbox_ymin)
feature_list['region/bbox/ymax'] = sequence_float_feature(bbox_ymax)
if is_annotated is None:
is_annotated = np.ones(num_frames, dtype=np.int64)
is_annotated = np.expand_dims(is_annotated, axis=-1)
feature_list['region/is_annotated'] = sequence_int64_feature(is_annotated)
if label_strings is not None:
feature_list['region/label/string'] = sequence_bytes_feature(
label_strings)
if detection_bboxes is not None:
det_bbox_ymin, det_bbox_xmin, det_bbox_ymax, det_bbox_xmax = (
boxes_to_box_components(detection_bboxes))
feature_list['predicted/region/bbox/xmin'] = sequence_float_feature(
det_bbox_xmin)
feature_list['predicted/region/bbox/xmax'] = sequence_float_feature(
det_bbox_xmax)
feature_list['predicted/region/bbox/ymin'] = sequence_float_feature(
det_bbox_ymin)
feature_list['predicted/region/bbox/ymax'] = sequence_float_feature(
det_bbox_ymax)
if detection_classes is not None:
feature_list['predicted/region/label/index'] = sequence_int64_feature(
detection_classes)
if detection_scores is not None:
feature_list['predicted/region/label/confidence'] = sequence_float_feature(
detection_scores)
context = tf.train.Features(feature=context_dict)
feature_lists = tf.train.FeatureLists(feature_list=feature_list)
sequence_example = tf.train.SequenceExample(
context=context,
feature_lists=feature_lists)
return sequence_example