Spaces:
Running
Running
# Lint as: python2, python3 | |
# Copyright 2020 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Common utility for object detection tf.train.SequenceExamples.""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import numpy as np | |
import tensorflow.compat.v1 as tf | |
def context_float_feature(ndarray): | |
"""Converts a numpy float array to a context float feature. | |
Args: | |
ndarray: A numpy float array. | |
Returns: | |
A context float feature. | |
""" | |
feature = tf.train.Feature() | |
for val in ndarray: | |
feature.float_list.value.append(val) | |
return feature | |
def context_int64_feature(ndarray): | |
"""Converts a numpy array to a context int64 feature. | |
Args: | |
ndarray: A numpy int64 array. | |
Returns: | |
A context int64 feature. | |
""" | |
feature = tf.train.Feature() | |
for val in ndarray: | |
feature.int64_list.value.append(val) | |
return feature | |
def context_bytes_feature(ndarray): | |
"""Converts a numpy bytes array to a context bytes feature. | |
Args: | |
ndarray: A numpy bytes array. | |
Returns: | |
A context bytes feature. | |
""" | |
feature = tf.train.Feature() | |
for val in ndarray: | |
if isinstance(val, np.ndarray): | |
val = val.tolist() | |
feature.bytes_list.value.append(tf.compat.as_bytes(val)) | |
return feature | |
def sequence_float_feature(ndarray): | |
"""Converts a numpy float array to a sequence float feature. | |
Args: | |
ndarray: A numpy float array. | |
Returns: | |
A sequence float feature. | |
""" | |
feature_list = tf.train.FeatureList() | |
for row in ndarray: | |
feature = feature_list.feature.add() | |
if row.size: | |
feature.float_list.value[:] = row | |
return feature_list | |
def sequence_int64_feature(ndarray): | |
"""Converts a numpy int64 array to a sequence int64 feature. | |
Args: | |
ndarray: A numpy int64 array. | |
Returns: | |
A sequence int64 feature. | |
""" | |
feature_list = tf.train.FeatureList() | |
for row in ndarray: | |
feature = feature_list.feature.add() | |
if row.size: | |
feature.int64_list.value[:] = row | |
return feature_list | |
def sequence_bytes_feature(ndarray): | |
"""Converts a bytes float array to a sequence bytes feature. | |
Args: | |
ndarray: A numpy bytes array. | |
Returns: | |
A sequence bytes feature. | |
""" | |
feature_list = tf.train.FeatureList() | |
for row in ndarray: | |
if isinstance(row, np.ndarray): | |
row = row.tolist() | |
feature = feature_list.feature.add() | |
if row: | |
row = [tf.compat.as_bytes(val) for val in row] | |
feature.bytes_list.value[:] = row | |
return feature_list | |
def boxes_to_box_components(bboxes): | |
"""Converts a list of numpy arrays (boxes) to box components. | |
Args: | |
bboxes: A numpy array of bounding boxes. | |
Returns: | |
Bounding box component lists. | |
""" | |
ymin_list = [] | |
xmin_list = [] | |
ymax_list = [] | |
xmax_list = [] | |
for bbox in bboxes: | |
bbox = np.array(bbox).astype(np.float32) | |
ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1) | |
ymin_list.append(np.reshape(ymin, [-1])) | |
xmin_list.append(np.reshape(xmin, [-1])) | |
ymax_list.append(np.reshape(ymax, [-1])) | |
xmax_list.append(np.reshape(xmax, [-1])) | |
return ymin_list, xmin_list, ymax_list, xmax_list | |
def make_sequence_example(dataset_name, | |
video_id, | |
encoded_images, | |
image_height, | |
image_width, | |
image_format=None, | |
image_source_ids=None, | |
timestamps=None, | |
is_annotated=None, | |
bboxes=None, | |
label_strings=None, | |
detection_bboxes=None, | |
detection_classes=None, | |
detection_scores=None): | |
"""Constructs tf.SequenceExamples. | |
Args: | |
dataset_name: String with dataset name. | |
video_id: String with video id. | |
encoded_images: A [num_frames] list (or numpy array) of encoded image | |
frames. | |
image_height: Height of the images. | |
image_width: Width of the images. | |
image_format: Format of encoded images. | |
image_source_ids: (Optional) A [num_frames] list of unique string ids for | |
each image. | |
timestamps: (Optional) A [num_frames] list (or numpy array) array with image | |
timestamps. | |
is_annotated: (Optional) A [num_frames] list (or numpy array) array | |
in which each element indicates whether the frame has been annotated | |
(1) or not (0). | |
bboxes: (Optional) A list (with num_frames elements) of [num_boxes_i, 4] | |
numpy float32 arrays holding boxes for each frame. | |
label_strings: (Optional) A list (with num_frames_elements) of [num_boxes_i] | |
numpy string arrays holding object string labels for each frame. | |
detection_bboxes: (Optional) A list (with num_frames elements) of | |
[num_boxes_i, 4] numpy float32 arrays holding prediction boxes for each | |
frame. | |
detection_classes: (Optional) A list (with num_frames_elements) of | |
[num_boxes_i] numpy int64 arrays holding predicted classes for each frame. | |
detection_scores: (Optional) A list (with num_frames_elements) of | |
[num_boxes_i] numpy float32 arrays holding predicted object scores for | |
each frame. | |
Returns: | |
A tf.train.SequenceExample. | |
""" | |
num_frames = len(encoded_images) | |
image_encoded = np.expand_dims(encoded_images, axis=-1) | |
if timestamps is None: | |
timestamps = np.arange(num_frames) | |
image_timestamps = np.expand_dims(timestamps, axis=-1) | |
# Context fields. | |
context_dict = { | |
'example/dataset_name': context_bytes_feature([dataset_name]), | |
'clip/start/timestamp': context_int64_feature([image_timestamps[0][0]]), | |
'clip/end/timestamp': context_int64_feature([image_timestamps[-1][0]]), | |
'clip/frames': context_int64_feature([num_frames]), | |
'image/channels': context_int64_feature([3]), | |
'image/height': context_int64_feature([image_height]), | |
'image/width': context_int64_feature([image_width]), | |
'clip/media_id': context_bytes_feature([video_id]) | |
} | |
# Sequence fields. | |
feature_list = { | |
'image/encoded': sequence_bytes_feature(image_encoded), | |
'image/timestamp': sequence_int64_feature(image_timestamps), | |
} | |
# Add optional fields. | |
if image_format is not None: | |
context_dict['image/format'] = context_bytes_feature([image_format]) | |
if image_source_ids is not None: | |
feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids) | |
if bboxes is not None: | |
bbox_ymin, bbox_xmin, bbox_ymax, bbox_xmax = boxes_to_box_components(bboxes) | |
feature_list['region/bbox/xmin'] = sequence_float_feature(bbox_xmin) | |
feature_list['region/bbox/xmax'] = sequence_float_feature(bbox_xmax) | |
feature_list['region/bbox/ymin'] = sequence_float_feature(bbox_ymin) | |
feature_list['region/bbox/ymax'] = sequence_float_feature(bbox_ymax) | |
if is_annotated is None: | |
is_annotated = np.ones(num_frames, dtype=np.int64) | |
is_annotated = np.expand_dims(is_annotated, axis=-1) | |
feature_list['region/is_annotated'] = sequence_int64_feature(is_annotated) | |
if label_strings is not None: | |
feature_list['region/label/string'] = sequence_bytes_feature( | |
label_strings) | |
if detection_bboxes is not None: | |
det_bbox_ymin, det_bbox_xmin, det_bbox_ymax, det_bbox_xmax = ( | |
boxes_to_box_components(detection_bboxes)) | |
feature_list['predicted/region/bbox/xmin'] = sequence_float_feature( | |
det_bbox_xmin) | |
feature_list['predicted/region/bbox/xmax'] = sequence_float_feature( | |
det_bbox_xmax) | |
feature_list['predicted/region/bbox/ymin'] = sequence_float_feature( | |
det_bbox_ymin) | |
feature_list['predicted/region/bbox/ymax'] = sequence_float_feature( | |
det_bbox_ymax) | |
if detection_classes is not None: | |
feature_list['predicted/region/label/index'] = sequence_int64_feature( | |
detection_classes) | |
if detection_scores is not None: | |
feature_list['predicted/region/label/confidence'] = sequence_float_feature( | |
detection_scores) | |
context = tf.train.Features(feature=context_dict) | |
feature_lists = tf.train.FeatureLists(feature_list=feature_list) | |
sequence_example = tf.train.SequenceExample( | |
context=context, | |
feature_lists=feature_lists) | |
return sequence_example | |