Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /attention_ocr /python /datasets /fsns.py

NCTCMumbai

Upload 2571 files

0b8359d almost 2 years ago

raw

history blame contribute delete

6.22 kB

	# -- coding: utf-8 --
	# Copyright 2017 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Configuration to read FSNS dataset https://goo.gl/3Ldm8v."""

	import os
	import re
	import tensorflow as tf
	from tensorflow.contrib import slim
	import logging

	DEFAULT_DATASET_DIR = os.path.join(os.path.dirname(__file__), 'data', 'fsns')

	# The dataset configuration, should be used only as a default value.
	DEFAULT_CONFIG = {
	'name': 'FSNS',
	'splits': {
	'train': {
	'size': 1044868,
	'pattern': 'train/train*'
	},
	'test': {
	'size': 20404,
	'pattern': 'test/test*'
	},
	'validation': {
	'size': 16150,
	'pattern': 'validation/validation*'
	}
	},
	'charset_filename': 'charset_size=134.txt',
	'image_shape': (150, 600, 3),
	'num_of_views': 4,
	'max_sequence_length': 37,
	'null_code': 133,
	'items_to_descriptions': {
	'image': 'A [150 x 600 x 3] color image.',
	'label': 'Characters codes.',
	'text': 'A unicode string.',
	'length': 'A length of the encoded text.',
	'num_of_views': 'A number of different views stored within the image.'
	}
	}


	def read_charset(filename, null_character=u'\u2591'):
	"""Reads a charset definition from a tab separated text file.

	charset file has to have format compatible with the FSNS dataset.

	Args:
	filename: a path to the charset file.
	null_character: a unicode character used to replace '<null>' character. the
	default value is a light shade block '░'.

	Returns:
	a dictionary with keys equal to character codes and values - unicode
	characters.
	"""
	pattern = re.compile(r'(\d+)\t(.+)')
	charset = {}
	with tf.gfile.GFile(filename) as f:
	for i, line in enumerate(f):
	m = pattern.match(line)
	if m is None:
	logging.warning('incorrect charset file. line #%d: %s', i, line)
	continue
	code = int(m.group(1))
	char = m.group(2)
	if char == '<nul>':
	char = null_character
	charset[code] = char
	return charset


	class _NumOfViewsHandler(slim.tfexample_decoder.ItemHandler):
	"""Convenience handler to determine number of views stored in an image."""

	def __init__(self, width_key, original_width_key, num_of_views):
	super(_NumOfViewsHandler, self).__init__([width_key, original_width_key])
	self._width_key = width_key
	self._original_width_key = original_width_key
	self._num_of_views = num_of_views

	def tensors_to_item(self, keys_to_tensors):
	return tf.to_int64(
	self._num_of_views * keys_to_tensors[self._original_width_key] /
	keys_to_tensors[self._width_key])


	def get_split(split_name, dataset_dir=None, config=None):
	"""Returns a dataset tuple for FSNS dataset.

	Args:
	split_name: A train/test split name.
	dataset_dir: The base directory of the dataset sources, by default it uses
	a predefined CNS path (see DEFAULT_DATASET_DIR).
	config: A dictionary with dataset configuration. If None - will use the
	DEFAULT_CONFIG.

	Returns:
	A `Dataset` namedtuple.

	Raises:
	ValueError: if `split_name` is not a valid train/test split.
	"""
	if not dataset_dir:
	dataset_dir = DEFAULT_DATASET_DIR

	if not config:
	config = DEFAULT_CONFIG

	if split_name not in config['splits']:
	raise ValueError('split name %s was not recognized.' % split_name)

	logging.info('Using %s dataset split_name=%s dataset_dir=%s', config['name'],
	split_name, dataset_dir)

	# Ignores the 'image/height' feature.
	zero = tf.zeros([1], dtype=tf.int64)
	keys_to_features = {
	'image/encoded':
	tf.FixedLenFeature((), tf.string, default_value=''),
	'image/format':
	tf.FixedLenFeature((), tf.string, default_value='png'),
	'image/width':
	tf.FixedLenFeature([1], tf.int64, default_value=zero),
	'image/orig_width':
	tf.FixedLenFeature([1], tf.int64, default_value=zero),
	'image/class':
	tf.FixedLenFeature([config['max_sequence_length']], tf.int64),
	'image/unpadded_class':
	tf.VarLenFeature(tf.int64),
	'image/text':
	tf.FixedLenFeature([1], tf.string, default_value=''),
	}
	items_to_handlers = {
	'image':
	slim.tfexample_decoder.Image(
	shape=config['image_shape'],
	image_key='image/encoded',
	format_key='image/format'),
	'label':
	slim.tfexample_decoder.Tensor(tensor_key='image/class'),
	'text':
	slim.tfexample_decoder.Tensor(tensor_key='image/text'),
	'num_of_views':
	_NumOfViewsHandler(
	width_key='image/width',
	original_width_key='image/orig_width',
	num_of_views=config['num_of_views'])
	}
	decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
	items_to_handlers)
	charset_file = os.path.join(dataset_dir, config['charset_filename'])
	charset = read_charset(charset_file)
	file_pattern = os.path.join(dataset_dir,
	config['splits'][split_name]['pattern'])
	return slim.dataset.Dataset(
	data_sources=file_pattern,
	reader=tf.TFRecordReader,
	decoder=decoder,
	num_samples=config['splits'][split_name]['size'],
	items_to_descriptions=config['items_to_descriptions'],
	# additional parameters for convenience.
	charset=charset,
	num_char_classes=len(charset),
	num_of_views=config['num_of_views'],
	max_sequence_length=config['max_sequence_length'],
	null_code=config['null_code'])