Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /fivo /experimental /data.py

NCTCMumbai

Upload 2571 files

0b8359d almost 2 years ago

raw

history blame contribute delete

7.36 kB

	# Copyright 2018 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Datasets."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import numpy as np
	import tensorflow as tf

	import models


	def make_long_chain_dataset(
	state_size=1,
	num_obs=5,
	steps_per_obs=3,
	variance=1.,
	observation_variance=1.,
	batch_size=4,
	num_samples=1,
	observation_type=models.STANDARD_OBSERVATION,
	transition_type=models.STANDARD_TRANSITION,
	fixed_observation=None,
	dtype="float32"):
	"""Creates a long chain data generating process.

	Creates a tf.data.Dataset that provides batches of data from a long
	chain.

	Args:
	state_size: The dimension of the state space of the process.
	num_obs: The number of observations in the chain.
	steps_per_obs: The number of steps between each observation.
	variance: The variance of the normal distributions used at each timestep.
	batch_size: The number of trajectories to include in each batch.
	num_samples: The number of replicas of each trajectory to include in each
	batch.
	dtype: The datatype of the states and observations.
	Returns:
	dataset: A tf.data.Dataset that can be iterated over.
	"""
	num_timesteps = num_obs * steps_per_obs
	def data_generator():
	"""An infinite generator of latents and observations from the model."""
	while True:
	states = []
	observations = []
	# z0 ~ Normal(0, sqrt(variance)).
	states.append(
	np.random.normal(size=[state_size],
	scale=np.sqrt(variance)).astype(dtype))
	# start at 1 because we've already generated z0
	# go to num_timesteps+1 because we want to include the num_timesteps-th step
	for t in xrange(1, num_timesteps+1):
	if transition_type == models.ROUND_TRANSITION:
	loc = np.round(states[-1])
	elif transition_type == models.STANDARD_TRANSITION:
	loc = states[-1]
	new_state = np.random.normal(size=[state_size],
	loc=loc,
	scale=np.sqrt(variance))
	states.append(new_state.astype(dtype))
	if t % steps_per_obs == 0:
	if fixed_observation is None:
	if observation_type == models.SQUARED_OBSERVATION:
	loc = np.square(states[-1])
	elif observation_type == models.ABS_OBSERVATION:
	loc = np.abs(states[-1])
	elif observation_type == models.STANDARD_OBSERVATION:
	loc = states[-1]
	new_obs = np.random.normal(size=[state_size],
	loc=loc,
	scale=np.sqrt(observation_variance)).astype(dtype)
	else:
	new_obs = np.ones([state_size])* fixed_observation

	observations.append(new_obs)
	yield states, observations

	dataset = tf.data.Dataset.from_generator(
	data_generator,
	output_types=(tf.as_dtype(dtype), tf.as_dtype(dtype)),
	output_shapes=([num_timesteps+1, state_size], [num_obs, state_size]))
	dataset = dataset.repeat().batch(batch_size)

	def tile_batch(state, observation):
	state = tf.tile(state, [num_samples, 1, 1])
	observation = tf.tile(observation, [num_samples, 1, 1])
	return state, observation

	dataset = dataset.map(tile_batch, num_parallel_calls=12).prefetch(1024)
	return dataset


	def make_dataset(bs=None,
	state_size=1,
	num_timesteps=10,
	variance=1.,
	prior_type="unimodal",
	bimodal_prior_weight=0.5,
	bimodal_prior_mean=1,
	transition_type=models.STANDARD_TRANSITION,
	fixed_observation=None,
	batch_size=4,
	num_samples=1,
	dtype='float32'):
	"""Creates a data generating process.

	Creates a tf.data.Dataset that provides batches of data.

	Args:
	bs: The parameters of the data generating process. If None, new bs are
	randomly generated.
	state_size: The dimension of the state space of the process.
	num_timesteps: The length of the state sequences in the process.
	variance: The variance of the normal distributions used at each timestep.
	batch_size: The number of trajectories to include in each batch.
	num_samples: The number of replicas of each trajectory to include in each
	batch.
	Returns:
	bs: The true bs used to generate the data
	dataset: A tf.data.Dataset that can be iterated over.
	"""

	if bs is None:
	bs = [np.random.uniform(size=[state_size]).astype(dtype) for _ in xrange(num_timesteps)]
	tf.logging.info("data generating processs bs: %s",
	np.array(bs).reshape(num_timesteps))


	def data_generator():
	"""An infinite generator of latents and observations from the model."""
	while True:
	states = []
	if prior_type == "unimodal" or prior_type == "nonlinear":
	# Prior is Normal(0, sqrt(variance)).
	states.append(np.random.normal(size=[state_size], scale=np.sqrt(variance)).astype(dtype))
	elif prior_type == "bimodal":
	if np.random.uniform() > bimodal_prior_weight:
	loc = bimodal_prior_mean
	else:
	loc = - bimodal_prior_mean
	states.append(np.random.normal(size=[state_size],
	loc=loc,
	scale=np.sqrt(variance)
	).astype(dtype))

	for t in xrange(num_timesteps):
	if transition_type == models.ROUND_TRANSITION:
	loc = np.round(states[-1])
	elif transition_type == models.STANDARD_TRANSITION:
	loc = states[-1]
	loc += bs[t]
	new_state = np.random.normal(size=[state_size],
	loc=loc,
	scale=np.sqrt(variance)).astype(dtype)
	states.append(new_state)

	if fixed_observation is None:
	observation = states[-1]
	else:
	observation = np.ones_like(states[-1]) * fixed_observation
	yield np.array(states[:-1]), observation

	dataset = tf.data.Dataset.from_generator(
	data_generator,
	output_types=(tf.as_dtype(dtype), tf.as_dtype(dtype)),
	output_shapes=([num_timesteps, state_size], [state_size]))
	dataset = dataset.repeat().batch(batch_size)

	def tile_batch(state, observation):
	state = tf.tile(state, [num_samples, 1, 1])
	observation = tf.tile(observation, [num_samples, 1])
	return state, observation

	dataset = dataset.map(tile_batch, num_parallel_calls=12).prefetch(1024)
	return np.array(bs), dataset