Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /brain_coder /single_task /tune.py

NCTCMumbai

Upload 2571 files

0b8359d almost 2 years ago

raw

history blame contribute delete

9.17 kB

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	r"""Run grid search.

	Look at launch_tuning.sh for details on how to tune at scale.

	Usage example:
	Tune with one worker on the local machine.

	CONFIG="agent=c(algorithm='pg'),"
	CONFIG+="env=c(task_cycle=['reverse-tune', 'remove-tune'])"
	HPARAM_SPACE_TYPE="pg"
	OUT_DIR="/tmp/bf_pg_tune"
	MAX_NPE=5000000
	NUM_REPETITIONS=50
	rm -rf $OUT_DIR
	mkdir $OUT_DIR
	bazel run -c opt single_task:tune -- \
	--alsologtostderr \
	--config="$CONFIG" \
	--max_npe="$MAX_NPE" \
	--num_repetitions="$NUM_REPETITIONS" \
	--logdir="$OUT_DIR" \
	--summary_interval=1 \
	--model_v=0 \
	--hparam_space="$HPARAM_SPACE_TYPE" \
	--tuner_id=0 \
	--num_tuners=1 \
	2>&1 >"$OUT_DIR/tuner_0.log"
	learning/brain/tensorboard/tensorboard.sh --port 12345 --logdir "$OUT_DIR"
	"""

	import ast
	import os

	from absl import app
	from absl import flags
	from absl import logging
	import numpy as np
	from six.moves import xrange
	import tensorflow as tf

	from single_task import defaults # brain coder
	from single_task import run as run_lib # brain coder

	FLAGS = flags.FLAGS
	flags.DEFINE_integer(
	'tuner_id', 0,
	'The unique ID for this tuning worker.')
	flags.DEFINE_integer(
	'num_tuners', 1,
	'How many tuners are there.')
	flags.DEFINE_string(
	'hparam_space', 'default',
	'String name which denotes the hparam space to tune over. This is '
	'algorithm dependent.')
	flags.DEFINE_string(
	'fixed_hparams', '',
	'HParams string. Used to fix hparams during tuning.')
	flags.DEFINE_float(
	'success_rate_objective_weight', 1.0,
	'How much to weight success rate vs num programs seen. By default, only '
	'success rate is optimized (this is the setting used in the paper).')


	def parse_hparams_string(hparams_str):
	hparams = {}
	for term in hparams_str.split(','):
	if not term:
	continue
	name, value = term.split('=')
	hparams[name.strip()] = ast.literal_eval(value)
	return hparams


	def int_to_multibase(n, bases):
	digits = [0] * len(bases)
	for i, b in enumerate(bases):
	n, d = divmod(n, b)
	digits[i] = d
	return digits


	def hparams_for_index(index, tuning_space):
	keys = sorted(tuning_space.keys())
	indices = int_to_multibase(index, [len(tuning_space[k]) for k in keys])
	return tf.contrib.training.HParams(
	**{k: tuning_space[k][i] for k, i in zip(keys, indices)})


	def run_tuner_loop(ns):
	"""Run tuning loop for this worker."""
	is_chief = FLAGS.task_id == 0
	tuning_space = ns.define_tuner_hparam_space(
	hparam_space_type=FLAGS.hparam_space)
	fixed_hparams = parse_hparams_string(FLAGS.fixed_hparams)
	for name, value in fixed_hparams.iteritems():
	tuning_space[name] = [value]
	tuning_space_size = np.prod([len(values) for values in tuning_space.values()])

	num_local_trials, remainder = divmod(tuning_space_size, FLAGS.num_tuners)
	if FLAGS.tuner_id < remainder:
	num_local_trials += 1
	starting_trial_id = (
	num_local_trials * FLAGS.tuner_id + min(remainder, FLAGS.tuner_id))

	logging.info('tuning_space_size: %d', tuning_space_size)
	logging.info('num_local_trials: %d', num_local_trials)
	logging.info('starting_trial_id: %d', starting_trial_id)

	for local_trial_index in xrange(num_local_trials):
	trial_config = defaults.default_config_with_updates(FLAGS.config)
	global_trial_index = local_trial_index + starting_trial_id
	trial_name = 'trial_' + str(global_trial_index)
	trial_dir = os.path.join(FLAGS.logdir, trial_name)
	hparams = hparams_for_index(global_trial_index, tuning_space)
	ns.write_hparams_to_config(
	trial_config, hparams, hparam_space_type=FLAGS.hparam_space)

	results_list = ns.run_training(
	config=trial_config, tuner=None, logdir=trial_dir, is_chief=is_chief,
	trial_name=trial_name)

	if not is_chief:
	# Only chief worker needs to write tuning results to disk.
	continue

	objective, metrics = compute_tuning_objective(
	results_list, hparams, trial_name, num_trials=tuning_space_size)
	logging.info('metrics:\n%s', metrics)
	logging.info('objective: %s', objective)
	logging.info('programs_seen_fraction: %s',
	metrics['programs_seen_fraction'])
	logging.info('success_rate: %s', metrics['success_rate'])
	logging.info('success_rate_objective_weight: %s',
	FLAGS.success_rate_objective_weight)

	tuning_results_file = os.path.join(trial_dir, 'tuning_results.txt')
	with tf.gfile.FastGFile(tuning_results_file, 'a') as writer:
	writer.write(str(metrics) + '\n')

	logging.info('Trial %s complete.', trial_name)


	def compute_tuning_objective(results_list, hparams, trial_name, num_trials):
	"""Compute tuning objective and metrics given results and trial information.

	Args:
	results_list: List of results dicts read from disk. These are written by
	workers.
	hparams: tf.contrib.training.HParams instance containing the hparams used
	in this trial (only the hparams which are being tuned).
	trial_name: Name of this trial. Used to create a trial directory.
	num_trials: Total number of trials that need to be run. This is saved in the
	metrics dict for future reference.

	Returns:
	objective: The objective computed for this trial. Choose the hparams for the
	trial with the largest objective value.
	metrics: Information about this trial. A dict.
	"""
	found_solution = [r['found_solution'] for r in results_list]
	successful_program_counts = [
	r['npe'] for r in results_list if r['found_solution']]

	success_rate = sum(found_solution) / float(len(results_list))

	max_programs = FLAGS.max_npe # Per run.
	all_program_counts = [
	r['npe'] if r['found_solution'] else max_programs
	for r in results_list]
	programs_seen_fraction = (
	float(sum(all_program_counts))
	/ (max_programs * len(all_program_counts)))

	# min/max/avg stats are over successful runs.
	metrics = {
	'num_runs': len(results_list),
	'num_succeeded': sum(found_solution),
	'success_rate': success_rate,
	'programs_seen_fraction': programs_seen_fraction,
	'avg_programs': np.mean(successful_program_counts),
	'max_possible_programs_per_run': max_programs,
	'global_step': sum([r['num_batches'] for r in results_list]),
	'hparams': hparams.values(),
	'trial_name': trial_name,
	'num_trials': num_trials}

	# Report stats per tasks.
	tasks = [r['task'] for r in results_list]
	for task in set(tasks):
	task_list = [r for r in results_list if r['task'] == task]
	found_solution = [r['found_solution'] for r in task_list]
	successful_rewards = [
	r['best_reward'] for r in task_list
	if r['found_solution']]
	successful_num_batches = [
	r['num_batches']
	for r in task_list if r['found_solution']]
	successful_program_counts = [
	r['npe'] for r in task_list if r['found_solution']]
	metrics_append = {
	task + '__num_runs': len(task_list),
	task + '__num_succeeded': sum(found_solution),
	task + '__success_rate': (
	sum(found_solution) / float(len(task_list)))}
	metrics.update(metrics_append)
	if any(found_solution):
	metrics_append = {
	task + '__min_reward': min(successful_rewards),
	task + '__max_reward': max(successful_rewards),
	task + '__avg_reward': np.median(successful_rewards),
	task + '__min_programs': min(successful_program_counts),
	task + '__max_programs': max(successful_program_counts),
	task + '__avg_programs': np.mean(successful_program_counts),
	task + '__min_batches': min(successful_num_batches),
	task + '__max_batches': max(successful_num_batches),
	task + '__avg_batches': np.mean(successful_num_batches)}
	metrics.update(metrics_append)

	# Objective will be maximized.
	# Maximize success rate, minimize num programs seen.
	# Max objective is always 1.
	weight = FLAGS.success_rate_objective_weight
	objective = (
	weight * success_rate
	+ (1 - weight) * (1 - programs_seen_fraction))
	metrics['objective'] = objective

	return objective, metrics


	def main(argv):
	del argv

	logging.set_verbosity(FLAGS.log_level)

	if not FLAGS.logdir:
	raise ValueError('logdir flag must be provided.')
	if FLAGS.num_workers <= 0:
	raise ValueError('num_workers flag must be greater than 0.')
	if FLAGS.task_id < 0:
	raise ValueError('task_id flag must be greater than or equal to 0.')
	if FLAGS.task_id >= FLAGS.num_workers:
	raise ValueError(
	'task_id flag must be strictly less than num_workers flag.')
	if FLAGS.num_tuners <= 0:
	raise ValueError('num_tuners flag must be greater than 0.')
	if FLAGS.tuner_id < 0:
	raise ValueError('tuner_id flag must be greater than or equal to 0.')
	if FLAGS.tuner_id >= FLAGS.num_tuners:
	raise ValueError(
	'tuner_id flag must be strictly less than num_tuners flag.')

	ns, _ = run_lib.get_namespace(FLAGS.config)
	run_tuner_loop(ns)


	if __name__ == '__main__':
	app.run(main)