Spaces:

OpenDILabCommunity
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /dizoo /classic_control /mountain_car /envs /mtcar_env.py

zjowowen

init space

3dfe8fb over 1 year ago

raw

history blame contribute delete

4.68 kB

	from typing import Any, List, Union, Optional
	import gym
	import numpy as np
	from ding.envs import BaseEnv, BaseEnvTimestep
	from ding.torch_utils import to_ndarray, to_list
	from ding.utils import ENV_REGISTRY


	@ENV_REGISTRY.register('mountain_car')
	class MountainCarEnv(BaseEnv):
	"""
	Implementation of DI-engine's version of the Mountain Car deterministic MDP.

	Important references that contributed to the creation of this env:
	> Source code of OpenAI's mountain car gym : https://is.gd/y1FkMT
	> Gym documentation of mountain car : https://is.gd/29S0dt
	> Based off DI-engine existing implementation of cartpole_env.py
	> DI-engine's env creation conventions : https://is.gd/ZHLISj

	Only __init__ , step, seed and reset are mandatory & impt.
	The other methods are generally for convenience.
	"""

	def __init__(self, cfg: EasyDict) -> None:
	self._cfg = cfg
	self._init_flag = False
	self._replay_path = None

	# Following specifications from https://is.gd/29S0dt
	self._observation_space = gym.spaces.Box(
	low=np.array([-1.2, -0.07]), high=np.array([0.6, 0.07]), shape=(2, ), dtype=np.float32
	)
	self._action_space = gym.spaces.Discrete(3, start=0)
	self._reward_space = gym.spaces.Box(low=-1, high=0.0, shape=(1, ), dtype=np.float32)

	def seed(self, seed: int, dynamic_seed: bool = True) -> None:
	self._seed = seed
	self._dynamic_seed = dynamic_seed
	np.random.seed(self._seed)

	def reset(self) -> np.ndarray:
	# Instantiate environment if not already done so
	if not self._init_flag:
	self._env = gym.make('MountainCar-v0')
	self._init_flag = True

	# Check if we have a valid replay path and save replay video accordingly
	if self._replay_path is not None:
	self._env = gym.wrappers.RecordVideo(
	self._env,
	video_folder=self._replay_path,
	episode_trigger=lambda episode_id: True,
	name_prefix='rl-video-{}'.format(id(self))
	)

	# Set the seeds for randomization.
	if hasattr(self, '_seed') and hasattr(self, '_dynamic_seed') and self._dynamic_seed:
	np_seed = 100 * np.random.randint(1, 1000)
	self._env.seed(self._seed + np_seed)
	self._action_space.seed(self._seed + np_seed)
	elif hasattr(self, '_seed'):
	self._env.seed(self._seed)
	self._action_space.seed(self._seed)

	# Get first observation from original environment
	obs = self._env.reset()

	# Convert to numpy array as output
	obs = to_ndarray(obs).astype(np.float32)

	# Init final reward : cumulative sum of the real rewards obtained by a whole episode,
	# used to evaluate the agent Performance on this environment, not used for training.
	self._eval_episode_return = 0.
	return obs

	def step(self, action: np.ndarray) -> BaseEnvTimestep:

	# Making sure that input action is of numpy ndarray
	assert isinstance(action, np.ndarray), type(action)

	# Extract action as int, 0-dim array
	action = action.squeeze()

	# Take a step of faith into the unknown!
	obs, rew, done, info = self._env.step(action)

	# Cummulate reward
	self._eval_episode_return += rew

	# Save final cummulative reward when done.
	if done:
	info['eval_episode_return'] = self._eval_episode_return

	# Making sure we conform to di-engine conventions
	obs = to_ndarray(obs)
	rew = to_ndarray([rew]).astype(np.float32)

	return BaseEnvTimestep(obs, rew, done, info)

	def close(self) -> None:
	# If init flag is False, then reset() was never run, no point closing.
	if self._init_flag:
	self._env.close()
	self._init_flag = False

	def enable_save_replay(self, replay_path: Optional[str] = None) -> None:
	if replay_path is None:
	replay_path = './video'
	self._replay_path = replay_path

	def random_action(self) -> np.ndarray:
	random_action = self.action_space.sample()
	random_action = to_ndarray([random_action], dtype=np.int64)
	return random_action

	@property
	def observation_space(self) -> gym.spaces.Space:
	return self._observation_space

	@property
	def action_space(self) -> gym.spaces.Space:
	return self._action_space

	@property
	def reward_space(self) -> gym.spaces.Space:
	return self._reward_space

	def __repr__(self) -> str:
	return "DI-engine Mountain Car Env"