import hashlib import pickle import random import warnings import numpy as np from pettingzoo.utils import parallel_to_aec def hash(val): val = pickle.dumps(val) hasher = hashlib.md5() hasher.update(val) return hasher.hexdigest() def calc_hash(new_env, rand_issue, max_env_iters): cur_hashes = [] sampler = random.Random(42) for i in range(3): new_env.reset() for j in range(rand_issue + 1): random.randint(0, 1000) np.random.normal(size=100) for agent in new_env.agent_iter(max_env_iters): obs, rew, done, info = new_env.last() if done: action = None elif isinstance(obs, dict) and 'action_mask' in obs: action = sampler.choice(np.flatnonzero(obs['action_mask'])) else: action = new_env.action_space(agent).sample() new_env.step(action) cur_hashes.append(agent) cur_hashes.append(hash_obsevation(obs)) cur_hashes.append(float(rew)) return hash(tuple(cur_hashes)) def seed_action_spaces(env): if hasattr(env, 'possible_agents'): for i, agent in enumerate(env.possible_agents): env.action_space(agent).seed(42 + i) def check_environment_deterministic(env1, env2, num_cycles): ''' env1 and env2 should be seeded environments returns a bool: true if env1 and env2 execute the same way ''' # seeds action space so that actions are deterministic seed_action_spaces(env1) seed_action_spaces(env2) num_agents = max(1, len(getattr(env1, 'possible_agents', []))) # checks deterministic behavior if seed is set hashes = [] num_seeds = 2 max_env_iters = num_cycles * num_agents envs = [env1, env2] for x in range(num_seeds): hashes.append(calc_hash(envs[x], x, max_env_iters)) return all(hashes[0] == h for h in hashes) def hash_obsevation(obs): try: val = hash(obs.tobytes()) return val except AttributeError: try: return hash(obs) except TypeError: warnings.warn("Observation not an int or an Numpy array") return 0 def test_environment_reset_deterministic(env1, num_cycles): seed_action_spaces(env1) env1.seed(42) env1.reset() hash1 = calc_hash(env1, 1, num_cycles) seed_action_spaces(env1) env1.seed(42) env1.reset() hash2 = calc_hash(env1, 2, num_cycles) assert hash1 == hash2, "environments kept state after seed(42) and reset()" def seed_test(env_constructor, num_cycles=10, test_kept_state=True): env1 = env_constructor() if test_kept_state: test_environment_reset_deterministic(env1, num_cycles) env2 = env_constructor() base_seed = 42 env1.seed(base_seed) env2.seed(base_seed) assert check_environment_deterministic(env1, env2, num_cycles), \ ("The environment gives different results on multiple runs when initialized with the same seed. This is usually a sign that you are using np.random or random modules directly, which uses a global random state.") def parallel_seed_test(parallel_env_fn, num_cycles=10, test_kept_state=True): def aec_env_fn(): parallel_env = parallel_env_fn() env = parallel_to_aec(parallel_env) return env seed_test(aec_env_fn, num_cycles, test_kept_state)