## Model parameters residual_channels = 64 residual_blocks = 6 se_ratio = 8 vit_input_channels = 320 # input dimension to ViT transformer_input_dim = 1024 model_embedding_size = 512 transformer_depth = 12 attention_heads = 8 mlp_dim = 2048 dim_head = 64 # k_q_v dims, risky to tune? dropout = 0. emb_dropout = 0. similarity_weight_init = 10. similarity_bias_init = -5. ## Training parameters learning_rate_init = 0.005 players_per_batch = 36 games_per_player = 10 v_players_per_batch = 40 v_games_per_player = 10 num_validate = 10