Spaces:
Running
Running
File size: 925 Bytes
a1f27d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
model:
src_vocab_size: 37000
tgt_vocab_size: 37000
d_model: 512
num_heads: 8
d_ff: 2048
num_encoder_layers: 6
num_decoder_layers: 6
dropout: 0.1
src_max_len: 128
tgt_max_len: 128
training:
seed: 42
batch_size: 144
epochs: 24
lr_factor: 1.0
num_workers: 8
quick_val_size: 1024 # 1024 examples for quick eval
quick_eval_every: 1000 # steps
full_eval_every: 10000 # steps
warmup_steps: 4000
weight_decay: 0.01
adam_eps: 1e-9
adam_beta1: 0.9
adam_beta2: 0.98
label_smoothing: 0.1
max_grad_norm: 1.0
experiment:
base_dir: "experiments"
checkpoint_dir: "checkpoints"
save_every_steps: 10000 # steps
keep_last_n: 10 # keep last n step checkpoints
log_every: 100 # log every N batches
log_dir: "logs"
data:
dataset_name: "wmt14"
subset: "de-en"
lang_src: "en"
lang_tgt: "de"
tokenization_strategy: "joint" # "joint" or "separate"
validation_fraction: 0.05
|