## IO | |
save_data: data | |
overwrite: True | |
seed: 1234 | |
report_every: 100 | |
valid_metrics: ["BLEU"] | |
tensorboard: true | |
tensorboard_log_dir: tensorboard | |
### Vocab | |
src_vocab: pt.eole.vocab | |
tgt_vocab: en.eole.vocab | |
src_vocab_size: 20000 | |
tgt_vocab_size: 20000 | |
vocab_size_multiple: 8 | |
share_vocab: false | |
n_sample: 0 | |
data: | |
corpus_1: | |
# path_src: hf://quickmt/quickmt-train.pt-en/pt | |
# path_tgt: hf://quickmt/quickmt-train.pt-en/en | |
# path_sco: hf://quickmt/quickmt-train.pt-en/sco | |
path_src: train.pt | |
path_tgt: train.en | |
valid: | |
path_src: dev.pt | |
path_tgt: dev.en | |
transforms: [sentencepiece, filtertoolong] | |
transforms_configs: | |
sentencepiece: | |
src_subword_model: "pt.spm.model" | |
tgt_subword_model: "en.spm.model" | |
filtertoolong: | |
src_seq_length: 256 | |
tgt_seq_length: 256 | |
training: | |
# Run configuration | |
model_path: quickmt-pt-en-eole-model | |
#train_from: model | |
keep_checkpoint: 4 | |
train_steps: 100000 | |
save_checkpoint_steps: 5000 | |
valid_steps: 5000 | |
# Train on a single GPU | |
world_size: 1 | |
gpu_ranks: [0] | |
# Batching 10240 | |
batch_type: "tokens" | |
batch_size: 8000 | |
valid_batch_size: 4096 | |
batch_size_multiple: 8 | |
accum_count: [10] | |
accum_steps: [0] | |
# Optimizer & Compute | |
compute_dtype: "fp16" | |
optim: "adamw" | |
#use_amp: False | |
learning_rate: 2.0 | |
warmup_steps: 4000 | |
decay_method: "noam" | |
adam_beta2: 0.998 | |
# Data loading | |
bucket_size: 128000 | |
num_workers: 4 | |
prefetch_factor: 32 | |
# Hyperparams | |
dropout_steps: [0] | |
dropout: [0.1] | |
attention_dropout: [0.1] | |
max_grad_norm: 0 | |
label_smoothing: 0.1 | |
average_decay: 0.0001 | |
param_init_method: xavier_uniform | |
normalization: "tokens" | |
model: | |
architecture: "transformer" | |
share_embeddings: false | |
share_decoder_embeddings: false | |
hidden_size: 1024 | |
encoder: | |
layers: 8 | |
decoder: | |
layers: 2 | |
heads: 8 | |
transformer_ff: 4096 | |
embeddings: | |
word_vec_size: 1024 | |
position_encoding_type: "SinusoidalInterleaved" | |