data:
  corpus_1:
    path_src: /Users/laabidn1/multiguide/data/toy_experiment/toy_experiment/train.src
    path_tgt: /Users/laabidn1/multiguide/data/toy_experiment/toy_experiment/train.tgt
  valid:
    path_src: /Users/laabidn1/multiguide/data/toy_experiment/toy_experiment/train.src
    path_tgt: /Users/laabidn1/multiguide/data/toy_experiment/toy_experiment/train.tgt

save_data: /Users/laabidn1/multiguide/data/toy_experiment/toy_experiment
src_vocab: /Users/laabidn1/multiguide/data/toy_experiment/toy_experiment/vocab.txt
tgt_vocab: /Users/laabidn1/multiguide/data/toy_experiment/toy_experiment/vocab.txt
share_vocab: true
src_seq_length: 200
tgt_seq_length: 200
overwrite: true

save_model: /Users/laabidn1/multiguide/experiments/toy_experiment/toy_experiment/onmt/onmt_model
save_checkpoint_steps: 1000
keep_checkpoint: 10
seed: 3435

train_steps: 1000 #20000
valid_steps: 100 #1000
warmup_steps: 100 #2000
report_every: 100 #100

# Model architecture
decoder_type: transformer
encoder_type: transformer
word_vec_size: 128
rnn_size: 128
layers: 16
transformer_ff: 1024
heads: 8

# Training parameters
accum_count: 1
optim: adam
adam_beta1: 0.9
adam_beta2: 0.998
decay_method: noam
learning_rate: 1.0
max_grad_norm: 0.0
batch_size: 32
batch_type: tokens
valid_batch_size: 32
normalization: tokens
dropout: 0.1
label_smoothing: 0.0
max_generator_batches: 32
param_init: 0.0
param_init_glorot: true
position_encoding: true
world_size: 1