_target_: bbo.algorithms.TransformerOpt
name: 'TransformerOpt'

transformer_config:
    d_model: 64
    n_head: 4
    n_hidden: 256
    dropout: 0.1
    n_layer: 4
    pos_embedding: null

pretrain_config:
    epochs: 5000
    bs: 256
    seq_len_range: [20, 100]
    shifting: 0.0
    eval_intervals: 20
    save_path: null
    optim_config:
        lr: 3e-4
        warmup_steps: 100
        weight_decay: 0.0