random_seed: 13
num_gpus_per_node: 2
batch_size: 64
resume:
    resume: False
    resume_model: True
    resume_optimizer: True
    resume_scheduler: True
    resume_rng_state: True
checkpointing:
    directory: "Checkpoints"
    steps_interval: 10
    seconds_interval: -1
    num_checkpoints_to_keep: 1000
    keep_checkpoint_every_num_seconds: 86400
logging:
    level: "INFO"
    steps_interval: -1 # disabled when negative
    seconds_interval: 2 # disabled when `steps_interval` is set
optimization:
    fp16: False
    fp16_opt_level: O1
    discriminator_optimizer_name: AdamW
    discriminator_learning_rate: 1e-5
    paraphraser_optimizer_name: AdamW
    paraphraser_learning_rate: 1e-5
    gradient_accumulation_steps: 4
    weight_decay: 0.01
    max_gradient_norm: -1.0
    scheduler:
        discriminator_scheduler_name: WarmupLinear
        paraphraser_scheduler_name: WarmupLinear
validation:
    steps_interval: 10 # after every epoch or none 
total_num:
    epochs: -1
    update_steps: 300 # disabled when total_num_epochs > 0