stage: stage1  # stage1, stage2, or alt/alternating

bsz: 64
nb_nodes: 50
dim_input_nodes: 2
dim_emb: 128
dim_ff: 512
nb_heads: 8
nb_layers_action_encoder: 2
nb_layers_state_encoder: 2
nb_layers_decoder: 3
use_stage1_action_encoding: True

# Optimization
model_lr_stage1: 2e-5
model_lr_stage2: 2e-5
nb_epochs_stage1: 50
nb_epochs_stage2: 200
nb_epochs_alt: 5
nb_batch_per_epoch: 300
nb_batch_eval: 20
k_promising: 8
gamma: 0.99

# Data / IO
aug: mix
aug_num: 8
test_aug_num: 16
data_path: ../INViT_data/
save_dir: ../INViT_ckpt/tsp_pretrain

# Checkpoints
stage1_ckpt: ""        # required when stage=stage2 to freeze Stage 1
stage1_init_ckpt: ""   # optional warm start for stage=stage1 (e.g., pretrain)
stage2_init_ckpt: ""   # optional warm start for stage=stage2
