defaults:
  - environment: tsp
  - networks: tsp
  - memory: tsp
  - validation: config_validate
  - slowrl: config_slowrl_light

# used to define the subdirectory of the exp
algo_name: "memo" # purely descriptive
env_name: "tsp" # purely descriptive
subdir_tag: "" # purely descriptive

seed: 0
num_devices: -1

num_steps: 4520000 #4600000 # 20000000
batch_size: 64 # 64 # check it's divisible by num_devices if needed.
pop_size: 1
num_starting_positions: 10 #20 #100
num_jit_steps: 1 #10 #10

init_mem_mha_scale: 0.1
budget: 200 #0 #100

loss:
  type: "LRMI" # "max_improvement", "LRMI" (log-rectified max improvement loss), "POMO"
  sp_spec: false
  weight_offset: 0.01 #1e-7
  weight_scale: 10

checkpoint_freq: 1000
checkpointing:
  directory: "checkpoints/"
  checkpoint_fname_load: "checkpoint"
  checkpoint_fname_save: "checkpoint"
  save_checkpoint: True
  keep_best_checkpoint: False
  overwrite_checkpoints: True
  restore_path: "data/v1_models/tsp/pomo" # Path to a checkpoint to be loaded (False -> do not load anything)
  restore_encoder: True # Whether to load the encoder from the checkpoint
  restore_decoder: True # Whether to load the decoder(s) from the checkpoint
  allow_cloned_across_population: True # Whether to clone the decoders
  restore_optimizer: False # Whether to load the optimizer from the checkpoint

rollout:
  policy:
    temperature: 1

optimizer:
  encoder:
    lr: 1e-4
    l2_regularization: 1e-6
  decoder:
    lr: 1e-4
    l2_regularization: 0
  memory:
    lr: 4e-3
    l2_regularization: 0
  num_gradient_accumulation_steps: ${budget} # 200 #1

validation_freq: 1000

logger:
  terminal:
    label: ""
    time_delta: 1
  neptune:
    name: "MEM(mlp, 6/8 inp.) - end traj / len"
    project: "PROJECT"
