experiment_name: dail_eval_p2m
logdir: null
evaluate_only: ${logdir}
device: cuda:0
debug: false

source_morph: point
target_morph: maze2d
maze_type: medium

root_dir: ./dail/results/dail_eval_p2m

source_env_id: ${source_morph}-${maze_type}-v1
target_env_id: ${target_morph}-${maze_type}-v1
source_dataset: ./datasets/${source_morph}/${source_env_id}.hdf5
target_dataset: ./datasets/${target_morph}/${target_env_id}.hdf5

source_domain_id: 0
target_domain_id: 1
domain_dim: 2
task_cond: true

num_epoch: 30
max_dataset_size: 2000000
adversarial_coef: 0.5
batch_size: 256
num_epoch_bc: 30
num_epoch_dynamics: 10
num_epoch_gama: ${num_epoch}
num_epoch_adapt: 30
inference_task_ids:
  - 7
lr: 1e-4
disc_lr: 1e-5

evaluate_args: {
  visualize_episodes: false,
  evaluate: true,
  processes: 2,
  max_dataset_size: 100000,
  num_visualize_episodes: 2,
  num_eval_episodes: 100,
  skip_frames: 5,
  fps: 20,
}

reverse_source_observations: false
reverse_source_actions: false
reverse_target_observations: true
reverse_target_actions: false

train_alignment: true
evaluate_alignment: true
train_adaptation: true
evaluate_adaptation: true

load_pretrained_model: false
pretrained: saved_models/${source_env_id}_${target_env_id}_${reverse_source_observations}_${reverse_source_actions}.pt

train_source_policy: true
source_policy_trained_path: ${pretrained}

train_dynamics_model: true
dynamics_trained_path: ${pretrained}

train_gama: true

bc:
  num_epoch: ${num_epoch_bc}
  batch_size: 256
  lr: 1e-3

h: 128
act: leaky_relu

lr_source_policy: ${lr}
lr_target_policy: 1e-5

lr_dynamics_model: ${lr}
lr_discriminator: ${disc_lr}

lr_auto: 1e-4 # auto means autoencoder (inv_state_map)
lr_state_map: ${lr}
lr_action_map: ${lr}

models:
  source_policy: # π_y : s_y -> a_y
    lr: ${lr_source_policy}
    in_dim: -1 # -1 is configured in python script
    hid_dims:
      - 300
      - 200
      - -1
    activations:
      - ${act}
      - ${act}
      - null

  target_policy: # π_x : s_x -> a_x
    lr: ${lr_target_policy}
    in_dim: -1
    hid_dims:
      - 300
      - 200
      - -1
    activations:
      - ${act}
      - ${act}
      - null

  state_map: # f : s_x -> s_y
    lr: ${lr_state_map}
    in_dim: -1
    hid_dims:
      - ${h}
      - ${h}
      - -1
    activations:
      - ${act}
      - ${act}
      - null

  action_map: # g : a_y -> a_x
    lr: ${lr_action_map}
    in_dim: -1
    hid_dims:
      - ${h}
      - ${h}
      - -1
    activations:
      - ${act}
      - ${act}
      - null

  inv_state_map: # f^-1 : s_y -> s_x
    lr: ${lr_auto}
    in_dim: -1
    hid_dims:
      - 200
      - 200
      - -1
    activations:
      - ${act}
      - ${act}
      - null

  dynamics_model: # P_x : (s_x, a_x) -> s'_x
    lr: ${lr_dynamics_model}
    in_dim: -1
    hid_dims:
      - ${h}
      - ${h}
      - ${h}
      - -1
    activations:
      - ${act}
      - ${act}
      - ${act}
      - null
  
  discriminator: # D : (s_y, a_y, s'_y) -> [0, 1]
    lr: ${lr_discriminator}
    in_dim: -1
    hid_dims:
      - ${h}
      - ${h}
      - 1
    activations:
      - ${act}
      - ${act}
      - null

original_params_url: https://github.com/ermongroup/dail/blob/main/saved_params/reacher_params.py
