dataset:
  data_path: '../../data/json_2.1.1/train'
  eval_id_data_path: '../../data/json_2.1.1/valid_seen'  # None to disable
  eval_ood_data_path: '../../data/json_2.1.1/valid_unseen'  # None to disable

pddl:
  domain: '../../data/alfred.pddl'

env:
  type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid'
  regen_game_files: False # check if game is solvable and save to game.tw-pddl file
  domain_randomization: False # shuffle print order and object id nums
  task_types: [1, 2, 3, 4, 5, 6]  # task type ids
  expert_timeout_steps: 150
  expert_type: "handcoded"
  goal_desc_human_anns_prob: 0.0
  training_size: 0  # 0 means all

  hybrid:
    start_eps: 100000
    thor_prob: 0.5
    eval_mode: "tw" # "tw" or "thor"

controller:
  type: 'oracle'
  debug: False
  load_receps: True # load receptacle locations from precomputed dict (if available)

mask_rcnn:
  pretrained_model_path: './detector/data/400_scenes/mcrnn_alfred_004.pth'

general:
  random_seed: 42
  use_cuda: False  # disable this when running on machine without cuda
  visdom: False
  task: 'alfred'
  training_method: 'dagger'  # 'dqn' or 'dagger'
  save_path: '.'
  observation_pool_capacity: 3  # 0 means not using any observation
  hide_init_receptacles: False

  training:
    batch_size: 10
    max_episode: 50000
    smoothing_eps: 0.1
    optimizer:
      learning_rate: 0.001
      clip_grad_norm: 5

  evaluate:
    run_eval: False
    batch_size: 40
    env:
      type: "AlfredTWEnv"

  checkpoint:
    report_frequency: 1000  # episode
    experiment_tag: 'test'
    load_pretrained: False  # during test, enable this so that the agent load your pretrained model
    load_from_tag: 'not loading anything'

  model:
    encoder_layers: 1
    decoder_layers: 1
    encoder_conv_num: 5
    block_hidden_dim: 64
    n_heads: 1
    dropout: 0.1
    block_dropout: 0.1
    recurrent: True

rl:
  action_space: "admissible" # "admissible", "generation", "beam_search_choice" ("exhaustive" not working)
  max_target_length: 20
  beam_width: 10  # 1 means greedy
  generate_top_k: 3

  training:
    max_nb_steps_per_episode: 50  # after this many steps, a game is terminated
    learn_start_from_this_episode: 0
    target_net_update_frequency: 500  # sync target net with online net per this many epochs

  replay:
    accumulate_reward_from_final: True
    count_reward_lambda: 0.0  # 0 to disable
    novel_object_reward_lambda: 0.0 # 0 to disable
    discount_gamma_game_reward: 0.9
    discount_gamma_count_reward: 0.5
    discount_gamma_novel_object_reward: 0.5
    replay_memory_capacity: 500000  # adjust this depending on your RAM size
    replay_memory_priority_fraction: 0.5
    update_per_k_game_steps: 5
    replay_batch_size: 64
    multi_step: 3
    replay_sample_history_length: 4
    replay_sample_update_from: 2

  epsilon_greedy:
    noisy_net: False  # if this is true, then epsilon greedy is disabled
    epsilon_anneal_episodes: 1000  # -1 if not annealing
    epsilon_anneal_from: 0.3
    epsilon_anneal_to: 0.1

dagger:
  action_space: "generation" # "admissible", "generation", ("exhaustive" not working)
  max_target_length: 20
  beam_width: 10  # 1 means greedy
  generate_top_k: 5
  unstick_by_beam_search: False

  training:
    max_nb_steps_per_episode: 50  # after this many steps, a game is terminated

  fraction_assist:
    fraction_assist_anneal_episodes: 50000
    fraction_assist_anneal_from: 1.0
    fraction_assist_anneal_to: 0.01

  fraction_random:
    fraction_random_anneal_episodes: 0  # disabled by default
    fraction_random_anneal_from: 0.0
    fraction_random_anneal_to: 0.0

  replay:
    replay_memory_capacity: 500000
    update_per_k_game_steps: 5
    replay_batch_size: 64
    replay_sample_history_length: 4
    replay_sample_update_from: 2

vision_dagger:
  model_type: "resnet" # "resnet" whole image features or "maskrcnn" top k box features
  resnet_fc_dim: 64
  maskrcnn_top_k_boxes: 10 # top k box features
  use_exploration_frame_feats: False # save feats from exploration (memory intensive!)