env_reward_wrong:
  desc: reward for choosing incorrect arm
  value: 0
env_reward_correct:
  value: 10
env_reward_backward:
  value: -1
env_reward_wall:
  value: -0.1
env_reward_backwall:
  value: -1
env_left_range:
  value: [1, 15]
env_right_range:
  value: [1, 15]
env_seq_len:
  value: 300
env_max_steps:
  value: 320
env_pixel_output:
  value: True
env_pixel_output_shape:
  value: [60, 60]
env_decaying_walls:
  value: False
env_decaying_rate_walls:
  value: 0.1
env_flag_end_wall:
  value: True
env_encode_obs_using_autoencoder:
  value: True

valid_envs:
  desc: >
    Contains key-value pairs. The key is the name of the validation environment, and the value is a
    mapping (dict) of keyword arguments to TonesEnv. If a kwarg is not specified here, the validation 
    environments default to the values from the training environment config.
  value:
    tower1_15: {num_interval_on_left: [1, 15], num_interval_on_right: [1, 15]}
#    len50: {"seq_len": 50}
#    len100: {"seq_len": 100}
#    len200: {"seq_len": 200}
#    tower1_5: {num_interval_on_left: [1, 5], num_interval_on_right: [1, 5]}
#    tower6_10: {num_interval_on_left: [6, 10], num_interval_on_right: [6, 10]}
#    tower11_15: {num_interval_on_left: [11, 15], num_interval_on_right: [11, 15]}


encoder_type:
  value: dense
encoder_latent_size:
  value: 3
encoder_activation_penalty_norm_p:
  value: 1
encoder_activation_penalty_weight:
  value: 0.001

memory_type:
  value: sith
memory_hidden_size:
  desc: hidden size of rnn/lstm
  value: 100
memory_activation_penalty_norm_p:
  value: 1
memory_activation_penalty_weight:
  value: 0.

add_z_skip:
  value: False
add_outer:
  value: False

rl_method:
  value: a2c

# A2C
actor_weight_penalty_norm_p:
  value: 1
actor_weight_penalty_weight:
  value: 0.
critic_weight_penalty_norm_p:
  value: 1
critic_weight_penalty_weight:
  value: 0.

# DQN
dqn_weight_penalty_norm_p:
  value: 1
dqn_weight_penalty_weight:
  value: 0.

learning_rate:
  value: 0.0001

gamma:
  value: 0.

log_videos:
  value: True
log_individual_ratemaps:
  value: False
log_aggregate_ratemaps:
  value: True

log_video_freq:
  value: 10000
log_checkpoint_freq:
  value: 1000
log_ratemap_freq:
  value: 10000
log_stats_freq:
  desc: Frequency of validation pass, in backprop steps
  value: 1000

training_step_limit:
  desc: Training step at which to stop training and terminate. Values of 0 or less = unlimited.
  value: -1
