defaults:
    - agent: dqn
    - env: small_gridworld # gym_Breakout-ram-v0, dog_stand, small_gridworld
    - tloss: base
    - adloss: base
    - rdynamics: none
    - l2embed: none
# this needs to be specified manually
experiment: "action_dist"

surf_loss: null

# reward learning
segment: 50
activation: tanh
num_seed_steps: 1000
num_unsup_steps: 5000
num_interact: 5000
reward_lr: 0.0003
reward_batch: 128 # for human queries
reward_update: 200  # number of epochs for updating reward model
reward_model_train_batch_size: 32 # batch size for training the reward model
feed_type: 0
reset_update: 100
topK: 5
ensemble_size: 3
max_feedback: 1400
large_batch: 10
label_margin: 0.0
teacher_beta: -1
teacher_gamma: 1
teacher_eps_mistake: 0
teacher_eps_skip: 0
teacher_eps_equal: 0

# scheduling
reward_schedule: 0

num_train_steps: 1e6
replay_buffer_capacity: ${num_train_steps}

# evaluation config
eval_frequency: 10
num_eval_episodes: 10
device: cpu

# logger
log_frequency: 10000
log_save_tb: true
wandb: false
# video recorder
save_video: false

# setups
seed: 1

gradient_update: 1
save_dir: ./data/exp/${experiment}/${env.name}/${now:%Y-%m-%d_%H-%M-%S}/
## hydra configuration
hydra:
    job:
        name: ${env}
    run:
        dir: ${save_dir}