defaults:
    - agent: sac
    
# this needs to be specified manually
experiment: PEBBLE_human

# reward learning
segment: 50
activation: tanh
num_seed_steps: 1000
num_unsup_steps: 9000
num_interact: 5000
reward_lr: 0.0003
reward_batch: 20
reward_update: 200
feed_type: 0
reset_update: 100
topK: 5
ensemble_size: 3
max_feedback: 200
large_batch: 10
label_margin: 0.0

# scheduling
reward_schedule: 0

num_train_steps: 1e6
replay_buffer_capacity: ${num_train_steps}

# evaluation config
eval_frequency: 10000
num_eval_episodes: 10
device: cuda

# logger
log_frequency: 10000
log_save_tb: false

# video recorder
save_video: false

# setups
seed: 1

# Environment
env: walker_walk

input_path: /mnt/hdd/workspace/PbRL/PbRL/sac_records/transitions/
label_path: /mnt/hdd/workspace/PbRL/PbRL/sac_records/labels/

# hydra configuration
hydra:
    run:
        dir: ./exp_pebble_human/${env}/seed${seed}_n${max_feedback}_b${reward_batch}/
