# Coarse sweep for hopper med exp

config_version = 1
device="cpu"

agent_name="TsallisKLInAC"
env_name="Hopper"
dataset="medexp"
evaluation_criteria="return"

discrete_control=false
state_dim=11
action_dim=3

max_steps=1_000_000
log_interval=10_000
timeout=1000

hidden_units=256
batch_size=256

polyak=0.995

gamma=0.99
q=2

use_target_network=1
target_network_update_freq=1

alpha=0.1

#new protocol
[sweep]
learning_rate=[3e-5, 1e-4, 3e-4]
tau=[1.0, 0.5, 0.33, 0.1, 0.01]
run=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Old protocol
# [sweep]
# learning_rate=[9.5367431640625e-7, 3.814697265625e-6, 1.52587890625e-5, 6.103515625e-5, 0.000244140625, 0.0009765625]
# tau=[0.0001, 0.001, 0.01, 0.1]
# alpha=[0.5, 0.75, 0.9, 0.95, 1.0]
# run=[0, 1, 2, 3]