#
# Initial sweep for the TsallisInAC for the an medexp dataset.
#
# Gives us a base understanding of how this algorithm works here.
#

config_version = 1
device="cpu"

agent_name="TsallisInAC"
env_name="Ant"
dataset="medexp"
evaluation_criteria="return"

discrete_control=false
state_dim=27
action_dim=8

max_steps=20_000
log_interval=1_000
timeout=500

hidden_units=256
batch_size=256

polyak=0.995

gamma=0.99

use_target_network=1
target_network_update_freq=1

[sweep]
learning_rate=[6.103515625e-5, 0.000244140625, 0.0009765625, 0.00390625, 0.015625]
tau=[0.0001, 0.001, 0.01, 0.1]
run=[0, 1, 2, 3]