
import copy

configs = dict()

config = dict(
    agent=dict(
        model_kwargs=None,
        q_model_kwargs=None,
    ),
    algo=dict(
        discount=0.99,
        batch_size=100,
        replay_ratio=100,
        target_update_tau=0.005,
        target_update_interval=2,
        policy_update_interval=2,
        learning_rate=1e-3,
        q_learning_rate=1e-3,
    ),
    env=dict(id="Hopper-v3"),
    # eval_env=dict(id="Hopper-v3"),  # Train script uses "env".
    model=dict(),
    optim=dict(),
    runner=dict(
        n_steps=1e6,
        log_interval_steps=1e4,
    ),
    sampler=dict(
        batch_T=1,
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=6,
        eval_max_steps=int(51e3),
        eval_max_trajectories=50,
    ),
)

configs["td3_1M_serial"] = config

config = copy.deepcopy(configs["td3_1M_serial"])
config["algo"]["bootstrap_timelimit"] = True
configs["td3_serial_bstl"] = config

config = copy.deepcopy(config)
config["sampler"]["batch_T"] = 5
config["sampler"]["batch_B"] = 3
config["algo"]["updates_per_sync"] = 1
configs["async_cpu"] = config
