env:
    _target_: "Ant-v4"
    gym: True
    discrete: False  # Discrete action space.

# Expert parameters for generating trajectories
expert_hyperparams:
    num_sa_pairs: 1000  # Number of state-action pairs to collect
    horizon: null  # Max steps per trajectory
    render_gif: False  # Whether to render a GIF of expert trajectories

# Wandb settings
wandb:
    project: ant_online
    job_type: ant_online_10
    tags: [HCC_Project]
    mode: online  # Whether to use wandb [disabled, online].
    run_name: null

algo:
    toy: False
    algo: "TD0_Buffer"

inner_steps:
    steps: 10

training_hyperparams:
    epochs: 3000  # Number of training epochs
    horizon: null  # Max steps per trajectory
    gamma: 0.99  # Discount factor
    learning_rate: 2e-4  # Learning rate for the optimizer
    num_sa_pairs: 1000
    buffer_size: 10
    inner_steps: 10
    num_traj: 1
    batch_size: 128
    step_size : 1.

Optimizer:
    opt: 'AdamW'
    scheduler: 'exp'
    lr: 2e-4

expert_net:
    _target_: src.agent_networks.gail_networks.Expert

value_net:
    _target_: src.agent_networks.gail_networks.ValueNetwork