ad_policy_dict: 
  # discrete_action_dim: 8 
  # parameter_action_dim: 64 
  max_action: 1.0 
  discount: 0.99 
  tau: 0.005 
  policy_noise: 0.1 
  noise_clip: 0.5
  policy_freq: 2

ad_policy_cpu_only: False
pretrain_tmax: 40000 
embed_lr: 0.0003
# perturbation_range 1.0 may be too large
perturbation_range: 10.0
vae_batch_size: 128
hyar_batch_size: 256
vae_load_buffer: False
vae_save_model: False
vae_load_model: False
checkpoint_path_vae_model: ""
vae_update_episode_interval: 1000
recon_loss_c_weight: 5.0
training_vae_steps: 10000 
vae_get_c_rate_batch_size: 5000 

# TD3 exploration
epsilon_steps: 200000
expl_noise_initial: 1.0
expl_noise: 0.05

agent_output_type: null
mac: "attack_mac"
learner: "ad_learner"

name: "hyar"
