env_name: macpo
algorithm_name: macpo
experiment_name: check
seed: 0
run_dir: ./runs
num_env_steps: 100000000
episode_length: 8
n_rollout_threads: 1
n_eval_rollout_threads: 1
hidden_size: 512
use_render: False
recurrent_N: 1

save_interval: 1
use_eval: False
eval_interval: 25
log_interval: 25
eval_episodes: 10000

cost_limit: 25
EPS: 1.e-8
safety_gamma: 0.09
step_fraction: 0.5
g_step_dir_coef: 0.1
b_step_dir_coef: 0.1
fraction_coef: 0.1

gamma: 0.96
gae_lambda: 0.95
use_gae: True
use_popart: True
use_valuenorm: True
use_proper_time_limits: False

target_kl: 0.016
searching_steps: 10
conjugate_gradient_iters: 10
accept_ratio: 0.5
clip_param: 0.2
learning_iters: 5
num_mini_batch: 1
data_chunk_length: 
value_loss_coef: 1
entropy_coef: 0.0
max_grad_norm: 10
huber_delta: 10.0
use_recurrent_policy: False
use_naive_recurrent_policy: False
use_max_grad_norm: True
use_clipped_value_loss: True
use_huber_loss: True
use_value_active_masks: False
use_policy_active_masks: False

actor_lr: 9.e-5
critic_lr: 5.e-3
opti_eps: 1.e-5
weight_decay: 0.0

gain: 0.01
actor_gain: 0.01
use_orthogonal: True

use_feature_normalization: True
use_ReLU: True
stacked_frames: 1
layer_N: 2

std_x_coef: 1
std_y_coef: 0.5

mamujoco: 
  layer_N: 1
  num_env_steps: 10000000
  episode_length: 1000
  n_rollout_threads: 10
  n_eval_rollout_threads: 10
  hidden_size: 128
  gamma: 0.99
  safety_gamma: 0.2
  target_kl: 0.01
  learning_iters: 15 # Number of SGD Iterations
  entropy_coef: 0.01