{
  "algorithm": "SAC",
  "algorithm_kwargs": {
    "batch_size": 256,
    "max_path_length": 1000,
    "min_num_steps_before_training": 10000,
    "num_epochs": 2500,
    "num_eval_steps_per_epoch": 5000,
    "num_expl_steps_per_train_loop": 1000,
    "num_trains_per_train_loop": 1000
  },
  "annealing": false,
  "bo": false,
  "domain": "ant",
  "entropy_based_exp": {
    "alpha": 0.0,
    "alpha_2": 0.1,
    "beta": 0.0,
    "nor": 3.0,
    "redq": false,
    "seed": 4,
    "should_use": false,
    "sigma": 0.0,
    "use_aleatoric": true,
    "use_automatic_z_tuning": false,
    "use_quantile_cdf": false,
    "version": 11250,
    "z": 0.0
  },
  "layer_size": 256,
  "log_dir": "./data/master/num_expl_steps_per_train_loop_1000_num_trains_per_train_loop_1000_beta_UB_4.66_delta_23.53_alpha_0.0_alpha_2_0.1_beta_0.0_sigma_0.0_nor_3.0_z_0.0_ee_False_version_11250/ant/seed_4",
  "optimistic_exp": {
    "beta_UB": 4.66,
    "delta": 23.53,
    "redq": false,
    "seed": 4,
    "should_use": true,
    "use_aleatoric": true,
    "version": 11250
  },
  "redq": false,
  "replay_buffer_size": 1000000,
  "seed": 4,
  "trainer_kwargs": {
    "discount": 0.99,
    "policy_lr": 0.0003,
    "qf_lr": 0.0003,
    "redq": false,
    "reward_scale": 1,
    "soft_target_tau": 0.005,
    "target_update_period": 1,
    "train_num": 20,
    "use_aleatoric": true,
    "use_automatic_entropy_tuning": true
  },
  "use_aleatoric": true,
  "version": "normal"
}