{
  "algorithm": "SAC",
  "algorithm_kwargs": {
    "batch_size": 256,
    "max_path_length": 1000,
    "min_num_steps_before_training": 10000,
    "num_epochs": 2500,
    "num_eval_steps_per_epoch": 5000,
    "num_expl_steps_per_train_loop": 1000,
    "num_trains_per_train_loop": 1000
  },
  "annealing": false,
  "bo": false,
  "domain": "ant",
  "entropy_based_exp": {
    "alpha": 0.05,
    "alpha_2": 0.1,
    "beta": 3.2,
    "nor": 3.0,
    "redq": false,
    "seed": 4,
    "should_use": true,
    "sigma": 0.0,
    "use_aleatoric": true,
    "use_automatic_z_tuning": false,
    "use_quantile_cdf": false,
    "version": 11253,
    "z": 0.5
  },
  "layer_size": 256,
  "log_dir": "./data/master/num_expl_steps_per_train_loop_1000_num_trains_per_train_loop_1000_beta_UB_0.0_delta_0.0_alpha_0.05_alpha_2_0.1_beta_3.2_sigma_0.0_nor_3.0_z_0.5_ee_True_version_11253/ant/seed_4",
  "optimistic_exp": {
    "beta_UB": 0.0,
    "delta": 0.0,
    "redq": false,
    "seed": 4,
    "should_use": false,
    "use_aleatoric": true,
    "version": 11253
  },
  "redq": false,
  "replay_buffer_size": 1000000,
  "seed": 4,
  "trainer_kwargs": {
    "discount": 0.99,
    "policy_lr": 0.0003,
    "qf_lr": 0.0003,
    "redq": false,
    "reward_scale": 1,
    "soft_target_tau": 0.005,
    "target_update_period": 1,
    "train_num": 20,
    "use_aleatoric": true,
    "use_automatic_entropy_tuning": true
  },
  "use_aleatoric": true,
  "version": "normal"
}