Expert(Sto) Return Avg: 508.37, std: 26.25
array([400.663, 544.379, 492.753, 500.658, 509.99 , 479.142, 475.925,
       498.168, 458.713, 514.004, 486.982, 480.986, 513.472, 521.008,
       499.279, 497.899, 534.434, 517.233, 516.258, 531.769, 521.51 ,
       532.416, 481.682, 507.024, 517.895, 488.073, 501.998, 478.044,
       496.556, 542.399, 536.651, 534.666, 492.342, 523.378, 539.616,
       543.993, 507.339, 503.385, 509.173, 476.083, 528.084, 529.575,
       521.889, 477.878, 545.717, 509.977, 465.511, 533.155, 545.124,
       489.439, 523.699, 515.764, 510.062, 522.139, 543.113, 481.121,
       508.362, 485.259, 535.849, 466.225, 525.522, 517.159, 517.424,
       529.648])
Expert(Det) Return Avg: 540.24, std: 17.32
array([527.351, 536.122, 576.935, 539.66 , 534.543, 544.018, 502.448,
       533.537, 558.749, 525.585, 554.195, 539.763, 541.998, 538.371,
       539.658, 546.712, 523.658, 545.319, 529.137, 547.361, 559.929,
       562.6  , 564.191, 541.01 , 513.862, 531.927, 540.976, 539.677,
       514.982, 510.545, 559.988, 553.349, 550.757, 518.249, 547.848,
       523.775, 547.638, 554.916, 528.086, 527.761, 501.607, 558.253,
       580.486, 540.196, 533.495, 509.816, 515.896, 559.562, 520.874,
       540.445, 556.881, 525.457, 550.133, 532.53 , 542.51 , 554.769,
       560.988, 559.794, 533.073, 555.744, 563.51 , 529.498, 554.61 ,
       518.229])
{'seed': 1, 'cuda': -1, 'env': {'env_name': 'HalfCheetah-v3', 'T': 100, 'state_indices': 'all'}, 'sac': {'epochs': 2000, 'log_step_interval': 500, 'update_every': 5, 'update_num': 1, 'random_explore_episodes': 10, 'batch_size': 100, 'lr': 0.001, 'alpha': 0.2, 'automatic_alpha_tuning': False, 'buffer_size': 1000000, 'num_test_episodes': 10}, 'expert': {'samples_episode': 64}, 'reward': {'use_bn': False, 'residual': False, 'hid_act': 'relu', 'hidden_sizes': [64, 64], 'clamp_magnitude': 10, 'lr': 0.0001, 'weight_decay': 0.001, 'gradient_step': 1, 'momentum': 0.9}}