Expert(Sto) Return Avg: 238.89, std: 2.99
array([236.307, 240.192, 244.063, 241.24 , 239.228, 235.418, 240.537,
       239.523, 235.252, 238.929, 236.688, 241.143, 235.749, 240.074,
       237.407, 246.129, 239.006, 236.974, 244.553, 239.047, 235.288,
       239.24 , 237.78 , 238.696, 237.958, 240.269, 236.951, 236.481,
       235.252, 236.209, 237.237, 237.011, 242.026, 242.402, 233.101,
       239.007, 241.503, 242.895, 239.285, 235.802, 238.638, 239.649,
       241.596, 236.94 , 235.817, 237.412, 230.689, 243.157, 238.654,
       236.898, 241.406, 248.204, 237.09 , 238.131, 239.212, 240.5  ,
       239.45 , 243.485, 236.55 , 240.356, 239.786, 238.929, 238.304,
       236.504])
Expert(Det) Return Avg: 238.14, std: 1.04
array([236.089, 238.725, 239.386, 237.02 , 237.454, 237.754, 239.03 ,
       240.407, 237.787, 236.701, 237.381, 239.351, 239.223, 238.821,
       237.17 , 237.459, 239.205, 237.154, 237.563, 237.631, 238.833,
       240.826, 239.231, 238.316, 236.955, 237.109, 239.095, 239.098,
       240.081, 237.356, 239.195, 239.248, 237.522, 236.804, 239.005,
       237.31 , 238.745, 239.136, 239.149, 237.009, 238.548, 239.224,
       237.684, 238.843, 237.059, 236.774, 237.347, 239.239, 237.054,
       238.816, 237.942, 239.055, 237.248, 237.12 , 238.476, 237.105,
       237.298, 238.676, 237.522, 238.682, 236.347, 237.484, 238.565,
       237.323])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'Hopper-v4'), ('T', 100), ('state_indices', 'all')])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])