Expert(Sto) Return Avg: 321.25, std: 15.33
array([313.575, 317.255, 356.548, 302.103, 319.463, 301.596, 332.554,
       306.173, 321.981, 322.865, 314.49 , 300.576, 336.631, 306.902,
       316.959, 320.277, 336.377, 346.645, 345.398, 327.563, 296.361,
       330.564, 324.697, 341.218, 316.346, 344.95 , 326.337, 338.429,
       304.08 , 346.279, 329.304, 321.976, 351.645, 324.837, 324.417,
       288.012, 329.184, 308.936, 332.812, 271.137, 302.197, 325.705,
       305.821, 314.311, 314.284, 321.314, 308.99 , 330.372, 322.932,
       315.324, 321.342, 318.61 , 318.053, 306.056, 328.854, 325.344,
       325.728, 317.884, 345.876, 316.862, 325.897, 316.138, 327.487,
       307.057])
Expert(Det) Return Avg: 339.36, std: 10.90
array([354.369, 329.729, 326.279, 344.062, 355.834, 339.01 , 342.934,
       336.913, 345.626, 352.682, 334.286, 343.319, 339.096, 334.788,
       338.105, 338.611, 330.896, 323.5  , 338.518, 339.222, 329.946,
       338.44 , 329.172, 354.566, 328.373, 336.298, 332.939, 335.516,
       317.715, 344.486, 335.816, 331.952, 328.011, 352.065, 343.652,
       340.461, 333.321, 330.158, 340.216, 356.05 , 332.9  , 330.446,
       333.566, 304.853, 333.415, 342.103, 340.328, 338.665, 338.395,
       351.964, 351.549, 362.983, 345.641, 346.81 , 344.945, 356.774,
       356.704, 357.751, 328.308, 329.323, 322.132, 359.561, 336.002,
       346.708])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'HalfCheetah-v4'), ('T', 100), ('state_indices', 'all'), ('delayed', 1)])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])