Expert(Sto) Return Avg: -23.35, std: 17.00
array([ -0.912, -30.109,  -4.747, -25.454, -23.691, -40.825, -42.481,
        -6.014, -52.437,  -6.856, -49.102, -18.827, -37.753, -20.911,
         8.248, -25.988, -10.251, -35.77 , -45.722, -27.39 , -26.129,
       -19.331, -30.823, -19.148,  -0.713,  13.017,  -9.066, -15.077,
       -10.948, -28.404, -39.768, -48.254, -33.306,  -9.266, -16.846,
       -36.812, -23.343, -28.198, -52.785,  -9.654, -13.82 , -36.316,
       -26.024,  -7.028, -52.54 ,  -5.581, -50.217, -37.129,   3.42 ,
       -29.424, -44.89 , -19.651,  -0.043, -14.886, -56.48 ,   1.632,
       -24.074, -31.734, -21.645,  -0.784, -28.611, -40.003,   5.076,
       -21.794])
Expert(Det) Return Avg: 81.11, std: 3.26
array([82.674, 81.835, 83.591, 81.088, 80.912, 81.233, 78.951, 80.54 ,
       84.417, 78.312, 79.425, 83.192, 86.354, 81.156, 83.275, 82.374,
       78.482, 84.338, 80.421, 83.041, 81.057, 80.417, 81.918, 78.5  ,
       80.962, 83.029, 82.956, 86.206, 80.53 , 81.344, 83.689, 81.328,
       78.542, 83.719, 81.311, 80.011, 79.533, 82.015, 77.602, 81.11 ,
       79.375, 82.305, 80.122, 83.715, 81.862, 60.497, 85.602, 79.893,
       81.315, 80.241, 82.055, 80.552, 83.565, 80.235, 80.447, 80.231,
       81.953, 84.545, 81.664, 77.904, 82.903, 79.759, 78.545, 80.256])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'Ant-v4'), ('T', 100), ('state_indices', 'all')])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])