Expert(Sto) Return Avg: 188.01, std: 9.89
array([198.422, 192.986, 189.506, 152.885, 187.417, 196.862, 171.564,
       167.763, 198.504, 179.387, 193.467, 190.278, 192.312, 194.425,
       191.228, 190.671, 188.106, 164.848, 187.616, 190.087, 188.119,
       152.371, 191.31 , 182.821, 190.778, 190.952, 193.638, 187.91 ,
       193.879, 169.818, 195.159, 187.852, 188.175, 194.36 , 177.046,
       189.103, 193.435, 174.231, 188.985, 177.31 , 195.34 , 181.101,
       192.912, 192.988, 189.584, 191.425, 192.876, 197.04 , 190.617,
       197.116, 189.891, 195.769, 193.265, 201.716, 195.796, 189.772,
       195.521, 193.416, 188.41 , 190.85 , 188.305, 189.808, 176.116,
       197.492])
Expert(Det) Return Avg: 182.21, std: 12.07
array([168.637, 177.804, 176.35 , 196.31 , 166.585, 170.172, 193.783,
       176.946, 172.356, 172.685, 192.415, 169.307, 177.56 , 167.667,
       163.893, 189.628, 178.88 , 193.113, 192.806, 196.148, 160.749,
       191.612, 170.289, 196.339, 170.578, 194.555, 195.46 , 193.648,
       193.554, 168.374, 169.639, 163.654, 177.541, 192.419, 193.506,
       192.313, 173.984, 175.935, 167.369, 197.654, 195.978, 195.188,
       195.913, 189.188, 192.888, 193.975, 197.149, 168.7  , 173.65 ,
       194.229, 163.276, 179.978, 177.841, 163.698, 174.509, 168.503,
       196.932, 180.783, 179.582, 166.825, 193.07 , 197.299, 195.048,
       195.333])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'Walker2d-v4'), ('T', 100), ('state_indices', 'all')])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])