Expert(Sto) Return Avg: 131.19, std: 8.56
array([136.887, 124.498, 140.96 , 115.372, 138.728, 135.68 , 132.474,
       130.878, 133.047, 143.655, 124.246, 135.844, 127.443, 125.392,
       123.962, 137.846, 134.785, 141.838, 131.588, 126.386, 132.713,
       130.45 , 144.508, 130.894, 128.807, 128.02 , 129.539, 129.967,
       118.974, 118.827, 131.617, 117.698, 124.009, 129.829, 147.594,
       147.115, 126.137, 127.664, 134.665, 122.206, 117.648, 144.598,
       138.151, 140.091, 139.526, 144.959, 123.584, 119.73 , 126.466,
       131.612, 134.617, 126.928, 119.924, 131.722, 134.357, 134.946,
       122.923, 114.415, 121.577, 149.39 , 120.406, 133.094, 144.945,
       137.924])
Expert(Det) Return Avg: 148.40, std: 2.39
array([147.427, 151.18 , 150.466, 149.625, 149.43 , 150.421, 151.629,
       149.009, 145.007, 147.601, 146.361, 146.131, 147.376, 149.239,
       150.828, 148.344, 150.109, 146.582, 149.113, 145.386, 149.578,
       147.789, 147.728, 148.957, 148.412, 147.269, 152.888, 149.388,
       150.323, 149.948, 146.84 , 149.47 , 145.037, 148.931, 145.552,
       151.394, 152.052, 143.721, 146.247, 150.664, 149.12 , 144.051,
       147.54 , 144.708, 152.757, 149.783, 146.959, 145.082, 150.769,
       145.188, 144.518, 152.417, 149.892, 151.092, 145.772, 150.404,
       151.401, 146.574, 150.805, 147.108, 150.836, 145.846, 145.262,
       146.433])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'Walker2d-v4'), ('T', 100), ('state_indices', 'all')])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])