Expert(Sto) Return Avg: 29.49, std: 1.87
array([28.854, 30.601, 30.97 , 26.907, 30.339, 31.781, 27.941, 28.799,
       31.784, 32.358, 31.001, 31.204, 28.   , 30.129, 30.174, 28.676,
       28.328, 29.705, 31.349, 28.606, 31.621, 32.813, 31.67 , 29.078,
       31.024, 26.896, 24.084, 26.849, 28.264, 30.863, 30.995, 29.787,
       28.457, 29.204, 29.452, 27.647, 27.937, 32.636, 29.788, 27.592,
       31.134, 29.19 , 32.842, 26.919, 26.564, 28.418, 29.248, 32.688,
       29.979, 27.436, 28.466, 30.041, 24.949, 29.221, 30.772, 27.184,
       27.929, 31.424, 29.494, 30.541, 31.182, 28.817, 29.164, 29.301])
Expert(Det) Return Avg: 30.41, std: 1.44
array([28.631, 29.973, 30.507, 32.2  , 29.015, 31.825, 30.937, 28.875,
       29.07 , 31.638, 29.648, 33.025, 30.51 , 33.147, 33.131, 32.761,
       28.758, 28.104, 27.191, 31.493, 28.457, 29.603, 32.607, 29.268,
       32.6  , 29.463, 29.5  , 29.994, 30.369, 31.537, 29.35 , 29.212,
       29.898, 29.553, 30.989, 29.386, 29.78 , 28.495, 31.026, 29.737,
       30.634, 28.517, 31.719, 31.269, 31.112, 28.438, 29.883, 31.757,
       30.77 , 32.021, 30.699, 28.336, 30.538, 31.928, 30.027, 28.801,
       29.173, 32.216, 31.239, 31.353, 32.804, 31.222, 29.194, 31.487])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'Swimmer-v4'), ('T', 100), ('state_indices', 'all')])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])