Expert(Sto) Return Avg: 28.72, std: 2.13
array([25.721, 29.071, 30.897, 28.91 , 30.404, 31.187, 28.412, 30.551,
       27.14 , 27.021, 27.35 , 31.186, 29.129, 28.939, 28.686, 28.877,
       29.628, 31.105, 30.758, 26.072, 20.287, 31.794, 30.114, 27.238,
       32.222, 29.066, 27.461, 28.311, 30.101, 30.254, 28.421, 32.157,
       25.829, 23.273, 31.479, 28.948, 24.334, 30.419, 29.602, 27.937,
       27.668, 26.793, 27.781, 27.603, 30.937, 30.196, 29.044, 28.765,
       26.704, 28.958, 25.944, 27.91 , 29.418, 27.154, 31.12 , 29.962,
       27.952, 28.371, 29.404, 29.36 , 28.22 , 30.126, 31.134, 27.298])
Expert(Det) Return Avg: 33.20, std: 0.95
array([32.951, 32.68 , 32.589, 31.942, 33.968, 31.008, 33.533, 34.07 ,
       34.145, 33.997, 34.018, 32.757, 34.167, 31.641, 33.59 , 34.185,
       33.791, 33.065, 33.191, 31.03 , 34.548, 31.977, 32.292, 32.588,
       31.226, 34.044, 33.993, 33.141, 34.978, 34.216, 33.331, 33.551,
       33.503, 34.22 , 33.189, 33.258, 34.127, 32.495, 34.534, 32.644,
       33.248, 33.014, 34.538, 32.17 , 34.404, 32.29 , 31.721, 31.531,
       33.165, 32.97 , 32.575, 33.037, 33.416, 32.905, 32.915, 33.647,
       33.39 , 31.963, 34.296, 34.65 , 33.235, 32.158, 34.443, 32.94 ])
{'seed': 1, 'cuda': -1, 'env': {'env_name': 'Swimmer-v3', 'T': 100, 'state_indices': 'all'}, 'sac': {'epochs': 2000, 'log_step_interval': 500, 'update_every': 5, 'update_num': 1, 'random_explore_episodes': 10, 'batch_size': 100, 'lr': 0.001, 'alpha': 0.2, 'automatic_alpha_tuning': False, 'buffer_size': 1000000, 'num_test_episodes': 10}, 'expert': {'training_episode': 50, 'eval_episode': 1, 'samples_episode': 64}, 'reward': {'use_bn': False, 'residual': False, 'hid_act': 'relu', 'hidden_sizes': [64, 64], 'clamp_magnitude': 10, 'lr': 0.0001, 'weight_decay': 0.001, 'gradient_step': 1, 'momentum': 0.9}}