Expert(Sto) Return Avg: 198.13, std: 21.23
array([185.814, 207.438, 215.856, 209.645, 206.846, 216.252, 205.591,
       129.215, 177.219, 204.958, 191.32 , 161.017, 167.981, 213.291,
       226.912, 220.095, 208.827, 238.001, 190.705, 169.163, 187.226,
       217.465, 200.158, 218.517, 206.094, 190.596, 184.574, 201.913,
       225.184, 189.492, 192.857, 199.919, 156.528, 210.235, 210.359,
       196.694, 210.357, 174.123, 216.245, 155.861, 218.686, 172.122,
       195.403, 212.667, 165.288, 209.921, 174.241, 206.09 , 202.862,
       203.81 , 211.745, 168.285, 221.906, 170.462, 214.908, 230.49 ,
       211.494, 193.49 , 163.561, 215.105, 209.695, 203.863, 199.342,
       214.252])
Expert(Det) Return Avg: 204.79, std: 7.50
array([201.332, 209.389, 203.604, 205.888, 203.53 , 209.077, 210.886,
       213.232, 212.563, 201.484, 215.847, 194.672, 197.082, 201.746,
       190.529, 210.189, 187.46 , 207.779, 210.233, 197.006, 213.065,
       200.339, 201.874, 194.352, 206.969, 194.635, 210.908, 212.234,
       194.779, 204.563, 208.696, 215.051, 194.683, 214.952, 206.237,
       204.999, 204.388, 214.326, 216.205, 211.879, 192.286, 201.302,
       199.817, 194.906, 217.112, 194.857, 198.699, 213.382, 205.814,
       207.817, 211.441, 196.3  , 212.928, 189.845, 202.368, 199.987,
       204.479, 202.862, 205.215, 214.472, 209.227, 200.793, 214.508,
       207.506])
{'seed': 1, 'cuda': -1, 'env': {'env_name': 'Walker2d-v3', 'T': 100, 'state_indices': 'all'}, 'sac': {'epochs': 2000, 'log_step_interval': 500, 'update_every': 5, 'update_num': 1, 'random_explore_episodes': 10, 'batch_size': 100, 'lr': 0.001, 'alpha': 0.2, 'automatic_alpha_tuning': False, 'buffer_size': 1000000, 'num_test_episodes': 10}, 'expert': {'samples_episode': 64}, 'reward': {'use_bn': False, 'residual': False, 'hid_act': 'relu', 'hidden_sizes': [64, 64], 'clamp_magnitude': 10, 'path': None}}