Expert(Sto) Return Avg: 299.79, std: 26.53
array([340.237, 297.458, 262.133, 321.399, 369.564, 320.675, 288.996,
       300.563, 276.598, 301.726, 284.435, 290.013, 289.678, 350.007,
       288.996, 323.604, 277.053, 272.752, 311.625, 278.051, 274.893,
       278.017, 271.707, 318.376, 290.464, 273.502, 282.919, 306.03 ,
       308.287, 316.584, 309.348, 291.811, 307.853, 319.319, 292.08 ,
       249.106, 287.742, 308.399, 333.665, 334.705, 281.875, 371.8  ,
       275.423, 269.513, 267.804, 306.056, 313.956, 268.958, 318.865,
       284.428, 233.736, 327.609, 290.468, 316.074, 329.167, 307.16 ,
       336.742, 282.254, 321.916, 327.383, 274.964, 308.36 , 298.797,
       273.066])
Expert(Det) Return Avg: 353.07, std: 27.97
array([317.789, 346.435, 340.724, 365.598, 325.373, 337.042, 369.705,
       387.343, 350.011, 362.314, 403.798, 260.067, 378.45 , 385.662,
       326.308, 339.212, 315.012, 338.909, 380.906, 349.094, 368.99 ,
       360.179, 388.184, 376.897, 348.558, 327.085, 327.891, 326.012,
       360.747, 369.959, 387.629, 400.16 , 329.747, 374.396, 394.767,
       302.394, 348.584, 389.023, 339.982, 334.884, 291.187, 336.533,
       343.857, 304.221, 386.943, 333.254, 333.34 , 373.647, 342.114,
       356.036, 345.933, 365.655, 365.81 , 352.727, 358.263, 355.527,
       362.542, 354.91 , 341.429, 332.403, 369.97 , 403.203, 357.401,
       393.864])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'Ant-v4'), ('T', 100), ('state_indices', 'all')])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])