Expert(Sto) Return Avg: 678.41, std: 24.19
array([664.179, 674.768, 680.428, 654.549, 670.532, 656.351, 675.553,
       655.006, 618.616, 704.189, 688.91 , 687.286, 680.319, 644.811,
       682.429, 698.795, 679.711, 671.479, 675.137, 721.022, 684.696,
       694.065, 703.582, 673.545, 667.775, 685.06 , 698.528, 713.494,
       674.06 , 686.613, 730.431, 642.779, 679.42 , 663.256, 686.676,
       699.344, 681.422, 703.009, 624.38 , 638.937, 678.997, 667.046,
       622.61 , 724.238, 651.933, 682.394, 673.552, 698.771, 668.275,
       711.995, 690.388, 690.507, 635.541, 701.708, 688.978, 679.744,
       663.236, 694.38 , 688.283, 701.152, 635.97 , 712.778, 683.171,
       657.355])
Expert(Det) Return Avg: 717.93, std: 19.94
array([725.891, 763.215, 717.235, 729.338, 725.468, 726.761, 720.567,
       683.903, 721.025, 715.58 , 699.896, 731.136, 750.13 , 707.954,
       750.795, 704.693, 731.547, 738.762, 754.786, 685.905, 750.711,
       713.617, 712.19 , 703.502, 704.414, 701.953, 702.622, 712.524,
       735.664, 695.062, 713.141, 720.258, 729.165, 696.751, 742.361,
       737.394, 722.222, 714.317, 717.061, 683.371, 702.246, 711.921,
       700.934, 704.719, 738.338, 729.713, 753.261, 707.292, 744.938,
       721.157, 714.796, 686.074, 702.65 , 677.679, 684.966, 720.89 ,
       750.676, 715.447, 701.813, 737.311, 739.958, 706.943, 700.705,
       700.034])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'HalfCheetah-v4'), ('T', 100), ('state_indices', 'all')])), ('sac', ordereddict([('epochs', 10000), ('log_step_interval', 500), ('update_every', 5), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)])), ('reward', ordereddict([('use_bn', False), ('residual', False), ('hid_act', 'relu'), ('hidden_sizes', [64, 64]), ('clamp_magnitude', 10), ('lr', 0.0001), ('weight_decay', 0.001), ('gradient_step', 1), ('momentum', 0.9)]))])