138.24724978334052
episode: 0 training return: tensor(105.6300, device='cuda:0')
episode: 1 training return: tensor(106.3510, device='cuda:0')
episode: 2 training return: tensor(102.8602, device='cuda:0')
episode: 3 training return: tensor(107.7648, device='cuda:0')
epoch: 1 test_true_pfm: 133.48615768241285 sim_pfm: 106.57802305246005
episode: 4 training return: tensor(100.9053, device='cuda:0')
episode: 5 training return: tensor(106.1220, device='cuda:0')
episode: 6 training return: tensor(106.6914, device='cuda:0')
episode: 7 training return: tensor(103.1407, device='cuda:0')
epoch: 2 test_true_pfm: 134.10371564362157 sim_pfm: 106.40849706512526
episode: 8 training return: tensor(101.3832, device='cuda:0')
episode: 9 training return: tensor(106.3555, device='cuda:0')
episode: 10 training return: tensor(105.2032, device='cuda:0')
episode: 11 training return: tensor(105.9592, device='cuda:0')
epoch: 3 test_true_pfm: 135.10961553795613 sim_pfm: 107.29660508455709
episode: 12 training return: tensor(104.5634, device='cuda:0')
episode: 13 training return: tensor(103.7692, device='cuda:0')
episode: 14 training return: tensor(112.0605, device='cuda:0')
episode: 15 training return: tensor(105.5114, device='cuda:0')
epoch: 4 test_true_pfm: 136.20608518176698 sim_pfm: 106.44798999861814
episode: 16 training return: tensor(107.1139, device='cuda:0')
episode: 17 training return: tensor(106.5432, device='cuda:0')
episode: 18 training return: tensor(105.0808, device='cuda:0')
episode: 19 training return: tensor(110.3184, device='cuda:0')
epoch: 5 test_true_pfm: 135.11386374573573 sim_pfm: 107.17173141535605
episode: 20 training return: tensor(107.1079, device='cuda:0')
episode: 21 training return: tensor(109.5762, device='cuda:0')
episode: 22 training return: tensor(104.8880, device='cuda:0')
episode: 23 training return: tensor(104.0295, device='cuda:0')
epoch: 6 test_true_pfm: 134.04289390978596 sim_pfm: 103.41182569526136
episode: 24 training return: tensor(105.8843, device='cuda:0')
episode: 25 training return: tensor(109.8638, device='cuda:0')
episode: 26 training return: tensor(104.9171, device='cuda:0')
episode: 27 training return: tensor(102.0831, device='cuda:0')
epoch: 7 test_true_pfm: 134.08165397662495 sim_pfm: 106.45393439042383
episode: 28 training return: tensor(105.4001, device='cuda:0')
episode: 29 training return: tensor(104.5330, device='cuda:0')
episode: 30 training return: tensor(101.1355, device='cuda:0')
episode: 31 training return: tensor(105.9275, device='cuda:0')
epoch: 8 test_true_pfm: 132.41310353671435 sim_pfm: 105.01640299963765
episode: 32 training return: tensor(105.5962, device='cuda:0')
episode: 33 training return: tensor(105.8313, device='cuda:0')
episode: 34 training return: tensor(108.3881, device='cuda:0')
episode: 35 training return: tensor(99.7591, device='cuda:0')
epoch: 9 test_true_pfm: 133.47330523603443 sim_pfm: 103.46246724022785
episode: 36 training return: tensor(104.2841, device='cuda:0')
episode: 37 training return: tensor(105.1310, device='cuda:0')
episode: 38 training return: tensor(103.0658, device='cuda:0')
episode: 39 training return: tensor(100.2106, device='cuda:0')
epoch: 10 test_true_pfm: 132.7846373075239 sim_pfm: 104.91889851525193
episode: 40 training return: tensor(105.9888, device='cuda:0')
episode: 41 training return: tensor(103.9516, device='cuda:0')
episode: 42 training return: tensor(104.3704, device='cuda:0')
episode: 43 training return: tensor(107.5763, device='cuda:0')
