3547.7469955728716
episode: 0 training return: tensor(-316.7202, device='cuda:0')
episode: 1 training return: tensor(-293.3904, device='cuda:0')
episode: 2 training return: tensor(-292.7188, device='cuda:0')
episode: 3 training return: tensor(-177.6054, device='cuda:0')
epoch: 1 test_true_pfm: 3892.5206121360548
episode: 4 training return: tensor(-261.7608, device='cuda:0')
episode: 5 training return: tensor(-168.2868, device='cuda:0')
episode: 6 training return: tensor(-223.8471, device='cuda:0')
episode: 7 training return: tensor(-258.2920, device='cuda:0')
epoch: 2 test_true_pfm: 7521.524883238802
episode: 8 training return: tensor(-314.2995, device='cuda:0')
episode: 9 training return: tensor(-262.9318, device='cuda:0')
episode: 10 training return: tensor(-270.2752, device='cuda:0')
episode: 11 training return: tensor(-262.3123, device='cuda:0')
epoch: 3 test_true_pfm: 8513.136707440943
episode: 12 training return: tensor(-263.0050, device='cuda:0')
episode: 13 training return: tensor(-272.6286, device='cuda:0')
episode: 14 training return: tensor(-259.7676, device='cuda:0')
episode: 15 training return: tensor(-261.0503, device='cuda:0')
epoch: 4 test_true_pfm: 8483.346782098486
episode: 16 training return: tensor(-260.2880, device='cuda:0')
episode: 17 training return: tensor(-262.9626, device='cuda:0')
episode: 18 training return: tensor(-318.6982, device='cuda:0')
episode: 19 training return: tensor(-267.4032, device='cuda:0')
epoch: 5 test_true_pfm: 6031.886210431781
episode: 20 training return: tensor(-317.9271, device='cuda:0')
episode: 21 training return: tensor(-267.1815, device='cuda:0')
episode: 22 training return: tensor(-249.2781, device='cuda:0')
episode: 23 training return: tensor(-314.0691, device='cuda:0')
epoch: 6 test_true_pfm: 5196.893303179459
episode: 24 training return: tensor(-239.1883, device='cuda:0')
episode: 25 training return: tensor(-306.0605, device='cuda:0')
episode: 26 training return: tensor(-275.8032, device='cuda:0')
episode: 27 training return: tensor(-310.8374, device='cuda:0')
epoch: 7 test_true_pfm: 6702.952155138832
episode: 28 training return: tensor(-278.6533, device='cuda:0')
episode: 29 training return: tensor(-197.9618, device='cuda:0')
episode: 30 training return: tensor(-262.4081, device='cuda:0')
episode: 31 training return: tensor(-270.9036, device='cuda:0')
epoch: 8 test_true_pfm: 8723.48986814278
episode: 32 training return: tensor(-315.1885, device='cuda:0')
episode: 33 training return: tensor(-261.1665, device='cuda:0')
episode: 34 training return: tensor(-260.2581, device='cuda:0')
episode: 35 training return: tensor(-307.7480, device='cuda:0')
epoch: 9 test_true_pfm: 4622.312208819779
