137.78686964010507
episode: 0 training return: tensor(105.0278, device='cuda:0')
episode: 1 training return: tensor(103.3732, device='cuda:0')
episode: 2 training return: tensor(107.5054, device='cuda:0')
episode: 3 training return: tensor(107.0795, device='cuda:0')
epoch: 1 test_true_pfm: 122.78438300535072 sim_pfm: 99.68649806440226
episode: 4 training return: tensor(100.2130, device='cuda:0')
episode: 5 training return: tensor(105.6433, device='cuda:0')
episode: 6 training return: tensor(106.3667, device='cuda:0')
episode: 7 training return: tensor(105.6103, device='cuda:0')
epoch: 2 test_true_pfm: 116.17410992091354 sim_pfm: 100.64768192750053
episode: 8 training return: tensor(107.8229, device='cuda:0')
episode: 9 training return: tensor(102.1682, device='cuda:0')
episode: 10 training return: tensor(102.0387, device='cuda:0')
episode: 11 training return: tensor(103.4006, device='cuda:0')
epoch: 3 test_true_pfm: 114.89763639627165 sim_pfm: 96.43936259403127
episode: 12 training return: tensor(105.4110, device='cuda:0')
episode: 13 training return: tensor(110.4408, device='cuda:0')
