4848.2003111114345
episode: 0 training return: tensor(-925.6822, device='cuda:0')
episode: 1 training return: tensor(-999.9990, device='cuda:0')
episode: 2 training return: tensor(-999.9991, device='cuda:0')
episode: 3 training return: tensor(-474.4514, device='cuda:0')
epoch: 1 test_true_pfm: 6007.595334484436 sim_pfm: -388.07984665746335
episode: 4 training return: tensor(-857.8235, device='cuda:0')
episode: 5 training return: tensor(-467.4006, device='cuda:0')
episode: 6 training return: tensor(-999.1536, device='cuda:0')
episode: 7 training return: tensor(-996.0417, device='cuda:0')
epoch: 2 test_true_pfm: 6762.744864883767 sim_pfm: -325.1623702719323
episode: 8 training return: tensor(-871.5560, device='cuda:0')
episode: 9 training return: tensor(-376.8216, device='cuda:0')
episode: 10 training return: tensor(-731.6616, device='cuda:0')
episode: 11 training return: tensor(-540.2244, device='cuda:0')
epoch: 3 test_true_pfm: 7379.533477272388 sim_pfm: -566.6503796861119
episode: 12 training return: tensor(-409.6816, device='cuda:0')
episode: 13 training return: tensor(-974.1447, device='cuda:0')
episode: 14 training return: tensor(-630.0140, device='cuda:0')
episode: 15 training return: tensor(-1000., device='cuda:0')
epoch: 4 test_true_pfm: 7972.632587850433 sim_pfm: -329.5856063645915
episode: 16 training return: tensor(-195.1577, device='cuda:0')
episode: 17 training return: tensor(-574.4995, device='cuda:0')
episode: 18 training return: tensor(-919.2033, device='cuda:0')
episode: 19 training return: tensor(-191.1108, device='cuda:0')
epoch: 5 test_true_pfm: 3690.0738225792375 sim_pfm: -480.2194952869613
episode: 20 training return: tensor(-792.3126, device='cuda:0')
episode: 21 training return: tensor(-991.8558, device='cuda:0')
episode: 22 training return: tensor(-839.7666, device='cuda:0')
episode: 23 training return: tensor(-394.1888, device='cuda:0')
epoch: 6 test_true_pfm: 3341.5319191437197 sim_pfm: -197.3682219753197
episode: 24 training return: tensor(-962.1411, device='cuda:0')
episode: 25 training return: tensor(-999.8353, device='cuda:0')
episode: 26 training return: tensor(-998.5007, device='cuda:0')
episode: 27 training return: tensor(-947.7735, device='cuda:0')
epoch: 7 test_true_pfm: 3263.015137201808 sim_pfm: -770.7713135486507
episode: 28 training return: tensor(-596.7696, device='cuda:0')
episode: 29 training return: tensor(-882.9705, device='cuda:0')
