epoch: 0 training_loss 37.89949327468872 test_loss: 19.605665588378905
epoch: 1 training_loss 16.518753576278687 test_loss: 14.054580688476562
epoch: 2 training_loss 12.431538124084472 test_loss: 11.056790924072265
epoch: 3 training_loss 10.317011842727661 test_loss: 9.938077545166015
epoch: 4 training_loss 8.99241222858429 test_loss: 8.481138610839844
epoch: 5 training_loss 7.791296973228454 test_loss: 7.654242706298828
epoch: 6 training_loss 7.124262886047363 test_loss: 7.246121978759765
epoch: 7 training_loss 6.235845055580139 test_loss: 6.244119262695312
epoch: 8 training_loss 5.690398254394531 test_loss: 5.6115570068359375
epoch: 9 training_loss 5.142407214641571 test_loss: 4.8348041534423825
epoch: 10 training_loss 4.899416599273682 test_loss: 4.746855926513672
epoch: 11 training_loss 4.5414904117584225 test_loss: 4.465702056884766
epoch: 12 training_loss 4.220159010887146 test_loss: 4.2206989288330075
epoch: 13 training_loss 4.274030458927155 test_loss: 3.97961311340332
epoch: 14 training_loss 3.984395117759705 test_loss: 3.8090694427490233
epoch: 15 training_loss 3.7590818047523498 test_loss: 3.5254337310791017
epoch: 16 training_loss 3.754456329345703 test_loss: 3.7335086822509767
epoch: 17 training_loss 3.474013307094574 test_loss: 3.440644073486328
epoch: 18 training_loss 3.3815713214874266 test_loss: 3.2340972900390623
epoch: 19 training_loss 3.2695675039291383 test_loss: 3.303671646118164
epoch: 20 training_loss 3.1772249913215638 test_loss: 3.3838031768798826
epoch: 21 training_loss 2.966872375011444 test_loss: 3.1441886901855467
epoch: 22 training_loss 3.025470640659332 test_loss: 2.9470338821411133
epoch: 23 training_loss 2.90580904006958 test_loss: 2.9038555145263674
epoch: 24 training_loss 2.889833015203476 test_loss: 2.7122121810913087
epoch: 25 training_loss 2.773686935901642 test_loss: 2.869289016723633
epoch: 26 training_loss 2.7030001497268676 test_loss: 2.5762508392333983
epoch: 27 training_loss 2.729840372800827 test_loss: 2.7097036361694338
epoch: 28 training_loss 2.6431161296367645 test_loss: 2.2869171142578124
epoch: 29 training_loss 2.527018632888794 test_loss: 2.7634225845336915
epoch: 30 training_loss 2.4741189289093017 test_loss: 2.455088233947754
epoch: 31 training_loss 2.4896085250377653 test_loss: 2.4925336837768555
epoch: 32 training_loss 2.46482950091362 test_loss: 2.292959785461426
epoch: 33 training_loss 2.3351487743854524 test_loss: 2.388816261291504
epoch: 34 training_loss 2.315794441699982 test_loss: 2.2026342391967773
epoch: 35 training_loss 2.330775045156479 test_loss: 2.2692630767822264
epoch: 36 training_loss 2.253105001449585 test_loss: 2.1857889175415037
epoch: 37 training_loss 2.320696028470993 test_loss: 2.1878129959106447
epoch: 38 training_loss 2.1380049777030945 test_loss: 2.253851127624512
epoch: 39 training_loss 2.1687214505672454 test_loss: 2.3393545150756836
epoch: 40 training_loss 2.1722848522663116 test_loss: 2.095554733276367
epoch: 41 training_loss 2.1311119318008425 test_loss: 1.9844736099243163
epoch: 42 training_loss 2.146028674840927 test_loss: 2.241992378234863
epoch: 43 training_loss 2.102972640991211 test_loss: 1.8233478546142579
epoch: 44 training_loss 2.0915616869926454 test_loss: 2.0374740600585937
epoch: 45 training_loss 2.0430420660972595 test_loss: 2.0590091705322267
epoch: 46 training_loss 1.9783249592781067 test_loss: 1.9834722518920898
epoch: 47 training_loss 1.9805260014533996 test_loss: 2.2475360870361327
epoch: 48 training_loss 1.9583288419246674 test_loss: 1.8066783905029298
epoch: 49 training_loss 1.878531255722046 test_loss: 1.846183967590332
epoch: 50 training_loss 1.8910248744487763 test_loss: 1.749842071533203
epoch: 51 training_loss 1.9219647669792175 test_loss: 1.9342666625976563
epoch: 52 training_loss 1.8125700664520263 test_loss: 1.8507698059082032
epoch: 53 training_loss 1.8529163098335266 test_loss: 1.842762565612793
epoch: 54 training_loss 1.8217035615444184 test_loss: 1.948868751525879
epoch: 55 training_loss 1.768904676437378 test_loss: 1.788550567626953
epoch: 56 training_loss 1.7856931352615357 test_loss: 1.7960729598999023
epoch: 57 training_loss 1.803951598405838 test_loss: 1.6320045471191407
epoch: 58 training_loss 1.7553680503368378 test_loss: 1.8979877471923827
epoch: 59 training_loss 1.7976289737224578 test_loss: 1.7067167282104492
epoch: 60 training_loss 1.6700254702568054 test_loss: 1.8210046768188477
epoch: 61 training_loss 1.7007238316535949 test_loss: 1.8620183944702149
epoch: 62 training_loss 1.705735387802124 test_loss: 1.756810188293457
epoch: 63 training_loss 1.639929221868515 test_loss: 1.7814146041870118
epoch: 64 training_loss 1.6490786480903625 test_loss: 1.759440803527832
epoch: 65 training_loss 1.6264476692676544 test_loss: 1.8611520767211913
epoch: 66 training_loss 1.6437711024284363 test_loss: 1.882676124572754
epoch: 67 training_loss 1.6760468888282776 test_loss: 1.6139598846435548
epoch: 68 training_loss 1.6302000594139099 test_loss: 1.6853483200073243
epoch: 69 training_loss 1.575511736869812 test_loss: 1.5115474700927733
epoch: 70 training_loss 1.6013815343379973 test_loss: 1.5895645141601562
epoch: 71 training_loss 1.5767284125089644 test_loss: 1.3984919548034669
epoch: 72 training_loss 1.5759847366809845 test_loss: 1.4162350654602052
epoch: 73 training_loss 1.576932578086853 test_loss: 1.6205759048461914
epoch: 74 training_loss 1.5411283361911774 test_loss: 1.6070337295532227
epoch: 75 training_loss 1.5181078284978866 test_loss: 1.4982050895690917
epoch: 76 training_loss 1.5329688256978988 test_loss: 1.5501179695129395
epoch: 77 training_loss 1.4945071697235108 test_loss: 1.5588801383972168
epoch: 78 training_loss 1.5405108559131622 test_loss: 1.3255927085876464
epoch: 79 training_loss 1.486966656446457 test_loss: 1.5272932052612305
2635.135436700447
episode: 0 training return: tensor(-999.9144, device='cuda:0')
episode: 1 training return: tensor(-997.1943, device='cuda:0')
episode: 2 training return: tensor(-999.9802, device='cuda:0')
episode: 3 training return: tensor(-999.9886, device='cuda:0')
epoch: 1 test_true_pfm: 3217.816483844889
episode: 4 training return: tensor(-999.0963, device='cuda:0')
episode: 5 training return: tensor(-999.9663, device='cuda:0')
episode: 6 training return: tensor(-999.9921, device='cuda:0')
episode: 7 training return: tensor(-991.0261, device='cuda:0')
epoch: 2 test_true_pfm: 2004.1410738385712
episode: 8 training return: tensor(-999.9872, device='cuda:0')
episode: 9 training return: tensor(-999.9919, device='cuda:0')
episode: 10 training return: tensor(-996.0817, device='cuda:0')
episode: 11 training return: tensor(-993.2427, device='cuda:0')
epoch: 3 test_true_pfm: -265.67496249256243
episode: 12 training return: tensor(-924.0803, device='cuda:0')
episode: 13 training return: tensor(-999.9633, device='cuda:0')
episode: 14 training return: tensor(-999.9417, device='cuda:0')
episode: 15 training return: tensor(-977.5451, device='cuda:0')
epoch: 4 test_true_pfm: 3638.469397275548
episode: 16 training return: tensor(-999.9521, device='cuda:0')
episode: 17 training return: tensor(-999.9833, device='cuda:0')
episode: 18 training return: tensor(-996.1462, device='cuda:0')
episode: 19 training return: tensor(-998.4870, device='cuda:0')
epoch: 5 test_true_pfm: 884.8276306243079
episode: 20 training return: tensor(-999.9769, device='cuda:0')
episode: 21 training return: tensor(-999.9899, device='cuda:0')
episode: 22 training return: tensor(-999.7485, device='cuda:0')
episode: 23 training return: tensor(-999.9693, device='cuda:0')
epoch: 6 test_true_pfm: 2337.2561965191517
episode: 24 training return: tensor(-990.9936, device='cuda:0')
episode: 25 training return: tensor(-999.9649, device='cuda:0')
episode: 26 training return: tensor(-999.9749, device='cuda:0')
episode: 27 training return: tensor(-994.3420, device='cuda:0')
epoch: 7 test_true_pfm: 1352.7127789913384
episode: 28 training return: tensor(-994.2102, device='cuda:0')
episode: 29 training return: tensor(-997.2526, device='cuda:0')
episode: 30 training return: tensor(-999.5171, device='cuda:0')
episode: 31 training return: tensor(-999.9716, device='cuda:0')
epoch: 8 test_true_pfm: -182.22541896485583
episode: 32 training return: tensor(-999.9621, device='cuda:0')
episode: 33 training return: tensor(-999.9604, device='cuda:0')
episode: 34 training return: tensor(-999.9748, device='cuda:0')
episode: 35 training return: tensor(-999.9656, device='cuda:0')
epoch: 9 test_true_pfm: 147.57183871176616
episode: 36 training return: tensor(-999.9898, device='cuda:0')
episode: 37 training return: tensor(-999.8953, device='cuda:0')
episode: 38 training return: tensor(-999.9824, device='cuda:0')
episode: 39 training return: tensor(-989.8054, device='cuda:0')
epoch: 10 test_true_pfm: 844.8187678421851
episode: 40 training return: tensor(-999.8077, device='cuda:0')
episode: 41 training return: tensor(-999.8285, device='cuda:0')
episode: 42 training return: tensor(-953.3524, device='cuda:0')
episode: 43 training return: tensor(-946.2724, device='cuda:0')
epoch: 11 test_true_pfm: 1576.6217371917412
episode: 44 training return: tensor(-999.1175, device='cuda:0')
episode: 45 training return: tensor(-991.9116, device='cuda:0')
episode: 46 training return: tensor(-999.9642, device='cuda:0')
episode: 47 training return: tensor(-997.1450, device='cuda:0')
epoch: 12 test_true_pfm: 1568.0369615242969
episode: 48 training return: tensor(-999.7916, device='cuda:0')
episode: 49 training return: tensor(-999.9600, device='cuda:0')
episode: 50 training return: tensor(-999.1093, device='cuda:0')
episode: 51 training return: tensor(-990.0705, device='cuda:0')
epoch: 13 test_true_pfm: 43.636704907191614
episode: 52 training return: tensor(-999.1979, device='cuda:0')
episode: 53 training return: tensor(-973.6646, device='cuda:0')
episode: 54 training return: tensor(-998.8286, device='cuda:0')
episode: 55 training return: tensor(-999.9890, device='cuda:0')
epoch: 14 test_true_pfm: 1160.747786877801
episode: 56 training return: tensor(-921.2500, device='cuda:0')
episode: 57 training return: tensor(-999.9908, device='cuda:0')
episode: 58 training return: tensor(-955.2690, device='cuda:0')
episode: 59 training return: tensor(-999.8171, device='cuda:0')
epoch: 15 test_true_pfm: 1452.9294012456128
episode: 60 training return: tensor(-999.9703, device='cuda:0')
episode: 61 training return: tensor(-999.7738, device='cuda:0')
episode: 62 training return: tensor(-984.4858, device='cuda:0')
episode: 63 training return: tensor(-999.9833, device='cuda:0')
epoch: 16 test_true_pfm: 21.583350159552
episode: 64 training return: tensor(-999.9567, device='cuda:0')
episode: 65 training return: tensor(-999.5660, device='cuda:0')
episode: 66 training return: tensor(-999.9904, device='cuda:0')
episode: 67 training return: tensor(-999.9695, device='cuda:0')
epoch: 17 test_true_pfm: 1140.949853594372
episode: 68 training return: tensor(-971.4515, device='cuda:0')
episode: 69 training return: tensor(-999.9390, device='cuda:0')
episode: 70 training return: tensor(-999.9695, device='cuda:0')
episode: 71 training return: tensor(-999.9111, device='cuda:0')
epoch: 18 test_true_pfm: 339.73365089437135
episode: 72 training return: tensor(-963.4362, device='cuda:0')
episode: 73 training return: tensor(-999.9914, device='cuda:0')
episode: 74 training return: tensor(-999.9908, device='cuda:0')
episode: 75 training return: tensor(-999.8989, device='cuda:0')
epoch: 19 test_true_pfm: 939.6509612622662
episode: 76 training return: tensor(-999.9711, device='cuda:0')
episode: 77 training return: tensor(-999.9849, device='cuda:0')
episode: 78 training return: tensor(-999.9886, device='cuda:0')
episode: 79 training return: tensor(-999.7549, device='cuda:0')
epoch: 20 test_true_pfm: 701.6128020173937
episode: 80 training return: tensor(-997.7496, device='cuda:0')
episode: 81 training return: tensor(-999.5044, device='cuda:0')
episode: 82 training return: tensor(-976.2654, device='cuda:0')
episode: 83 training return: tensor(-999.9346, device='cuda:0')
epoch: 21 test_true_pfm: 653.1421452534742
episode: 84 training return: tensor(-999.7161, device='cuda:0')
episode: 85 training return: tensor(-999.9103, device='cuda:0')
episode: 86 training return: tensor(-999.6619, device='cuda:0')
episode: 87 training return: tensor(-992.3441, device='cuda:0')
epoch: 22 test_true_pfm: -61.51840913575629
episode: 88 training return: tensor(-965.0430, device='cuda:0')
episode: 89 training return: tensor(-999.5710, device='cuda:0')
episode: 90 training return: tensor(-999.8163, device='cuda:0')
episode: 91 training return: tensor(-999.9900, device='cuda:0')
epoch: 23 test_true_pfm: -19.827588096114614
episode: 92 training return: tensor(-996.6411, device='cuda:0')
episode: 93 training return: tensor(-994.9675, device='cuda:0')
episode: 94 training return: tensor(-999.4576, device='cuda:0')
episode: 95 training return: tensor(-999.9649, device='cuda:0')
epoch: 24 test_true_pfm: 1405.155538003697
episode: 96 training return: tensor(-999.4717, device='cuda:0')
episode: 97 training return: tensor(-852.1152, device='cuda:0')
episode: 98 training return: tensor(-990.7927, device='cuda:0')
episode: 99 training return: tensor(-953.6306, device='cuda:0')
epoch: 25 test_true_pfm: 1355.9368497348228
episode: 100 training return: tensor(-994.7680, device='cuda:0')
episode: 101 training return: tensor(-972.5564, device='cuda:0')
episode: 102 training return: tensor(-999.9711, device='cuda:0')
episode: 103 training return: tensor(-999.9117, device='cuda:0')
epoch: 26 test_true_pfm: 614.291955756954
episode: 104 training return: tensor(-999.9570, device='cuda:0')
episode: 105 training return: tensor(-973.4756, device='cuda:0')
episode: 106 training return: tensor(-999.8865, device='cuda:0')
episode: 107 training return: tensor(-979.9880, device='cuda:0')
epoch: 27 test_true_pfm: 1940.5549695712562
episode: 108 training return: tensor(-999.9440, device='cuda:0')
episode: 109 training return: tensor(-999.9883, device='cuda:0')
episode: 110 training return: tensor(-998.5385, device='cuda:0')
episode: 111 training return: tensor(-999.9581, device='cuda:0')
epoch: 28 test_true_pfm: 710.2952659059071
episode: 112 training return: tensor(-999.9741, device='cuda:0')
episode: 113 training return: tensor(-999.8646, device='cuda:0')
episode: 114 training return: tensor(-989.5867, device='cuda:0')
episode: 115 training return: tensor(-999.9647, device='cuda:0')
epoch: 29 test_true_pfm: 22.71721671278713
episode: 116 training return: tensor(-993.3553, device='cuda:0')
episode: 117 training return: tensor(-999.9818, device='cuda:0')
episode: 118 training return: tensor(-999.9772, device='cuda:0')
episode: 119 training return: tensor(-986.8776, device='cuda:0')
epoch: 30 test_true_pfm: 1051.9785237951708
episode: 120 training return: tensor(-999.9901, device='cuda:0')
episode: 121 training return: tensor(-999.9920, device='cuda:0')
episode: 122 training return: tensor(-999.9846, device='cuda:0')
episode: 123 training return: tensor(-999.9474, device='cuda:0')
epoch: 31 test_true_pfm: 1384.0277637916233
episode: 124 training return: tensor(-999.9210, device='cuda:0')
episode: 125 training return: tensor(-999.9011, device='cuda:0')
episode: 126 training return: tensor(-999.7679, device='cuda:0')
episode: 127 training return: tensor(-995.9627, device='cuda:0')
epoch: 32 test_true_pfm: 571.7584835330878
episode: 128 training return: tensor(-983.5531, device='cuda:0')
episode: 129 training return: tensor(-996.5161, device='cuda:0')
episode: 130 training return: tensor(-999.9905, device='cuda:0')
episode: 131 training return: tensor(-999.9863, device='cuda:0')
epoch: 33 test_true_pfm: 744.8532949638707
episode: 132 training return: tensor(-999.9680, device='cuda:0')
episode: 133 training return: tensor(-998.7288, device='cuda:0')
episode: 134 training return: tensor(-999.9915, device='cuda:0')
episode: 135 training return: tensor(-999.9710, device='cuda:0')
epoch: 34 test_true_pfm: 3442.709593639392
episode: 136 training return: tensor(-999.9828, device='cuda:0')
episode: 137 training return: tensor(-999.7739, device='cuda:0')
episode: 138 training return: tensor(-998.3463, device='cuda:0')
episode: 139 training return: tensor(-999.9916, device='cuda:0')
epoch: 35 test_true_pfm: 1211.2821716567814
episode: 140 training return: tensor(-992.8334, device='cuda:0')
episode: 141 training return: tensor(-999.6321, device='cuda:0')
episode: 142 training return: tensor(-999.9905, device='cuda:0')
episode: 143 training return: tensor(-999.8694, device='cuda:0')
epoch: 36 test_true_pfm: 734.7092879729802
episode: 144 training return: tensor(-999.9628, device='cuda:0')
episode: 145 training return: tensor(-999.5405, device='cuda:0')
episode: 146 training return: tensor(-999.9841, device='cuda:0')
episode: 147 training return: tensor(-991.8076, device='cuda:0')
epoch: 37 test_true_pfm: 2658.7266616755637
episode: 148 training return: tensor(-999.9905, device='cuda:0')
episode: 149 training return: tensor(-983.4234, device='cuda:0')
episode: 150 training return: tensor(-999.9831, device='cuda:0')
episode: 151 training return: tensor(-974.0520, device='cuda:0')
epoch: 38 test_true_pfm: -34.96613942346527
episode: 152 training return: tensor(-999.5079, device='cuda:0')
episode: 153 training return: tensor(-997.6833, device='cuda:0')
episode: 154 training return: tensor(-981.1458, device='cuda:0')
episode: 155 training return: tensor(-984.8465, device='cuda:0')
epoch: 39 test_true_pfm: 630.5170753518099
episode: 156 training return: tensor(-999.9020, device='cuda:0')
episode: 157 training return: tensor(-981.2889, device='cuda:0')
episode: 158 training return: tensor(-971.4285, device='cuda:0')
episode: 159 training return: tensor(-997.9229, device='cuda:0')
epoch: 40 test_true_pfm: 362.52100330743696
episode: 160 training return: tensor(-997.4078, device='cuda:0')
episode: 161 training return: tensor(-998.0842, device='cuda:0')
episode: 162 training return: tensor(-999.9909, device='cuda:0')
episode: 163 training return: tensor(-993.4811, device='cuda:0')
epoch: 41 test_true_pfm: 1324.2075273851972
episode: 164 training return: tensor(-999.9249, device='cuda:0')
episode: 165 training return: tensor(-996.0818, device='cuda:0')
episode: 166 training return: tensor(-999.8909, device='cuda:0')
episode: 167 training return: tensor(-999.8613, device='cuda:0')
epoch: 42 test_true_pfm: 884.9099409303032
episode: 168 training return: tensor(-999.9314, device='cuda:0')
episode: 169 training return: tensor(-999.9611, device='cuda:0')
episode: 170 training return: tensor(-996.9891, device='cuda:0')
episode: 171 training return: tensor(-999.1130, device='cuda:0')
epoch: 43 test_true_pfm: 874.0690064017589
episode: 172 training return: tensor(-999.9514, device='cuda:0')
episode: 173 training return: tensor(-970.3612, device='cuda:0')
episode: 174 training return: tensor(-999.3335, device='cuda:0')
episode: 175 training return: tensor(-999.5149, device='cuda:0')
epoch: 44 test_true_pfm: 1431.290823914689
episode: 176 training return: tensor(-999.7662, device='cuda:0')
episode: 177 training return: tensor(-999.9809, device='cuda:0')
episode: 178 training return: tensor(-999.9775, device='cuda:0')
episode: 179 training return: tensor(-999.9694, device='cuda:0')
epoch: 45 test_true_pfm: 162.98537799796287
episode: 180 training return: tensor(-991.3414, device='cuda:0')
episode: 181 training return: tensor(-969.7208, device='cuda:0')
episode: 182 training return: tensor(-993.5000, device='cuda:0')
episode: 183 training return: tensor(-999.9406, device='cuda:0')
epoch: 46 test_true_pfm: 69.60362257307334
episode: 184 training return: tensor(-996.1235, device='cuda:0')
episode: 185 training return: tensor(-999.8936, device='cuda:0')
episode: 186 training return: tensor(-887.9118, device='cuda:0')
episode: 187 training return: tensor(-998.9194, device='cuda:0')
epoch: 47 test_true_pfm: 1254.545462422191
episode: 188 training return: tensor(-999.9077, device='cuda:0')
episode: 189 training return: tensor(-999.9894, device='cuda:0')
episode: 190 training return: tensor(-999.9734, device='cuda:0')
episode: 191 training return: tensor(-999.9879, device='cuda:0')
epoch: 48 test_true_pfm: 793.0848398508484
episode: 192 training return: tensor(-990.4741, device='cuda:0')
episode: 193 training return: tensor(-987.1838, device='cuda:0')
episode: 194 training return: tensor(-990.7870, device='cuda:0')
episode: 195 training return: tensor(-999.9915, device='cuda:0')
epoch: 49 test_true_pfm: 1236.526040441768
episode: 196 training return: tensor(-996.8916, device='cuda:0')
episode: 197 training return: tensor(-999.9771, device='cuda:0')
episode: 198 training return: tensor(-999.9403, device='cuda:0')
episode: 199 training return: tensor(-988.3391, device='cuda:0')
epoch: 50 test_true_pfm: 2126.0904337303423
episode: 200 training return: tensor(-999.8456, device='cuda:0')
episode: 201 training return: tensor(-999.9772, device='cuda:0')
episode: 202 training return: tensor(-999.9901, device='cuda:0')
episode: 203 training return: tensor(-999.2366, device='cuda:0')
epoch: 51 test_true_pfm: 3417.279627962262
episode: 204 training return: tensor(-996.1165, device='cuda:0')
episode: 205 training return: tensor(-951.7358, device='cuda:0')
episode: 206 training return: tensor(-999.9824, device='cuda:0')
episode: 207 training return: tensor(-998.3959, device='cuda:0')
epoch: 52 test_true_pfm: 1374.9646487199482
episode: 208 training return: tensor(-999.9142, device='cuda:0')
episode: 209 training return: tensor(-999.9485, device='cuda:0')
episode: 210 training return: tensor(-988.2943, device='cuda:0')
episode: 211 training return: tensor(-999.9473, device='cuda:0')
epoch: 53 test_true_pfm: 40.84906707661353
episode: 212 training return: tensor(-999.7065, device='cuda:0')
episode: 213 training return: tensor(-999.9175, device='cuda:0')
episode: 214 training return: tensor(-996.5323, device='cuda:0')
episode: 215 training return: tensor(-999.4988, device='cuda:0')
epoch: 54 test_true_pfm: 717.0553636391197
episode: 216 training return: tensor(-999.9895, device='cuda:0')
episode: 217 training return: tensor(-999.5461, device='cuda:0')
episode: 218 training return: tensor(-992.7190, device='cuda:0')
episode: 219 training return: tensor(-999.8839, device='cuda:0')
epoch: 55 test_true_pfm: 1901.7569511722922
episode: 220 training return: tensor(-999.8487, device='cuda:0')
episode: 221 training return: tensor(-999.9773, device='cuda:0')
episode: 222 training return: tensor(-999.9827, device='cuda:0')
episode: 223 training return: tensor(-999.0950, device='cuda:0')
epoch: 56 test_true_pfm: 1764.517019455366
episode: 224 training return: tensor(-999.9086, device='cuda:0')
episode: 225 training return: tensor(-999.7106, device='cuda:0')
episode: 226 training return: tensor(-991.0303, device='cuda:0')
episode: 227 training return: tensor(-999.8722, device='cuda:0')
epoch: 57 test_true_pfm: 1116.2726196642732
episode: 228 training return: tensor(-998.0540, device='cuda:0')
episode: 229 training return: tensor(-997.6947, device='cuda:0')
episode: 230 training return: tensor(-999.9569, device='cuda:0')
episode: 231 training return: tensor(-999.9723, device='cuda:0')
epoch: 58 test_true_pfm: 2333.3824199899786
episode: 232 training return: tensor(-997.3028, device='cuda:0')
episode: 233 training return: tensor(-999.7271, device='cuda:0')
episode: 234 training return: tensor(-999.7906, device='cuda:0')
episode: 235 training return: tensor(-999.9539, device='cuda:0')
epoch: 59 test_true_pfm: 560.7758248517417
episode: 236 training return: tensor(-999.3410, device='cuda:0')
episode: 237 training return: tensor(-999.3746, device='cuda:0')
episode: 238 training return: tensor(-999.9727, device='cuda:0')
episode: 239 training return: tensor(-999.8782, device='cuda:0')
epoch: 60 test_true_pfm: -357.46108720192007
episode: 240 training return: tensor(-999.9526, device='cuda:0')
episode: 241 training return: tensor(-996.4932, device='cuda:0')
episode: 242 training return: tensor(-998.0302, device='cuda:0')
episode: 243 training return: tensor(-998.7076, device='cuda:0')
epoch: 61 test_true_pfm: 693.9443500507346
episode: 244 training return: tensor(-986.7705, device='cuda:0')
episode: 245 training return: tensor(-999.6943, device='cuda:0')
episode: 246 training return: tensor(-999.8509, device='cuda:0')
episode: 247 training return: tensor(-999.9670, device='cuda:0')
epoch: 62 test_true_pfm: 2142.928230018914
episode: 248 training return: tensor(-999.9565, device='cuda:0')
episode: 249 training return: tensor(-999.9908, device='cuda:0')
episode: 250 training return: tensor(-999.9021, device='cuda:0')
episode: 251 training return: tensor(-999.5875, device='cuda:0')
epoch: 63 test_true_pfm: 1967.0119118197708
episode: 252 training return: tensor(-978.5043, device='cuda:0')
episode: 253 training return: tensor(-997.0665, device='cuda:0')
episode: 254 training return: tensor(-999.9562, device='cuda:0')
episode: 255 training return: tensor(-927.7706, device='cuda:0')
epoch: 64 test_true_pfm: 2170.5929558012213
episode: 256 training return: tensor(-994.6613, device='cuda:0')
episode: 257 training return: tensor(-988.8901, device='cuda:0')
episode: 258 training return: tensor(-999.8781, device='cuda:0')
episode: 259 training return: tensor(-992.7046, device='cuda:0')
epoch: 65 test_true_pfm: 949.9653748941456
episode: 260 training return: tensor(-990.0936, device='cuda:0')
episode: 261 training return: tensor(-998.7134, device='cuda:0')
episode: 262 training return: tensor(-999.9911, device='cuda:0')
episode: 263 training return: tensor(-999.9904, device='cuda:0')
epoch: 66 test_true_pfm: 1209.3738316739611
episode: 264 training return: tensor(-996.7549, device='cuda:0')
episode: 265 training return: tensor(-999.9147, device='cuda:0')
episode: 266 training return: tensor(-999.9730, device='cuda:0')
episode: 267 training return: tensor(-999.9709, device='cuda:0')
epoch: 67 test_true_pfm: 1571.856842557363
episode: 268 training return: tensor(-999.6367, device='cuda:0')
episode: 269 training return: tensor(-999.9628, device='cuda:0')
episode: 270 training return: tensor(-999.9103, device='cuda:0')
episode: 271 training return: tensor(-979.5110, device='cuda:0')
epoch: 68 test_true_pfm: 1989.9811001177407
episode: 272 training return: tensor(-999.9802, device='cuda:0')
episode: 273 training return: tensor(-999.9543, device='cuda:0')
episode: 274 training return: tensor(-999.8495, device='cuda:0')
episode: 275 training return: tensor(-999.8878, device='cuda:0')
epoch: 69 test_true_pfm: 1080.8580787550497
episode: 276 training return: tensor(-999.2831, device='cuda:0')
episode: 277 training return: tensor(-999.7910, device='cuda:0')
episode: 278 training return: tensor(-999.7698, device='cuda:0')
episode: 279 training return: tensor(-995.7965, device='cuda:0')
epoch: 70 test_true_pfm: 2182.576366636938
episode: 280 training return: tensor(-999.9554, device='cuda:0')
episode: 281 training return: tensor(-975.0778, device='cuda:0')
episode: 282 training return: tensor(-999.9176, device='cuda:0')
episode: 283 training return: tensor(-999.3258, device='cuda:0')
epoch: 71 test_true_pfm: -254.2964134762888
episode: 284 training return: tensor(-999.9885, device='cuda:0')
episode: 285 training return: tensor(-999.6643, device='cuda:0')
episode: 286 training return: tensor(-999.9474, device='cuda:0')
episode: 287 training return: tensor(-996.2819, device='cuda:0')
epoch: 72 test_true_pfm: 1606.8175775369964
episode: 288 training return: tensor(-995.4864, device='cuda:0')
episode: 289 training return: tensor(-999.9864, device='cuda:0')
episode: 290 training return: tensor(-999.9526, device='cuda:0')
episode: 291 training return: tensor(-961.1397, device='cuda:0')
epoch: 73 test_true_pfm: 2811.01685260424
episode: 292 training return: tensor(-999.3970, device='cuda:0')
episode: 293 training return: tensor(-988.1255, device='cuda:0')
episode: 294 training return: tensor(-999.8719, device='cuda:0')
episode: 295 training return: tensor(-999.9565, device='cuda:0')
epoch: 74 test_true_pfm: 144.72111165353977
