epoch: 0 training_loss 0.4233407074213028 test_loss: 0.33046257495880127
epoch: 1 training_loss 0.3056741800904274 test_loss: 0.2815053701400757
epoch: 2 training_loss 0.26973387226462364 test_loss: 0.26090593338012696
epoch: 3 training_loss 0.25681129679083825 test_loss: 0.25511913299560546
epoch: 4 training_loss 0.2488134151697159 test_loss: 0.24144549369812013
epoch: 5 training_loss 0.2355788554251194 test_loss: 0.22510960102081298
epoch: 6 training_loss 0.22632156148552895 test_loss: 0.21761865615844728
epoch: 7 training_loss 0.2316144122183323 test_loss: 0.2430433750152588
epoch: 8 training_loss 0.22813247881829737 test_loss: 0.22009611129760742
epoch: 9 training_loss 0.22013069301843644 test_loss: 0.21256117820739745
epoch: 10 training_loss 0.23101121038198472 test_loss: 0.1976509690284729
epoch: 11 training_loss 0.22010152623057366 test_loss: 0.19800785779953003
epoch: 12 training_loss 0.22203500352799893 test_loss: 0.21698832511901855
epoch: 13 training_loss 0.21433034732937814 test_loss: 0.21390690803527831
epoch: 14 training_loss 0.21384961888194085 test_loss: 0.2183851957321167
epoch: 15 training_loss 0.2348131614923477 test_loss: 0.23706369400024413
epoch: 16 training_loss 0.21522548019886018 test_loss: 0.20212035179138182
epoch: 17 training_loss 0.21445717573165893 test_loss: 0.21424753665924073
epoch: 18 training_loss 0.20613466538488864 test_loss: 0.19902058839797973
epoch: 19 training_loss 0.20259679682552814 test_loss: 0.22623560428619385
epoch: 20 training_loss 0.2012443856149912 test_loss: 0.1841817617416382
epoch: 21 training_loss 0.20389840833842754 test_loss: 0.1891417145729065
epoch: 22 training_loss 0.1986280556023121 test_loss: 0.18694318532943727
epoch: 23 training_loss 0.20766735926270485 test_loss: 0.1904582977294922
epoch: 24 training_loss 0.20988835662603378 test_loss: 0.22085824012756347
epoch: 25 training_loss 0.20732446394860746 test_loss: 0.1956256628036499
epoch: 26 training_loss 0.21178993068635463 test_loss: 0.21740524768829345
epoch: 27 training_loss 0.19987219847738744 test_loss: 0.19421766996383666
epoch: 28 training_loss 0.19830255024135113 test_loss: 0.23299219608306884
epoch: 29 training_loss 0.1939112452417612 test_loss: 0.20384843349456788
epoch: 30 training_loss 0.19786652207374572 test_loss: 0.20386152267456054
epoch: 31 training_loss 0.1949701814353466 test_loss: 0.2003889799118042
epoch: 32 training_loss 0.2002650498598814 test_loss: 0.22112774848937988
epoch: 33 training_loss 0.2060784536600113 test_loss: 0.19126579761505128
epoch: 34 training_loss 0.1997404818981886 test_loss: 0.22403328418731688
epoch: 35 training_loss 0.20511212147772312 test_loss: 0.18812921047210693
epoch: 36 training_loss 0.19998880542814731 test_loss: 0.17416071891784668
epoch: 37 training_loss 0.19590907268226146 test_loss: 0.21926088333129884
epoch: 38 training_loss 0.21271257005631924 test_loss: 0.20107629299163818
epoch: 39 training_loss 0.19724250867962836 test_loss: 0.2008732557296753
epoch: 40 training_loss 0.2006932320445776 test_loss: 0.18736311197280883
epoch: 41 training_loss 0.20219779685139655 test_loss: 0.1789769411087036
epoch: 42 training_loss 0.19446477629244327 test_loss: 0.1962943196296692
epoch: 43 training_loss 0.19170374728739262 test_loss: 0.1863599419593811
epoch: 44 training_loss 0.1950526873022318 test_loss: 0.20214993953704835
epoch: 45 training_loss 0.18574512749910355 test_loss: 0.19673290252685546
epoch: 46 training_loss 0.19088418677449226 test_loss: 0.20702288150787354
epoch: 47 training_loss 0.19178905487060546 test_loss: 0.2062976121902466
epoch: 48 training_loss 0.1936748116463423 test_loss: 0.21641385555267334
epoch: 49 training_loss 0.19771305233240127 test_loss: 0.1983189821243286
epoch: 50 training_loss 0.19354056395590305 test_loss: 0.18719854354858398
epoch: 51 training_loss 0.19600578680634498 test_loss: 0.19692318439483641
epoch: 52 training_loss 0.18877685032784938 test_loss: 0.19627797603607178
epoch: 53 training_loss 0.19363000728189944 test_loss: 0.16167607307434081
epoch: 54 training_loss 0.18725341998040676 test_loss: 0.1559901475906372
epoch: 55 training_loss 0.194961576461792 test_loss: 0.18908244371414185
epoch: 56 training_loss 0.19448665738105775 test_loss: 0.17402085065841674
epoch: 57 training_loss 0.19090656720101834 test_loss: 0.2030643939971924
epoch: 58 training_loss 0.1897069028764963 test_loss: 0.1848026394844055
epoch: 59 training_loss 0.18980625994503497 test_loss: 0.18716598749160768
epoch: 60 training_loss 0.19866764441132545 test_loss: 0.19227700233459472
epoch: 61 training_loss 0.1970494570583105 test_loss: 0.1968194365501404
epoch: 62 training_loss 0.18677732162177563 test_loss: 0.22537503242492676
epoch: 63 training_loss 0.18814742110669613 test_loss: 0.1786901593208313
epoch: 64 training_loss 0.18660592034459114 test_loss: 0.1816290259361267
epoch: 65 training_loss 0.18883026793599128 test_loss: 0.1648875117301941
epoch: 66 training_loss 0.18925746716558933 test_loss: 0.18864451646804808
epoch: 67 training_loss 0.18841993175446986 test_loss: 0.19067304134368895
epoch: 68 training_loss 0.18881164416670798 test_loss: 0.18462259769439698
epoch: 69 training_loss 0.19123808108270168 test_loss: 0.19849865436553954
epoch: 70 training_loss 0.1759571222215891 test_loss: 0.20569438934326173
epoch: 71 training_loss 0.18732937924563886 test_loss: 0.17854708433151245
epoch: 72 training_loss 0.18397903449833394 test_loss: 0.20344314575195313
epoch: 73 training_loss 0.19248508490622043 test_loss: 0.19737620353698732
epoch: 74 training_loss 0.19069756016135217 test_loss: 0.16876235008239746
epoch: 75 training_loss 0.19338053971529007 test_loss: 0.19575120210647584
epoch: 76 training_loss 0.18713278539478778 test_loss: 0.21585075855255126
epoch: 77 training_loss 0.19014157481491567 test_loss: 0.19099773168563844
epoch: 78 training_loss 0.1910201258957386 test_loss: 0.1763300657272339
epoch: 79 training_loss 0.199441399872303 test_loss: 0.24834535121917725
epoch: 80 training_loss 0.1931993079930544 test_loss: 0.20387189388275145
epoch: 81 training_loss 0.1860267809778452 test_loss: 0.1741209387779236
epoch: 82 training_loss 0.18697168856859206 test_loss: 0.19495785236358643
epoch: 83 training_loss 0.18393404453992843 test_loss: 0.17971936464309693
epoch: 84 training_loss 0.18543685242533683 test_loss: 0.20918631553649902
epoch: 85 training_loss 0.1879900112748146 test_loss: 0.20000243186950684
epoch: 86 training_loss 0.1843476928025484 test_loss: 0.18029193878173827
epoch: 87 training_loss 0.18093269526958466 test_loss: 0.20842416286468507
epoch: 88 training_loss 0.18828586235642433 test_loss: 0.18541994094848632
epoch: 89 training_loss 0.18930754706263542 test_loss: 0.17235428094863892
epoch: 90 training_loss 0.19260193474590778 test_loss: 0.1746760845184326
epoch: 91 training_loss 0.18242791153490542 test_loss: 0.20453081130981446
epoch: 92 training_loss 0.18571544259786607 test_loss: 0.20264787673950196
epoch: 93 training_loss 0.1878742950409651 test_loss: 0.18371248245239258
epoch: 94 training_loss 0.18306356757879258 test_loss: 0.17214858531951904
epoch: 95 training_loss 0.1871179834008217 test_loss: 0.19180550575256347
epoch: 96 training_loss 0.18127857774496078 test_loss: 0.18838708400726317
epoch: 97 training_loss 0.1849177622050047 test_loss: 0.19101762771606445
epoch: 98 training_loss 0.18784100256860256 test_loss: 0.1926662802696228
epoch: 99 training_loss 0.19460817515850068 test_loss: 0.18674767017364502
epoch: 100 training_loss 0.1875529955327511 test_loss: 0.1878516435623169
epoch: 101 training_loss 0.19203959830105305 test_loss: 0.17110960483551024
epoch: 102 training_loss 0.19463659562170504 test_loss: 0.1727336287498474
epoch: 103 training_loss 0.1841627659648657 test_loss: 0.17570626735687256
epoch: 104 training_loss 0.19193809695541858 test_loss: 0.1912951111793518
epoch: 105 training_loss 0.19038482800126075 test_loss: 0.18080068826675416
epoch: 106 training_loss 0.17183972135186196 test_loss: 0.1825717568397522
epoch: 107 training_loss 0.19531882293522357 test_loss: 0.18540596961975098
epoch: 108 training_loss 0.18629670590162278 test_loss: 0.1984561562538147
epoch: 109 training_loss 0.18204963117837905 test_loss: 0.2021345853805542
epoch: 110 training_loss 0.19105576075613498 test_loss: 0.18667863607406615
epoch: 111 training_loss 0.17814356438815593 test_loss: 0.20336928367614746
epoch: 112 training_loss 0.18135136723518372 test_loss: 0.1939000129699707
epoch: 113 training_loss 0.17056001506745816 test_loss: 0.1631963849067688
epoch: 114 training_loss 0.18948785342276098 test_loss: 0.18321467638015748
epoch: 115 training_loss 0.186552392616868 test_loss: 0.18910739421844483
epoch: 116 training_loss 0.1788735405355692 test_loss: 0.20885848999023438
epoch: 117 training_loss 0.18561880856752397 test_loss: 0.18091076612472534
epoch: 118 training_loss 0.1912584588676691 test_loss: 0.19727364778518677
epoch: 119 training_loss 0.18180426329374313 test_loss: 0.1857306957244873
epoch: 0 training_loss 44.381299076080325 test_loss: 23.399794006347655
epoch: 1 training_loss 18.90461100578308 test_loss: 16.318870544433594
epoch: 2 training_loss 14.788456640243531 test_loss: 13.313856506347657
epoch: 3 training_loss 12.520936183929443 test_loss: 12.014775848388672
epoch: 4 training_loss 10.861501960754394 test_loss: 10.584861755371094
epoch: 5 training_loss 10.181271591186523 test_loss: 10.001397705078125
epoch: 6 training_loss 9.579241237640382 test_loss: 9.425350952148438
epoch: 7 training_loss 8.82851674079895 test_loss: 8.146334838867187
epoch: 8 training_loss 8.301430320739746 test_loss: 8.479947662353515
epoch: 9 training_loss 8.03878475189209 test_loss: 8.25166015625
epoch: 10 training_loss 7.6889937162399296 test_loss: 7.642448425292969
epoch: 11 training_loss 7.232068185806274 test_loss: 7.219479370117187
epoch: 12 training_loss 7.016258130073547 test_loss: 6.955200958251953
epoch: 13 training_loss 7.009201378822326 test_loss: 6.6493675231933596
epoch: 14 training_loss 6.55820475101471 test_loss: 5.882141494750977
epoch: 15 training_loss 6.180210990905762 test_loss: 6.354584503173828
epoch: 16 training_loss 6.168981456756592 test_loss: 6.531028747558594
epoch: 17 training_loss 6.004908573627472 test_loss: 5.5385009765625
epoch: 18 training_loss 5.912210278511047 test_loss: 5.733367538452148
epoch: 19 training_loss 5.721437029838562 test_loss: 5.617660140991211
epoch: 20 training_loss 5.599958143234253 test_loss: 5.321665191650391
epoch: 21 training_loss 5.470072383880615 test_loss: 5.111124038696289
epoch: 22 training_loss 5.254220154285431 test_loss: 4.951445007324219
epoch: 23 training_loss 5.162095110416413 test_loss: 5.1262248992919925
epoch: 24 training_loss 5.115424838066101 test_loss: 5.136738586425781
epoch: 25 training_loss 5.0398051953315735 test_loss: 5.021091079711914
epoch: 26 training_loss 4.949465725421906 test_loss: 5.415222549438477
epoch: 27 training_loss 5.025843257904053 test_loss: 4.44470329284668
epoch: 28 training_loss 4.788207442760467 test_loss: 5.144643402099609
epoch: 29 training_loss 4.612962365150452 test_loss: 4.465848159790039
epoch: 30 training_loss 4.59060183763504 test_loss: 4.320093154907227
epoch: 31 training_loss 4.402821369171143 test_loss: 4.623794174194336
epoch: 32 training_loss 4.367040364742279 test_loss: 4.545414733886719
epoch: 33 training_loss 4.460381529331207 test_loss: 4.500505447387695
epoch: 34 training_loss 4.320516281127929 test_loss: 4.0478660583496096
epoch: 35 training_loss 4.30602187871933 test_loss: 4.209321594238281
epoch: 36 training_loss 4.204648630619049 test_loss: 3.9925891876220705
epoch: 37 training_loss 4.08884752035141 test_loss: 4.002481079101562
epoch: 38 training_loss 4.0356478071212765 test_loss: 3.9193492889404298
epoch: 39 training_loss 4.037894175052643 test_loss: 3.5372753143310547
epoch: 40 training_loss 3.988066518306732 test_loss: 4.029782104492187
epoch: 41 training_loss 3.828618412017822 test_loss: 3.5763477325439452
epoch: 42 training_loss 3.796943130493164 test_loss: 3.3505435943603517
epoch: 43 training_loss 3.8385865354537962 test_loss: 3.788427734375
epoch: 44 training_loss 3.6124812364578247 test_loss: 3.5900821685791016
epoch: 45 training_loss 3.7621777296066283 test_loss: 3.7690807342529298
epoch: 46 training_loss 4.0274947428703305 test_loss: 3.4250839233398436
epoch: 47 training_loss 3.720345742702484 test_loss: 3.5730995178222655
epoch: 48 training_loss 3.5981845688819885 test_loss: 3.5108306884765623
epoch: 49 training_loss 3.5545932245254517 test_loss: 3.233549880981445
epoch: 50 training_loss 3.5627515077590943 test_loss: 3.3108287811279298
epoch: 51 training_loss 3.6351134967803955 test_loss: 3.2050621032714846
epoch: 52 training_loss 3.8315823698043823 test_loss: 3.2997211456298827
epoch: 53 training_loss 3.4620697498321533 test_loss: 3.5729503631591797
epoch: 54 training_loss 3.4330897903442383 test_loss: 3.1464412689208983
epoch: 55 training_loss 3.446372318267822 test_loss: 3.2821514129638674
epoch: 56 training_loss 3.2746796226501464 test_loss: 2.7809452056884765
epoch: 57 training_loss 3.053819169998169 test_loss: 3.1999156951904295
epoch: 58 training_loss 3.2478943181037905 test_loss: 2.9762964248657227
epoch: 59 training_loss 3.099006922245026 test_loss: 2.8659812927246096
epoch: 60 training_loss 2.9842009329795838 test_loss: 3.275995635986328
epoch: 61 training_loss 2.9403253245353698 test_loss: 2.825162887573242
epoch: 62 training_loss 2.925668318271637 test_loss: 2.6564207077026367
epoch: 63 training_loss 2.960192265510559 test_loss: 2.7888616561889648
epoch: 64 training_loss 2.8638366770744326 test_loss: 2.77109375
epoch: 65 training_loss 2.863756477832794 test_loss: 2.610533332824707
epoch: 66 training_loss 2.745137621164322 test_loss: 2.576813888549805
epoch: 67 training_loss 3.0752101874351503 test_loss: 2.901948165893555
epoch: 68 training_loss 2.8147896838188173 test_loss: 2.9043893814086914
epoch: 69 training_loss 2.900851981639862 test_loss: 2.566189002990723
epoch: 70 training_loss 2.806905802488327 test_loss: 2.7939428329467773
epoch: 71 training_loss 2.754241919517517 test_loss: 2.58974609375
epoch: 72 training_loss 2.803882929086685 test_loss: 3.9622554779052734
epoch: 73 training_loss 2.9213905334472656 test_loss: 2.7527450561523437
epoch: 74 training_loss 2.650700612068176 test_loss: 2.6545761108398436
epoch: 75 training_loss 2.5496034383773805 test_loss: 2.4406759262084963
epoch: 76 training_loss 2.6388630044460295 test_loss: 2.6633819580078124
epoch: 77 training_loss 2.644961392879486 test_loss: 2.5614944458007813
epoch: 78 training_loss 2.6475702250003814 test_loss: 2.7220222473144533
epoch: 79 training_loss 2.776093212366104 test_loss: 2.499393653869629
epoch: 80 training_loss 2.692910302877426 test_loss: 2.4042146682739256
epoch: 81 training_loss 2.866449055671692 test_loss: 2.4281606674194336
epoch: 82 training_loss 2.4745085859298706 test_loss: 2.4936065673828125
epoch: 83 training_loss 2.520137528181076 test_loss: 2.8058929443359375
epoch: 84 training_loss 2.632617439031601 test_loss: 2.5458784103393555
epoch: 85 training_loss 2.5785426247119902 test_loss: 2.4900550842285156
epoch: 86 training_loss 2.486225801706314 test_loss: 2.567803382873535
epoch: 87 training_loss 2.5542491829395293 test_loss: 2.5632558822631837
epoch: 88 training_loss 2.57223783493042 test_loss: 2.0987695693969726
epoch: 89 training_loss 2.4813644433021547 test_loss: 2.591464042663574
epoch: 90 training_loss 2.443473768234253 test_loss: 2.4097145080566404
epoch: 91 training_loss 2.450264996290207 test_loss: 2.628831481933594
epoch: 92 training_loss 2.5568854916095733 test_loss: 2.3540275573730467
epoch: 93 training_loss 2.3672127175331115 test_loss: 2.3492637634277345
epoch: 94 training_loss 2.372830022573471 test_loss: 2.172380828857422
epoch: 95 training_loss 2.3490673732757568 test_loss: 2.5935251235961916
epoch: 96 training_loss 2.409547742605209 test_loss: 2.367831802368164
epoch: 97 training_loss 2.2555903589725492 test_loss: 2.3891069412231447
epoch: 98 training_loss 2.5107296502590177 test_loss: 2.505946159362793
epoch: 99 training_loss 2.361740725040436 test_loss: 2.2216100692749023
epoch: 100 training_loss 2.265825035572052 test_loss: 2.2680503845214846
epoch: 101 training_loss 2.498256450891495 test_loss: 2.6005170822143553
epoch: 102 training_loss 2.245743054151535 test_loss: 2.1851335525512696
epoch: 103 training_loss 2.1857411813735963 test_loss: 2.1046062469482423
epoch: 104 training_loss 2.271378594636917 test_loss: 2.69348087310791
epoch: 105 training_loss 2.2838120555877683 test_loss: 2.020126152038574
epoch: 106 training_loss 2.2822462809085846 test_loss: 2.4593017578125
epoch: 107 training_loss 2.1145421862602234 test_loss: 2.13978271484375
epoch: 108 training_loss 2.200744014978409 test_loss: 2.2004865646362304
epoch: 109 training_loss 2.2122764003276827 test_loss: 2.037919807434082
epoch: 110 training_loss 2.2551865446567536 test_loss: 2.0586395263671875
epoch: 111 training_loss 2.1624855279922484 test_loss: 2.301503562927246
epoch: 112 training_loss 2.1686694300174714 test_loss: 2.147345542907715
epoch: 113 training_loss 2.150925977230072 test_loss: 2.0048139572143553
epoch: 114 training_loss 2.195576583147049 test_loss: 2.3233455657958983
epoch: 115 training_loss 2.1895755457878114 test_loss: 1.875767707824707
epoch: 116 training_loss 2.160421518087387 test_loss: 2.2829477310180666
epoch: 117 training_loss 2.2295490419864654 test_loss: 2.2426544189453126
epoch: 118 training_loss 2.0523670601844786 test_loss: 2.3746658325195313
epoch: 119 training_loss 2.115832163095474 test_loss: 2.2523433685302736
83.94878141531878
episode: 0 training return: tensor(-72.9284, device='cuda:0')
episode: 1 training return: tensor(-122.4802, device='cuda:0')
episode: 2 training return: tensor(-58.5434, device='cuda:0')
episode: 3 training return: tensor(-45.5102, device='cuda:0')
epoch: 1 test_true_pfm: 68.64624022609068 sim_pfm: -64.74339523570379
episode: 4 training return: tensor(-64.2779, device='cuda:0')
episode: 5 training return: tensor(-109.4437, device='cuda:0')
episode: 6 training return: tensor(-68.0436, device='cuda:0')
episode: 7 training return: tensor(-97.8525, device='cuda:0')
epoch: 2 test_true_pfm: 71.46928889832408 sim_pfm: -31.918739147949964
episode: 8 training return: tensor(-116.7156, device='cuda:0')
episode: 9 training return: tensor(-55.9458, device='cuda:0')
episode: 10 training return: tensor(-61.7966, device='cuda:0')
episode: 11 training return: tensor(-127.7551, device='cuda:0')
epoch: 3 test_true_pfm: 66.64807297739769 sim_pfm: -117.40903673747088
episode: 12 training return: tensor(-117.5448, device='cuda:0')
episode: 13 training return: tensor(-76.6423, device='cuda:0')
episode: 14 training return: tensor(2.1016, device='cuda:0')
episode: 15 training return: tensor(-113.6472, device='cuda:0')
epoch: 4 test_true_pfm: 64.94907734163516 sim_pfm: -34.41887945669587
episode: 16 training return: tensor(4.2747, device='cuda:0')
episode: 17 training return: tensor(-53.2474, device='cuda:0')
episode: 18 training return: tensor(-63.1075, device='cuda:0')
episode: 19 training return: tensor(-153.9055, device='cuda:0')
epoch: 5 test_true_pfm: 68.67047454626629 sim_pfm: -79.96986420088214
episode: 20 training return: tensor(-164.0181, device='cuda:0')
episode: 21 training return: tensor(-58.7459, device='cuda:0')
episode: 22 training return: tensor(-179.8838, device='cuda:0')
episode: 23 training return: tensor(-134.2988, device='cuda:0')
epoch: 6 test_true_pfm: 73.95455212110167 sim_pfm: -140.96954474818193
episode: 24 training return: tensor(-116.3951, device='cuda:0')
episode: 25 training return: tensor(-72.8245, device='cuda:0')
episode: 26 training return: tensor(-69.0159, device='cuda:0')
episode: 27 training return: tensor(-128.5697, device='cuda:0')
epoch: 7 test_true_pfm: 54.84310674117815 sim_pfm: -108.80409107409068
episode: 28 training return: tensor(-49.2239, device='cuda:0')
episode: 29 training return: tensor(-123.5424, device='cuda:0')
episode: 30 training return: tensor(13.5426, device='cuda:0')
episode: 31 training return: tensor(-65.9118, device='cuda:0')
epoch: 8 test_true_pfm: 75.47175336800532 sim_pfm: -50.98651557805715
episode: 32 training return: tensor(-66.6124, device='cuda:0')
episode: 33 training return: tensor(-60.9793, device='cuda:0')
episode: 34 training return: tensor(-74.5478, device='cuda:0')
episode: 35 training return: tensor(9.1435, device='cuda:0')
epoch: 9 test_true_pfm: 51.37544675312654 sim_pfm: -23.230328710761388
episode: 36 training return: tensor(-128.6512, device='cuda:0')
episode: 37 training return: tensor(-43.5603, device='cuda:0')
episode: 38 training return: tensor(-112.3546, device='cuda:0')
episode: 39 training return: tensor(-15.2599, device='cuda:0')
epoch: 10 test_true_pfm: 59.34041020477919 sim_pfm: -54.78318622522638
episode: 40 training return: tensor(-2.7975, device='cuda:0')
episode: 41 training return: tensor(-13.1128, device='cuda:0')
episode: 42 training return: tensor(-64.8354, device='cuda:0')
episode: 43 training return: tensor(5.8167, device='cuda:0')
epoch: 11 test_true_pfm: 63.78520724331796 sim_pfm: 3.0437688171456103
episode: 44 training return: tensor(-125.8895, device='cuda:0')
episode: 45 training return: tensor(-128.4033, device='cuda:0')
episode: 46 training return: tensor(-125.1770, device='cuda:0')
episode: 47 training return: tensor(-116.9960, device='cuda:0')
epoch: 12 test_true_pfm: 54.860445341322496 sim_pfm: -46.98386247068411
episode: 48 training return: tensor(-52.5196, device='cuda:0')
episode: 49 training return: tensor(-127.3416, device='cuda:0')
episode: 50 training return: tensor(-132.5016, device='cuda:0')
episode: 51 training return: tensor(6.9367, device='cuda:0')
epoch: 13 test_true_pfm: 69.03294411334672 sim_pfm: -63.480737063102424
episode: 52 training return: tensor(-72.2033, device='cuda:0')
episode: 53 training return: tensor(-145.9335, device='cuda:0')
episode: 54 training return: tensor(-117.0526, device='cuda:0')
episode: 55 training return: tensor(-119.7143, device='cuda:0')
epoch: 14 test_true_pfm: 51.21809086846857 sim_pfm: -15.449863354954868
episode: 56 training return: tensor(-111.3159, device='cuda:0')
episode: 57 training return: tensor(29.8194, device='cuda:0')
episode: 58 training return: tensor(-61.9004, device='cuda:0')
episode: 59 training return: tensor(41.8961, device='cuda:0')
epoch: 15 test_true_pfm: 54.863017357100844 sim_pfm: -47.87874415603001
episode: 60 training return: tensor(43.1361, device='cuda:0')
episode: 61 training return: tensor(14.4498, device='cuda:0')
episode: 62 training return: tensor(-68.3881, device='cuda:0')
episode: 63 training return: tensor(-131.0509, device='cuda:0')
epoch: 16 test_true_pfm: 49.45491129396007 sim_pfm: -20.29256488122046
episode: 64 training return: tensor(27.1182, device='cuda:0')
episode: 65 training return: tensor(-133.7502, device='cuda:0')
episode: 66 training return: tensor(-141.6124, device='cuda:0')
episode: 67 training return: tensor(12.5207, device='cuda:0')
epoch: 17 test_true_pfm: 68.44169917436011 sim_pfm: -45.017356479080625
episode: 68 training return: tensor(-64.6415, device='cuda:0')
episode: 69 training return: tensor(-118.0867, device='cuda:0')
episode: 70 training return: tensor(-2.3992, device='cuda:0')
episode: 71 training return: tensor(-63.7764, device='cuda:0')
epoch: 18 test_true_pfm: 62.51264301892218 sim_pfm: -30.102011931448942
episode: 72 training return: tensor(2.7322, device='cuda:0')
episode: 73 training return: tensor(-112.4941, device='cuda:0')
episode: 74 training return: tensor(-116.8724, device='cuda:0')
episode: 75 training return: tensor(18.5386, device='cuda:0')
epoch: 19 test_true_pfm: 60.88443829070643 sim_pfm: -44.403029572358356
episode: 76 training return: tensor(26.5869, device='cuda:0')
episode: 77 training return: tensor(-126.5247, device='cuda:0')
episode: 78 training return: tensor(-79.5626, device='cuda:0')
episode: 79 training return: tensor(-120.8275, device='cuda:0')
epoch: 20 test_true_pfm: 60.23263953460355 sim_pfm: 24.224383083643623
episode: 80 training return: tensor(-140.6149, device='cuda:0')
episode: 81 training return: tensor(-138.7300, device='cuda:0')
episode: 82 training return: tensor(-56.9978, device='cuda:0')
episode: 83 training return: tensor(-45.0334, device='cuda:0')
epoch: 21 test_true_pfm: 47.0301558129873 sim_pfm: -23.22273556275177
episode: 84 training return: tensor(14.0551, device='cuda:0')
episode: 85 training return: tensor(-55.2383, device='cuda:0')
episode: 86 training return: tensor(-45.7871, device='cuda:0')
episode: 87 training return: tensor(-131.5828, device='cuda:0')
epoch: 22 test_true_pfm: 60.33064163659361 sim_pfm: 4.572985801950563
episode: 88 training return: tensor(18.0342, device='cuda:0')
episode: 89 training return: tensor(42.1159, device='cuda:0')
episode: 90 training return: tensor(-64.4184, device='cuda:0')
episode: 91 training return: tensor(-142.7740, device='cuda:0')
epoch: 23 test_true_pfm: 59.30159117005046 sim_pfm: -40.296730764204405
episode: 92 training return: tensor(-57.2073, device='cuda:0')
episode: 93 training return: tensor(5.3306, device='cuda:0')
episode: 94 training return: tensor(-93.8520, device='cuda:0')
episode: 95 training return: tensor(30.7578, device='cuda:0')
epoch: 24 test_true_pfm: 58.77746965833295 sim_pfm: -5.55674599444028
episode: 96 training return: tensor(12.7737, device='cuda:0')
episode: 97 training return: tensor(33.1077, device='cuda:0')
episode: 98 training return: tensor(-137.7244, device='cuda:0')
episode: 99 training return: tensor(51.5351, device='cuda:0')
epoch: 25 test_true_pfm: 56.36977755930629 sim_pfm: -34.31578605439281
episode: 100 training return: tensor(-136.1136, device='cuda:0')
episode: 101 training return: tensor(12.7921, device='cuda:0')
episode: 102 training return: tensor(-50.4128, device='cuda:0')
episode: 103 training return: tensor(-119.8517, device='cuda:0')
epoch: 26 test_true_pfm: 51.304774315018804 sim_pfm: 36.20707488133339
episode: 104 training return: tensor(49.3302, device='cuda:0')
episode: 105 training return: tensor(43.2429, device='cuda:0')
episode: 106 training return: tensor(31.9642, device='cuda:0')
episode: 107 training return: tensor(39.5168, device='cuda:0')
epoch: 27 test_true_pfm: 69.85372082929413 sim_pfm: -50.62715932495193
episode: 108 training return: tensor(5.9701, device='cuda:0')
episode: 109 training return: tensor(33.3459, device='cuda:0')
episode: 110 training return: tensor(47.9319, device='cuda:0')
episode: 111 training return: tensor(-132.8775, device='cuda:0')
epoch: 28 test_true_pfm: 59.55353312509963 sim_pfm: 14.031505379686132
episode: 112 training return: tensor(32.1200, device='cuda:0')
episode: 113 training return: tensor(46.7653, device='cuda:0')
episode: 114 training return: tensor(4.4169, device='cuda:0')
episode: 115 training return: tensor(28.1589, device='cuda:0')
epoch: 29 test_true_pfm: 60.8159225072411 sim_pfm: -41.39796857653418
episode: 116 training return: tensor(-117.8255, device='cuda:0')
episode: 117 training return: tensor(-131.8228, device='cuda:0')
episode: 118 training return: tensor(33.6753, device='cuda:0')
episode: 119 training return: tensor(-123.7466, device='cuda:0')
epoch: 30 test_true_pfm: 49.74391654078282 sim_pfm: -99.07922776355409
episode: 120 training return: tensor(-137.9664, device='cuda:0')
episode: 121 training return: tensor(13.6937, device='cuda:0')
episode: 122 training return: tensor(7.8373, device='cuda:0')
episode: 123 training return: tensor(-128.5419, device='cuda:0')
epoch: 31 test_true_pfm: 60.72639827332538 sim_pfm: -18.20548973439145
episode: 124 training return: tensor(-80.1013, device='cuda:0')
episode: 125 training return: tensor(-10.0253, device='cuda:0')
episode: 126 training return: tensor(18.0624, device='cuda:0')
episode: 127 training return: tensor(-132.0542, device='cuda:0')
epoch: 32 test_true_pfm: 44.46459255323678 sim_pfm: -36.3183349005878
episode: 128 training return: tensor(-113.9657, device='cuda:0')
episode: 129 training return: tensor(-110.2869, device='cuda:0')
episode: 130 training return: tensor(47.0737, device='cuda:0')
episode: 131 training return: tensor(-126.6287, device='cuda:0')
epoch: 33 test_true_pfm: 69.01315448875395 sim_pfm: -30.34979761892464
episode: 132 training return: tensor(44.8633, device='cuda:0')
episode: 133 training return: tensor(46.5348, device='cuda:0')
episode: 134 training return: tensor(40.1602, device='cuda:0')
episode: 135 training return: tensor(-118.9843, device='cuda:0')
epoch: 34 test_true_pfm: 47.20712760769316 sim_pfm: 20.903803349332883
episode: 136 training return: tensor(23.9952, device='cuda:0')
episode: 137 training return: tensor(-146.4061, device='cuda:0')
episode: 138 training return: tensor(46.4782, device='cuda:0')
episode: 139 training return: tensor(44.4068, device='cuda:0')
epoch: 35 test_true_pfm: 53.23477623758083 sim_pfm: -19.221893800079123
episode: 140 training return: tensor(14.1561, device='cuda:0')
episode: 141 training return: tensor(13.8311, device='cuda:0')
episode: 142 training return: tensor(47.5380, device='cuda:0')
episode: 143 training return: tensor(35.1066, device='cuda:0')
epoch: 36 test_true_pfm: 46.80479865793763 sim_pfm: -23.224508891999722
episode: 144 training return: tensor(29.3288, device='cuda:0')
episode: 145 training return: tensor(21.4861, device='cuda:0')
episode: 146 training return: tensor(36.6715, device='cuda:0')
episode: 147 training return: tensor(15.7353, device='cuda:0')
epoch: 37 test_true_pfm: 55.44409415141414 sim_pfm: -26.854294605506585
episode: 148 training return: tensor(-109.8896, device='cuda:0')
episode: 149 training return: tensor(23.7590, device='cuda:0')
episode: 150 training return: tensor(47.7873, device='cuda:0')
episode: 151 training return: tensor(-111.5213, device='cuda:0')
epoch: 38 test_true_pfm: 52.120081456564854 sim_pfm: -34.19321066769771
episode: 152 training return: tensor(-101.4199, device='cuda:0')
episode: 153 training return: tensor(27.2273, device='cuda:0')
episode: 154 training return: tensor(-4.5941, device='cuda:0')
episode: 155 training return: tensor(-124.5412, device='cuda:0')
epoch: 39 test_true_pfm: 44.35394743626532 sim_pfm: -32.309538745926695
episode: 156 training return: tensor(38.7928, device='cuda:0')
episode: 157 training return: tensor(-122.8508, device='cuda:0')
episode: 158 training return: tensor(-51.8362, device='cuda:0')
episode: 159 training return: tensor(48.5936, device='cuda:0')
epoch: 40 test_true_pfm: 47.645120973233475 sim_pfm: -3.470525057154009
episode: 160 training return: tensor(-119.9341, device='cuda:0')
episode: 161 training return: tensor(-128.8543, device='cuda:0')
episode: 162 training return: tensor(-125.0459, device='cuda:0')
episode: 163 training return: tensor(42.6349, device='cuda:0')
epoch: 41 test_true_pfm: 52.70204390562825 sim_pfm: 11.42070659136516
episode: 164 training return: tensor(28.7326, device='cuda:0')
episode: 165 training return: tensor(44.3544, device='cuda:0')
episode: 166 training return: tensor(47.7904, device='cuda:0')
episode: 167 training return: tensor(-109.0842, device='cuda:0')
epoch: 42 test_true_pfm: 52.54698507895195 sim_pfm: 0.45695930239744487
episode: 168 training return: tensor(-50.5757, device='cuda:0')
episode: 169 training return: tensor(30.5502, device='cuda:0')
episode: 170 training return: tensor(-120.4322, device='cuda:0')
episode: 171 training return: tensor(-65.6645, device='cuda:0')
epoch: 43 test_true_pfm: 56.144220545448675 sim_pfm: -28.343701151409185
episode: 172 training return: tensor(-118.5578, device='cuda:0')
episode: 173 training return: tensor(29.2474, device='cuda:0')
episode: 174 training return: tensor(27.4318, device='cuda:0')
episode: 175 training return: tensor(-61.4873, device='cuda:0')
epoch: 44 test_true_pfm: 55.624909999848214 sim_pfm: -1.6143553245870863
episode: 176 training return: tensor(53.5363, device='cuda:0')
episode: 177 training return: tensor(-129.8724, device='cuda:0')
episode: 178 training return: tensor(42.5604, device='cuda:0')
episode: 179 training return: tensor(49.6490, device='cuda:0')
epoch: 45 test_true_pfm: 55.32346349496636 sim_pfm: -36.69672485891497
episode: 180 training return: tensor(18.0284, device='cuda:0')
episode: 181 training return: tensor(40.5951, device='cuda:0')
episode: 182 training return: tensor(-111.0385, device='cuda:0')
episode: 183 training return: tensor(-122.8671, device='cuda:0')
epoch: 46 test_true_pfm: 47.71908322980893 sim_pfm: 22.061369515582918
episode: 184 training return: tensor(48.3659, device='cuda:0')
episode: 185 training return: tensor(17.5188, device='cuda:0')
episode: 186 training return: tensor(26.0130, device='cuda:0')
episode: 187 training return: tensor(28.3821, device='cuda:0')
epoch: 47 test_true_pfm: 43.51591495662869 sim_pfm: -20.940167510381436
episode: 188 training return: tensor(38.6087, device='cuda:0')
episode: 189 training return: tensor(35.7100, device='cuda:0')
episode: 190 training return: tensor(30.8505, device='cuda:0')
episode: 191 training return: tensor(49.7638, device='cuda:0')
epoch: 48 test_true_pfm: 47.75461148517151 sim_pfm: -39.3436290125188
episode: 192 training return: tensor(16.9418, device='cuda:0')
episode: 193 training return: tensor(-111.6503, device='cuda:0')
episode: 194 training return: tensor(30.3975, device='cuda:0')
episode: 195 training return: tensor(15.8286, device='cuda:0')
epoch: 49 test_true_pfm: 76.8239964165783 sim_pfm: -5.758318283152766
episode: 196 training return: tensor(39.9449, device='cuda:0')
episode: 197 training return: tensor(28.8977, device='cuda:0')
episode: 198 training return: tensor(-111.7965, device='cuda:0')
episode: 199 training return: tensor(-73.8824, device='cuda:0')
epoch: 50 test_true_pfm: 55.32837123797706 sim_pfm: 2.8596280267636756
episode: 200 training return: tensor(1.5339, device='cuda:0')
episode: 201 training return: tensor(-84.8306, device='cuda:0')
episode: 202 training return: tensor(31.2392, device='cuda:0')
episode: 203 training return: tensor(-107.6595, device='cuda:0')
epoch: 51 test_true_pfm: 52.31283856633551 sim_pfm: -16.6907489141915
episode: 204 training return: tensor(29.4134, device='cuda:0')
episode: 205 training return: tensor(4.4327, device='cuda:0')
episode: 206 training return: tensor(-102.5968, device='cuda:0')
episode: 207 training return: tensor(25.3749, device='cuda:0')
epoch: 52 test_true_pfm: 48.18143586855848 sim_pfm: -0.7395618110662326
episode: 208 training return: tensor(-49.0934, device='cuda:0')
episode: 209 training return: tensor(28.5618, device='cuda:0')
episode: 210 training return: tensor(34.2760, device='cuda:0')
episode: 211 training return: tensor(43.2495, device='cuda:0')
epoch: 53 test_true_pfm: 51.535976991887516 sim_pfm: -27.426737638923804
episode: 212 training return: tensor(22.5572, device='cuda:0')
episode: 213 training return: tensor(-2.8887, device='cuda:0')
episode: 214 training return: tensor(-1.0840, device='cuda:0')
episode: 215 training return: tensor(23.0000, device='cuda:0')
epoch: 54 test_true_pfm: 53.79903454265909 sim_pfm: 22.308083426742815
episode: 216 training return: tensor(-6.4018, device='cuda:0')
episode: 217 training return: tensor(-69.2669, device='cuda:0')
episode: 218 training return: tensor(-97.9926, device='cuda:0')
episode: 219 training return: tensor(-27.2437, device='cuda:0')
epoch: 55 test_true_pfm: 52.8219593934643 sim_pfm: 2.8971584476355927
episode: 220 training return: tensor(-66.3797, device='cuda:0')
episode: 221 training return: tensor(29.8359, device='cuda:0')
episode: 222 training return: tensor(30.3855, device='cuda:0')
episode: 223 training return: tensor(-116.3719, device='cuda:0')
epoch: 56 test_true_pfm: 49.719587359648024 sim_pfm: -38.462068455480036
episode: 224 training return: tensor(-113.2958, device='cuda:0')
episode: 225 training return: tensor(17.9367, device='cuda:0')
episode: 226 training return: tensor(24.4078, device='cuda:0')
episode: 227 training return: tensor(22.3148, device='cuda:0')
epoch: 57 test_true_pfm: 44.1431707139926 sim_pfm: -12.140851577406284
episode: 228 training return: tensor(-96.4867, device='cuda:0')
episode: 229 training return: tensor(-110.6529, device='cuda:0')
episode: 230 training return: tensor(44.8647, device='cuda:0')
episode: 231 training return: tensor(-101.5298, device='cuda:0')
epoch: 58 test_true_pfm: 65.53323863783936 sim_pfm: -43.93471268161666
episode: 232 training return: tensor(-91.5806, device='cuda:0')
episode: 233 training return: tensor(-108.1328, device='cuda:0')
episode: 234 training return: tensor(14.1329, device='cuda:0')
episode: 235 training return: tensor(-104.8881, device='cuda:0')
epoch: 59 test_true_pfm: 47.16782001628768 sim_pfm: 23.938826457294635
episode: 236 training return: tensor(-99.4104, device='cuda:0')
episode: 237 training return: tensor(-119.2016, device='cuda:0')
episode: 238 training return: tensor(20.2797, device='cuda:0')
episode: 239 training return: tensor(16.8027, device='cuda:0')
epoch: 60 test_true_pfm: 46.992371143339696 sim_pfm: -4.640472082118504
episode: 240 training return: tensor(42.0023, device='cuda:0')
episode: 241 training return: tensor(41.9715, device='cuda:0')
episode: 242 training return: tensor(35.4540, device='cuda:0')
episode: 243 training return: tensor(27.1109, device='cuda:0')
epoch: 61 test_true_pfm: 51.47505745948189 sim_pfm: 37.02918099274393
episode: 244 training return: tensor(33.6775, device='cuda:0')
episode: 245 training return: tensor(24.2766, device='cuda:0')
episode: 246 training return: tensor(19.1011, device='cuda:0')
episode: 247 training return: tensor(-80.2037, device='cuda:0')
epoch: 62 test_true_pfm: 56.50632543947635 sim_pfm: -6.260538632026874
episode: 248 training return: tensor(29.1946, device='cuda:0')
episode: 249 training return: tensor(-6.0597, device='cuda:0')
episode: 250 training return: tensor(11.8659, device='cuda:0')
episode: 251 training return: tensor(39.0423, device='cuda:0')
epoch: 63 test_true_pfm: 58.55405855885128 sim_pfm: 21.483967127348297
episode: 252 training return: tensor(20.7237, device='cuda:0')
episode: 253 training return: tensor(28.5178, device='cuda:0')
episode: 254 training return: tensor(-101.7620, device='cuda:0')
episode: 255 training return: tensor(56.5128, device='cuda:0')
epoch: 64 test_true_pfm: 54.198913498961076 sim_pfm: -19.426032437430695
episode: 256 training return: tensor(21.1809, device='cuda:0')
episode: 257 training return: tensor(32.2483, device='cuda:0')
episode: 258 training return: tensor(15.4053, device='cuda:0')
episode: 259 training return: tensor(-94.1244, device='cuda:0')
epoch: 65 test_true_pfm: 52.26041622186074 sim_pfm: -19.68810805818066
episode: 260 training return: tensor(53.3534, device='cuda:0')
episode: 261 training return: tensor(32.7117, device='cuda:0')
episode: 262 training return: tensor(45.0178, device='cuda:0')
episode: 263 training return: tensor(10.5517, device='cuda:0')
epoch: 66 test_true_pfm: 48.14578310823228 sim_pfm: -29.781825143052266
episode: 264 training return: tensor(-106.7292, device='cuda:0')
episode: 265 training return: tensor(20.1258, device='cuda:0')
episode: 266 training return: tensor(29.3347, device='cuda:0')
episode: 267 training return: tensor(-120.2913, device='cuda:0')
epoch: 67 test_true_pfm: 47.15327961966359 sim_pfm: -48.870366879570064
episode: 268 training return: tensor(19.5390, device='cuda:0')
episode: 269 training return: tensor(-67.7339, device='cuda:0')
episode: 270 training return: tensor(39.6378, device='cuda:0')
episode: 271 training return: tensor(-121.0803, device='cuda:0')
epoch: 68 test_true_pfm: 48.36693447962892 sim_pfm: 13.34780510836281
episode: 272 training return: tensor(-113.9021, device='cuda:0')
episode: 273 training return: tensor(36.8869, device='cuda:0')
episode: 274 training return: tensor(16.4611, device='cuda:0')
episode: 275 training return: tensor(30.4340, device='cuda:0')
epoch: 69 test_true_pfm: 61.19950743698875 sim_pfm: 7.725839494669344
episode: 276 training return: tensor(40.8746, device='cuda:0')
episode: 277 training return: tensor(-13.1690, device='cuda:0')
episode: 278 training return: tensor(-64.3802, device='cuda:0')
episode: 279 training return: tensor(-9.3710, device='cuda:0')
epoch: 70 test_true_pfm: 54.75692307669161 sim_pfm: 8.313323491771007
episode: 280 training return: tensor(49.8831, device='cuda:0')
episode: 281 training return: tensor(29.5655, device='cuda:0')
episode: 282 training return: tensor(-106.5112, device='cuda:0')
episode: 283 training return: tensor(38.3869, device='cuda:0')
epoch: 71 test_true_pfm: 47.917955144124754 sim_pfm: -18.047284635540564
episode: 284 training return: tensor(34.7544, device='cuda:0')
episode: 285 training return: tensor(43.7949, device='cuda:0')
episode: 286 training return: tensor(22.9424, device='cuda:0')
episode: 287 training return: tensor(39.8418, device='cuda:0')
epoch: 72 test_true_pfm: 51.56527707219719 sim_pfm: 32.437760196207094
episode: 288 training return: tensor(-104.2558, device='cuda:0')
episode: 289 training return: tensor(32.9490, device='cuda:0')
episode: 290 training return: tensor(21.7227, device='cuda:0')
episode: 291 training return: tensor(1.0464, device='cuda:0')
epoch: 73 test_true_pfm: 46.007793386565496 sim_pfm: -40.592040631646526
episode: 292 training return: tensor(-41.1990, device='cuda:0')
episode: 293 training return: tensor(58.4382, device='cuda:0')
episode: 294 training return: tensor(-3.1704, device='cuda:0')
episode: 295 training return: tensor(4.0792, device='cuda:0')
epoch: 74 test_true_pfm: 44.457357991795924 sim_pfm: -67.68580082067055
episode: 296 training return: tensor(19.0196, device='cuda:0')
episode: 297 training return: tensor(42.4181, device='cuda:0')
episode: 298 training return: tensor(29.0659, device='cuda:0')
episode: 299 training return: tensor(31.8622, device='cuda:0')
epoch: 75 test_true_pfm: 41.73228297495228 sim_pfm: -20.28898807108053
episode: 300 training return: tensor(7.2262, device='cuda:0')
episode: 301 training return: tensor(-80.9514, device='cuda:0')
episode: 302 training return: tensor(-117.8749, device='cuda:0')
episode: 303 training return: tensor(44.7619, device='cuda:0')
epoch: 76 test_true_pfm: 54.002958993969756 sim_pfm: -24.691796076111494
episode: 304 training return: tensor(-126.9872, device='cuda:0')
episode: 305 training return: tensor(20.5692, device='cuda:0')
episode: 306 training return: tensor(-112.3132, device='cuda:0')
episode: 307 training return: tensor(27.6703, device='cuda:0')
epoch: 77 test_true_pfm: 50.57772614084847 sim_pfm: -49.815281741094076
episode: 308 training return: tensor(23.7519, device='cuda:0')
episode: 309 training return: tensor(15.5095, device='cuda:0')
episode: 310 training return: tensor(24.9337, device='cuda:0')
episode: 311 training return: tensor(-119.3248, device='cuda:0')
epoch: 78 test_true_pfm: 47.15814181496793 sim_pfm: 5.551453803502954
episode: 312 training return: tensor(33.0635, device='cuda:0')
episode: 313 training return: tensor(28.4439, device='cuda:0')
episode: 314 training return: tensor(34.7876, device='cuda:0')
episode: 315 training return: tensor(25.5120, device='cuda:0')
epoch: 79 test_true_pfm: 46.30013736942489 sim_pfm: 19.18298726839712
episode: 316 training return: tensor(-3.5799, device='cuda:0')
episode: 317 training return: tensor(24.2117, device='cuda:0')
episode: 318 training return: tensor(43.5617, device='cuda:0')
episode: 319 training return: tensor(24.1756, device='cuda:0')
epoch: 80 test_true_pfm: 48.1313040877658 sim_pfm: 29.809890683472624
episode: 320 training return: tensor(-123.9020, device='cuda:0')
episode: 321 training return: tensor(32.8405, device='cuda:0')
episode: 322 training return: tensor(46.4908, device='cuda:0')
episode: 323 training return: tensor(40.7758, device='cuda:0')
epoch: 81 test_true_pfm: 44.4290563397928 sim_pfm: -51.123416345316215
episode: 324 training return: tensor(-112.6961, device='cuda:0')
episode: 325 training return: tensor(-103.6712, device='cuda:0')
episode: 326 training return: tensor(29.5125, device='cuda:0')
episode: 327 training return: tensor(46.5679, device='cuda:0')
epoch: 82 test_true_pfm: 48.19478752051943 sim_pfm: 39.840074660500974
episode: 328 training return: tensor(-107.1076, device='cuda:0')
episode: 329 training return: tensor(24.2214, device='cuda:0')
episode: 330 training return: tensor(-89.2279, device='cuda:0')
episode: 331 training return: tensor(22.6947, device='cuda:0')
epoch: 83 test_true_pfm: 51.931690560918256 sim_pfm: -67.76710879371967
episode: 332 training return: tensor(16.7737, device='cuda:0')
episode: 333 training return: tensor(-115.5392, device='cuda:0')
episode: 334 training return: tensor(39.9204, device='cuda:0')
episode: 335 training return: tensor(-110.8683, device='cuda:0')
epoch: 84 test_true_pfm: 65.35218619440415 sim_pfm: -17.22911870482494
episode: 336 training return: tensor(28.4122, device='cuda:0')
episode: 337 training return: tensor(49.0597, device='cuda:0')
episode: 338 training return: tensor(17.6536, device='cuda:0')
episode: 339 training return: tensor(24.3427, device='cuda:0')
epoch: 85 test_true_pfm: 43.596130844481934 sim_pfm: -26.690529703197534
episode: 340 training return: tensor(30.8425, device='cuda:0')
episode: 341 training return: tensor(-125.3113, device='cuda:0')
episode: 342 training return: tensor(46.2543, device='cuda:0')
episode: 343 training return: tensor(-93.7892, device='cuda:0')
epoch: 86 test_true_pfm: 47.11922849815141 sim_pfm: -54.4563160765043
episode: 344 training return: tensor(-103.2101, device='cuda:0')
episode: 345 training return: tensor(-1.5391, device='cuda:0')
episode: 346 training return: tensor(27.2573, device='cuda:0')
episode: 347 training return: tensor(29.1085, device='cuda:0')
epoch: 87 test_true_pfm: 45.81737561825288 sim_pfm: -11.563934218586656
episode: 348 training return: tensor(-118.4400, device='cuda:0')
episode: 349 training return: tensor(-105.0179, device='cuda:0')
episode: 350 training return: tensor(-124.7782, device='cuda:0')
episode: 351 training return: tensor(31.7902, device='cuda:0')
epoch: 88 test_true_pfm: 48.37772196697843 sim_pfm: 0.4408871498540975
episode: 352 training return: tensor(27.7902, device='cuda:0')
episode: 353 training return: tensor(15.8447, device='cuda:0')
episode: 354 training return: tensor(14.3223, device='cuda:0')
episode: 355 training return: tensor(-112.4522, device='cuda:0')
epoch: 89 test_true_pfm: 43.45289799218809 sim_pfm: -13.935598426056094
episode: 356 training return: tensor(-107.8332, device='cuda:0')
episode: 357 training return: tensor(42.6409, device='cuda:0')
episode: 358 training return: tensor(22.7253, device='cuda:0')
episode: 359 training return: tensor(30.7119, device='cuda:0')
epoch: 90 test_true_pfm: 52.09201913570157 sim_pfm: 2.681899203360081
episode: 360 training return: tensor(18.0909, device='cuda:0')
episode: 361 training return: tensor(-102.6775, device='cuda:0')
episode: 362 training return: tensor(19.6447, device='cuda:0')
episode: 363 training return: tensor(34.7838, device='cuda:0')
epoch: 91 test_true_pfm: 52.16703857654621 sim_pfm: 5.0450191610318145
episode: 364 training return: tensor(-118.0509, device='cuda:0')
episode: 365 training return: tensor(-112.9339, device='cuda:0')
episode: 366 training return: tensor(17.6273, device='cuda:0')
episode: 367 training return: tensor(12.8161, device='cuda:0')
epoch: 92 test_true_pfm: 48.57653362967633 sim_pfm: 31.126116088428535
episode: 368 training return: tensor(-107.2507, device='cuda:0')
episode: 369 training return: tensor(-145.4010, device='cuda:0')
episode: 370 training return: tensor(53.5798, device='cuda:0')
episode: 371 training return: tensor(38.6127, device='cuda:0')
epoch: 93 test_true_pfm: 51.63281222420842 sim_pfm: -19.579432660038584
episode: 372 training return: tensor(4.6252, device='cuda:0')
episode: 373 training return: tensor(38.1285, device='cuda:0')
episode: 374 training return: tensor(-3.6211, device='cuda:0')
episode: 375 training return: tensor(-112.5361, device='cuda:0')
epoch: 94 test_true_pfm: 44.31851446970917 sim_pfm: 38.46027301407885
episode: 376 training return: tensor(27.9325, device='cuda:0')
episode: 377 training return: tensor(4.7776, device='cuda:0')
episode: 378 training return: tensor(46.5798, device='cuda:0')
episode: 379 training return: tensor(63.4016, device='cuda:0')
epoch: 95 test_true_pfm: 50.640680293219894 sim_pfm: -18.784898827812867
episode: 380 training return: tensor(37.6637, device='cuda:0')
episode: 381 training return: tensor(27.1986, device='cuda:0')
episode: 382 training return: tensor(29.9370, device='cuda:0')
episode: 383 training return: tensor(11.7377, device='cuda:0')
epoch: 96 test_true_pfm: 59.74853017256021 sim_pfm: -68.8161535256484
episode: 384 training return: tensor(-107.1116, device='cuda:0')
episode: 385 training return: tensor(-113.3626, device='cuda:0')
episode: 386 training return: tensor(-115.3652, device='cuda:0')
episode: 387 training return: tensor(35.6535, device='cuda:0')
epoch: 97 test_true_pfm: 57.21415284795906 sim_pfm: -41.64525164552615
episode: 388 training return: tensor(-65.1864, device='cuda:0')
episode: 389 training return: tensor(-108.5748, device='cuda:0')
episode: 390 training return: tensor(14.3502, device='cuda:0')
episode: 391 training return: tensor(50.3109, device='cuda:0')
epoch: 98 test_true_pfm: 47.873859342148876 sim_pfm: -6.624762794427807
episode: 392 training return: tensor(39.9730, device='cuda:0')
episode: 393 training return: tensor(-111.2556, device='cuda:0')
episode: 394 training return: tensor(28.9534, device='cuda:0')
episode: 395 training return: tensor(-59.8327, device='cuda:0')
epoch: 99 test_true_pfm: 51.67177533945555 sim_pfm: 3.015069309750106
episode: 396 training return: tensor(-122.7728, device='cuda:0')
episode: 397 training return: tensor(-70.9851, device='cuda:0')
episode: 398 training return: tensor(33.6359, device='cuda:0')
episode: 399 training return: tensor(-108.3893, device='cuda:0')
epoch: 100 test_true_pfm: 52.808216518924816 sim_pfm: 39.58265370659065
episode: 400 training return: tensor(41.0553, device='cuda:0')
episode: 401 training return: tensor(43.3585, device='cuda:0')
episode: 402 training return: tensor(45.2359, device='cuda:0')
episode: 403 training return: tensor(47.4395, device='cuda:0')
epoch: 101 test_true_pfm: 52.468701720873455 sim_pfm: -72.32143139582476
episode: 404 training return: tensor(-119.4315, device='cuda:0')
episode: 405 training return: tensor(27.6698, device='cuda:0')
episode: 406 training return: tensor(24.8356, device='cuda:0')
episode: 407 training return: tensor(0.2370, device='cuda:0')
epoch: 102 test_true_pfm: 56.847975526232595 sim_pfm: -28.553666953346692
episode: 408 training return: tensor(-55.4976, device='cuda:0')
episode: 409 training return: tensor(-74.8884, device='cuda:0')
episode: 410 training return: tensor(-126.3570, device='cuda:0')
episode: 411 training return: tensor(36.8740, device='cuda:0')
epoch: 103 test_true_pfm: 47.022035503921536 sim_pfm: 9.238431620737538
episode: 412 training return: tensor(29.3117, device='cuda:0')
episode: 413 training return: tensor(29.8397, device='cuda:0')
episode: 414 training return: tensor(28.1646, device='cuda:0')
episode: 415 training return: tensor(-120.8182, device='cuda:0')
epoch: 104 test_true_pfm: 48.13115562201902 sim_pfm: -62.4020064425189
episode: 416 training return: tensor(17.7597, device='cuda:0')
episode: 417 training return: tensor(-111.0739, device='cuda:0')
episode: 418 training return: tensor(32.6869, device='cuda:0')
episode: 419 training return: tensor(49.3600, device='cuda:0')
epoch: 105 test_true_pfm: 61.84284776520076 sim_pfm: -20.843813760066404
episode: 420 training return: tensor(29.1819, device='cuda:0')
episode: 421 training return: tensor(34.7204, device='cuda:0')
episode: 422 training return: tensor(28.0430, device='cuda:0')
episode: 423 training return: tensor(21.1098, device='cuda:0')
epoch: 106 test_true_pfm: 43.78387800768374 sim_pfm: 32.93377935031894
episode: 424 training return: tensor(53.6536, device='cuda:0')
episode: 425 training return: tensor(6.2348, device='cuda:0')
episode: 426 training return: tensor(-86.0981, device='cuda:0')
episode: 427 training return: tensor(4.6795, device='cuda:0')
epoch: 107 test_true_pfm: 47.52114313118615 sim_pfm: -32.33662138096988
episode: 428 training return: tensor(-75.3721, device='cuda:0')
episode: 429 training return: tensor(33.9955, device='cuda:0')
episode: 430 training return: tensor(-120.4061, device='cuda:0')
episode: 431 training return: tensor(-113.5178, device='cuda:0')
epoch: 108 test_true_pfm: 53.49873246264442 sim_pfm: -27.814874104905176
episode: 432 training return: tensor(50.4301, device='cuda:0')
episode: 433 training return: tensor(37.4050, device='cuda:0')
episode: 434 training return: tensor(-88.9517, device='cuda:0')
episode: 435 training return: tensor(29.8810, device='cuda:0')
epoch: 109 test_true_pfm: 42.917897211313054 sim_pfm: 35.45145378976595
episode: 436 training return: tensor(41.4449, device='cuda:0')
episode: 437 training return: tensor(-48.3719, device='cuda:0')
episode: 438 training return: tensor(34.4718, device='cuda:0')
episode: 439 training return: tensor(-100.0621, device='cuda:0')
epoch: 110 test_true_pfm: 50.68486539774226 sim_pfm: -35.01679461041931
episode: 440 training return: tensor(20.5829, device='cuda:0')
episode: 441 training return: tensor(-96.8637, device='cuda:0')
episode: 442 training return: tensor(28.8149, device='cuda:0')
episode: 443 training return: tensor(-116.5106, device='cuda:0')
epoch: 111 test_true_pfm: 52.49136953220508 sim_pfm: -19.741609156795313
episode: 444 training return: tensor(19.0503, device='cuda:0')
episode: 445 training return: tensor(23.8035, device='cuda:0')
episode: 446 training return: tensor(8.5362, device='cuda:0')
episode: 447 training return: tensor(26.2531, device='cuda:0')
epoch: 112 test_true_pfm: 56.301165746871604 sim_pfm: -28.451824869669508
episode: 448 training return: tensor(14.7234, device='cuda:0')
episode: 449 training return: tensor(-110.9077, device='cuda:0')
episode: 450 training return: tensor(13.1654, device='cuda:0')
episode: 451 training return: tensor(50.4406, device='cuda:0')
epoch: 113 test_true_pfm: 59.56258220081146 sim_pfm: -43.49828047947958
episode: 452 training return: tensor(-131.4131, device='cuda:0')
episode: 453 training return: tensor(25.5528, device='cuda:0')
episode: 454 training return: tensor(-107.3613, device='cuda:0')
episode: 455 training return: tensor(25.1085, device='cuda:0')
epoch: 114 test_true_pfm: 51.16342167340623 sim_pfm: 41.68123842513887
episode: 456 training return: tensor(21.9893, device='cuda:0')
episode: 457 training return: tensor(44.5919, device='cuda:0')
episode: 458 training return: tensor(45.4117, device='cuda:0')
episode: 459 training return: tensor(-1.2021, device='cuda:0')
epoch: 115 test_true_pfm: 55.30845668937782 sim_pfm: 3.8080285411910153
episode: 460 training return: tensor(26.0704, device='cuda:0')
episode: 461 training return: tensor(-102.2743, device='cuda:0')
episode: 462 training return: tensor(26.4449, device='cuda:0')
episode: 463 training return: tensor(45.2922, device='cuda:0')
epoch: 116 test_true_pfm: 48.18405449169519 sim_pfm: 3.179726922028931
episode: 464 training return: tensor(32.1408, device='cuda:0')
episode: 465 training return: tensor(-100.5865, device='cuda:0')
episode: 466 training return: tensor(7.7034, device='cuda:0')
episode: 467 training return: tensor(23.7753, device='cuda:0')
epoch: 117 test_true_pfm: 55.00213100510307 sim_pfm: -64.57114348623436
episode: 468 training return: tensor(-4.5888, device='cuda:0')
episode: 469 training return: tensor(11.6008, device='cuda:0')
episode: 470 training return: tensor(41.1373, device='cuda:0')
episode: 471 training return: tensor(34.8992, device='cuda:0')
epoch: 118 test_true_pfm: 44.0774412276191 sim_pfm: -5.6977843669301365
episode: 472 training return: tensor(-76.2241, device='cuda:0')
episode: 473 training return: tensor(33.8196, device='cuda:0')
episode: 474 training return: tensor(-107.3020, device='cuda:0')
episode: 475 training return: tensor(25.2344, device='cuda:0')
epoch: 119 test_true_pfm: 51.90706739336821 sim_pfm: 20.411165450973204
episode: 476 training return: tensor(37.0387, device='cuda:0')
episode: 477 training return: tensor(5.4997, device='cuda:0')
episode: 478 training return: tensor(-109.8235, device='cuda:0')
episode: 479 training return: tensor(11.8521, device='cuda:0')
epoch: 120 test_true_pfm: 60.850003203652705 sim_pfm: 10.888242743775482
episode: 480 training return: tensor(22.6470, device='cuda:0')
episode: 481 training return: tensor(20.0784, device='cuda:0')
episode: 482 training return: tensor(-108.7192, device='cuda:0')
episode: 483 training return: tensor(21.8924, device='cuda:0')
epoch: 121 test_true_pfm: 43.20948339335314 sim_pfm: 6.703374927159166
episode: 484 training return: tensor(48.5176, device='cuda:0')
episode: 485 training return: tensor(-72.3745, device='cuda:0')
episode: 486 training return: tensor(18.0398, device='cuda:0')
episode: 487 training return: tensor(46.9848, device='cuda:0')
epoch: 122 test_true_pfm: 44.873849817255845 sim_pfm: -1.6785327682853677
episode: 488 training return: tensor(30.1574, device='cuda:0')
episode: 489 training return: tensor(6.4416, device='cuda:0')
episode: 490 training return: tensor(-58.7789, device='cuda:0')
episode: 491 training return: tensor(39.6984, device='cuda:0')
epoch: 123 test_true_pfm: 44.41968264327952 sim_pfm: 8.856897371273954
episode: 492 training return: tensor(35.0200, device='cuda:0')
episode: 493 training return: tensor(-100.5617, device='cuda:0')
episode: 494 training return: tensor(22.9075, device='cuda:0')
episode: 495 training return: tensor(3.7597, device='cuda:0')
epoch: 124 test_true_pfm: 62.671508898335105 sim_pfm: -7.559410476626363
episode: 496 training return: tensor(-110.9687, device='cuda:0')
episode: 497 training return: tensor(33.3899, device='cuda:0')
episode: 498 training return: tensor(-106.9481, device='cuda:0')
episode: 499 training return: tensor(25.6628, device='cuda:0')
epoch: 125 test_true_pfm: 43.91082279423288 sim_pfm: 9.210013149963924
episode: 500 training return: tensor(34.1431, device='cuda:0')
episode: 501 training return: tensor(47.8322, device='cuda:0')
episode: 502 training return: tensor(-3.3544, device='cuda:0')
episode: 503 training return: tensor(-104.3505, device='cuda:0')
epoch: 126 test_true_pfm: 56.78453742270153 sim_pfm: 7.854607105976902
episode: 504 training return: tensor(41.5284, device='cuda:0')
episode: 505 training return: tensor(28.4002, device='cuda:0')
episode: 506 training return: tensor(2.4214, device='cuda:0')
episode: 507 training return: tensor(-108.5477, device='cuda:0')
epoch: 127 test_true_pfm: 65.27286316575305 sim_pfm: -31.068486094358377
episode: 508 training return: tensor(45.9998, device='cuda:0')
episode: 509 training return: tensor(44.6017, device='cuda:0')
episode: 510 training return: tensor(-102.5080, device='cuda:0')
episode: 511 training return: tensor(40.0029, device='cuda:0')
epoch: 128 test_true_pfm: 48.61512937500159 sim_pfm: -37.697155735769776
episode: 512 training return: tensor(-78.8858, device='cuda:0')
episode: 513 training return: tensor(-105.3775, device='cuda:0')
episode: 514 training return: tensor(35.7440, device='cuda:0')
episode: 515 training return: tensor(13.5788, device='cuda:0')
epoch: 129 test_true_pfm: 51.8460071081456 sim_pfm: 14.743473520193948
episode: 516 training return: tensor(52.2901, device='cuda:0')
episode: 517 training return: tensor(21.1767, device='cuda:0')
episode: 518 training return: tensor(-73.1638, device='cuda:0')
episode: 519 training return: tensor(-68.7040, device='cuda:0')
epoch: 130 test_true_pfm: 51.333005882918634 sim_pfm: 0.40304485538508744
episode: 520 training return: tensor(-25.1366, device='cuda:0')
episode: 521 training return: tensor(-95.0221, device='cuda:0')
episode: 522 training return: tensor(11.9478, device='cuda:0')
episode: 523 training return: tensor(-114.3217, device='cuda:0')
epoch: 131 test_true_pfm: 61.100242283836884 sim_pfm: -16.15271510644816
episode: 524 training return: tensor(27.5993, device='cuda:0')
episode: 525 training return: tensor(46.9326, device='cuda:0')
episode: 526 training return: tensor(26.9523, device='cuda:0')
episode: 527 training return: tensor(-104.9054, device='cuda:0')
epoch: 132 test_true_pfm: 49.17634953321525 sim_pfm: 2.6271622205560563
episode: 528 training return: tensor(-111.6627, device='cuda:0')
episode: 529 training return: tensor(49.3636, device='cuda:0')
episode: 530 training return: tensor(53.7603, device='cuda:0')
episode: 531 training return: tensor(49.4594, device='cuda:0')
epoch: 133 test_true_pfm: 51.07948969198628 sim_pfm: 27.743790975713637
episode: 532 training return: tensor(-58.2129, device='cuda:0')
episode: 533 training return: tensor(-144.4704, device='cuda:0')
episode: 534 training return: tensor(32.0574, device='cuda:0')
episode: 535 training return: tensor(15.6756, device='cuda:0')
epoch: 134 test_true_pfm: 47.40995166194121 sim_pfm: -11.146839393116533
episode: 536 training return: tensor(32.8435, device='cuda:0')
episode: 537 training return: tensor(44.0601, device='cuda:0')
episode: 538 training return: tensor(10.1983, device='cuda:0')
episode: 539 training return: tensor(58.0181, device='cuda:0')
epoch: 135 test_true_pfm: 48.533251702941826 sim_pfm: 8.32167291767546
episode: 540 training return: tensor(37.7144, device='cuda:0')
episode: 541 training return: tensor(-100.9294, device='cuda:0')
episode: 542 training return: tensor(11.6868, device='cuda:0')
episode: 543 training return: tensor(17.3766, device='cuda:0')
epoch: 136 test_true_pfm: 55.79747260867025 sim_pfm: -53.253662419843025
episode: 544 training return: tensor(-113.9471, device='cuda:0')
episode: 545 training return: tensor(20.6080, device='cuda:0')
episode: 546 training return: tensor(7.9222, device='cuda:0')
episode: 547 training return: tensor(-105.1376, device='cuda:0')
epoch: 137 test_true_pfm: 51.67981732783477 sim_pfm: -75.30990829708753
episode: 548 training return: tensor(38.5316, device='cuda:0')
episode: 549 training return: tensor(9.4107, device='cuda:0')
episode: 550 training return: tensor(27.3423, device='cuda:0')
episode: 551 training return: tensor(-94.5159, device='cuda:0')
epoch: 138 test_true_pfm: 53.62538798534265 sim_pfm: -5.972127676475793
episode: 552 training return: tensor(-61.5986, device='cuda:0')
episode: 553 training return: tensor(-104.2174, device='cuda:0')
episode: 554 training return: tensor(-104.0188, device='cuda:0')
episode: 555 training return: tensor(3.3547, device='cuda:0')
epoch: 139 test_true_pfm: 47.59184293360145 sim_pfm: 14.27364504080615
episode: 556 training return: tensor(-98.0669, device='cuda:0')
episode: 557 training return: tensor(28.7385, device='cuda:0')
episode: 558 training return: tensor(-114.7539, device='cuda:0')
episode: 559 training return: tensor(30.7320, device='cuda:0')
epoch: 140 test_true_pfm: 47.97070725145731 sim_pfm: -28.102169358805988
episode: 560 training return: tensor(32.2541, device='cuda:0')
episode: 561 training return: tensor(37.8401, device='cuda:0')
episode: 562 training return: tensor(31.2939, device='cuda:0')
episode: 563 training return: tensor(-120.7556, device='cuda:0')
epoch: 141 test_true_pfm: 48.44926350007659 sim_pfm: -69.4636313836323
episode: 564 training return: tensor(-103.6506, device='cuda:0')
episode: 565 training return: tensor(-100.5222, device='cuda:0')
episode: 566 training return: tensor(-88.6305, device='cuda:0')
episode: 567 training return: tensor(-21.7080, device='cuda:0')
epoch: 142 test_true_pfm: 52.267179197948664 sim_pfm: -50.48902087992174
episode: 568 training return: tensor(-112.4220, device='cuda:0')
episode: 569 training return: tensor(10.6543, device='cuda:0')
episode: 570 training return: tensor(-117.0862, device='cuda:0')
episode: 571 training return: tensor(34.1601, device='cuda:0')
epoch: 143 test_true_pfm: 46.68318531245711 sim_pfm: 16.735818620328793
episode: 572 training return: tensor(-118.2816, device='cuda:0')
episode: 573 training return: tensor(-124.3416, device='cuda:0')
episode: 574 training return: tensor(-116.0858, device='cuda:0')
episode: 575 training return: tensor(49.9823, device='cuda:0')
epoch: 144 test_true_pfm: 52.44856090445793 sim_pfm: 10.85015899806749
episode: 576 training return: tensor(41.0332, device='cuda:0')
episode: 577 training return: tensor(11.5822, device='cuda:0')
episode: 578 training return: tensor(32.6557, device='cuda:0')
episode: 579 training return: tensor(-99.6204, device='cuda:0')
epoch: 145 test_true_pfm: 48.32391763393499 sim_pfm: 13.539916280226317
episode: 580 training return: tensor(-132.3211, device='cuda:0')
episode: 581 training return: tensor(18.3016, device='cuda:0')
episode: 582 training return: tensor(-63.4303, device='cuda:0')
episode: 583 training return: tensor(53.7310, device='cuda:0')
epoch: 146 test_true_pfm: 45.2460047445709 sim_pfm: 35.09010629169643
episode: 584 training return: tensor(50.8646, device='cuda:0')
episode: 585 training return: tensor(32.4925, device='cuda:0')
episode: 586 training return: tensor(43.2522, device='cuda:0')
episode: 587 training return: tensor(47.0167, device='cuda:0')
epoch: 147 test_true_pfm: 48.22345787278031 sim_pfm: -85.73318340977421
episode: 588 training return: tensor(-173.0006, device='cuda:0')
episode: 589 training return: tensor(34.7388, device='cuda:0')
episode: 590 training return: tensor(33.0689, device='cuda:0')
episode: 591 training return: tensor(47.9212, device='cuda:0')
epoch: 148 test_true_pfm: 63.14230072219931 sim_pfm: -26.88232191645657
episode: 592 training return: tensor(37.1456, device='cuda:0')
episode: 593 training return: tensor(-62.2226, device='cuda:0')
episode: 594 training return: tensor(-116.7456, device='cuda:0')
episode: 595 training return: tensor(46.3497, device='cuda:0')
epoch: 149 test_true_pfm: 46.297330808420114 sim_pfm: 38.85834147600108
episode: 596 training return: tensor(46.9344, device='cuda:0')
episode: 597 training return: tensor(-114.6475, device='cuda:0')
episode: 598 training return: tensor(-67.0063, device='cuda:0')
episode: 599 training return: tensor(34.6905, device='cuda:0')
epoch: 150 test_true_pfm: 56.106683109802006 sim_pfm: -48.419518916530066
