epoch: 0 training_loss 0.46909609526395796 test_loss: 0.30391993522644045
epoch: 1 training_loss 0.29975983768701553 test_loss: 0.2974399089813232
epoch: 2 training_loss 0.26338092938065527 test_loss: 0.24935603141784668
epoch: 3 training_loss 0.23797679841518402 test_loss: 0.22641243934631347
epoch: 4 training_loss 0.22621845088899137 test_loss: 0.2287057876586914
epoch: 5 training_loss 0.22901033326983453 test_loss: 0.2105499267578125
epoch: 6 training_loss 0.2198975369334221 test_loss: 0.21682934761047362
epoch: 7 training_loss 0.2210538513958454 test_loss: 0.2587116003036499
epoch: 8 training_loss 0.22789012685418128 test_loss: 0.21323022842407227
epoch: 9 training_loss 0.22524490118026733 test_loss: 0.24616892337799073
epoch: 10 training_loss 0.2185404419898987 test_loss: 0.23474204540252686
epoch: 11 training_loss 0.22087447509169578 test_loss: 0.20345990657806395
epoch: 12 training_loss 0.20829017147421836 test_loss: 0.22017951011657716
epoch: 13 training_loss 0.2156358255445957 test_loss: 0.2356621503829956
epoch: 14 training_loss 0.2047691461443901 test_loss: 0.24190924167633057
epoch: 15 training_loss 0.21059981741011144 test_loss: 0.23405916690826417
epoch: 16 training_loss 0.2156106385588646 test_loss: 0.18712517023086547
epoch: 17 training_loss 0.21875688195228576 test_loss: 0.19274570941925048
epoch: 18 training_loss 0.2078018306940794 test_loss: 0.2179579257965088
epoch: 19 training_loss 0.21544103018939495 test_loss: 0.21036617755889891
epoch: 20 training_loss 0.20345651879906654 test_loss: 0.2345363140106201
epoch: 21 training_loss 0.21260036781430244 test_loss: 0.2221585988998413
epoch: 22 training_loss 0.21877023823559283 test_loss: 0.20740029811859131
epoch: 23 training_loss 0.19548684827983379 test_loss: 0.18451684713363647
epoch: 24 training_loss 0.19962015211582185 test_loss: 0.22508084774017334
epoch: 25 training_loss 0.2092550040781498 test_loss: 0.2218557119369507
epoch: 26 training_loss 0.20560937374830246 test_loss: 0.22830593585968018
epoch: 27 training_loss 0.20787597626447676 test_loss: 0.2136892557144165
epoch: 28 training_loss 0.19037198945879935 test_loss: 0.18554092645645143
epoch: 29 training_loss 0.20872071854770183 test_loss: 0.1883746027946472
epoch: 30 training_loss 0.20268927603960038 test_loss: 0.20060129165649415
epoch: 31 training_loss 0.20553948402404784 test_loss: 0.21747565269470215
epoch: 32 training_loss 0.20071885213255883 test_loss: 0.2037799119949341
epoch: 33 training_loss 0.19737470746040345 test_loss: 0.20813477039337158
epoch: 34 training_loss 0.21071466661989688 test_loss: 0.20258092880249023
epoch: 35 training_loss 0.19871675804257394 test_loss: 0.1823310971260071
epoch: 36 training_loss 0.19680483996868134 test_loss: 0.21402647495269775
epoch: 37 training_loss 0.20729260846972467 test_loss: 0.18328245878219604
epoch: 38 training_loss 0.1900593113899231 test_loss: 0.21195027828216553
epoch: 39 training_loss 0.2038041715323925 test_loss: 0.20644333362579345
epoch: 40 training_loss 0.19609659723937511 test_loss: 0.2138364791870117
epoch: 41 training_loss 0.1953358944505453 test_loss: 0.2127523422241211
epoch: 42 training_loss 0.1975924501568079 test_loss: 0.19045571088790894
epoch: 43 training_loss 0.19132948614656925 test_loss: 0.20631368160247804
epoch: 44 training_loss 0.19187316060066223 test_loss: 0.1689408540725708
epoch: 45 training_loss 0.19398722887039185 test_loss: 0.19893479347229004
epoch: 46 training_loss 0.18939949125051497 test_loss: 0.19842036962509155
epoch: 47 training_loss 0.19385980144143106 test_loss: 0.23365316390991211
epoch: 48 training_loss 0.19011362999677658 test_loss: 0.20425758361816407
epoch: 49 training_loss 0.18778963789343833 test_loss: 0.21390466690063475
epoch: 50 training_loss 0.19749337635934353 test_loss: 0.18468234539031983
epoch: 51 training_loss 0.1910347546637058 test_loss: 0.20036237239837645
epoch: 52 training_loss 0.1819849994778633 test_loss: 0.20800273418426513
epoch: 53 training_loss 0.1953784539550543 test_loss: 0.20461173057556153
epoch: 54 training_loss 0.19311545722186566 test_loss: 0.1994330883026123
epoch: 55 training_loss 0.19025664903223516 test_loss: 0.18455088138580322
epoch: 56 training_loss 0.19056023970246316 test_loss: 0.17868140935897828
epoch: 57 training_loss 0.19072416551411153 test_loss: 0.212821888923645
epoch: 58 training_loss 0.18906857393682003 test_loss: 0.20488786697387695
epoch: 59 training_loss 0.20195753775537015 test_loss: 0.19926848411560058
epoch: 60 training_loss 0.1872815316915512 test_loss: 0.17362161874771118
epoch: 61 training_loss 0.18777332298457622 test_loss: 0.20248091220855713
epoch: 62 training_loss 0.17930515594780444 test_loss: 0.20936968326568603
epoch: 63 training_loss 0.18733047731220723 test_loss: 0.1799396514892578
epoch: 64 training_loss 0.18516563110053538 test_loss: 0.181756329536438
epoch: 65 training_loss 0.187655114158988 test_loss: 0.2012943983078003
epoch: 66 training_loss 0.19527229502797128 test_loss: 0.23780834674835205
epoch: 67 training_loss 0.18811587542295455 test_loss: 0.1991321086883545
epoch: 68 training_loss 0.18689759880304335 test_loss: 0.20312838554382323
epoch: 69 training_loss 0.1916699842363596 test_loss: 0.19483119249343872
epoch: 70 training_loss 0.19124186307191848 test_loss: 0.20865561962127685
epoch: 71 training_loss 0.18583200491964816 test_loss: 0.20942091941833496
epoch: 72 training_loss 0.19190433397889137 test_loss: 0.17958058118820192
epoch: 73 training_loss 0.18532349050045013 test_loss: 0.2005472421646118
epoch: 74 training_loss 0.2033603310585022 test_loss: 0.2028334617614746
epoch: 75 training_loss 0.1933228014409542 test_loss: 0.20415749549865722
epoch: 76 training_loss 0.18345411852002144 test_loss: 0.19702186584472656
epoch: 77 training_loss 0.1912733093649149 test_loss: 0.18770679235458373
epoch: 78 training_loss 0.19144960150122642 test_loss: 0.21100034713745117
epoch: 79 training_loss 0.1812739236652851 test_loss: 0.19554197788238525
epoch: 80 training_loss 0.19167831264436244 test_loss: 0.17688814401626587
epoch: 81 training_loss 0.19025394603610038 test_loss: 0.19172220230102538
epoch: 82 training_loss 0.18839580550789833 test_loss: 0.20884995460510253
epoch: 83 training_loss 0.1913783633708954 test_loss: 0.17836897373199462
epoch: 84 training_loss 0.18851341292262078 test_loss: 0.1937925934791565
epoch: 85 training_loss 0.1941364488005638 test_loss: 0.18504750728607178
epoch: 86 training_loss 0.18546400167047977 test_loss: 0.19805167913436889
epoch: 87 training_loss 0.1768943316489458 test_loss: 0.1774998426437378
epoch: 88 training_loss 0.18034762918949127 test_loss: 0.20655345916748047
epoch: 89 training_loss 0.18250985838472844 test_loss: 0.1727190852165222
epoch: 90 training_loss 0.17966893158853053 test_loss: 0.185980486869812
epoch: 91 training_loss 0.18027066245675086 test_loss: 0.1880500078201294
epoch: 92 training_loss 0.18334007807075978 test_loss: 0.18718328475952148
epoch: 93 training_loss 0.18661899648606778 test_loss: 0.19846093654632568
epoch: 94 training_loss 0.18062835931777954 test_loss: 0.1981687903404236
epoch: 95 training_loss 0.19134695254266262 test_loss: 0.1929520010948181
epoch: 96 training_loss 0.1774827928841114 test_loss: 0.19285541772842407
epoch: 97 training_loss 0.1936448060721159 test_loss: 0.19957789182662963
epoch: 98 training_loss 0.1803609324246645 test_loss: 0.17891055345535278
epoch: 99 training_loss 0.17918958388268946 test_loss: 0.1668989896774292
epoch: 100 training_loss 0.18409016355872154 test_loss: 0.20798583030700685
epoch: 101 training_loss 0.1893279641121626 test_loss: 0.22176892757415773
epoch: 102 training_loss 0.1857201950997114 test_loss: 0.18130674362182617
epoch: 103 training_loss 0.18650937564671038 test_loss: 0.17065852880477905
epoch: 104 training_loss 0.18682028211653232 test_loss: 0.17834680080413817
epoch: 105 training_loss 0.18109882101416588 test_loss: 0.19858452081680297
epoch: 106 training_loss 0.18448637910187243 test_loss: 0.17517757415771484
epoch: 107 training_loss 0.18724402368068696 test_loss: 0.18298380374908446
epoch: 108 training_loss 0.1823378974944353 test_loss: 0.17706689834594727
epoch: 109 training_loss 0.19089548744261264 test_loss: 0.19903532266616822
epoch: 110 training_loss 0.18145049251616002 test_loss: 0.17685710191726683
epoch: 111 training_loss 0.1790762584656477 test_loss: 0.19401469230651855
epoch: 112 training_loss 0.18648385599255562 test_loss: 0.17942147254943847
epoch: 113 training_loss 0.1880157359689474 test_loss: 0.179240620136261
epoch: 114 training_loss 0.18244517035782337 test_loss: 0.187924325466156
epoch: 115 training_loss 0.17790443167090417 test_loss: 0.18343466520309448
epoch: 116 training_loss 0.1883854318410158 test_loss: 0.21635117530822753
epoch: 117 training_loss 0.17931415259838104 test_loss: 0.16778721809387206
epoch: 118 training_loss 0.18185958929359913 test_loss: 0.18955765962600707
epoch: 119 training_loss 0.18251605048775674 test_loss: 0.1951033353805542
epoch: 120 training_loss 0.18733951531350612 test_loss: 0.1947665572166443
epoch: 121 training_loss 0.17875806421041487 test_loss: 0.19585448503494263
epoch: 122 training_loss 0.18282714389264584 test_loss: 0.20481832027435304
epoch: 123 training_loss 0.18559407323598862 test_loss: 0.19534506797790527
epoch: 124 training_loss 0.17936346158385277 test_loss: 0.21622147560119628
epoch: 125 training_loss 0.17645166195929052 test_loss: 0.19431569576263427
epoch: 126 training_loss 0.18895998857915403 test_loss: 0.18746423721313477
epoch: 127 training_loss 0.18291585423052312 test_loss: 0.16387087106704712
epoch: 128 training_loss 0.17906269617378712 test_loss: 0.19426307678222657
epoch: 129 training_loss 0.17859958335757256 test_loss: 0.2502628803253174
epoch: 130 training_loss 0.18536387972533702 test_loss: 0.19278351068496705
epoch: 131 training_loss 0.18065091118216514 test_loss: 0.16752392053604126
epoch: 132 training_loss 0.1767029855400324 test_loss: 0.2048933744430542
epoch: 133 training_loss 0.174779180213809 test_loss: 0.18903737068176268
epoch: 134 training_loss 0.1868227235227823 test_loss: 0.18220759630203248
epoch: 135 training_loss 0.18586347065865993 test_loss: 0.19671313762664794
epoch: 136 training_loss 0.183987138569355 test_loss: 0.20086045265197755
epoch: 137 training_loss 0.17935357123613357 test_loss: 0.19686027765274047
epoch: 138 training_loss 0.17573669321835042 test_loss: 0.16191977262496948
epoch: 139 training_loss 0.17825831092894076 test_loss: 0.174263334274292
epoch: 140 training_loss 0.17772056117653848 test_loss: 0.18148493766784668
epoch: 141 training_loss 0.18033864356577398 test_loss: 0.2167675256729126
epoch: 142 training_loss 0.18231351777911187 test_loss: 0.18756500482559205
epoch: 143 training_loss 0.18438156388700008 test_loss: 0.16758456230163574
epoch: 144 training_loss 0.1826379518210888 test_loss: 0.17845845222473145
epoch: 145 training_loss 0.18458977997303008 test_loss: 0.1714656710624695
epoch: 146 training_loss 0.17881354592740537 test_loss: 0.1964891791343689
epoch: 147 training_loss 0.17998024702072143 test_loss: 0.18340733051300048
epoch: 148 training_loss 0.17629746958613396 test_loss: 0.1831180453300476
epoch: 149 training_loss 0.17508988715708257 test_loss: 0.21075904369354248
epoch: 0 training_loss 48.07635419845581 test_loss: 30.324874877929688
epoch: 1 training_loss 22.466237716674804 test_loss: 18.615725708007812
epoch: 2 training_loss 16.779049530029297 test_loss: 14.945111083984376
epoch: 3 training_loss 13.824370126724244 test_loss: 12.937490844726563
epoch: 4 training_loss 12.00616283416748 test_loss: 11.119741821289063
epoch: 5 training_loss 11.079805612564087 test_loss: 10.554498291015625
epoch: 6 training_loss 10.549143114089965 test_loss: 9.831398010253906
epoch: 7 training_loss 9.883924169540405 test_loss: 9.639883422851563
epoch: 8 training_loss 9.576224570274354 test_loss: 9.2450927734375
epoch: 9 training_loss 9.288394494056702 test_loss: 9.084178924560547
epoch: 10 training_loss 8.85706196784973 test_loss: 8.700336456298828
epoch: 11 training_loss 8.683672318458557 test_loss: 8.584082794189452
epoch: 12 training_loss 8.399387159347533 test_loss: 8.304106903076171
epoch: 13 training_loss 8.09603096485138 test_loss: 8.196418762207031
epoch: 14 training_loss 7.951754846572876 test_loss: 7.7552024841308596
epoch: 15 training_loss 7.638775401115417 test_loss: 7.883867645263672
epoch: 16 training_loss 7.646310868263245 test_loss: 7.450245666503906
epoch: 17 training_loss 7.423552174568176 test_loss: 6.963387298583984
epoch: 18 training_loss 7.252782354354858 test_loss: 6.894419860839844
epoch: 19 training_loss 7.207208871841431 test_loss: 7.114047241210938
epoch: 20 training_loss 6.935086016654968 test_loss: 6.487113189697266
epoch: 21 training_loss 6.859971470832825 test_loss: 6.548551940917969
epoch: 22 training_loss 6.503910241127014 test_loss: 6.737409973144532
epoch: 23 training_loss 6.551486554145813 test_loss: 6.960289764404297
epoch: 24 training_loss 6.614087629318237 test_loss: 6.385034561157227
epoch: 25 training_loss 6.099988942146301 test_loss: 6.176922988891602
epoch: 26 training_loss 5.931864242553711 test_loss: 6.112409591674805
epoch: 27 training_loss 5.916519365310669 test_loss: 5.908563613891602
epoch: 28 training_loss 5.7497345113754275 test_loss: 5.934388732910156
epoch: 29 training_loss 5.700101509094238 test_loss: 5.886343383789063
epoch: 30 training_loss 5.486117897033691 test_loss: 5.636343765258789
epoch: 31 training_loss 5.422565369606018 test_loss: 5.040262985229492
epoch: 32 training_loss 5.418450403213501 test_loss: 5.911314392089844
epoch: 33 training_loss 5.406448383331298 test_loss: 5.911758041381836
epoch: 34 training_loss 5.278897094726562 test_loss: 4.831439208984375
epoch: 35 training_loss 5.223109946250916 test_loss: 5.281063842773437
epoch: 36 training_loss 5.196849155426025 test_loss: 5.701246643066407
epoch: 37 training_loss 5.099667613506317 test_loss: 4.861966705322265
epoch: 38 training_loss 5.216425333023071 test_loss: 4.882187271118164
epoch: 39 training_loss 4.859733371734619 test_loss: 5.357754516601562
epoch: 40 training_loss 4.853067667484283 test_loss: 5.338286972045898
epoch: 41 training_loss 4.736899476051331 test_loss: 4.786368942260742
epoch: 42 training_loss 4.668640775680542 test_loss: 5.083878707885742
epoch: 43 training_loss 4.76319105386734 test_loss: 4.562908935546875
epoch: 44 training_loss 4.538529725074768 test_loss: 4.549910354614258
epoch: 45 training_loss 4.337955014705658 test_loss: 4.701134872436524
epoch: 46 training_loss 4.505672285556793 test_loss: 4.278616333007813
epoch: 47 training_loss 4.302861685752869 test_loss: 4.380123901367187
epoch: 48 training_loss 4.380719149112702 test_loss: 4.33142204284668
epoch: 49 training_loss 4.154835367202759 test_loss: 4.278092956542968
epoch: 50 training_loss 4.104219126701355 test_loss: 4.01605110168457
epoch: 51 training_loss 4.138823161125183 test_loss: 4.621935653686523
epoch: 52 training_loss 4.1306660056114195 test_loss: 4.246169662475586
epoch: 53 training_loss 4.266146867275238 test_loss: 4.378436279296875
epoch: 54 training_loss 4.095014019012451 test_loss: 4.443763732910156
epoch: 55 training_loss 4.008326699733734 test_loss: 4.205404663085938
epoch: 56 training_loss 3.946303434371948 test_loss: 3.899354934692383
epoch: 57 training_loss 4.039962515830994 test_loss: 3.614106369018555
epoch: 58 training_loss 3.8383541464805604 test_loss: 3.939132308959961
epoch: 59 training_loss 3.9030667042732237 test_loss: 3.6128955841064454
epoch: 60 training_loss 3.8271752071380614 test_loss: 3.9888229370117188
epoch: 61 training_loss 3.7687184953689576 test_loss: 3.619599151611328
epoch: 62 training_loss 3.908972210884094 test_loss: 4.0551612854003904
epoch: 63 training_loss 3.72529328584671 test_loss: 3.3836105346679686
epoch: 64 training_loss 3.722400896549225 test_loss: 3.7609825134277344
epoch: 65 training_loss 3.5895827627182006 test_loss: 3.960356903076172
epoch: 66 training_loss 3.7637454056739807 test_loss: 3.5010753631591798
epoch: 67 training_loss 3.6804395508766174 test_loss: 3.6085845947265627
epoch: 68 training_loss 3.482030894756317 test_loss: 3.516633987426758
epoch: 69 training_loss 3.702281539440155 test_loss: 3.710126495361328
epoch: 70 training_loss 3.669516999721527 test_loss: 3.5001220703125
epoch: 71 training_loss 3.4894137620925902 test_loss: 4.239731979370117
epoch: 72 training_loss 3.624077920913696 test_loss: 3.4257366180419924
epoch: 73 training_loss 3.4211212491989134 test_loss: 3.426274871826172
epoch: 74 training_loss 3.4925205445289613 test_loss: 3.27264289855957
epoch: 75 training_loss 3.4904293251037597 test_loss: 4.060146331787109
epoch: 76 training_loss 3.4192929458618164 test_loss: 3.2287574768066407
epoch: 77 training_loss 3.4852431058883666 test_loss: 3.1400665283203124
epoch: 78 training_loss 3.4333902060985566 test_loss: 3.4422916412353515
epoch: 79 training_loss 3.3622812676429747 test_loss: 3.4568897247314454
epoch: 80 training_loss 3.6574278235435487 test_loss: 3.327310562133789
epoch: 81 training_loss 3.1975189518928526 test_loss: 3.5071975708007814
epoch: 82 training_loss 3.2053175711631776 test_loss: 3.2424293518066407
epoch: 83 training_loss 3.2854355549812317 test_loss: 3.317219543457031
epoch: 84 training_loss 3.3650584983825684 test_loss: 3.944477081298828
epoch: 85 training_loss 3.217061538696289 test_loss: 3.0052358627319338
epoch: 86 training_loss 3.1962576484680176 test_loss: 3.2709400177001955
epoch: 87 training_loss 3.2962174892425535 test_loss: 3.749500274658203
epoch: 88 training_loss 3.0874638533592225 test_loss: 3.034262847900391
epoch: 89 training_loss 3.1111851072311403 test_loss: 2.9370439529418944
epoch: 90 training_loss 3.08051008939743 test_loss: 3.527873229980469
epoch: 91 training_loss 3.2282704663276673 test_loss: 3.4415847778320314
epoch: 92 training_loss 3.0553612875938416 test_loss: 3.2282859802246096
epoch: 93 training_loss 3.0673020935058593 test_loss: 3.3406700134277343
epoch: 94 training_loss 3.2024947810173034 test_loss: 3.272167205810547
epoch: 95 training_loss 3.1470153665542604 test_loss: 2.995469665527344
epoch: 96 training_loss 3.1004722607135773 test_loss: 3.0214553833007813
epoch: 97 training_loss 3.0115745067596436 test_loss: 2.9668569564819336
epoch: 98 training_loss 2.989442250728607 test_loss: 2.73669376373291
epoch: 99 training_loss 3.119725660085678 test_loss: 3.0832021713256834
epoch: 100 training_loss 3.1677046811580656 test_loss: 3.2411502838134765
epoch: 101 training_loss 2.8867832088470458 test_loss: 2.7834415435791016
epoch: 102 training_loss 2.955927120447159 test_loss: 2.875353240966797
epoch: 103 training_loss 2.977702763080597 test_loss: 2.832587242126465
epoch: 104 training_loss 3.059030817747116 test_loss: 2.985114860534668
epoch: 105 training_loss 3.0457523250579834 test_loss: 2.96317195892334
epoch: 106 training_loss 2.963559869527817 test_loss: 2.8425487518310546
epoch: 107 training_loss 2.9478123354911805 test_loss: 3.0536788940429687
epoch: 108 training_loss 3.0687398767471312 test_loss: 2.917799186706543
epoch: 109 training_loss 3.0019786310195924 test_loss: 2.8252805709838866
epoch: 110 training_loss 2.8556567656993868 test_loss: 2.824379730224609
epoch: 111 training_loss 2.819070584774017 test_loss: 2.7682886123657227
epoch: 112 training_loss 2.761736445426941 test_loss: 3.323806381225586
epoch: 113 training_loss 2.846120014190674 test_loss: 2.9634593963623046
epoch: 114 training_loss 2.9062307047843934 test_loss: 2.9119319915771484
epoch: 115 training_loss 2.7757985484600067 test_loss: 2.627566337585449
epoch: 116 training_loss 2.7922938120365144 test_loss: 2.9976730346679688
epoch: 117 training_loss 2.738101694583893 test_loss: 3.059012603759766
epoch: 118 training_loss 2.872720625400543 test_loss: 3.433085632324219
epoch: 119 training_loss 2.7166275346279143 test_loss: 2.9512964248657227
epoch: 120 training_loss 2.6862045192718504 test_loss: 2.8803768157958984
epoch: 121 training_loss 2.8831738662719726 test_loss: 2.749617004394531
epoch: 122 training_loss 2.7423769807815552 test_loss: 3.323908233642578
epoch: 123 training_loss 2.7253712463378905 test_loss: 2.777402877807617
epoch: 124 training_loss 2.6521373951435088 test_loss: 2.548600196838379
epoch: 125 training_loss 2.5963072562217713 test_loss: 2.937712287902832
epoch: 126 training_loss 2.638113000392914 test_loss: 2.470899200439453
epoch: 127 training_loss 2.6746830344200134 test_loss: 2.7423946380615236
epoch: 128 training_loss 2.6424925351142883 test_loss: 2.502592849731445
epoch: 129 training_loss 2.808407666683197 test_loss: 2.8831626892089846
epoch: 130 training_loss 2.4954567778110506 test_loss: 2.4760908126831054
epoch: 131 training_loss 2.659213579893112 test_loss: 2.782542037963867
epoch: 132 training_loss 2.3992724525928497 test_loss: 2.4214641571044924
epoch: 133 training_loss 2.5632055294513703 test_loss: 2.721586799621582
epoch: 134 training_loss 2.51065106511116 test_loss: 2.4664434432983398
epoch: 135 training_loss 2.497102987766266 test_loss: 2.7027040481567384
epoch: 136 training_loss 2.349175143241882 test_loss: 2.3935964584350584
epoch: 137 training_loss 2.5331492125988007 test_loss: 2.636081886291504
epoch: 138 training_loss 2.375692343711853 test_loss: 2.7677286148071287
epoch: 139 training_loss 2.6356569159030916 test_loss: 2.635554885864258
epoch: 140 training_loss 2.5897616922855375 test_loss: 2.5096649169921874
epoch: 141 training_loss 2.2679481744766234 test_loss: 2.262306785583496
epoch: 142 training_loss 2.2969401121139525 test_loss: 2.8133399963378904
epoch: 143 training_loss 2.546685434579849 test_loss: 2.8667640686035156
epoch: 144 training_loss 2.2794795644283297 test_loss: 2.3054494857788086
epoch: 145 training_loss 2.421614284515381 test_loss: 2.5550567626953127
epoch: 146 training_loss 2.288186753988266 test_loss: 2.079109954833984
epoch: 147 training_loss 2.5198199093341827 test_loss: 2.3108293533325197
epoch: 148 training_loss 2.2267979311943056 test_loss: 2.8139877319335938
epoch: 149 training_loss 2.2542256259918214 test_loss: 2.4436342239379885
45.7444190694104
episode: 0 training return: tensor(-211.1290, device='cuda:0')
episode: 1 training return: tensor(-77.8445, device='cuda:0')
episode: 2 training return: tensor(-204.2969, device='cuda:0')
episode: 3 training return: tensor(-204.5450, device='cuda:0')
epoch: 1 test_true_pfm: 46.05403915083165 sim_pfm: -170.44286436127732
episode: 4 training return: tensor(-202.0171, device='cuda:0')
episode: 5 training return: tensor(-201.8832, device='cuda:0')
episode: 6 training return: tensor(-204.2760, device='cuda:0')
episode: 7 training return: tensor(-204.1378, device='cuda:0')
epoch: 2 test_true_pfm: 48.10045510853092 sim_pfm: -165.6177493235562
episode: 8 training return: tensor(-203.6662, device='cuda:0')
episode: 9 training return: tensor(-212.6086, device='cuda:0')
episode: 10 training return: tensor(-205.0811, device='cuda:0')
episode: 11 training return: tensor(-205.1025, device='cuda:0')
epoch: 3 test_true_pfm: 54.70251969444574 sim_pfm: -198.15540377422585
episode: 12 training return: tensor(-202.0138, device='cuda:0')
episode: 13 training return: tensor(-212.3596, device='cuda:0')
episode: 14 training return: tensor(-204.7259, device='cuda:0')
episode: 15 training return: tensor(-203.8393, device='cuda:0')
epoch: 4 test_true_pfm: 48.76077281265266 sim_pfm: -201.5868961501401
episode: 16 training return: tensor(-202.4904, device='cuda:0')
episode: 17 training return: tensor(-212.2093, device='cuda:0')
episode: 18 training return: tensor(-205.1602, device='cuda:0')
episode: 19 training return: tensor(-187.7771, device='cuda:0')
epoch: 5 test_true_pfm: 49.21199926400668 sim_pfm: -203.73170377407223
episode: 20 training return: tensor(-217.8015, device='cuda:0')
episode: 21 training return: tensor(-211.3435, device='cuda:0')
episode: 22 training return: tensor(-177.5852, device='cuda:0')
episode: 23 training return: tensor(-200.7561, device='cuda:0')
epoch: 6 test_true_pfm: 45.05777639576151 sim_pfm: -210.12729833761696
episode: 24 training return: tensor(-201.3229, device='cuda:0')
episode: 25 training return: tensor(-212.6423, device='cuda:0')
episode: 26 training return: tensor(-203.9521, device='cuda:0')
episode: 27 training return: tensor(-179.5979, device='cuda:0')
epoch: 7 test_true_pfm: 47.093032186509205 sim_pfm: -203.41239173189388
episode: 28 training return: tensor(-205.0845, device='cuda:0')
episode: 29 training return: tensor(-211.6807, device='cuda:0')
episode: 30 training return: tensor(-201.0181, device='cuda:0')
episode: 31 training return: tensor(-206.0257, device='cuda:0')
epoch: 8 test_true_pfm: 46.6898833036938 sim_pfm: -213.10772691185122
episode: 32 training return: tensor(-214.9395, device='cuda:0')
episode: 33 training return: tensor(-188.5953, device='cuda:0')
episode: 34 training return: tensor(-205.1184, device='cuda:0')
episode: 35 training return: tensor(-205.0236, device='cuda:0')
epoch: 9 test_true_pfm: 42.12779801012275 sim_pfm: -203.43339203125214
episode: 36 training return: tensor(-214.1771, device='cuda:0')
episode: 37 training return: tensor(-206.3649, device='cuda:0')
episode: 38 training return: tensor(-202.6769, device='cuda:0')
episode: 39 training return: tensor(-205.1633, device='cuda:0')
epoch: 10 test_true_pfm: 42.12508311003671 sim_pfm: -207.41547428024933
episode: 40 training return: tensor(-204.1894, device='cuda:0')
episode: 41 training return: tensor(-212.2864, device='cuda:0')
episode: 42 training return: tensor(-204.3561, device='cuda:0')
episode: 43 training return: tensor(-213.0112, device='cuda:0')
epoch: 11 test_true_pfm: 43.92785176880302 sim_pfm: -207.4176387789019
episode: 44 training return: tensor(-214.1164, device='cuda:0')
episode: 45 training return: tensor(-202.7806, device='cuda:0')
episode: 46 training return: tensor(-212.5425, device='cuda:0')
episode: 47 training return: tensor(-213.4316, device='cuda:0')
epoch: 12 test_true_pfm: 48.154959674629524 sim_pfm: -204.79648011508397
episode: 48 training return: tensor(-201.5507, device='cuda:0')
episode: 49 training return: tensor(-201.3695, device='cuda:0')
episode: 50 training return: tensor(-203.3378, device='cuda:0')
episode: 51 training return: tensor(-203.1942, device='cuda:0')
epoch: 13 test_true_pfm: 47.53439139485579 sim_pfm: -201.2985636846628
episode: 52 training return: tensor(-211.8219, device='cuda:0')
episode: 53 training return: tensor(-211.8731, device='cuda:0')
episode: 54 training return: tensor(-202.4678, device='cuda:0')
episode: 55 training return: tensor(-173.5523, device='cuda:0')
epoch: 14 test_true_pfm: 48.101194835721124 sim_pfm: -209.79851199173717
episode: 56 training return: tensor(-207.3534, device='cuda:0')
episode: 57 training return: tensor(-213.1415, device='cuda:0')
episode: 58 training return: tensor(-202.0476, device='cuda:0')
episode: 59 training return: tensor(-199.0805, device='cuda:0')
epoch: 15 test_true_pfm: 47.64084123881817 sim_pfm: -198.46208744465258
episode: 60 training return: tensor(-217.8728, device='cuda:0')
episode: 61 training return: tensor(-201.3478, device='cuda:0')
episode: 62 training return: tensor(-196.9869, device='cuda:0')
episode: 63 training return: tensor(-197.0501, device='cuda:0')
epoch: 16 test_true_pfm: 48.5653750396334 sim_pfm: -202.70068927918328
episode: 64 training return: tensor(-207.4974, device='cuda:0')
episode: 65 training return: tensor(-197.9045, device='cuda:0')
episode: 66 training return: tensor(-203.6042, device='cuda:0')
episode: 67 training return: tensor(-202.4233, device='cuda:0')
epoch: 17 test_true_pfm: 44.037880505135114 sim_pfm: -196.86262945921627
episode: 68 training return: tensor(-211.2770, device='cuda:0')
episode: 69 training return: tensor(-208.0466, device='cuda:0')
episode: 70 training return: tensor(-202.9439, device='cuda:0')
episode: 71 training return: tensor(-205.3957, device='cuda:0')
epoch: 18 test_true_pfm: 44.740587341334994 sim_pfm: -212.3455067130504
episode: 72 training return: tensor(-214.4174, device='cuda:0')
episode: 73 training return: tensor(-200.6630, device='cuda:0')
episode: 74 training return: tensor(-200.3745, device='cuda:0')
episode: 75 training return: tensor(-204.3790, device='cuda:0')
epoch: 19 test_true_pfm: 46.86724321332444 sim_pfm: -204.31500259097666
episode: 76 training return: tensor(-177.8659, device='cuda:0')
episode: 77 training return: tensor(-181.0605, device='cuda:0')
episode: 78 training return: tensor(-220.6546, device='cuda:0')
episode: 79 training return: tensor(-204.9324, device='cuda:0')
epoch: 20 test_true_pfm: 46.19285233842599 sim_pfm: -198.33329120760317
episode: 80 training return: tensor(-207.9540, device='cuda:0')
episode: 81 training return: tensor(-201.1400, device='cuda:0')
episode: 82 training return: tensor(-201.4363, device='cuda:0')
episode: 83 training return: tensor(-217.0069, device='cuda:0')
epoch: 21 test_true_pfm: 45.72498081132681 sim_pfm: -212.91708042634417
episode: 84 training return: tensor(-215.6122, device='cuda:0')
episode: 85 training return: tensor(-211.4155, device='cuda:0')
episode: 86 training return: tensor(-207.5786, device='cuda:0')
episode: 87 training return: tensor(-213.9331, device='cuda:0')
epoch: 22 test_true_pfm: 44.260171503317466 sim_pfm: -210.43718186226906
episode: 88 training return: tensor(-211.5804, device='cuda:0')
episode: 89 training return: tensor(-210.9079, device='cuda:0')
episode: 90 training return: tensor(-208.8879, device='cuda:0')
episode: 91 training return: tensor(-213.7530, device='cuda:0')
epoch: 23 test_true_pfm: 46.030726123207444 sim_pfm: -195.91650654059487
episode: 92 training return: tensor(-201.8148, device='cuda:0')
episode: 93 training return: tensor(-199.3420, device='cuda:0')
episode: 94 training return: tensor(-209.5785, device='cuda:0')
episode: 95 training return: tensor(-200.7286, device='cuda:0')
epoch: 24 test_true_pfm: 44.12910648291233 sim_pfm: -203.7983313477598
episode: 96 training return: tensor(-199.4220, device='cuda:0')
episode: 97 training return: tensor(-193.8573, device='cuda:0')
episode: 98 training return: tensor(-202.3004, device='cuda:0')
episode: 99 training return: tensor(-199.3905, device='cuda:0')
epoch: 25 test_true_pfm: 46.22338349075487 sim_pfm: -201.66606829029624
episode: 100 training return: tensor(-201.4799, device='cuda:0')
episode: 101 training return: tensor(-198.2448, device='cuda:0')
episode: 102 training return: tensor(-212.0496, device='cuda:0')
episode: 103 training return: tensor(-171.4760, device='cuda:0')
epoch: 26 test_true_pfm: 45.2791185816714 sim_pfm: -203.82136282345746
episode: 104 training return: tensor(-213.6577, device='cuda:0')
episode: 105 training return: tensor(-202.5755, device='cuda:0')
episode: 106 training return: tensor(-199.8411, device='cuda:0')
episode: 107 training return: tensor(-202.3233, device='cuda:0')
epoch: 27 test_true_pfm: 46.56216857038132 sim_pfm: -206.74982415481935
episode: 108 training return: tensor(-194.4384, device='cuda:0')
episode: 109 training return: tensor(-199.7752, device='cuda:0')
episode: 110 training return: tensor(-200.1581, device='cuda:0')
episode: 111 training return: tensor(-199.8846, device='cuda:0')
epoch: 28 test_true_pfm: 44.188289576349675 sim_pfm: -197.92151609756985
episode: 112 training return: tensor(-198.0536, device='cuda:0')
episode: 113 training return: tensor(-199.8745, device='cuda:0')
episode: 114 training return: tensor(-185.6140, device='cuda:0')
episode: 115 training return: tensor(-194.9081, device='cuda:0')
epoch: 29 test_true_pfm: 48.6411432822165 sim_pfm: -196.75087604474976
episode: 116 training return: tensor(-199.4279, device='cuda:0')
episode: 117 training return: tensor(-213.7595, device='cuda:0')
episode: 118 training return: tensor(-201.2260, device='cuda:0')
episode: 119 training return: tensor(-203.5526, device='cuda:0')
epoch: 30 test_true_pfm: 44.41903319531341 sim_pfm: -208.54386890246533
episode: 120 training return: tensor(-197.0294, device='cuda:0')
episode: 121 training return: tensor(-204.8318, device='cuda:0')
episode: 122 training return: tensor(-198.4956, device='cuda:0')
episode: 123 training return: tensor(-203.5330, device='cuda:0')
epoch: 31 test_true_pfm: 43.55715491293287 sim_pfm: -203.57117900625454
episode: 124 training return: tensor(-205.6321, device='cuda:0')
episode: 125 training return: tensor(-202.0120, device='cuda:0')
episode: 126 training return: tensor(-204.2451, device='cuda:0')
episode: 127 training return: tensor(-182.4380, device='cuda:0')
epoch: 32 test_true_pfm: 42.52217515391715 sim_pfm: -184.470973381144
episode: 128 training return: tensor(-202.1442, device='cuda:0')
episode: 129 training return: tensor(-202.0736, device='cuda:0')
episode: 130 training return: tensor(-216.3426, device='cuda:0')
episode: 131 training return: tensor(-201.1328, device='cuda:0')
epoch: 33 test_true_pfm: 43.75497210090316 sim_pfm: -205.88931615314215
episode: 132 training return: tensor(-203.1833, device='cuda:0')
episode: 133 training return: tensor(-213.6166, device='cuda:0')
episode: 134 training return: tensor(-214.6609, device='cuda:0')
episode: 135 training return: tensor(-200.4265, device='cuda:0')
epoch: 34 test_true_pfm: 46.69321203632818 sim_pfm: -207.04767874922837
episode: 136 training return: tensor(-203.0975, device='cuda:0')
episode: 137 training return: tensor(-197.6319, device='cuda:0')
episode: 138 training return: tensor(-210.4712, device='cuda:0')
episode: 139 training return: tensor(-203.1992, device='cuda:0')
epoch: 35 test_true_pfm: 42.38013758901588 sim_pfm: -204.7392917117104
episode: 140 training return: tensor(-201.1741, device='cuda:0')
episode: 141 training return: tensor(-212.7603, device='cuda:0')
episode: 142 training return: tensor(-204.8524, device='cuda:0')
episode: 143 training return: tensor(-189.5425, device='cuda:0')
epoch: 36 test_true_pfm: 42.53530328337813 sim_pfm: -203.74529434300493
episode: 144 training return: tensor(-202.3054, device='cuda:0')
episode: 145 training return: tensor(-202.3807, device='cuda:0')
episode: 146 training return: tensor(-206.4721, device='cuda:0')
episode: 147 training return: tensor(-215.4524, device='cuda:0')
epoch: 37 test_true_pfm: 44.64468583777735 sim_pfm: -210.48713783747516
episode: 148 training return: tensor(-217.1228, device='cuda:0')
episode: 149 training return: tensor(-201.2042, device='cuda:0')
episode: 150 training return: tensor(-205.9548, device='cuda:0')
episode: 151 training return: tensor(-206.4140, device='cuda:0')
epoch: 38 test_true_pfm: 47.252021017077226 sim_pfm: -204.78067178577186
episode: 152 training return: tensor(-205.7672, device='cuda:0')
episode: 153 training return: tensor(-200.9536, device='cuda:0')
episode: 154 training return: tensor(-209.5529, device='cuda:0')
episode: 155 training return: tensor(-166.0939, device='cuda:0')
epoch: 39 test_true_pfm: 45.87516627229145 sim_pfm: -197.73668768949574
episode: 156 training return: tensor(-204.3022, device='cuda:0')
episode: 157 training return: tensor(-178.0262, device='cuda:0')
episode: 158 training return: tensor(-186.5916, device='cuda:0')
episode: 159 training return: tensor(-197.0265, device='cuda:0')
epoch: 40 test_true_pfm: 45.951262386248374 sim_pfm: -199.7910374401312
episode: 160 training return: tensor(-198.9675, device='cuda:0')
episode: 161 training return: tensor(-204.0738, device='cuda:0')
episode: 162 training return: tensor(-204.0252, device='cuda:0')
episode: 163 training return: tensor(-204.1390, device='cuda:0')
epoch: 41 test_true_pfm: 45.345930554214625 sim_pfm: -202.86994112898827
episode: 164 training return: tensor(-205.1590, device='cuda:0')
episode: 165 training return: tensor(-205.9916, device='cuda:0')
episode: 166 training return: tensor(-207.5424, device='cuda:0')
episode: 167 training return: tensor(-213.2459, device='cuda:0')
epoch: 42 test_true_pfm: 48.60825673647347 sim_pfm: -204.42177222719184
episode: 168 training return: tensor(-188.9736, device='cuda:0')
episode: 169 training return: tensor(-200.2246, device='cuda:0')
episode: 170 training return: tensor(-202.7230, device='cuda:0')
episode: 171 training return: tensor(-178.3569, device='cuda:0')
epoch: 43 test_true_pfm: 47.26472907882904 sim_pfm: -198.56533482170198
episode: 172 training return: tensor(-204.3635, device='cuda:0')
episode: 173 training return: tensor(-200.9553, device='cuda:0')
episode: 174 training return: tensor(-194.0754, device='cuda:0')
episode: 175 training return: tensor(-200.2753, device='cuda:0')
epoch: 44 test_true_pfm: 47.04573659802918 sim_pfm: -200.5548419911065
episode: 176 training return: tensor(-199.3240, device='cuda:0')
episode: 177 training return: tensor(-199.8634, device='cuda:0')
episode: 178 training return: tensor(-199.9198, device='cuda:0')
episode: 179 training return: tensor(-199.8946, device='cuda:0')
epoch: 45 test_true_pfm: 42.449494403930046 sim_pfm: -202.01744951401488
episode: 180 training return: tensor(-207.9966, device='cuda:0')
episode: 181 training return: tensor(-197.3630, device='cuda:0')
episode: 182 training return: tensor(-200.5765, device='cuda:0')
episode: 183 training return: tensor(-198.3398, device='cuda:0')
epoch: 46 test_true_pfm: 45.91420235080936 sim_pfm: -201.06325958357192
episode: 184 training return: tensor(-198.5682, device='cuda:0')
episode: 185 training return: tensor(-198.0471, device='cuda:0')
episode: 186 training return: tensor(-196.8260, device='cuda:0')
episode: 187 training return: tensor(-208.9621, device='cuda:0')
epoch: 47 test_true_pfm: 46.02139028034542 sim_pfm: -203.26789435679675
episode: 188 training return: tensor(-209.8965, device='cuda:0')
episode: 189 training return: tensor(-200.6155, device='cuda:0')
episode: 190 training return: tensor(-197.4694, device='cuda:0')
episode: 191 training return: tensor(-210.1322, device='cuda:0')
epoch: 48 test_true_pfm: 46.816957408323695 sim_pfm: -202.54031143262984
episode: 192 training return: tensor(-199.4961, device='cuda:0')
episode: 193 training return: tensor(-208.5512, device='cuda:0')
episode: 194 training return: tensor(-206.6008, device='cuda:0')
episode: 195 training return: tensor(-197.0858, device='cuda:0')
epoch: 49 test_true_pfm: 43.89742829830088 sim_pfm: -202.98367128639947
episode: 196 training return: tensor(-206.8765, device='cuda:0')
episode: 197 training return: tensor(-197.2034, device='cuda:0')
episode: 198 training return: tensor(-207.6759, device='cuda:0')
episode: 199 training return: tensor(-197.3699, device='cuda:0')
epoch: 50 test_true_pfm: 44.60166905251787 sim_pfm: -204.6463428086601
episode: 200 training return: tensor(-199.6588, device='cuda:0')
episode: 201 training return: tensor(-208.9924, device='cuda:0')
episode: 202 training return: tensor(-200.0639, device='cuda:0')
episode: 203 training return: tensor(-210.0652, device='cuda:0')
epoch: 51 test_true_pfm: 41.27053835308131 sim_pfm: -202.14774602283723
episode: 204 training return: tensor(-55.5584, device='cuda:0')
episode: 205 training return: tensor(-200.4881, device='cuda:0')
episode: 206 training return: tensor(-205.4505, device='cuda:0')
episode: 207 training return: tensor(-201.8129, device='cuda:0')
epoch: 52 test_true_pfm: 43.8017656238209 sim_pfm: -200.38887417577206
episode: 208 training return: tensor(-211.5884, device='cuda:0')
episode: 209 training return: tensor(-206.9024, device='cuda:0')
episode: 210 training return: tensor(-208.2466, device='cuda:0')
episode: 211 training return: tensor(-200.0530, device='cuda:0')
epoch: 53 test_true_pfm: 41.67692025133843 sim_pfm: -200.00239051932004
episode: 212 training return: tensor(-199.4013, device='cuda:0')
episode: 213 training return: tensor(-201.1649, device='cuda:0')
episode: 214 training return: tensor(-195.4866, device='cuda:0')
episode: 215 training return: tensor(-199.0903, device='cuda:0')
epoch: 54 test_true_pfm: 42.241484575520936 sim_pfm: -200.15717364302836
episode: 216 training return: tensor(-195.9456, device='cuda:0')
episode: 217 training return: tensor(-204.9927, device='cuda:0')
episode: 218 training return: tensor(-201.2409, device='cuda:0')
episode: 219 training return: tensor(-198.6489, device='cuda:0')
epoch: 55 test_true_pfm: 44.8551836030672 sim_pfm: -201.53206391062122
episode: 220 training return: tensor(-208.8900, device='cuda:0')
episode: 221 training return: tensor(-206.9144, device='cuda:0')
episode: 222 training return: tensor(-204.6052, device='cuda:0')
episode: 223 training return: tensor(-210.2750, device='cuda:0')
epoch: 56 test_true_pfm: 42.49738895446011 sim_pfm: -200.04843663503416
episode: 224 training return: tensor(-209.2520, device='cuda:0')
episode: 225 training return: tensor(-198.4444, device='cuda:0')
episode: 226 training return: tensor(-200.7471, device='cuda:0')
episode: 227 training return: tensor(-198.3998, device='cuda:0')
epoch: 57 test_true_pfm: 48.07441985006463 sim_pfm: -203.11977606983857
episode: 228 training return: tensor(-198.2257, device='cuda:0')
episode: 229 training return: tensor(-208.4232, device='cuda:0')
episode: 230 training return: tensor(-208.9732, device='cuda:0')
episode: 231 training return: tensor(-198.2718, device='cuda:0')
epoch: 58 test_true_pfm: 46.231376477092326 sim_pfm: -207.93498930044007
episode: 232 training return: tensor(-211.8161, device='cuda:0')
episode: 233 training return: tensor(-200.2672, device='cuda:0')
episode: 234 training return: tensor(-201.4613, device='cuda:0')
episode: 235 training return: tensor(-197.5040, device='cuda:0')
epoch: 59 test_true_pfm: 46.097470261112356 sim_pfm: -195.61709839442045
episode: 236 training return: tensor(-208.6519, device='cuda:0')
episode: 237 training return: tensor(-195.3396, device='cuda:0')
episode: 238 training return: tensor(-188.0952, device='cuda:0')
episode: 239 training return: tensor(-198.9894, device='cuda:0')
epoch: 60 test_true_pfm: 44.95034452834001 sim_pfm: -202.19441357516916
episode: 240 training return: tensor(-210.5463, device='cuda:0')
episode: 241 training return: tensor(-196.3847, device='cuda:0')
episode: 242 training return: tensor(-208.0381, device='cuda:0')
episode: 243 training return: tensor(-201.9144, device='cuda:0')
epoch: 61 test_true_pfm: 47.69539386049987 sim_pfm: -201.02649940168484
episode: 244 training return: tensor(-196.1933, device='cuda:0')
episode: 245 training return: tensor(-209.9967, device='cuda:0')
episode: 246 training return: tensor(-200.2282, device='cuda:0')
episode: 247 training return: tensor(-200.6091, device='cuda:0')
epoch: 62 test_true_pfm: 43.53700257562243 sim_pfm: -197.14690784360283
episode: 248 training return: tensor(-208.7580, device='cuda:0')
episode: 249 training return: tensor(-198.7772, device='cuda:0')
episode: 250 training return: tensor(-199.7510, device='cuda:0')
episode: 251 training return: tensor(-199.3262, device='cuda:0')
epoch: 63 test_true_pfm: 44.96630654923207 sim_pfm: -202.93724534206558
episode: 252 training return: tensor(-198.5696, device='cuda:0')
episode: 253 training return: tensor(-199.4922, device='cuda:0')
episode: 254 training return: tensor(-198.6328, device='cuda:0')
episode: 255 training return: tensor(-200.2051, device='cuda:0')
epoch: 64 test_true_pfm: 43.15412247250064 sim_pfm: -200.59837849171018
episode: 256 training return: tensor(-198.3208, device='cuda:0')
episode: 257 training return: tensor(-198.8073, device='cuda:0')
episode: 258 training return: tensor(-209.6046, device='cuda:0')
episode: 259 training return: tensor(-209.6697, device='cuda:0')
epoch: 65 test_true_pfm: 44.93797842668607 sim_pfm: -202.37047651797766
episode: 260 training return: tensor(-199.0536, device='cuda:0')
episode: 261 training return: tensor(-198.4728, device='cuda:0')
episode: 262 training return: tensor(-197.5072, device='cuda:0')
episode: 263 training return: tensor(-196.7846, device='cuda:0')
epoch: 66 test_true_pfm: 45.15459888146982 sim_pfm: -200.25974891881924
episode: 264 training return: tensor(-198.8963, device='cuda:0')
episode: 265 training return: tensor(-198.7136, device='cuda:0')
episode: 266 training return: tensor(-208.8006, device='cuda:0')
episode: 267 training return: tensor(-210.8626, device='cuda:0')
epoch: 67 test_true_pfm: 43.49421796559396 sim_pfm: -201.58866354443597
episode: 268 training return: tensor(-199.6591, device='cuda:0')
episode: 269 training return: tensor(-208.7480, device='cuda:0')
episode: 270 training return: tensor(-197.4885, device='cuda:0')
episode: 271 training return: tensor(-199.4799, device='cuda:0')
epoch: 68 test_true_pfm: 45.73769896969154 sim_pfm: -201.35382900850382
episode: 272 training return: tensor(-198.4322, device='cuda:0')
episode: 273 training return: tensor(-200.0807, device='cuda:0')
episode: 274 training return: tensor(-206.5471, device='cuda:0')
episode: 275 training return: tensor(-200.0296, device='cuda:0')
epoch: 69 test_true_pfm: 45.26020115936098 sim_pfm: -201.7339410964516
episode: 276 training return: tensor(-200.4529, device='cuda:0')
episode: 277 training return: tensor(-197.8194, device='cuda:0')
episode: 278 training return: tensor(-198.6740, device='cuda:0')
episode: 279 training return: tensor(-197.7572, device='cuda:0')
epoch: 70 test_true_pfm: 41.915941785057136 sim_pfm: -198.24994883011678
episode: 280 training return: tensor(-207.7158, device='cuda:0')
episode: 281 training return: tensor(-198.5282, device='cuda:0')
episode: 282 training return: tensor(-199.1449, device='cuda:0')
episode: 283 training return: tensor(-200.4705, device='cuda:0')
epoch: 71 test_true_pfm: 47.600997802806 sim_pfm: -200.28145997497487
episode: 284 training return: tensor(-199.8409, device='cuda:0')
episode: 285 training return: tensor(-196.6983, device='cuda:0')
episode: 286 training return: tensor(-207.7564, device='cuda:0')
episode: 287 training return: tensor(-201.2604, device='cuda:0')
epoch: 72 test_true_pfm: 46.573520395098214 sim_pfm: -204.5832322625618
episode: 288 training return: tensor(-197.8208, device='cuda:0')
episode: 289 training return: tensor(-199.2817, device='cuda:0')
episode: 290 training return: tensor(-207.6349, device='cuda:0')
episode: 291 training return: tensor(-198.7796, device='cuda:0')
epoch: 73 test_true_pfm: 46.670981101601896 sim_pfm: -200.20582450869261
episode: 292 training return: tensor(-198.2098, device='cuda:0')
episode: 293 training return: tensor(-197.1018, device='cuda:0')
episode: 294 training return: tensor(-208.8232, device='cuda:0')
episode: 295 training return: tensor(-198.5821, device='cuda:0')
epoch: 74 test_true_pfm: 42.99385882726104 sim_pfm: -201.7797626910964
episode: 296 training return: tensor(-207.2857, device='cuda:0')
episode: 297 training return: tensor(-199.0256, device='cuda:0')
episode: 298 training return: tensor(-185.1930, device='cuda:0')
episode: 299 training return: tensor(-207.0110, device='cuda:0')
epoch: 75 test_true_pfm: 44.13466587089347 sim_pfm: -201.117687410675
episode: 300 training return: tensor(-199.1166, device='cuda:0')
episode: 301 training return: tensor(-197.6561, device='cuda:0')
episode: 302 training return: tensor(-199.5943, device='cuda:0')
episode: 303 training return: tensor(-199.3117, device='cuda:0')
epoch: 76 test_true_pfm: 45.405141210299284 sim_pfm: -204.2530263462686
episode: 304 training return: tensor(-201.3193, device='cuda:0')
episode: 305 training return: tensor(-199.9590, device='cuda:0')
episode: 306 training return: tensor(-198.5341, device='cuda:0')
episode: 307 training return: tensor(-201.1732, device='cuda:0')
epoch: 77 test_true_pfm: 46.69979099269585 sim_pfm: -202.90184429682557
episode: 308 training return: tensor(-208.4304, device='cuda:0')
episode: 309 training return: tensor(-201.3131, device='cuda:0')
episode: 310 training return: tensor(-198.3174, device='cuda:0')
episode: 311 training return: tensor(-209.1350, device='cuda:0')
epoch: 78 test_true_pfm: 48.47877773780057 sim_pfm: -196.89243920154405
episode: 312 training return: tensor(-196.2357, device='cuda:0')
episode: 313 training return: tensor(-188.6053, device='cuda:0')
episode: 314 training return: tensor(-199.3938, device='cuda:0')
episode: 315 training return: tensor(-187.0123, device='cuda:0')
epoch: 79 test_true_pfm: 46.40786365061918 sim_pfm: -198.84646215501706
episode: 316 training return: tensor(-199.7487, device='cuda:0')
episode: 317 training return: tensor(-196.9357, device='cuda:0')
episode: 318 training return: tensor(-200.2399, device='cuda:0')
episode: 319 training return: tensor(-197.6061, device='cuda:0')
epoch: 80 test_true_pfm: 43.07834789841466 sim_pfm: -202.91329774606857
episode: 320 training return: tensor(-200.3040, device='cuda:0')
episode: 321 training return: tensor(-209.5626, device='cuda:0')
episode: 322 training return: tensor(-199.0648, device='cuda:0')
episode: 323 training return: tensor(-199.9287, device='cuda:0')
epoch: 81 test_true_pfm: 44.67854731947305 sim_pfm: -202.70744649855186
episode: 324 training return: tensor(-207.5445, device='cuda:0')
episode: 325 training return: tensor(-199.2219, device='cuda:0')
episode: 326 training return: tensor(-208.8410, device='cuda:0')
episode: 327 training return: tensor(-198.3232, device='cuda:0')
epoch: 82 test_true_pfm: 44.82001877226162 sim_pfm: -202.11009552665055
episode: 328 training return: tensor(-208.9924, device='cuda:0')
episode: 329 training return: tensor(-198.0502, device='cuda:0')
episode: 330 training return: tensor(-207.7164, device='cuda:0')
episode: 331 training return: tensor(-198.9004, device='cuda:0')
epoch: 83 test_true_pfm: 44.70275571291758 sim_pfm: -203.0939982996264
episode: 332 training return: tensor(-197.8778, device='cuda:0')
episode: 333 training return: tensor(-208.8182, device='cuda:0')
episode: 334 training return: tensor(-210.3911, device='cuda:0')
episode: 335 training return: tensor(-199.4254, device='cuda:0')
epoch: 84 test_true_pfm: 46.50548766727651 sim_pfm: -202.26655759221757
episode: 336 training return: tensor(-198.6954, device='cuda:0')
episode: 337 training return: tensor(-197.6842, device='cuda:0')
episode: 338 training return: tensor(-196.3010, device='cuda:0')
episode: 339 training return: tensor(-198.1072, device='cuda:0')
epoch: 85 test_true_pfm: 47.634839004783984 sim_pfm: -201.47681007502834
episode: 340 training return: tensor(-198.5932, device='cuda:0')
episode: 341 training return: tensor(-207.2784, device='cuda:0')
episode: 342 training return: tensor(-195.6762, device='cuda:0')
episode: 343 training return: tensor(-207.1763, device='cuda:0')
epoch: 86 test_true_pfm: 46.87028369166876 sim_pfm: -201.1499526789179
episode: 344 training return: tensor(-197.6889, device='cuda:0')
episode: 345 training return: tensor(-210.9558, device='cuda:0')
episode: 346 training return: tensor(-206.9964, device='cuda:0')
episode: 347 training return: tensor(-200.2782, device='cuda:0')
epoch: 87 test_true_pfm: 44.44804343178839 sim_pfm: -201.9664417911321
episode: 348 training return: tensor(-207.5820, device='cuda:0')
episode: 349 training return: tensor(-198.3135, device='cuda:0')
episode: 350 training return: tensor(-200.4588, device='cuda:0')
episode: 351 training return: tensor(-208.0452, device='cuda:0')
epoch: 88 test_true_pfm: 47.92289577921399 sim_pfm: -200.6424716267502
episode: 352 training return: tensor(-200.6616, device='cuda:0')
episode: 353 training return: tensor(-209.4113, device='cuda:0')
episode: 354 training return: tensor(-199.1310, device='cuda:0')
episode: 355 training return: tensor(-199.3506, device='cuda:0')
epoch: 89 test_true_pfm: 46.19062438814439 sim_pfm: -202.54040382160457
episode: 356 training return: tensor(-208.2942, device='cuda:0')
episode: 357 training return: tensor(-199.0211, device='cuda:0')
episode: 358 training return: tensor(-199.4322, device='cuda:0')
episode: 359 training return: tensor(-201.1294, device='cuda:0')
epoch: 90 test_true_pfm: 47.95207682265892 sim_pfm: -203.06480914771674
episode: 360 training return: tensor(-196.8661, device='cuda:0')
episode: 361 training return: tensor(-207.8244, device='cuda:0')
episode: 362 training return: tensor(-209.0242, device='cuda:0')
episode: 363 training return: tensor(-197.3148, device='cuda:0')
epoch: 91 test_true_pfm: 42.54048759382224 sim_pfm: -202.891191841173
episode: 364 training return: tensor(-199.5537, device='cuda:0')
episode: 365 training return: tensor(-210.2888, device='cuda:0')
episode: 366 training return: tensor(-210.4692, device='cuda:0')
episode: 367 training return: tensor(-197.5129, device='cuda:0')
epoch: 92 test_true_pfm: 43.44259036035862 sim_pfm: -202.6221476089908
episode: 368 training return: tensor(-201.3325, device='cuda:0')
episode: 369 training return: tensor(-209.9405, device='cuda:0')
episode: 370 training return: tensor(-201.1641, device='cuda:0')
episode: 371 training return: tensor(-198.1451, device='cuda:0')
epoch: 93 test_true_pfm: 48.53210961135081 sim_pfm: -194.36400676246268
episode: 372 training return: tensor(-199.0583, device='cuda:0')
episode: 373 training return: tensor(-209.5682, device='cuda:0')
episode: 374 training return: tensor(-198.7532, device='cuda:0')
episode: 375 training return: tensor(-198.4148, device='cuda:0')
epoch: 94 test_true_pfm: 45.59106084772702 sim_pfm: -201.75155510209734
episode: 376 training return: tensor(-208.8290, device='cuda:0')
episode: 377 training return: tensor(-209.4604, device='cuda:0')
episode: 378 training return: tensor(-209.0848, device='cuda:0')
episode: 379 training return: tensor(-198.5728, device='cuda:0')
epoch: 95 test_true_pfm: 45.05334542327664 sim_pfm: -200.49880662583746
episode: 380 training return: tensor(-208.3082, device='cuda:0')
episode: 381 training return: tensor(-195.7578, device='cuda:0')
episode: 382 training return: tensor(-199.8480, device='cuda:0')
episode: 383 training return: tensor(-209.6152, device='cuda:0')
epoch: 96 test_true_pfm: 46.16390163988606 sim_pfm: -204.08043934195302
episode: 384 training return: tensor(-199.6767, device='cuda:0')
episode: 385 training return: tensor(-199.4170, device='cuda:0')
episode: 386 training return: tensor(-199.8504, device='cuda:0')
episode: 387 training return: tensor(-199.2934, device='cuda:0')
epoch: 97 test_true_pfm: 43.03517462000786 sim_pfm: -199.54606362665072
episode: 388 training return: tensor(-200.2407, device='cuda:0')
episode: 389 training return: tensor(-207.5363, device='cuda:0')
episode: 390 training return: tensor(-198.7966, device='cuda:0')
episode: 391 training return: tensor(-207.6365, device='cuda:0')
epoch: 98 test_true_pfm: 43.690721099533455 sim_pfm: -200.48656074670143
episode: 392 training return: tensor(-206.4596, device='cuda:0')
episode: 393 training return: tensor(-198.8662, device='cuda:0')
episode: 394 training return: tensor(-211.0310, device='cuda:0')
episode: 395 training return: tensor(-197.8263, device='cuda:0')
epoch: 99 test_true_pfm: 46.62213224442755 sim_pfm: -200.13747701116372
episode: 396 training return: tensor(-207.3636, device='cuda:0')
episode: 397 training return: tensor(-209.2326, device='cuda:0')
episode: 398 training return: tensor(-207.9563, device='cuda:0')
episode: 399 training return: tensor(-198.2049, device='cuda:0')
epoch: 100 test_true_pfm: 48.200395736104284 sim_pfm: -201.19250551016302
episode: 400 training return: tensor(-198.6058, device='cuda:0')
episode: 401 training return: tensor(-195.9340, device='cuda:0')
episode: 402 training return: tensor(-196.7771, device='cuda:0')
episode: 403 training return: tensor(-199.6697, device='cuda:0')
epoch: 101 test_true_pfm: 42.922100320718556 sim_pfm: -197.56681702731294
episode: 404 training return: tensor(-200.1077, device='cuda:0')
episode: 405 training return: tensor(-199.3288, device='cuda:0')
episode: 406 training return: tensor(-200.6232, device='cuda:0')
episode: 407 training return: tensor(-201.2076, device='cuda:0')
epoch: 102 test_true_pfm: 46.513319813483065 sim_pfm: -201.18659539599903
episode: 408 training return: tensor(-197.5995, device='cuda:0')
episode: 409 training return: tensor(-209.2591, device='cuda:0')
episode: 410 training return: tensor(-200.1915, device='cuda:0')
episode: 411 training return: tensor(-199.5338, device='cuda:0')
epoch: 103 test_true_pfm: 47.87342146244089 sim_pfm: -201.1285964173265
episode: 412 training return: tensor(-198.0208, device='cuda:0')
episode: 413 training return: tensor(-209.0253, device='cuda:0')
episode: 414 training return: tensor(-184.9762, device='cuda:0')
episode: 415 training return: tensor(-197.6656, device='cuda:0')
epoch: 104 test_true_pfm: 45.97066608664623 sim_pfm: -194.6013442061143
episode: 416 training return: tensor(-184.8159, device='cuda:0')
episode: 417 training return: tensor(-208.5923, device='cuda:0')
episode: 418 training return: tensor(-198.7804, device='cuda:0')
episode: 419 training return: tensor(-196.5260, device='cuda:0')
epoch: 105 test_true_pfm: 43.1166439135299 sim_pfm: -204.14165987946325
episode: 420 training return: tensor(-199.2480, device='cuda:0')
episode: 421 training return: tensor(-185.9488, device='cuda:0')
episode: 422 training return: tensor(-198.7135, device='cuda:0')
episode: 423 training return: tensor(-199.2025, device='cuda:0')
epoch: 106 test_true_pfm: 47.11626854531566 sim_pfm: -202.25175169833238
episode: 424 training return: tensor(-208.9049, device='cuda:0')
episode: 425 training return: tensor(-186.7665, device='cuda:0')
episode: 426 training return: tensor(-200.3319, device='cuda:0')
episode: 427 training return: tensor(-198.9308, device='cuda:0')
epoch: 107 test_true_pfm: 44.174411906497326 sim_pfm: -201.78858801865718
episode: 428 training return: tensor(-209.6234, device='cuda:0')
episode: 429 training return: tensor(-207.3843, device='cuda:0')
episode: 430 training return: tensor(-185.2464, device='cuda:0')
episode: 431 training return: tensor(-197.0530, device='cuda:0')
epoch: 108 test_true_pfm: 43.972370701317814 sim_pfm: -199.5312874797848
episode: 432 training return: tensor(-199.3352, device='cuda:0')
episode: 433 training return: tensor(-199.4543, device='cuda:0')
episode: 434 training return: tensor(-200.2373, device='cuda:0')
episode: 435 training return: tensor(-199.2608, device='cuda:0')
epoch: 109 test_true_pfm: 44.61048358602962 sim_pfm: -201.54130633752794
episode: 436 training return: tensor(-178.2149, device='cuda:0')
episode: 437 training return: tensor(-200.2704, device='cuda:0')
episode: 438 training return: tensor(-200.1463, device='cuda:0')
episode: 439 training return: tensor(-207.5244, device='cuda:0')
epoch: 110 test_true_pfm: 43.44007035905929 sim_pfm: -201.11367413189728
episode: 440 training return: tensor(-198.4869, device='cuda:0')
episode: 441 training return: tensor(-197.7898, device='cuda:0')
episode: 442 training return: tensor(-200.2932, device='cuda:0')
episode: 443 training return: tensor(-207.5556, device='cuda:0')
epoch: 111 test_true_pfm: 47.03022614090865 sim_pfm: -201.2970992198796
episode: 444 training return: tensor(-200.4687, device='cuda:0')
episode: 445 training return: tensor(-202.3375, device='cuda:0')
episode: 446 training return: tensor(-207.6676, device='cuda:0')
episode: 447 training return: tensor(-199.8040, device='cuda:0')
epoch: 112 test_true_pfm: 46.03239591602503 sim_pfm: -207.7581857443205
episode: 448 training return: tensor(-207.7389, device='cuda:0')
episode: 449 training return: tensor(-198.1322, device='cuda:0')
episode: 450 training return: tensor(-200.2882, device='cuda:0')
episode: 451 training return: tensor(-198.7276, device='cuda:0')
epoch: 113 test_true_pfm: 47.908118659845435 sim_pfm: -200.62548627222424
episode: 452 training return: tensor(-199.2553, device='cuda:0')
episode: 453 training return: tensor(-208.7316, device='cuda:0')
episode: 454 training return: tensor(-199.1897, device='cuda:0')
episode: 455 training return: tensor(-199.3707, device='cuda:0')
epoch: 114 test_true_pfm: 46.443447280052716 sim_pfm: -204.96364013902493
episode: 456 training return: tensor(-199.8515, device='cuda:0')
episode: 457 training return: tensor(-195.8747, device='cuda:0')
episode: 458 training return: tensor(-199.2426, device='cuda:0')
episode: 459 training return: tensor(-199.2652, device='cuda:0')
epoch: 115 test_true_pfm: 46.11435581908843 sim_pfm: -202.36899441513233
episode: 460 training return: tensor(-200.1188, device='cuda:0')
episode: 461 training return: tensor(-197.7500, device='cuda:0')
episode: 462 training return: tensor(-197.8042, device='cuda:0')
episode: 463 training return: tensor(-200.0024, device='cuda:0')
epoch: 116 test_true_pfm: 48.40248137562957 sim_pfm: -201.0594300343073
episode: 464 training return: tensor(-199.4124, device='cuda:0')
episode: 465 training return: tensor(-198.9409, device='cuda:0')
episode: 466 training return: tensor(-180.2172, device='cuda:0')
episode: 467 training return: tensor(-195.9905, device='cuda:0')
epoch: 117 test_true_pfm: 46.46214000914917 sim_pfm: -205.19178046075976
episode: 468 training return: tensor(-199.4866, device='cuda:0')
episode: 469 training return: tensor(-184.9088, device='cuda:0')
episode: 470 training return: tensor(-198.4725, device='cuda:0')
episode: 471 training return: tensor(-208.7042, device='cuda:0')
epoch: 118 test_true_pfm: 48.819361597133934 sim_pfm: -200.8494520252105
episode: 472 training return: tensor(-198.3949, device='cuda:0')
episode: 473 training return: tensor(-200.3936, device='cuda:0')
episode: 474 training return: tensor(-199.2413, device='cuda:0')
episode: 475 training return: tensor(-200.2355, device='cuda:0')
epoch: 119 test_true_pfm: 48.49080122470006 sim_pfm: -201.39230299280024
episode: 476 training return: tensor(-198.4414, device='cuda:0')
episode: 477 training return: tensor(-206.2205, device='cuda:0')
episode: 478 training return: tensor(-199.0398, device='cuda:0')
episode: 479 training return: tensor(-199.6699, device='cuda:0')
epoch: 120 test_true_pfm: 45.11353703933482 sim_pfm: -197.69565322820563
episode: 480 training return: tensor(-198.3286, device='cuda:0')
episode: 481 training return: tensor(-209.1572, device='cuda:0')
episode: 482 training return: tensor(-198.1813, device='cuda:0')
episode: 483 training return: tensor(-207.1307, device='cuda:0')
epoch: 121 test_true_pfm: 46.63571840754827 sim_pfm: -200.79079125870484
episode: 484 training return: tensor(-209.7262, device='cuda:0')
episode: 485 training return: tensor(-199.1514, device='cuda:0')
episode: 486 training return: tensor(-207.3646, device='cuda:0')
episode: 487 training return: tensor(-207.0717, device='cuda:0')
epoch: 122 test_true_pfm: 46.98126070800227 sim_pfm: -199.16918693510814
episode: 488 training return: tensor(-198.2078, device='cuda:0')
episode: 489 training return: tensor(-197.4769, device='cuda:0')
episode: 490 training return: tensor(-199.1637, device='cuda:0')
episode: 491 training return: tensor(-198.3596, device='cuda:0')
epoch: 123 test_true_pfm: 46.33541323383336 sim_pfm: -202.410025118466
episode: 492 training return: tensor(-198.6958, device='cuda:0')
episode: 493 training return: tensor(-199.6002, device='cuda:0')
episode: 494 training return: tensor(-199.8906, device='cuda:0')
episode: 495 training return: tensor(-201.2927, device='cuda:0')
epoch: 124 test_true_pfm: 42.56632862886532 sim_pfm: -205.26884553157726
episode: 496 training return: tensor(-207.6564, device='cuda:0')
episode: 497 training return: tensor(-198.9492, device='cuda:0')
episode: 498 training return: tensor(-198.8620, device='cuda:0')
episode: 499 training return: tensor(-198.8699, device='cuda:0')
epoch: 125 test_true_pfm: 46.963275690997406 sim_pfm: -198.41717842191574
episode: 500 training return: tensor(-206.8434, device='cuda:0')
episode: 501 training return: tensor(-200.9941, device='cuda:0')
episode: 502 training return: tensor(-208.9134, device='cuda:0')
episode: 503 training return: tensor(-199.7594, device='cuda:0')
epoch: 126 test_true_pfm: 46.24662594998584 sim_pfm: -204.03591681089603
episode: 504 training return: tensor(-207.7737, device='cuda:0')
episode: 505 training return: tensor(-208.1037, device='cuda:0')
episode: 506 training return: tensor(-209.7507, device='cuda:0')
episode: 507 training return: tensor(-209.0751, device='cuda:0')
epoch: 127 test_true_pfm: 43.224770746093235 sim_pfm: -198.62732129818178
episode: 508 training return: tensor(-208.8719, device='cuda:0')
episode: 509 training return: tensor(-207.8912, device='cuda:0')
episode: 510 training return: tensor(-187.4235, device='cuda:0')
episode: 511 training return: tensor(-198.3269, device='cuda:0')
epoch: 128 test_true_pfm: 45.62021362215568 sim_pfm: -200.07527323896065
episode: 512 training return: tensor(-198.9899, device='cuda:0')
episode: 513 training return: tensor(-209.7853, device='cuda:0')
episode: 514 training return: tensor(-198.2197, device='cuda:0')
episode: 515 training return: tensor(-200.1563, device='cuda:0')
epoch: 129 test_true_pfm: 44.273193302732274 sim_pfm: -201.94155060199554
episode: 516 training return: tensor(-208.3764, device='cuda:0')
episode: 517 training return: tensor(-208.8403, device='cuda:0')
episode: 518 training return: tensor(-212.3248, device='cuda:0')
episode: 519 training return: tensor(-199.2794, device='cuda:0')
epoch: 130 test_true_pfm: 47.58527326108773 sim_pfm: -199.421906317072
episode: 520 training return: tensor(-198.3473, device='cuda:0')
episode: 521 training return: tensor(-201.2073, device='cuda:0')
episode: 522 training return: tensor(-209.6438, device='cuda:0')
episode: 523 training return: tensor(-209.1481, device='cuda:0')
epoch: 131 test_true_pfm: 43.14372790221889 sim_pfm: -203.81930164776858
episode: 524 training return: tensor(-209.3923, device='cuda:0')
episode: 525 training return: tensor(-205.8520, device='cuda:0')
episode: 526 training return: tensor(-198.9375, device='cuda:0')
episode: 527 training return: tensor(-199.0479, device='cuda:0')
epoch: 132 test_true_pfm: 47.78436785575754 sim_pfm: -201.78782472438178
episode: 528 training return: tensor(-199.7522, device='cuda:0')
episode: 529 training return: tensor(-199.1000, device='cuda:0')
episode: 530 training return: tensor(-199.3801, device='cuda:0')
