['--learn', 'behavior', '--env', 'HalfCheetah-v2', '--traj', 'medium', '--seed', '5']
epoch: 0 training_loss 0.31763816490769387 test_loss: 0.20073938369750977
epoch: 1 training_loss 0.17264075629413128 test_loss: 0.15180085897445678
epoch: 2 training_loss 0.14227547071874141 test_loss: 0.1308242678642273
epoch: 3 training_loss 0.13429004821926355 test_loss: 0.13961098194122315
epoch: 4 training_loss 0.13463526759296657 test_loss: 0.14608609676361084
epoch: 5 training_loss 0.12822954006493092 test_loss: 0.1153834581375122
epoch: 6 training_loss 0.11819875944405794 test_loss: 0.14003149271011353
epoch: 7 training_loss 0.12701745327562095 test_loss: 0.128667151927948
epoch: 8 training_loss 0.13308539628982544 test_loss: 0.12889788150787354
epoch: 9 training_loss 0.12231985241174698 test_loss: 0.13206197023391725
epoch: 10 training_loss 0.12286379400640726 test_loss: 0.1004411220550537
epoch: 11 training_loss 0.11684317857027054 test_loss: 0.10312962532043457
epoch: 12 training_loss 0.11410786628723145 test_loss: 0.1387676477432251
epoch: 13 training_loss 0.12410138715058565 test_loss: 0.10470027923583984
epoch: 14 training_loss 0.10392183434218168 test_loss: 0.11713205575942993
epoch: 15 training_loss 0.1079505181312561 test_loss: 0.11210868358612061
epoch: 16 training_loss 0.11924426302313805 test_loss: 0.11997644901275635
epoch: 17 training_loss 0.11306406363844872 test_loss: 0.11258865594863891
epoch: 18 training_loss 0.10617672994732857 test_loss: 0.1282659649848938
epoch: 19 training_loss 0.11189882172271609 test_loss: 0.09960335493087769
epoch: 20 training_loss 0.1115264768153429 test_loss: 0.09883034229278564
epoch: 21 training_loss 0.11542213622480631 test_loss: 0.10452018976211548
epoch: 22 training_loss 0.11743757616728544 test_loss: 0.11432808637619019
epoch: 23 training_loss 0.10413544911891222 test_loss: 0.10342484712600708
epoch: 24 training_loss 0.12550416653975843 test_loss: 0.1133266806602478
epoch: 25 training_loss 0.10977835189551115 test_loss: 0.10835797786712646
epoch: 26 training_loss 0.11682188473641872 test_loss: 0.11165231466293335
epoch: 27 training_loss 0.10597459010779858 test_loss: 0.12013815641403199
epoch: 28 training_loss 0.10701699040830136 test_loss: 0.10241791009902954
epoch: 29 training_loss 0.10178505374118686 test_loss: 0.13021045923233032
epoch: 30 training_loss 0.11483331620693207 test_loss: 0.10977150201797485
epoch: 31 training_loss 0.10925528155639767 test_loss: 0.12469801902770997
epoch: 32 training_loss 0.10805953010916709 test_loss: 0.11973905563354492
epoch: 33 training_loss 0.10996934775263072 test_loss: 0.09886135458946228
epoch: 34 training_loss 0.10612223979085683 test_loss: 0.11310402154922486
epoch: 35 training_loss 0.11091150283813477 test_loss: 0.11060729026794433
epoch: 36 training_loss 0.10718238685280085 test_loss: 0.12082152366638184
epoch: 37 training_loss 0.09881994493305683 test_loss: 0.10358010530471802
epoch: 38 training_loss 0.10797106876969337 test_loss: 0.11354858875274658
epoch: 39 training_loss 0.10330520927906037 test_loss: 0.11107071638107299
epoch: 40 training_loss 0.10492466621100903 test_loss: 0.11856213808059693
epoch: 41 training_loss 0.10860683852806688 test_loss: 0.1076966643333435
epoch: 42 training_loss 0.10330837218090892 test_loss: 0.11312594413757324
epoch: 43 training_loss 0.10651528310030699 test_loss: 0.10052387714385987
epoch: 44 training_loss 0.10249431267380714 test_loss: 0.10727906227111816
epoch: 45 training_loss 0.10334579091519118 test_loss: 0.11810444593429566
epoch: 46 training_loss 0.10751339286565781 test_loss: 0.13019264936447145
epoch: 47 training_loss 0.10679239582270383 test_loss: 0.1037934422492981
epoch: 48 training_loss 0.10914446350187063 test_loss: 0.10778374671936035
epoch: 49 training_loss 0.10843276869505644 test_loss: 0.10726318359375
epoch: 50 training_loss 0.10689936246722936 test_loss: 0.11203429698944092
epoch: 51 training_loss 0.10672818625345826 test_loss: 0.09302828907966613
epoch: 52 training_loss 0.11023874478414655 test_loss: 0.10703781843185425
epoch: 53 training_loss 0.10738253645598889 test_loss: 0.11014392375946044
epoch: 54 training_loss 0.10672100797295571 test_loss: 0.11070058345794678
epoch: 55 training_loss 0.10874934405088425 test_loss: 0.10621824264526367
epoch: 56 training_loss 0.10497607216238976 test_loss: 0.12319170236587525
epoch: 57 training_loss 0.11402911182492971 test_loss: 0.12576563358306886
epoch: 58 training_loss 0.10999192897230387 test_loss: 0.10953202247619628
epoch: 59 training_loss 0.10795069854706525 test_loss: 0.11361080408096313
epoch: 60 training_loss 0.10218370812013745 test_loss: 0.1312812089920044
epoch: 61 training_loss 0.10542923346161842 test_loss: 0.10530133247375488
epoch: 62 training_loss 0.10823450025171041 test_loss: 0.0841285228729248
epoch: 63 training_loss 0.10429974116384982 test_loss: 0.10251474380493164
epoch: 64 training_loss 0.10353473952040076 test_loss: 0.09643572568893433
epoch: 65 training_loss 0.10116982858628035 test_loss: 0.09901151061058044
epoch: 66 training_loss 0.11205978609621525 test_loss: 0.09831486940383911
epoch: 67 training_loss 0.10367692153900862 test_loss: 0.1020543098449707
epoch: 68 training_loss 0.10920101471245289 test_loss: 0.11075628995895385
epoch: 69 training_loss 0.10785476848483086 test_loss: 0.11571310758590699
epoch: 70 training_loss 0.11105023900046945 test_loss: 0.10811771154403686
epoch: 71 training_loss 0.10695567848160863 test_loss: 0.13488517999649047
epoch: 72 training_loss 0.09755868576467037 test_loss: 0.10665955543518066
epoch: 73 training_loss 0.09352362036705017 test_loss: 0.11070730686187744
epoch: 74 training_loss 0.10342062715440989 test_loss: 0.10179156064987183
epoch: 75 training_loss 0.09793969973921776 test_loss: 0.10509327650070191
epoch: 76 training_loss 0.10248162413947284 test_loss: 0.1074715256690979
epoch: 77 training_loss 0.11339335583150387 test_loss: 0.1080740213394165
epoch: 78 training_loss 0.10139167161658406 test_loss: 0.09086505770683288
epoch: 79 training_loss 0.10831979505717754 test_loss: 0.0884761393070221
epoch: 80 training_loss 0.10872358394786716 test_loss: 0.0974802553653717
epoch: 81 training_loss 0.10374906808137893 test_loss: 0.11398708820343018
epoch: 82 training_loss 0.10026262929663061 test_loss: 0.11482270956039428
epoch: 83 training_loss 0.10601610664278269 test_loss: 0.09869309663772582
epoch: 84 training_loss 0.10602402213960886 test_loss: 0.09719301462173462
epoch: 85 training_loss 0.10614524528384209 test_loss: 0.12044764757156372
epoch: 86 training_loss 0.1089565565995872 test_loss: 0.1144174575805664
epoch: 87 training_loss 0.10163381319493055 test_loss: 0.11448593139648437
epoch: 88 training_loss 0.10686618087813259 test_loss: 0.10638372898101807
epoch: 89 training_loss 0.10253735817968845 test_loss: 0.10791114568710328
epoch: 90 training_loss 0.10139901768416167 test_loss: 0.1152044653892517
epoch: 91 training_loss 0.10613510742783547 test_loss: 0.10925440788269043
epoch: 92 training_loss 0.10371882004663348 test_loss: 0.10554965734481811
epoch: 93 training_loss 0.10895586539059877 test_loss: 0.11879466772079468
epoch: 94 training_loss 0.11277265883982182 test_loss: 0.10866315364837646
epoch: 95 training_loss 0.10939244318753481 test_loss: 0.1047437310218811
epoch: 96 training_loss 0.11141929823905229 test_loss: 0.11539452075958252
epoch: 97 training_loss 0.10428713355213404 test_loss: 0.11825089454650879
epoch: 98 training_loss 0.10368971720337868 test_loss: 0.11964162588119506
epoch: 99 training_loss 0.1011464349925518 test_loss: 0.11059309244155884
epoch: 100 training_loss 0.10976306594908238 test_loss: 0.10362880229949951
epoch: 101 training_loss 0.10245820472016931 test_loss: 0.11275917291641235
epoch: 102 training_loss 0.10841945070773364 test_loss: 0.09967843294143677
epoch: 103 training_loss 0.10578469490632415 test_loss: 0.12242716550827026
epoch: 104 training_loss 0.099774802736938 test_loss: 0.10862358808517455
epoch: 105 training_loss 0.1041414961963892 test_loss: 0.13321722745895387
epoch: 106 training_loss 0.10318955447524786 test_loss: 0.09947715401649475
epoch: 107 training_loss 0.10388876624405384 test_loss: 0.10701706409454345
epoch: 108 training_loss 0.10658059306442738 test_loss: 0.09939072728157043
epoch: 109 training_loss 0.10506247848272324 test_loss: 0.11776819229125976
epoch: 110 training_loss 0.10213117614388466 test_loss: 0.09760302305221558
epoch: 111 training_loss 0.10647860888391733 test_loss: 0.11030017137527466
epoch: 112 training_loss 0.1045663501881063 test_loss: 0.10014307498931885
epoch: 113 training_loss 0.1116694495640695 test_loss: 0.11003276109695434
epoch: 114 training_loss 0.09711933676153421 test_loss: 0.10395324230194092
epoch: 115 training_loss 0.10122028384357691 test_loss: 0.13360222578048705
epoch: 116 training_loss 0.10257192818447948 test_loss: 0.1110486388206482
epoch: 117 training_loss 0.10470530908554793 test_loss: 0.11354053020477295
epoch: 118 training_loss 0.1026552875339985 test_loss: 0.10297027826309205
epoch: 119 training_loss 0.09836797166615724 test_loss: 0.09305898547172546
epoch: 120 training_loss 0.10283305212855338 test_loss: 0.10036158561706543
epoch: 121 training_loss 0.10625408843159675 test_loss: 0.11322286128997802
epoch: 122 training_loss 0.10560674220323563 test_loss: 0.08869538307189942
epoch: 123 training_loss 0.11030557222664356 test_loss: 0.10518149137496949
epoch: 124 training_loss 0.09520361255854368 test_loss: 0.11830013990402222
epoch: 125 training_loss 0.10352520145475864 test_loss: 0.10170398950576783
epoch: 126 training_loss 0.10509158361703158 test_loss: 0.10071519613265992
epoch: 127 training_loss 0.10395502772182226 test_loss: 0.0971062958240509
epoch: 128 training_loss 0.10375593645498156 test_loss: 0.10820754766464233
epoch: 129 training_loss 0.10339358577504754 test_loss: 0.1353761672973633
epoch: 130 training_loss 0.10889309648424388 test_loss: 0.09448602199554443
epoch: 131 training_loss 0.11041114289313554 test_loss: 0.09571194648742676
epoch: 132 training_loss 0.10894356578588486 test_loss: 0.11071064472198486
epoch: 133 training_loss 0.10617092188447713 test_loss: 0.12078744173049927
epoch: 134 training_loss 0.10343917176127433 test_loss: 0.09579251408576965
epoch: 135 training_loss 0.10281631972640753 test_loss: 0.10403655767440796
epoch: 136 training_loss 0.10069147191941738 test_loss: 0.11583929061889649
epoch: 137 training_loss 0.09710023174062371 test_loss: 0.09303807020187378
epoch: 138 training_loss 0.10805479224771261 test_loss: 0.11335346698760987
epoch: 139 training_loss 0.1096437363512814 test_loss: 0.11013731956481934
epoch: 140 training_loss 0.10003505313768983 test_loss: 0.10335179567337036
epoch: 141 training_loss 0.11112380230799318 test_loss: 0.10519371032714844
epoch: 142 training_loss 0.09518994309008122 test_loss: 0.10848296880722046
epoch: 143 training_loss 0.10757537592202425 test_loss: 0.10997816324234008
epoch: 144 training_loss 0.10046152882277966 test_loss: 0.11734870672225953
epoch: 145 training_loss 0.10132553899660707 test_loss: 0.11099438667297364
epoch: 146 training_loss 0.0990896487981081 test_loss: 0.09667407274246216
epoch: 147 training_loss 0.10814871203154325 test_loss: 0.09672409892082215
epoch: 148 training_loss 0.1059670645929873 test_loss: 0.08163039088249206
epoch: 149 training_loss 0.1056796626187861 test_loss: 0.10640913248062134
epoch: 0 training_loss 49.04700595855713 test_loss: 24.249166870117186
epoch: 1 training_loss 18.39391800880432 test_loss: 14.999183654785156
epoch: 2 training_loss 13.24521523475647 test_loss: 11.555469512939453
epoch: 3 training_loss 10.604878149032594 test_loss: 9.579443359375
epoch: 4 training_loss 8.799644212722779 test_loss: 8.298991394042968
epoch: 5 training_loss 7.513269248008728 test_loss: 7.284773254394532
epoch: 6 training_loss 6.707641434669495 test_loss: 6.362500762939453
epoch: 7 training_loss 5.894515838623047 test_loss: 5.613155364990234
epoch: 8 training_loss 5.456404600143433 test_loss: 5.092734146118164
epoch: 9 training_loss 5.052616286277771 test_loss: 4.9394287109375
epoch: 10 training_loss 4.794757745265961 test_loss: 4.585175704956055
epoch: 11 training_loss 4.432114551067352 test_loss: 4.371836853027344
epoch: 12 training_loss 4.2060323596000675 test_loss: 4.113210296630859
epoch: 13 training_loss 4.064234149456024 test_loss: 3.9113487243652343
epoch: 14 training_loss 3.839342248439789 test_loss: 3.644412612915039
epoch: 15 training_loss 3.6616670536994933 test_loss: 3.5512130737304686
epoch: 16 training_loss 3.5420479702949526 test_loss: 3.456575393676758
epoch: 17 training_loss 3.453343107700348 test_loss: 3.514689636230469
epoch: 18 training_loss 3.3785960555076597 test_loss: 3.2522308349609377
epoch: 19 training_loss 3.2605502128601076 test_loss: 3.19281120300293
epoch: 20 training_loss 3.168996102809906 test_loss: 2.96303653717041
epoch: 21 training_loss 3.037376503944397 test_loss: 3.0718080520629885
epoch: 22 training_loss 2.9709770846366883 test_loss: 2.9088863372802733
epoch: 23 training_loss 3.0335435819625856 test_loss: 2.906601905822754
epoch: 24 training_loss 2.7793148350715637 test_loss: 2.87518367767334
epoch: 25 training_loss 2.7997570633888245 test_loss: 2.77968807220459
epoch: 26 training_loss 2.7445907187461853 test_loss: 2.672929382324219
epoch: 27 training_loss 2.7151753282546998 test_loss: 2.7369625091552736
epoch: 28 training_loss 2.6575716519355774 test_loss: 2.559944725036621
epoch: 29 training_loss 2.6739840650558473 test_loss: 2.615427780151367
epoch: 30 training_loss 2.5603470635414123 test_loss: 2.588534927368164
epoch: 31 training_loss 2.52030033826828 test_loss: 2.3577644348144533
epoch: 32 training_loss 2.481298615932465 test_loss: 2.378673553466797
epoch: 33 training_loss 2.504765183925629 test_loss: 2.4023962020874023
epoch: 34 training_loss 2.378501658439636 test_loss: 2.409739685058594
epoch: 35 training_loss 2.4079871821403502 test_loss: 2.2849864959716797
epoch: 36 training_loss 2.3451732766628264 test_loss: 2.2136873245239257
epoch: 37 training_loss 2.3094443213939666 test_loss: 2.2983377456665037
epoch: 38 training_loss 2.310858545303345 test_loss: 2.2497936248779298
epoch: 39 training_loss 2.2579228830337525 test_loss: 2.3067129135131834
epoch: 40 training_loss 2.251517142057419 test_loss: 2.1752349853515627
epoch: 41 training_loss 2.1859193837642668 test_loss: 2.2138208389282226
epoch: 42 training_loss 2.213774071931839 test_loss: 2.2162805557250977
epoch: 43 training_loss 2.1651603782176974 test_loss: 2.159672737121582
epoch: 44 training_loss 2.149047371149063 test_loss: 2.0771249771118163
epoch: 45 training_loss 2.176660977602005 test_loss: 2.154322052001953
epoch: 46 training_loss 2.0930161869525907 test_loss: 2.1666980743408204
epoch: 47 training_loss 2.1061751568317413 test_loss: 2.1403457641601564
epoch: 48 training_loss 2.0356180655956266 test_loss: 1.979537582397461
epoch: 49 training_loss 2.036184108257294 test_loss: 1.9841583251953125
epoch: 50 training_loss 2.02802831530571 test_loss: 2.038969802856445
epoch: 51 training_loss 2.0141550374031065 test_loss: 1.955172348022461
epoch: 52 training_loss 1.9515970075130462 test_loss: 1.9776762008666993
epoch: 53 training_loss 1.988374457359314 test_loss: 1.9771270751953125
epoch: 54 training_loss 1.974036009311676 test_loss: 1.9776992797851562
epoch: 55 training_loss 1.9088015830516816 test_loss: 1.9059242248535155
epoch: 56 training_loss 2.0119860792160034 test_loss: 1.962153434753418
epoch: 57 training_loss 1.9135595679283142 test_loss: 1.9148809432983398
epoch: 58 training_loss 1.8934045481681823 test_loss: 1.9468931198120116
epoch: 59 training_loss 1.9046788370609284 test_loss: 1.8561788558959962
epoch: 60 training_loss 1.910998958349228 test_loss: 1.8175678253173828
epoch: 61 training_loss 1.8863469231128693 test_loss: 1.9068607330322265
epoch: 62 training_loss 1.8416567254066467 test_loss: 1.86785831451416
epoch: 63 training_loss 1.8330272018909455 test_loss: 1.8496545791625976
epoch: 64 training_loss 1.838475351333618 test_loss: 1.8459480285644532
epoch: 65 training_loss 1.8490773332118988 test_loss: 1.778610610961914
epoch: 66 training_loss 1.8148272275924682 test_loss: 1.7809534072875977
epoch: 67 training_loss 1.8293240821361543 test_loss: 1.7895061492919921
epoch: 68 training_loss 1.774239091873169 test_loss: 1.7233186721801759
epoch: 69 training_loss 1.7814886045455933 test_loss: 1.787738037109375
epoch: 70 training_loss 1.7513239932060243 test_loss: 1.7840492248535156
epoch: 71 training_loss 1.7905466067790985 test_loss: 1.7616281509399414
epoch: 72 training_loss 1.7593664598464966 test_loss: 1.730949592590332
epoch: 73 training_loss 1.7490628898143767 test_loss: 1.6892990112304687
epoch: 74 training_loss 1.7391250932216644 test_loss: 1.7445728302001953
epoch: 75 training_loss 1.7684908866882325 test_loss: 1.7095882415771484
epoch: 76 training_loss 1.7493420600891114 test_loss: 1.708315658569336
epoch: 77 training_loss 1.724427704811096 test_loss: 1.6744888305664063
epoch: 78 training_loss 1.7028395867347716 test_loss: 1.7759422302246093
epoch: 79 training_loss 1.7307351446151733 test_loss: 1.686337661743164
epoch: 80 training_loss 1.7266860353946685 test_loss: 1.7114946365356445
epoch: 81 training_loss 1.7111096179485321 test_loss: 1.657170867919922
epoch: 82 training_loss 1.7212976908683777 test_loss: 1.6625871658325195
epoch: 83 training_loss 1.693232923746109 test_loss: 1.6743906021118165
epoch: 84 training_loss 1.670469455718994 test_loss: 1.7166589736938476
epoch: 85 training_loss 1.676764075756073 test_loss: 1.6826566696166991
epoch: 86 training_loss 1.6778687167167663 test_loss: 1.6426647186279297
epoch: 87 training_loss 1.6732753717899322 test_loss: 1.676543617248535
epoch: 88 training_loss 1.6399925649166107 test_loss: 1.672715950012207
epoch: 89 training_loss 1.6300986349582671 test_loss: 1.6034141540527345
epoch: 90 training_loss 1.664800397157669 test_loss: 1.65003662109375
epoch: 91 training_loss 1.6298963165283202 test_loss: 1.6568273544311523
epoch: 92 training_loss 1.6147350299358367 test_loss: 1.655136489868164
epoch: 93 training_loss 1.6300130116939544 test_loss: 1.612654685974121
epoch: 94 training_loss 1.6206895983219147 test_loss: 1.6457773208618165
epoch: 95 training_loss 1.611325011253357 test_loss: 1.57955322265625
epoch: 96 training_loss 1.629123225212097 test_loss: 1.6075908660888671
epoch: 97 training_loss 1.6045080256462096 test_loss: 1.595108985900879
epoch: 98 training_loss 1.60307497382164 test_loss: 1.5707308769226074
epoch: 99 training_loss 1.6086488020420076 test_loss: 1.6126907348632813
epoch: 100 training_loss 1.5908297085762024 test_loss: 1.5842453002929688
epoch: 101 training_loss 1.6216469645500182 test_loss: 1.5788549423217773
epoch: 102 training_loss 1.588555895090103 test_loss: 1.5739451408386231
epoch: 103 training_loss 1.5711385893821717 test_loss: 1.6779891967773437
epoch: 104 training_loss 1.5594612288475036 test_loss: 1.5996026039123534
epoch: 105 training_loss 1.5666725969314574 test_loss: 1.5596986770629884
epoch: 106 training_loss 1.5839782428741456 test_loss: 1.597018051147461
epoch: 107 training_loss 1.5867692267894744 test_loss: 1.5542577743530273
epoch: 108 training_loss 1.5865215575695037 test_loss: 1.5856245040893555
epoch: 109 training_loss 1.5871290922164918 test_loss: 1.52351655960083
epoch: 110 training_loss 1.5726973950862884 test_loss: 1.5494416236877442
epoch: 111 training_loss 1.5572207057476044 test_loss: 1.544374370574951
epoch: 112 training_loss 1.5416821205615998 test_loss: 1.5047574996948243
epoch: 113 training_loss 1.5456459784507752 test_loss: 1.5857574462890625
epoch: 114 training_loss 1.5258152782917023 test_loss: 1.5615453720092773
epoch: 115 training_loss 1.513608944416046 test_loss: 1.5808713912963868
epoch: 116 training_loss 1.5296353125572204 test_loss: 1.5221309661865234
epoch: 117 training_loss 1.5475253522396089 test_loss: 1.5314386367797852
epoch: 118 training_loss 1.5392114925384521 test_loss: 1.5260710716247559
epoch: 119 training_loss 1.5254030299186707 test_loss: 1.5556453704833983
epoch: 120 training_loss 1.528702368736267 test_loss: 1.5181864738464355
epoch: 121 training_loss 1.5365028190612793 test_loss: 1.5493003845214843
epoch: 122 training_loss 1.538251620531082 test_loss: 1.5237721443176269
epoch: 123 training_loss 1.5115611708164216 test_loss: 1.5031093597412108
epoch: 124 training_loss 1.5206490540504456 test_loss: 1.4978964805603028
epoch: 125 training_loss 1.5104584097862244 test_loss: 1.4892142295837403
epoch: 126 training_loss 1.491382166147232 test_loss: 1.4964092254638672
epoch: 127 training_loss 1.490392061471939 test_loss: 1.4917831420898438
epoch: 128 training_loss 1.4979745769500732 test_loss: 1.4918376922607421
epoch: 129 training_loss 1.5197780847549438 test_loss: 1.5073469161987305
epoch: 130 training_loss 1.5066477298736571 test_loss: 1.4978663444519043
epoch: 131 training_loss 1.5101811802387237 test_loss: 1.5362298965454102
epoch: 132 training_loss 1.5138418102264404 test_loss: 1.4671365737915039
epoch: 133 training_loss 1.5257877326011657 test_loss: 1.4989893913269043
epoch: 134 training_loss 1.4983897602558136 test_loss: 1.5020073890686034
epoch: 135 training_loss 1.4952936363220215 test_loss: 1.5025707244873048
epoch: 136 training_loss 1.5006364715099334 test_loss: 1.4781121253967284
epoch: 137 training_loss 1.4868838536739348 test_loss: 1.5078890800476075
epoch: 138 training_loss 1.4757024431228638 test_loss: 1.4945280075073242
epoch: 139 training_loss 1.496822978258133 test_loss: 1.4809938430786134
epoch: 140 training_loss 1.5033286249637603 test_loss: 1.4511534690856933
epoch: 141 training_loss 1.4809611570835113 test_loss: 1.462526798248291
epoch: 142 training_loss 1.4685380589962005 test_loss: 1.4453911781311035
epoch: 143 training_loss 1.4680396127700805 test_loss: 1.4849538803100586
epoch: 144 training_loss 1.463035020828247 test_loss: 1.5007933616638183
epoch: 145 training_loss 1.481025719642639 test_loss: 1.5044913291931152
epoch: 146 training_loss 1.4582099580764771 test_loss: 1.4469584465026855
epoch: 147 training_loss 1.4619506549835206 test_loss: 1.4875469207763672
epoch: 148 training_loss 1.461570122241974 test_loss: 1.4431491851806642
epoch: 149 training_loss 1.4746635270118713 test_loss: 1.5318719863891601
5076.729692048314
episode: 0 training return: tensor(-201.3668, device='cuda:0')
episode: 1 training return: tensor(-142.1541, device='cuda:0')
episode: 2 training return: tensor(-32.7641, device='cuda:0')
episode: 3 training return: tensor(-27.6990, device='cuda:0')
epoch: 1 test_true_pfm: 5134.061697395053 sim_pfm: -39.55968773982022
episode: 4 training return: tensor(-115.2948, device='cuda:0')
episode: 5 training return: tensor(-67.4126, device='cuda:0')
episode: 6 training return: tensor(-22.2441, device='cuda:0')
episode: 7 training return: tensor(-116.3183, device='cuda:0')
epoch: 2 test_true_pfm: 5004.155577256907 sim_pfm: -30.64952269922166
episode: 8 training return: tensor(40.6761, device='cuda:0')
episode: 9 training return: tensor(-199.3657, device='cuda:0')
episode: 10 training return: tensor(29.7414, device='cuda:0')
episode: 11 training return: tensor(-147.6441, device='cuda:0')
epoch: 3 test_true_pfm: 5143.690217645035 sim_pfm: -28.768149561617367
episode: 12 training return: tensor(-71.2049, device='cuda:0')
episode: 13 training return: tensor(-35.5274, device='cuda:0')
episode: 14 training return: tensor(-104.7685, device='cuda:0')
episode: 15 training return: tensor(-21.4608, device='cuda:0')
epoch: 4 test_true_pfm: 5197.693468057237 sim_pfm: -9.825447451061336
episode: 16 training return: tensor(50.0475, device='cuda:0')
episode: 17 training return: tensor(-8.9613, device='cuda:0')
episode: 18 training return: tensor(23.0108, device='cuda:0')
episode: 19 training return: tensor(-47.8638, device='cuda:0')
epoch: 5 test_true_pfm: 5210.11626038505 sim_pfm: -44.55568204736725
episode: 20 training return: tensor(-34.0001, device='cuda:0')
episode: 21 training return: tensor(-74.2524, device='cuda:0')
episode: 22 training return: tensor(-80.3336, device='cuda:0')
episode: 23 training return: tensor(-46.3746, device='cuda:0')
epoch: 6 test_true_pfm: 5074.021400198049 sim_pfm: -66.17560897471655
episode: 24 training return: tensor(-29.0879, device='cuda:0')
episode: 25 training return: tensor(-92.6264, device='cuda:0')
episode: 26 training return: tensor(-76.7599, device='cuda:0')
episode: 27 training return: tensor(-131.5509, device='cuda:0')
epoch: 7 test_true_pfm: 5090.668927298977 sim_pfm: 56.55373699303406
episode: 28 training return: tensor(-47.5376, device='cuda:0')
episode: 29 training return: tensor(-11.0850, device='cuda:0')
episode: 30 training return: tensor(-91.4603, device='cuda:0')
episode: 31 training return: tensor(-79.8640, device='cuda:0')
epoch: 8 test_true_pfm: 5118.640596010843 sim_pfm: -32.70949717626596
episode: 32 training return: tensor(-14.4235, device='cuda:0')
episode: 33 training return: tensor(-83.3218, device='cuda:0')
episode: 34 training return: tensor(-2.5266, device='cuda:0')
episode: 35 training return: tensor(-8.2833, device='cuda:0')
epoch: 9 test_true_pfm: 5192.774557672322 sim_pfm: 104.30131881818913
episode: 36 training return: tensor(-72.2061, device='cuda:0')
episode: 37 training return: tensor(-11.6654, device='cuda:0')
episode: 38 training return: tensor(-45.5567, device='cuda:0')
episode: 39 training return: tensor(-26.9961, device='cuda:0')
epoch: 10 test_true_pfm: 5107.369546692621 sim_pfm: 10.057040985877393
episode: 40 training return: tensor(-82.7356, device='cuda:0')
episode: 41 training return: tensor(4.5242, device='cuda:0')
episode: 42 training return: tensor(-22.1124, device='cuda:0')
episode: 43 training return: tensor(-69.3811, device='cuda:0')
epoch: 11 test_true_pfm: 5001.754731159576 sim_pfm: 64.97660627525572
episode: 44 training return: tensor(-48.3278, device='cuda:0')
episode: 45 training return: tensor(-87.7273, device='cuda:0')
episode: 46 training return: tensor(3.9296, device='cuda:0')
episode: 47 training return: tensor(17.4891, device='cuda:0')
epoch: 12 test_true_pfm: 5283.682490625098 sim_pfm: 23.615791097613208
episode: 48 training return: tensor(-23.9126, device='cuda:0')
episode: 49 training return: tensor(-12.7264, device='cuda:0')
episode: 50 training return: tensor(-12.4716, device='cuda:0')
episode: 51 training return: tensor(-197.8399, device='cuda:0')
epoch: 13 test_true_pfm: 5220.813600568639 sim_pfm: 54.23641819840608
episode: 52 training return: tensor(105.8619, device='cuda:0')
episode: 53 training return: tensor(140.9088, device='cuda:0')
episode: 54 training return: tensor(140.4017, device='cuda:0')
episode: 55 training return: tensor(-41.5522, device='cuda:0')
epoch: 14 test_true_pfm: 5260.727391494903 sim_pfm: 115.40748893316292
episode: 56 training return: tensor(94.9904, device='cuda:0')
episode: 57 training return: tensor(-59.7370, device='cuda:0')
episode: 58 training return: tensor(-2.1774, device='cuda:0')
episode: 59 training return: tensor(-61.2037, device='cuda:0')
epoch: 15 test_true_pfm: 5218.740797083534 sim_pfm: 94.41562644896719
episode: 60 training return: tensor(83.2621, device='cuda:0')
episode: 61 training return: tensor(-48.3385, device='cuda:0')
episode: 62 training return: tensor(-134.1460, device='cuda:0')
episode: 63 training return: tensor(48.9620, device='cuda:0')
epoch: 16 test_true_pfm: 5151.802530364684 sim_pfm: 8.907606440596282
episode: 64 training return: tensor(9.1676, device='cuda:0')
episode: 65 training return: tensor(-23.4574, device='cuda:0')
episode: 66 training return: tensor(174.0712, device='cuda:0')
episode: 67 training return: tensor(-49.9424, device='cuda:0')
epoch: 17 test_true_pfm: 5349.526596583894 sim_pfm: 153.61303923612772
episode: 68 training return: tensor(127.1575, device='cuda:0')
episode: 69 training return: tensor(99.0119, device='cuda:0')
episode: 70 training return: tensor(42.4389, device='cuda:0')
episode: 71 training return: tensor(120.3249, device='cuda:0')
epoch: 18 test_true_pfm: 5293.595016008978 sim_pfm: 32.38834163702753
episode: 72 training return: tensor(-75.4681, device='cuda:0')
episode: 73 training return: tensor(12.7135, device='cuda:0')
episode: 74 training return: tensor(-47.1103, device='cuda:0')
episode: 75 training return: tensor(81.7017, device='cuda:0')
epoch: 19 test_true_pfm: 5183.0135395921825 sim_pfm: 80.6790812490508
episode: 76 training return: tensor(-87.8892, device='cuda:0')
episode: 77 training return: tensor(37.8085, device='cuda:0')
episode: 78 training return: tensor(-19.9318, device='cuda:0')
episode: 79 training return: tensor(93.9624, device='cuda:0')
epoch: 20 test_true_pfm: 5196.849271740736 sim_pfm: 79.65421414372395
episode: 80 training return: tensor(-192.6976, device='cuda:0')
episode: 81 training return: tensor(-93.9611, device='cuda:0')
episode: 82 training return: tensor(113.3516, device='cuda:0')
episode: 83 training return: tensor(39.2370, device='cuda:0')
epoch: 21 test_true_pfm: 5141.776070571325 sim_pfm: 80.2691672341122
episode: 84 training return: tensor(11.9359, device='cuda:0')
episode: 85 training return: tensor(68.5992, device='cuda:0')
episode: 86 training return: tensor(9.3630, device='cuda:0')
episode: 87 training return: tensor(-8.6340, device='cuda:0')
epoch: 22 test_true_pfm: 5375.037027769577 sim_pfm: 107.5653737289055
episode: 88 training return: tensor(-44.4678, device='cuda:0')
episode: 89 training return: tensor(-27.2858, device='cuda:0')
episode: 90 training return: tensor(-28.7717, device='cuda:0')
episode: 91 training return: tensor(3.6769, device='cuda:0')
epoch: 23 test_true_pfm: 5301.881758871106 sim_pfm: 110.61854678876504
episode: 92 training return: tensor(-49.3066, device='cuda:0')
episode: 93 training return: tensor(-35.2491, device='cuda:0')
episode: 94 training return: tensor(26.9235, device='cuda:0')
episode: 95 training return: tensor(15.5467, device='cuda:0')
epoch: 24 test_true_pfm: 5262.801869419094 sim_pfm: 99.93541235979258
episode: 96 training return: tensor(-137.9582, device='cuda:0')
episode: 97 training return: tensor(84.3098, device='cuda:0')
episode: 98 training return: tensor(163.7434, device='cuda:0')
episode: 99 training return: tensor(83.7554, device='cuda:0')
epoch: 25 test_true_pfm: 5284.620458844659 sim_pfm: 142.98686173121678
episode: 100 training return: tensor(-8.1674, device='cuda:0')
episode: 101 training return: tensor(6.2226, device='cuda:0')
episode: 102 training return: tensor(20.0399, device='cuda:0')
episode: 103 training return: tensor(122.7096, device='cuda:0')
epoch: 26 test_true_pfm: 5298.716324563623 sim_pfm: 114.0686555360541
episode: 104 training return: tensor(92.7093, device='cuda:0')
episode: 105 training return: tensor(-14.9060, device='cuda:0')
episode: 106 training return: tensor(47.2419, device='cuda:0')
episode: 107 training return: tensor(43.7840, device='cuda:0')
epoch: 27 test_true_pfm: 5332.071004657421 sim_pfm: 127.29786842316389
episode: 108 training return: tensor(23.4617, device='cuda:0')
episode: 109 training return: tensor(133.8307, device='cuda:0')
episode: 110 training return: tensor(32.0861, device='cuda:0')
episode: 111 training return: tensor(-122.7329, device='cuda:0')
epoch: 28 test_true_pfm: 5332.4964599570385 sim_pfm: 105.17145618320986
episode: 112 training return: tensor(69.3916, device='cuda:0')
episode: 113 training return: tensor(49.8931, device='cuda:0')
episode: 114 training return: tensor(149.6929, device='cuda:0')
episode: 115 training return: tensor(69.2309, device='cuda:0')
epoch: 29 test_true_pfm: 5323.699240762628 sim_pfm: 107.38239019109945
episode: 116 training return: tensor(70.8577, device='cuda:0')
episode: 117 training return: tensor(-26.4746, device='cuda:0')
episode: 118 training return: tensor(-53.2033, device='cuda:0')
episode: 119 training return: tensor(-10.8786, device='cuda:0')
epoch: 30 test_true_pfm: 5366.670438077938 sim_pfm: 154.70946479666358
episode: 120 training return: tensor(14.3110, device='cuda:0')
episode: 121 training return: tensor(109.1972, device='cuda:0')
episode: 122 training return: tensor(67.4823, device='cuda:0')
episode: 123 training return: tensor(101.5434, device='cuda:0')
epoch: 31 test_true_pfm: 5349.026785567788 sim_pfm: 128.7328016742055
episode: 124 training return: tensor(139.2261, device='cuda:0')
episode: 125 training return: tensor(122.0633, device='cuda:0')
episode: 126 training return: tensor(72.1264, device='cuda:0')
episode: 127 training return: tensor(20.2762, device='cuda:0')
epoch: 32 test_true_pfm: 5382.8456674320005 sim_pfm: 215.97674490483283
episode: 128 training return: tensor(70.1496, device='cuda:0')
episode: 129 training return: tensor(-54.0404, device='cuda:0')
episode: 130 training return: tensor(99.6319, device='cuda:0')
episode: 131 training return: tensor(72.1117, device='cuda:0')
epoch: 33 test_true_pfm: 5322.679936400241 sim_pfm: 183.3346380443642
episode: 132 training return: tensor(98.5813, device='cuda:0')
episode: 133 training return: tensor(-83.7204, device='cuda:0')
episode: 134 training return: tensor(50.0948, device='cuda:0')
episode: 135 training return: tensor(54.2703, device='cuda:0')
epoch: 34 test_true_pfm: 5329.541679012007 sim_pfm: 201.20799744778196
episode: 136 training return: tensor(106.5448, device='cuda:0')
episode: 137 training return: tensor(168.3823, device='cuda:0')
episode: 138 training return: tensor(95.1096, device='cuda:0')
episode: 139 training return: tensor(134.6629, device='cuda:0')
epoch: 35 test_true_pfm: 5343.067488243444 sim_pfm: 125.97833985092196
episode: 140 training return: tensor(45.4178, device='cuda:0')
episode: 141 training return: tensor(47.0614, device='cuda:0')
episode: 142 training return: tensor(117.7909, device='cuda:0')
episode: 143 training return: tensor(181.7878, device='cuda:0')
epoch: 36 test_true_pfm: 5364.61431385791 sim_pfm: 127.65510236858002
episode: 144 training return: tensor(43.5374, device='cuda:0')
episode: 145 training return: tensor(43.4078, device='cuda:0')
episode: 146 training return: tensor(182.8685, device='cuda:0')
episode: 147 training return: tensor(19.9668, device='cuda:0')
epoch: 37 test_true_pfm: 5433.365037938592 sim_pfm: 236.12039749335963
episode: 148 training return: tensor(162.8044, device='cuda:0')
episode: 149 training return: tensor(46.8431, device='cuda:0')
episode: 150 training return: tensor(109.6469, device='cuda:0')
episode: 151 training return: tensor(84.7966, device='cuda:0')
epoch: 38 test_true_pfm: 5465.756620487311 sim_pfm: 199.57782696185555
episode: 152 training return: tensor(218.4848, device='cuda:0')
episode: 153 training return: tensor(73.2652, device='cuda:0')
episode: 154 training return: tensor(137.5353, device='cuda:0')
episode: 155 training return: tensor(229.2218, device='cuda:0')
epoch: 39 test_true_pfm: 5384.439156164924 sim_pfm: 251.61212066947095
episode: 156 training return: tensor(184.1704, device='cuda:0')
episode: 157 training return: tensor(46.4084, device='cuda:0')
episode: 158 training return: tensor(92.5317, device='cuda:0')
episode: 159 training return: tensor(-5.7622, device='cuda:0')
epoch: 40 test_true_pfm: 5412.087261910022 sim_pfm: 194.37982911422537
episode: 160 training return: tensor(105.9371, device='cuda:0')
episode: 161 training return: tensor(158.2516, device='cuda:0')
episode: 162 training return: tensor(130.5786, device='cuda:0')
episode: 163 training return: tensor(33.7435, device='cuda:0')
epoch: 41 test_true_pfm: 5519.173400476478 sim_pfm: 205.25670159925357
episode: 164 training return: tensor(177.1996, device='cuda:0')
episode: 165 training return: tensor(154.9524, device='cuda:0')
episode: 166 training return: tensor(84.7356, device='cuda:0')
episode: 167 training return: tensor(102.9428, device='cuda:0')
epoch: 42 test_true_pfm: 5482.203851925323 sim_pfm: 247.3740118710363
episode: 168 training return: tensor(96.6814, device='cuda:0')
episode: 169 training return: tensor(83.7554, device='cuda:0')
episode: 170 training return: tensor(82.7164, device='cuda:0')
episode: 171 training return: tensor(75.2666, device='cuda:0')
epoch: 43 test_true_pfm: 5444.209524827926 sim_pfm: 216.33447425021828
episode: 172 training return: tensor(204.5243, device='cuda:0')
episode: 173 training return: tensor(111.5904, device='cuda:0')
episode: 174 training return: tensor(151.7192, device='cuda:0')
episode: 175 training return: tensor(87.9469, device='cuda:0')
epoch: 44 test_true_pfm: 5453.63885948464 sim_pfm: 204.54945009042663
episode: 176 training return: tensor(202.3831, device='cuda:0')
episode: 177 training return: tensor(128.6549, device='cuda:0')
episode: 178 training return: tensor(84.8064, device='cuda:0')
episode: 179 training return: tensor(45.6414, device='cuda:0')
epoch: 45 test_true_pfm: 5295.309442964251 sim_pfm: 278.90621107449016
episode: 180 training return: tensor(108.6498, device='cuda:0')
episode: 181 training return: tensor(72.4025, device='cuda:0')
episode: 182 training return: tensor(85.7182, device='cuda:0')
episode: 183 training return: tensor(118.5445, device='cuda:0')
epoch: 46 test_true_pfm: 5487.518261656936 sim_pfm: 193.01789514689395
episode: 184 training return: tensor(175.7581, device='cuda:0')
episode: 185 training return: tensor(-35.7888, device='cuda:0')
episode: 186 training return: tensor(90.5555, device='cuda:0')
episode: 187 training return: tensor(96.4248, device='cuda:0')
epoch: 47 test_true_pfm: 5455.980088846546 sim_pfm: 203.25080975397336
episode: 188 training return: tensor(256.7653, device='cuda:0')
episode: 189 training return: tensor(109.2216, device='cuda:0')
episode: 190 training return: tensor(129.7317, device='cuda:0')
episode: 191 training return: tensor(107.1945, device='cuda:0')
epoch: 48 test_true_pfm: 5487.070767537701 sim_pfm: 224.80292661457983
episode: 192 training return: tensor(-10.6810, device='cuda:0')
episode: 193 training return: tensor(163.7671, device='cuda:0')
episode: 194 training return: tensor(198.8091, device='cuda:0')
episode: 195 training return: tensor(164.1571, device='cuda:0')
epoch: 49 test_true_pfm: 5424.279091918827 sim_pfm: 246.46978532643212
episode: 196 training return: tensor(141.7752, device='cuda:0')
episode: 197 training return: tensor(81.4030, device='cuda:0')
episode: 198 training return: tensor(58.7402, device='cuda:0')
episode: 199 training return: tensor(251.9161, device='cuda:0')
epoch: 50 test_true_pfm: 5455.868415082486 sim_pfm: 215.63553411790053
episode: 200 training return: tensor(112.8132, device='cuda:0')
episode: 201 training return: tensor(101.3560, device='cuda:0')
episode: 202 training return: tensor(161.0684, device='cuda:0')
episode: 203 training return: tensor(273.5131, device='cuda:0')
epoch: 51 test_true_pfm: 5474.278369814136 sim_pfm: 164.72527376626385
episode: 204 training return: tensor(124.1414, device='cuda:0')
episode: 205 training return: tensor(168.7397, device='cuda:0')
episode: 206 training return: tensor(-76.0626, device='cuda:0')
episode: 207 training return: tensor(75.9062, device='cuda:0')
epoch: 52 test_true_pfm: 5412.663404847641 sim_pfm: 227.72296480245618
episode: 208 training return: tensor(154.0966, device='cuda:0')
episode: 209 training return: tensor(171.3595, device='cuda:0')
episode: 210 training return: tensor(89.1965, device='cuda:0')
episode: 211 training return: tensor(77.6564, device='cuda:0')
epoch: 53 test_true_pfm: 5460.906135839047 sim_pfm: 333.430786411569
episode: 212 training return: tensor(134.7470, device='cuda:0')
episode: 213 training return: tensor(66.5669, device='cuda:0')
episode: 214 training return: tensor(228.0145, device='cuda:0')
episode: 215 training return: tensor(199.8073, device='cuda:0')
epoch: 54 test_true_pfm: 5466.709018148381 sim_pfm: 307.5316965498205
episode: 216 training return: tensor(160.4108, device='cuda:0')
episode: 217 training return: tensor(183.1442, device='cuda:0')
episode: 218 training return: tensor(153.3219, device='cuda:0')
episode: 219 training return: tensor(219.4075, device='cuda:0')
epoch: 55 test_true_pfm: 5516.301633862783 sim_pfm: 303.6852708486452
episode: 220 training return: tensor(134.1450, device='cuda:0')
episode: 221 training return: tensor(167.7875, device='cuda:0')
episode: 222 training return: tensor(186.5622, device='cuda:0')
episode: 223 training return: tensor(170.8697, device='cuda:0')
epoch: 56 test_true_pfm: 5358.554545814935 sim_pfm: 237.37042670433098
episode: 224 training return: tensor(89.3017, device='cuda:0')
episode: 225 training return: tensor(148.9513, device='cuda:0')
episode: 226 training return: tensor(241.5550, device='cuda:0')
episode: 227 training return: tensor(97.7855, device='cuda:0')
epoch: 57 test_true_pfm: 5498.996723842875 sim_pfm: 275.4631136719836
episode: 228 training return: tensor(202.5980, device='cuda:0')
episode: 229 training return: tensor(152.7176, device='cuda:0')
episode: 230 training return: tensor(134.9415, device='cuda:0')
episode: 231 training return: tensor(30.4400, device='cuda:0')
epoch: 58 test_true_pfm: 5430.339983085768 sim_pfm: 251.9353928353327
episode: 232 training return: tensor(103.7575, device='cuda:0')
episode: 233 training return: tensor(141.0966, device='cuda:0')
episode: 234 training return: tensor(209.2535, device='cuda:0')
episode: 235 training return: tensor(139.6248, device='cuda:0')
epoch: 59 test_true_pfm: 5610.180218565795 sim_pfm: 251.27486537597724
episode: 236 training return: tensor(206.9291, device='cuda:0')
episode: 237 training return: tensor(173.0824, device='cuda:0')
episode: 238 training return: tensor(162.5927, device='cuda:0')
episode: 239 training return: tensor(251.9659, device='cuda:0')
epoch: 60 test_true_pfm: 5461.187316414463 sim_pfm: 260.53935726255685
episode: 240 training return: tensor(184.6451, device='cuda:0')
episode: 241 training return: tensor(56.7147, device='cuda:0')
episode: 242 training return: tensor(208.5418, device='cuda:0')
episode: 243 training return: tensor(145.7018, device='cuda:0')
epoch: 61 test_true_pfm: 5402.692685159163 sim_pfm: 211.52337141881193
episode: 244 training return: tensor(86.1830, device='cuda:0')
episode: 245 training return: tensor(196.0841, device='cuda:0')
episode: 246 training return: tensor(155.9737, device='cuda:0')
episode: 247 training return: tensor(217.6601, device='cuda:0')
epoch: 62 test_true_pfm: 5435.310464947691 sim_pfm: 262.2704851531501
episode: 248 training return: tensor(131.7580, device='cuda:0')
episode: 249 training return: tensor(150.9707, device='cuda:0')
episode: 250 training return: tensor(52.9247, device='cuda:0')
episode: 251 training return: tensor(180.1758, device='cuda:0')
epoch: 63 test_true_pfm: 5476.376617870216 sim_pfm: 287.352915575301
episode: 252 training return: tensor(194.9800, device='cuda:0')
episode: 253 training return: tensor(159.8045, device='cuda:0')
episode: 254 training return: tensor(198.1647, device='cuda:0')
episode: 255 training return: tensor(176.1280, device='cuda:0')
epoch: 64 test_true_pfm: 5567.59169378249 sim_pfm: 332.12375641951803
episode: 256 training return: tensor(61.6617, device='cuda:0')
episode: 257 training return: tensor(195.3497, device='cuda:0')
episode: 258 training return: tensor(74.9023, device='cuda:0')
episode: 259 training return: tensor(141.2309, device='cuda:0')
epoch: 65 test_true_pfm: 5462.076438386706 sim_pfm: 320.61695973809884
episode: 260 training return: tensor(90.4047, device='cuda:0')
episode: 261 training return: tensor(216.8589, device='cuda:0')
episode: 262 training return: tensor(266.4427, device='cuda:0')
episode: 263 training return: tensor(227.9688, device='cuda:0')
epoch: 66 test_true_pfm: 5547.6102011152 sim_pfm: 294.93913647090085
episode: 264 training return: tensor(267.7876, device='cuda:0')
episode: 265 training return: tensor(165.7939, device='cuda:0')
episode: 266 training return: tensor(135.2728, device='cuda:0')
episode: 267 training return: tensor(189.1089, device='cuda:0')
epoch: 67 test_true_pfm: 5424.025755002159 sim_pfm: 270.8514005736797
episode: 268 training return: tensor(173.1981, device='cuda:0')
episode: 269 training return: tensor(198.0062, device='cuda:0')
episode: 270 training return: tensor(122.0818, device='cuda:0')
episode: 271 training return: tensor(233.6978, device='cuda:0')
epoch: 68 test_true_pfm: 5481.821480854246 sim_pfm: 196.91700302160461
episode: 272 training return: tensor(173.8309, device='cuda:0')
episode: 273 training return: tensor(173.3072, device='cuda:0')
episode: 274 training return: tensor(100.2115, device='cuda:0')
episode: 275 training return: tensor(158.8650, device='cuda:0')
epoch: 69 test_true_pfm: 5485.167531706925 sim_pfm: 285.81631480839377
episode: 276 training return: tensor(173.5757, device='cuda:0')
episode: 277 training return: tensor(251.7677, device='cuda:0')
episode: 278 training return: tensor(205.0508, device='cuda:0')
episode: 279 training return: tensor(95.3521, device='cuda:0')
epoch: 70 test_true_pfm: 5553.365951642293 sim_pfm: 311.92451506636763
episode: 280 training return: tensor(245.6186, device='cuda:0')
episode: 281 training return: tensor(172.4016, device='cuda:0')
episode: 282 training return: tensor(301.8481, device='cuda:0')
episode: 283 training return: tensor(245.7648, device='cuda:0')
epoch: 71 test_true_pfm: 5557.875097264166 sim_pfm: 258.619889645682
episode: 284 training return: tensor(322.5204, device='cuda:0')
episode: 285 training return: tensor(159.2168, device='cuda:0')
episode: 286 training return: tensor(230.3805, device='cuda:0')
episode: 287 training return: tensor(108.3340, device='cuda:0')
epoch: 72 test_true_pfm: 5577.534105163902 sim_pfm: 286.83972042558406
episode: 288 training return: tensor(80.2508, device='cuda:0')
episode: 289 training return: tensor(38.6112, device='cuda:0')
episode: 290 training return: tensor(198.5372, device='cuda:0')
episode: 291 training return: tensor(116.9502, device='cuda:0')
epoch: 73 test_true_pfm: 5544.45879816055 sim_pfm: 278.8674505594633
episode: 292 training return: tensor(87.6841, device='cuda:0')
episode: 293 training return: tensor(238.6594, device='cuda:0')
episode: 294 training return: tensor(187.8120, device='cuda:0')
episode: 295 training return: tensor(212.8478, device='cuda:0')
epoch: 74 test_true_pfm: 5584.793481447333 sim_pfm: 301.6337792631627
episode: 296 training return: tensor(89.2719, device='cuda:0')
episode: 297 training return: tensor(172.2949, device='cuda:0')
episode: 298 training return: tensor(111.3413, device='cuda:0')
episode: 299 training return: tensor(218.0919, device='cuda:0')
epoch: 75 test_true_pfm: 5560.827118226487 sim_pfm: 312.75805074046366
episode: 300 training return: tensor(130.2825, device='cuda:0')
episode: 301 training return: tensor(152.2932, device='cuda:0')
episode: 302 training return: tensor(74.8046, device='cuda:0')
episode: 303 training return: tensor(166.2636, device='cuda:0')
epoch: 76 test_true_pfm: 5418.188463832187 sim_pfm: 270.9009098392562
episode: 304 training return: tensor(201.1506, device='cuda:0')
episode: 305 training return: tensor(218.1437, device='cuda:0')
episode: 306 training return: tensor(31.4270, device='cuda:0')
episode: 307 training return: tensor(187.1516, device='cuda:0')
epoch: 77 test_true_pfm: 5551.399972557988 sim_pfm: 242.74101137043908
episode: 308 training return: tensor(49.7609, device='cuda:0')
episode: 309 training return: tensor(314.6836, device='cuda:0')
episode: 310 training return: tensor(224.9651, device='cuda:0')
episode: 311 training return: tensor(279.3379, device='cuda:0')
epoch: 78 test_true_pfm: 5608.533755111425 sim_pfm: 358.5036156766
episode: 312 training return: tensor(74.1726, device='cuda:0')
episode: 313 training return: tensor(179.5252, device='cuda:0')
episode: 314 training return: tensor(240.2125, device='cuda:0')
episode: 315 training return: tensor(305.7033, device='cuda:0')
epoch: 79 test_true_pfm: 5606.687948405362 sim_pfm: 325.23802831411984
episode: 316 training return: tensor(171.5008, device='cuda:0')
episode: 317 training return: tensor(228.4206, device='cuda:0')
episode: 318 training return: tensor(272.8335, device='cuda:0')
episode: 319 training return: tensor(201.5773, device='cuda:0')
epoch: 80 test_true_pfm: 5568.130253230235 sim_pfm: 329.9512575168046
episode: 320 training return: tensor(251.9956, device='cuda:0')
episode: 321 training return: tensor(208.8060, device='cuda:0')
episode: 322 training return: tensor(196.9886, device='cuda:0')
episode: 323 training return: tensor(256.6421, device='cuda:0')
epoch: 81 test_true_pfm: 5527.176095462376 sim_pfm: 337.7897649028455
episode: 324 training return: tensor(147.5297, device='cuda:0')
episode: 325 training return: tensor(205.5645, device='cuda:0')
episode: 326 training return: tensor(156.2846, device='cuda:0')
episode: 327 training return: tensor(219.1449, device='cuda:0')
epoch: 82 test_true_pfm: 5535.073019012053 sim_pfm: 356.9600306604213
episode: 328 training return: tensor(205.5782, device='cuda:0')
episode: 329 training return: tensor(124.1040, device='cuda:0')
episode: 330 training return: tensor(63.4661, device='cuda:0')
episode: 331 training return: tensor(10.3325, device='cuda:0')
epoch: 83 test_true_pfm: 5528.093556940643 sim_pfm: 280.09551879598683
episode: 332 training return: tensor(268.0620, device='cuda:0')
episode: 333 training return: tensor(253.4167, device='cuda:0')
episode: 334 training return: tensor(199.8221, device='cuda:0')
episode: 335 training return: tensor(268.6731, device='cuda:0')
epoch: 84 test_true_pfm: 5451.924852641875 sim_pfm: 329.4890104964531
episode: 336 training return: tensor(107.5267, device='cuda:0')
episode: 337 training return: tensor(101.8209, device='cuda:0')
episode: 338 training return: tensor(200.5652, device='cuda:0')
episode: 339 training return: tensor(198.4866, device='cuda:0')
epoch: 85 test_true_pfm: 5574.831343490343 sim_pfm: 297.5787176552888
episode: 340 training return: tensor(153.4323, device='cuda:0')
episode: 341 training return: tensor(248.8142, device='cuda:0')
episode: 342 training return: tensor(229.8819, device='cuda:0')
episode: 343 training return: tensor(127.6026, device='cuda:0')
epoch: 86 test_true_pfm: 5575.82408814485 sim_pfm: 213.08570187393343
episode: 344 training return: tensor(71.3340, device='cuda:0')
episode: 345 training return: tensor(124.9224, device='cuda:0')
episode: 346 training return: tensor(143.2831, device='cuda:0')
episode: 347 training return: tensor(118.2661, device='cuda:0')
epoch: 87 test_true_pfm: 5573.037037889179 sim_pfm: 256.96494611150894
episode: 348 training return: tensor(98.7283, device='cuda:0')
episode: 349 training return: tensor(190.7106, device='cuda:0')
episode: 350 training return: tensor(103.1680, device='cuda:0')
episode: 351 training return: tensor(223.0809, device='cuda:0')
epoch: 88 test_true_pfm: 5549.70951530266 sim_pfm: 312.06658621912356
episode: 352 training return: tensor(252.4626, device='cuda:0')
episode: 353 training return: tensor(222.0191, device='cuda:0')
episode: 354 training return: tensor(201.1002, device='cuda:0')
episode: 355 training return: tensor(208.3391, device='cuda:0')
epoch: 89 test_true_pfm: 5551.165228230934 sim_pfm: 307.02112407872727
episode: 356 training return: tensor(81.1973, device='cuda:0')
episode: 357 training return: tensor(131.9066, device='cuda:0')
episode: 358 training return: tensor(210.5188, device='cuda:0')
episode: 359 training return: tensor(257.3248, device='cuda:0')
epoch: 90 test_true_pfm: 5593.7038658614865 sim_pfm: 318.30643341172254
episode: 360 training return: tensor(135.6330, device='cuda:0')
episode: 361 training return: tensor(100.5518, device='cuda:0')
episode: 362 training return: tensor(243.7989, device='cuda:0')
episode: 363 training return: tensor(124.9228, device='cuda:0')
epoch: 91 test_true_pfm: 5519.333490721442 sim_pfm: 315.3576321429767
episode: 364 training return: tensor(160.2273, device='cuda:0')
episode: 365 training return: tensor(140.7999, device='cuda:0')
episode: 366 training return: tensor(202.7190, device='cuda:0')
episode: 367 training return: tensor(231.9809, device='cuda:0')
epoch: 92 test_true_pfm: 5614.503821043009 sim_pfm: 268.7150183003396
episode: 368 training return: tensor(218.4735, device='cuda:0')
episode: 369 training return: tensor(294.1031, device='cuda:0')
episode: 370 training return: tensor(215.8640, device='cuda:0')
episode: 371 training return: tensor(197.3934, device='cuda:0')
epoch: 93 test_true_pfm: 5514.536335484566 sim_pfm: 297.3933818420046
episode: 372 training return: tensor(336.8713, device='cuda:0')
episode: 373 training return: tensor(317.4895, device='cuda:0')
episode: 374 training return: tensor(200.2340, device='cuda:0')
episode: 375 training return: tensor(273.4714, device='cuda:0')
epoch: 94 test_true_pfm: 5589.621645416769 sim_pfm: 275.3257521284492
episode: 376 training return: tensor(208.2471, device='cuda:0')
episode: 377 training return: tensor(323.0587, device='cuda:0')
episode: 378 training return: tensor(228.4570, device='cuda:0')
episode: 379 training return: tensor(178.4998, device='cuda:0')
epoch: 95 test_true_pfm: 5646.588929983013 sim_pfm: 281.69172946420923
episode: 380 training return: tensor(232.5710, device='cuda:0')
episode: 381 training return: tensor(250.3922, device='cuda:0')
episode: 382 training return: tensor(242.8405, device='cuda:0')
episode: 383 training return: tensor(261.0152, device='cuda:0')
epoch: 96 test_true_pfm: 5587.803215024437 sim_pfm: 340.2238260668043
episode: 384 training return: tensor(228.3231, device='cuda:0')
episode: 385 training return: tensor(270.3471, device='cuda:0')
episode: 386 training return: tensor(290.3056, device='cuda:0')
episode: 387 training return: tensor(132.5809, device='cuda:0')
epoch: 97 test_true_pfm: 5532.693024492863 sim_pfm: 318.9134005377612
episode: 388 training return: tensor(197.6278, device='cuda:0')
episode: 389 training return: tensor(181.4062, device='cuda:0')
episode: 390 training return: tensor(209.8274, device='cuda:0')
episode: 391 training return: tensor(195.6976, device='cuda:0')
epoch: 98 test_true_pfm: 5609.558888967776 sim_pfm: 299.61541480468196
episode: 392 training return: tensor(174.8579, device='cuda:0')
episode: 393 training return: tensor(167.4027, device='cuda:0')
episode: 394 training return: tensor(130.0240, device='cuda:0')
episode: 395 training return: tensor(187.5049, device='cuda:0')
epoch: 99 test_true_pfm: 5581.556438733433 sim_pfm: 344.13725105345173
episode: 396 training return: tensor(137.0373, device='cuda:0')
episode: 397 training return: tensor(132.5872, device='cuda:0')
episode: 398 training return: tensor(248.5880, device='cuda:0')
episode: 399 training return: tensor(154.7125, device='cuda:0')
epoch: 100 test_true_pfm: 5607.067735622129 sim_pfm: 334.29237933268695
episode: 400 training return: tensor(244.4582, device='cuda:0')
episode: 401 training return: tensor(155.8610, device='cuda:0')
episode: 402 training return: tensor(269.2223, device='cuda:0')
episode: 403 training return: tensor(288.6410, device='cuda:0')
epoch: 101 test_true_pfm: 5578.601746280897 sim_pfm: 316.6477872587566
episode: 404 training return: tensor(278.0802, device='cuda:0')
episode: 405 training return: tensor(233.6168, device='cuda:0')
episode: 406 training return: tensor(215.4270, device='cuda:0')
episode: 407 training return: tensor(239.7901, device='cuda:0')
epoch: 102 test_true_pfm: 5584.676549603329 sim_pfm: 295.7497378326564
episode: 408 training return: tensor(294.1285, device='cuda:0')
episode: 409 training return: tensor(322.3048, device='cuda:0')
episode: 410 training return: tensor(119.8290, device='cuda:0')
episode: 411 training return: tensor(101.6958, device='cuda:0')
epoch: 103 test_true_pfm: 5618.590229039066 sim_pfm: 298.57633283928345
episode: 412 training return: tensor(181.5154, device='cuda:0')
episode: 413 training return: tensor(178.7870, device='cuda:0')
episode: 414 training return: tensor(184.6442, device='cuda:0')
episode: 415 training return: tensor(299.9931, device='cuda:0')
epoch: 104 test_true_pfm: 5553.408989600694 sim_pfm: 312.0413089195499
episode: 416 training return: tensor(226.3596, device='cuda:0')
episode: 417 training return: tensor(127.4940, device='cuda:0')
episode: 418 training return: tensor(164.1797, device='cuda:0')
episode: 419 training return: tensor(92.4973, device='cuda:0')
epoch: 105 test_true_pfm: 5560.624067487133 sim_pfm: 326.1926547483017
episode: 420 training return: tensor(136.7418, device='cuda:0')
episode: 421 training return: tensor(216.3075, device='cuda:0')
episode: 422 training return: tensor(228.9156, device='cuda:0')
episode: 423 training return: tensor(231.4358, device='cuda:0')
epoch: 106 test_true_pfm: 5617.470289825728 sim_pfm: 333.4597576777257
episode: 424 training return: tensor(188.3918, device='cuda:0')
episode: 425 training return: tensor(189.6801, device='cuda:0')
episode: 426 training return: tensor(237.4598, device='cuda:0')
episode: 427 training return: tensor(300.3893, device='cuda:0')
epoch: 107 test_true_pfm: 5629.260408306066 sim_pfm: 320.56035247873905
episode: 428 training return: tensor(120.0814, device='cuda:0')
episode: 429 training return: tensor(255.3446, device='cuda:0')
episode: 430 training return: tensor(286.7135, device='cuda:0')
episode: 431 training return: tensor(182.5463, device='cuda:0')
epoch: 108 test_true_pfm: 5668.446258387864 sim_pfm: 326.5462872057299
episode: 432 training return: tensor(227.6953, device='cuda:0')
episode: 433 training return: tensor(229.5468, device='cuda:0')
episode: 434 training return: tensor(161.8661, device='cuda:0')
episode: 435 training return: tensor(236.6711, device='cuda:0')
epoch: 109 test_true_pfm: 5517.869732756247 sim_pfm: 300.6975958929688
episode: 436 training return: tensor(92.7232, device='cuda:0')
episode: 437 training return: tensor(218.2935, device='cuda:0')
episode: 438 training return: tensor(220.0938, device='cuda:0')
episode: 439 training return: tensor(294.9604, device='cuda:0')
epoch: 110 test_true_pfm: 5626.874211192616 sim_pfm: 281.04242836870253
episode: 440 training return: tensor(230.1191, device='cuda:0')
episode: 441 training return: tensor(268.1231, device='cuda:0')
episode: 442 training return: tensor(151.8064, device='cuda:0')
episode: 443 training return: tensor(120.4204, device='cuda:0')
epoch: 111 test_true_pfm: 5576.909926844996 sim_pfm: 280.57596123209805
episode: 444 training return: tensor(99.9255, device='cuda:0')
episode: 445 training return: tensor(180.7683, device='cuda:0')
episode: 446 training return: tensor(292.1264, device='cuda:0')
episode: 447 training return: tensor(237.0701, device='cuda:0')
epoch: 112 test_true_pfm: 5616.338116577442 sim_pfm: 346.3580899045434
episode: 448 training return: tensor(172.7923, device='cuda:0')
episode: 449 training return: tensor(199.1650, device='cuda:0')
episode: 450 training return: tensor(281.9093, device='cuda:0')
episode: 451 training return: tensor(149.3210, device='cuda:0')
epoch: 113 test_true_pfm: 5508.590510575603 sim_pfm: 319.13912709711195
episode: 452 training return: tensor(253.6099, device='cuda:0')
episode: 453 training return: tensor(242.0918, device='cuda:0')
episode: 454 training return: tensor(228.6071, device='cuda:0')
episode: 455 training return: tensor(238.2253, device='cuda:0')
epoch: 114 test_true_pfm: 5682.916651896088 sim_pfm: 278.3471728140721
episode: 456 training return: tensor(308.8714, device='cuda:0')
episode: 457 training return: tensor(277.4059, device='cuda:0')
episode: 458 training return: tensor(201.1575, device='cuda:0')
episode: 459 training return: tensor(234.3843, device='cuda:0')
epoch: 115 test_true_pfm: 5610.525091894757 sim_pfm: 384.8223150641813
episode: 460 training return: tensor(290.6523, device='cuda:0')
episode: 461 training return: tensor(263.8353, device='cuda:0')
episode: 462 training return: tensor(138.1984, device='cuda:0')
episode: 463 training return: tensor(252.4265, device='cuda:0')
epoch: 116 test_true_pfm: 5576.199807808545 sim_pfm: 309.7975121005923
episode: 464 training return: tensor(220.8895, device='cuda:0')
episode: 465 training return: tensor(140.2238, device='cuda:0')
episode: 466 training return: tensor(173.6209, device='cuda:0')
episode: 467 training return: tensor(376.0381, device='cuda:0')
epoch: 117 test_true_pfm: 5653.886149204943 sim_pfm: 304.93674452166306
episode: 468 training return: tensor(210.4559, device='cuda:0')
episode: 469 training return: tensor(258.2212, device='cuda:0')
episode: 470 training return: tensor(165.3628, device='cuda:0')
episode: 471 training return: tensor(208.9357, device='cuda:0')
epoch: 118 test_true_pfm: 5677.146888884701 sim_pfm: 380.03629856267554
episode: 472 training return: tensor(276.2226, device='cuda:0')
episode: 473 training return: tensor(145.8510, device='cuda:0')
episode: 474 training return: tensor(341.9782, device='cuda:0')
episode: 475 training return: tensor(277.1261, device='cuda:0')
epoch: 119 test_true_pfm: 5626.682143797251 sim_pfm: 367.9248350176883
episode: 476 training return: tensor(150.8013, device='cuda:0')
episode: 477 training return: tensor(299.7039, device='cuda:0')
episode: 478 training return: tensor(167.9942, device='cuda:0')
episode: 479 training return: tensor(254.9719, device='cuda:0')
epoch: 120 test_true_pfm: 5595.164831546087 sim_pfm: 328.68707504528965
episode: 480 training return: tensor(266.4943, device='cuda:0')
episode: 481 training return: tensor(306.0480, device='cuda:0')
episode: 482 training return: tensor(246.5307, device='cuda:0')
episode: 483 training return: tensor(318.8381, device='cuda:0')
epoch: 121 test_true_pfm: 5716.011594950545 sim_pfm: 353.1784035238573
episode: 484 training return: tensor(174.3918, device='cuda:0')
episode: 485 training return: tensor(313.2289, device='cuda:0')
episode: 486 training return: tensor(152.4600, device='cuda:0')
episode: 487 training return: tensor(216.8992, device='cuda:0')
epoch: 122 test_true_pfm: 5661.937304900294 sim_pfm: 316.49941328179557
episode: 488 training return: tensor(263.7217, device='cuda:0')
episode: 489 training return: tensor(224.1628, device='cuda:0')
episode: 490 training return: tensor(364.0238, device='cuda:0')
episode: 491 training return: tensor(110.7529, device='cuda:0')
epoch: 123 test_true_pfm: 5664.606359743758 sim_pfm: 325.76328431205667
episode: 492 training return: tensor(118.2779, device='cuda:0')
episode: 493 training return: tensor(281.8047, device='cuda:0')
episode: 494 training return: tensor(266.9583, device='cuda:0')
episode: 495 training return: tensor(214.3990, device='cuda:0')
epoch: 124 test_true_pfm: 5649.524560926174 sim_pfm: 374.7495709734115
episode: 496 training return: tensor(287.1110, device='cuda:0')
episode: 497 training return: tensor(218.5175, device='cuda:0')
episode: 498 training return: tensor(213.4864, device='cuda:0')
episode: 499 training return: tensor(169.7027, device='cuda:0')
epoch: 125 test_true_pfm: 5603.723496153775 sim_pfm: 285.8322066008889
episode: 500 training return: tensor(244.1568, device='cuda:0')
episode: 501 training return: tensor(299.1174, device='cuda:0')
episode: 502 training return: tensor(186.7768, device='cuda:0')
episode: 503 training return: tensor(269.8114, device='cuda:0')
epoch: 126 test_true_pfm: 5659.445134942459 sim_pfm: 321.3928119651973
episode: 504 training return: tensor(240.5644, device='cuda:0')
episode: 505 training return: tensor(346.5025, device='cuda:0')
episode: 506 training return: tensor(119.8530, device='cuda:0')
episode: 507 training return: tensor(272.3993, device='cuda:0')
epoch: 127 test_true_pfm: 5606.782337437241 sim_pfm: 394.4833897499193
episode: 508 training return: tensor(241.3470, device='cuda:0')
episode: 509 training return: tensor(265.9468, device='cuda:0')
episode: 510 training return: tensor(335.2724, device='cuda:0')
episode: 511 training return: tensor(95.8112, device='cuda:0')
epoch: 128 test_true_pfm: 5645.061574826235 sim_pfm: 375.6894018613966
episode: 512 training return: tensor(280.8224, device='cuda:0')
episode: 513 training return: tensor(307.1315, device='cuda:0')
episode: 514 training return: tensor(257.3511, device='cuda:0')
episode: 515 training return: tensor(163.7680, device='cuda:0')
epoch: 129 test_true_pfm: 5665.03907168335 sim_pfm: 317.36884383687476
episode: 516 training return: tensor(199.4990, device='cuda:0')
episode: 517 training return: tensor(107.8765, device='cuda:0')
episode: 518 training return: tensor(295.9383, device='cuda:0')
episode: 519 training return: tensor(215.3772, device='cuda:0')
epoch: 130 test_true_pfm: 5606.931318925864 sim_pfm: 408.8378833862953
episode: 520 training return: tensor(237.4100, device='cuda:0')
episode: 521 training return: tensor(246.1694, device='cuda:0')
episode: 522 training return: tensor(297.2105, device='cuda:0')
episode: 523 training return: tensor(366.9785, device='cuda:0')
epoch: 131 test_true_pfm: 5694.098741664883 sim_pfm: 366.43027934090543
episode: 524 training return: tensor(275.3347, device='cuda:0')
episode: 525 training return: tensor(243.5217, device='cuda:0')
episode: 526 training return: tensor(154.3190, device='cuda:0')
episode: 527 training return: tensor(287.6987, device='cuda:0')
epoch: 132 test_true_pfm: 5605.435765278996 sim_pfm: 336.87712843318394
episode: 528 training return: tensor(292.9258, device='cuda:0')
episode: 529 training return: tensor(313.7890, device='cuda:0')
episode: 530 training return: tensor(176.0650, device='cuda:0')
episode: 531 training return: tensor(340.2393, device='cuda:0')
epoch: 133 test_true_pfm: 5576.124602822257 sim_pfm: 382.15495387959527
episode: 532 training return: tensor(319.5293, device='cuda:0')
episode: 533 training return: tensor(193.6623, device='cuda:0')
episode: 534 training return: tensor(298.8562, device='cuda:0')
episode: 535 training return: tensor(245.8340, device='cuda:0')
epoch: 134 test_true_pfm: 5593.409015622121 sim_pfm: 281.88542824046453
episode: 536 training return: tensor(259.5162, device='cuda:0')
episode: 537 training return: tensor(239.0931, device='cuda:0')
episode: 538 training return: tensor(227.5475, device='cuda:0')
episode: 539 training return: tensor(279.1384, device='cuda:0')
epoch: 135 test_true_pfm: 5570.92774588128 sim_pfm: 350.09509355327464
episode: 540 training return: tensor(210.0600, device='cuda:0')
episode: 541 training return: tensor(299.2133, device='cuda:0')
episode: 542 training return: tensor(257.8738, device='cuda:0')
episode: 543 training return: tensor(279.4396, device='cuda:0')
epoch: 136 test_true_pfm: 5573.806072444747 sim_pfm: 352.5695878038726
episode: 544 training return: tensor(298.5929, device='cuda:0')
episode: 545 training return: tensor(277.8867, device='cuda:0')
episode: 546 training return: tensor(254.5499, device='cuda:0')
episode: 547 training return: tensor(401.5524, device='cuda:0')
epoch: 137 test_true_pfm: 5648.020233299635 sim_pfm: 347.23453651939053
episode: 548 training return: tensor(280.7744, device='cuda:0')
episode: 549 training return: tensor(135.6425, device='cuda:0')
episode: 550 training return: tensor(245.0170, device='cuda:0')
episode: 551 training return: tensor(270.0853, device='cuda:0')
epoch: 138 test_true_pfm: 5647.281374842433 sim_pfm: 389.59061087243026
episode: 552 training return: tensor(159.4193, device='cuda:0')
episode: 553 training return: tensor(279.4115, device='cuda:0')
episode: 554 training return: tensor(244.4604, device='cuda:0')
episode: 555 training return: tensor(292.3779, device='cuda:0')
epoch: 139 test_true_pfm: 5672.924583099503 sim_pfm: 312.0554096836907
episode: 556 training return: tensor(277.5322, device='cuda:0')
episode: 557 training return: tensor(229.7928, device='cuda:0')
episode: 558 training return: tensor(318.4703, device='cuda:0')
episode: 559 training return: tensor(238.3440, device='cuda:0')
epoch: 140 test_true_pfm: 5631.940813012341 sim_pfm: 354.19118976519286
episode: 560 training return: tensor(225.8031, device='cuda:0')
episode: 561 training return: tensor(287.9557, device='cuda:0')
episode: 562 training return: tensor(317.6288, device='cuda:0')
episode: 563 training return: tensor(217.6961, device='cuda:0')
epoch: 141 test_true_pfm: 5617.282266770679 sim_pfm: 356.0486702840232
episode: 564 training return: tensor(261.1614, device='cuda:0')
episode: 565 training return: tensor(266.4925, device='cuda:0')
episode: 566 training return: tensor(298.5849, device='cuda:0')
episode: 567 training return: tensor(152.4854, device='cuda:0')
epoch: 142 test_true_pfm: 5531.019228153018 sim_pfm: 361.0423324587755
episode: 568 training return: tensor(238.0854, device='cuda:0')
episode: 569 training return: tensor(243.1854, device='cuda:0')
episode: 570 training return: tensor(302.1459, device='cuda:0')
episode: 571 training return: tensor(176.8532, device='cuda:0')
epoch: 143 test_true_pfm: 5610.588354148066 sim_pfm: 388.5088846653234
episode: 572 training return: tensor(177.5515, device='cuda:0')
episode: 573 training return: tensor(201.6433, device='cuda:0')
episode: 574 training return: tensor(206.0118, device='cuda:0')
episode: 575 training return: tensor(243.1847, device='cuda:0')
epoch: 144 test_true_pfm: 5609.583256335442 sim_pfm: 373.65349238849984
episode: 576 training return: tensor(173.1891, device='cuda:0')
episode: 577 training return: tensor(192.8828, device='cuda:0')
episode: 578 training return: tensor(314.1545, device='cuda:0')
episode: 579 training return: tensor(284.7715, device='cuda:0')
epoch: 145 test_true_pfm: 5632.799738187058 sim_pfm: 341.45843102965347
episode: 580 training return: tensor(215.8288, device='cuda:0')
episode: 581 training return: tensor(228.2317, device='cuda:0')
episode: 582 training return: tensor(297.4688, device='cuda:0')
episode: 583 training return: tensor(258.6478, device='cuda:0')
epoch: 146 test_true_pfm: 5629.821556108196 sim_pfm: 346.3640568673533
episode: 584 training return: tensor(216.3034, device='cuda:0')
episode: 585 training return: tensor(149.3432, device='cuda:0')
episode: 586 training return: tensor(205.0877, device='cuda:0')
episode: 587 training return: tensor(315.7372, device='cuda:0')
epoch: 147 test_true_pfm: 5694.530137162409 sim_pfm: 378.0134664997361
episode: 588 training return: tensor(271.5544, device='cuda:0')
episode: 589 training return: tensor(179.4037, device='cuda:0')
episode: 590 training return: tensor(352.9703, device='cuda:0')
episode: 591 training return: tensor(122.8282, device='cuda:0')
epoch: 148 test_true_pfm: 5652.680217220773 sim_pfm: 326.7466754777706
episode: 592 training return: tensor(186.3922, device='cuda:0')
episode: 593 training return: tensor(260.2669, device='cuda:0')
episode: 594 training return: tensor(379.3142, device='cuda:0')
episode: 595 training return: tensor(214.2344, device='cuda:0')
epoch: 149 test_true_pfm: 5634.266985026323 sim_pfm: 340.2988961823673
episode: 596 training return: tensor(206.6055, device='cuda:0')
episode: 597 training return: tensor(170.2120, device='cuda:0')
episode: 598 training return: tensor(312.3736, device='cuda:0')
episode: 599 training return: tensor(142.9904, device='cuda:0')
epoch: 150 test_true_pfm: 5523.825255105773 sim_pfm: 442.9225321433041
