['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '7', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.26085400089621547 test_loss: 0.23243441581726074
epoch: 1 training_loss 0.21511603459715845 test_loss: 0.202329683303833
epoch: 2 training_loss 0.20279383346438407 test_loss: 0.20043351650238037
epoch: 3 training_loss 0.20141403652727605 test_loss: 0.18930312395095825
epoch: 4 training_loss 0.18392519235610963 test_loss: 0.19989250898361205
epoch: 5 training_loss 0.19018535152077676 test_loss: 0.198223078250885
epoch: 6 training_loss 0.18979075290262698 test_loss: 0.1836538314819336
epoch: 7 training_loss 0.19465905219316482 test_loss: 0.1935907244682312
epoch: 8 training_loss 0.19243154905736445 test_loss: 0.18051847219467163
epoch: 9 training_loss 0.1867319943010807 test_loss: 0.18169264793395995
epoch: 10 training_loss 0.1845000373572111 test_loss: 0.19331198930740356
epoch: 11 training_loss 0.18318132616579533 test_loss: 0.18871794939041137
epoch: 12 training_loss 0.18814474761486052 test_loss: 0.18646665811538696
epoch: 13 training_loss 0.18737715251743794 test_loss: 0.1847813606262207
epoch: 14 training_loss 0.18131552442908286 test_loss: 0.18917585611343385
epoch: 15 training_loss 0.18070092089474202 test_loss: 0.19005657434463502
epoch: 16 training_loss 0.17955390110611916 test_loss: 0.17852481603622436
epoch: 17 training_loss 0.18972320035099982 test_loss: 0.18200116157531737
epoch: 18 training_loss 0.18497162483632565 test_loss: 0.20678343772888183
epoch: 19 training_loss 0.17806796930730343 test_loss: 0.18033828735351562
epoch: 20 training_loss 0.18366800986230372 test_loss: 0.1841542363166809
epoch: 21 training_loss 0.18493766322731972 test_loss: 0.1889730215072632
epoch: 22 training_loss 0.1823228956758976 test_loss: 0.20890557765960693
epoch: 23 training_loss 0.18724215760827065 test_loss: 0.18854749202728271
epoch: 24 training_loss 0.1760679579526186 test_loss: 0.1837571382522583
epoch: 25 training_loss 0.184204887971282 test_loss: 0.20464749336242677
epoch: 26 training_loss 0.1792074391245842 test_loss: 0.20137956142425537
epoch: 27 training_loss 0.19053514868021013 test_loss: 0.18442220687866212
epoch: 28 training_loss 0.1829159628227353 test_loss: 0.1875477433204651
epoch: 29 training_loss 0.18435785084962844 test_loss: 0.17584255933761597
epoch: 30 training_loss 0.18273572228848933 test_loss: 0.17043759822845458
epoch: 31 training_loss 0.1816724706441164 test_loss: 0.19925711154937745
epoch: 32 training_loss 0.1812262772768736 test_loss: 0.1795782685279846
epoch: 33 training_loss 0.17612225636839868 test_loss: 0.19461336135864257
epoch: 34 training_loss 0.178160180747509 test_loss: 0.16972683668136596
epoch: 35 training_loss 0.18165492489933968 test_loss: 0.21073057651519775
epoch: 36 training_loss 0.18696385122835635 test_loss: 0.17471954822540284
epoch: 37 training_loss 0.18664707630872726 test_loss: 0.17585803270339967
epoch: 38 training_loss 0.1784652040153742 test_loss: 0.20920441150665284
epoch: 39 training_loss 0.1800143852084875 test_loss: 0.17216662168502808
epoch: 40 training_loss 0.18214356683194638 test_loss: 0.1840467095375061
epoch: 41 training_loss 0.176648510992527 test_loss: 0.18606890439987184
epoch: 42 training_loss 0.17803939089179038 test_loss: 0.1757287859916687
epoch: 43 training_loss 0.19253084905445575 test_loss: 0.19170262813568115
epoch: 44 training_loss 0.17845719493925571 test_loss: 0.17710291147232055
epoch: 45 training_loss 0.18819477066397666 test_loss: 0.18934611082077027
epoch: 46 training_loss 0.17200423888862132 test_loss: 0.16200249195098876
epoch: 47 training_loss 0.1798913773149252 test_loss: 0.1864236831665039
epoch: 48 training_loss 0.17773869395256042 test_loss: 0.18340121507644652
epoch: 49 training_loss 0.1808374886214733 test_loss: 0.20584194660186766
epoch: 50 training_loss 0.1904146122932434 test_loss: 0.18690708875656128
epoch: 51 training_loss 0.18151506774127482 test_loss: 0.182875657081604
epoch: 52 training_loss 0.17808271437883377 test_loss: 0.18200660943984986
epoch: 53 training_loss 0.18367563985288143 test_loss: 0.1882955551147461
epoch: 54 training_loss 0.1756992541998625 test_loss: 0.18355484008789064
epoch: 55 training_loss 0.1817049790173769 test_loss: 0.17819830179214477
epoch: 56 training_loss 0.17958394363522528 test_loss: 0.18755507469177246
epoch: 57 training_loss 0.18343927681446076 test_loss: 0.19179909229278563
epoch: 58 training_loss 0.1738238063454628 test_loss: 0.16550146341323851
epoch: 59 training_loss 0.17515266723930836 test_loss: 0.18956398963928223
epoch: 60 training_loss 0.18261347070336342 test_loss: 0.18918468952178955
epoch: 61 training_loss 0.18706536047160627 test_loss: 0.18539026975631714
epoch: 62 training_loss 0.17866819068789483 test_loss: 0.18446495532989501
epoch: 63 training_loss 0.18421616233885288 test_loss: 0.17584728002548217
epoch: 64 training_loss 0.18344712793827056 test_loss: 0.19527528285980225
epoch: 65 training_loss 0.18265472583472728 test_loss: 0.19359358549118041
epoch: 66 training_loss 0.1880096260458231 test_loss: 0.17987921237945556
epoch: 67 training_loss 0.1896797551959753 test_loss: 0.16997506618499755
epoch: 68 training_loss 0.1810402562469244 test_loss: 0.1942565321922302
epoch: 69 training_loss 0.18306518375873565 test_loss: 0.17822258472442626
epoch: 70 training_loss 0.17651024021208286 test_loss: 0.18336273431777955
epoch: 71 training_loss 0.18447419226169587 test_loss: 0.1595844030380249
epoch: 72 training_loss 0.18428520426154138 test_loss: 0.17123231887817383
epoch: 73 training_loss 0.18095117211341857 test_loss: 0.18132604360580445
epoch: 74 training_loss 0.17838756330311298 test_loss: 0.18225494623184205
epoch: 75 training_loss 0.18029865473508835 test_loss: 0.19339118003845215
epoch: 76 training_loss 0.17977102920413018 test_loss: 0.18336275815963746
epoch: 77 training_loss 0.1794213779270649 test_loss: 0.165569269657135
epoch: 78 training_loss 0.17497134692966937 test_loss: 0.19048382043838502
epoch: 79 training_loss 0.17800068780779837 test_loss: 0.17347184419631959
epoch: 80 training_loss 0.18429758109152317 test_loss: 0.1829346776008606
epoch: 81 training_loss 0.18071046009659766 test_loss: 0.17576409578323365
epoch: 82 training_loss 0.17373038284480571 test_loss: 0.17013779878616334
epoch: 83 training_loss 0.18039485700428487 test_loss: 0.19319744110107423
epoch: 84 training_loss 0.1784474966675043 test_loss: 0.1704469919204712
epoch: 85 training_loss 0.17576072722673416 test_loss: 0.16755988597869872
epoch: 86 training_loss 0.1729132141917944 test_loss: 0.1827796459197998
epoch: 87 training_loss 0.18278958536684514 test_loss: 0.1804918646812439
epoch: 88 training_loss 0.17990607663989067 test_loss: 0.1707141160964966
epoch: 89 training_loss 0.1753625648468733 test_loss: 0.1662023425102234
epoch: 90 training_loss 0.1836929588764906 test_loss: 0.17443833351135254
epoch: 91 training_loss 0.17842033542692662 test_loss: 0.18779622316360473
epoch: 92 training_loss 0.17116329312324524 test_loss: 0.19416009187698363
epoch: 93 training_loss 0.17266845881938933 test_loss: 0.18837689161300658
epoch: 94 training_loss 0.17605869073420763 test_loss: 0.19393035173416137
epoch: 95 training_loss 0.17711288679391146 test_loss: 0.19225445985794068
epoch: 96 training_loss 0.17797394551336765 test_loss: 0.17399801015853883
epoch: 97 training_loss 0.179882520288229 test_loss: 0.18300199508666992
epoch: 98 training_loss 0.18860273249447346 test_loss: 0.1956656575202942
epoch: 99 training_loss 0.18277019582688808 test_loss: 0.17415363788604737
epoch: 100 training_loss 0.17019964329898357 test_loss: 0.198238742351532
epoch: 101 training_loss 0.18572088919579982 test_loss: 0.20395488739013673
epoch: 102 training_loss 0.18007165394723415 test_loss: 0.16573301553726197
epoch: 103 training_loss 0.17949346765875818 test_loss: 0.18460277318954468
epoch: 104 training_loss 0.18025926113128662 test_loss: 0.1793613314628601
epoch: 105 training_loss 0.17670019932091235 test_loss: 0.19446483850479127
epoch: 106 training_loss 0.18267758436501025 test_loss: 0.1792231559753418
epoch: 107 training_loss 0.17488356098532676 test_loss: 0.19942891597747803
epoch: 108 training_loss 0.17934833273291587 test_loss: 0.18741496801376342
epoch: 109 training_loss 0.18139259494841098 test_loss: 0.17539788484573365
epoch: 110 training_loss 0.17892782822251319 test_loss: 0.17974883317947388
epoch: 111 training_loss 0.17770570382475853 test_loss: 0.1817442536354065
epoch: 112 training_loss 0.1824376417696476 test_loss: 0.17436102628707886
epoch: 113 training_loss 0.18245901279151439 test_loss: 0.18668750524520875
epoch: 114 training_loss 0.17790133655071258 test_loss: 0.2036423683166504
epoch: 115 training_loss 0.18139744985848666 test_loss: 0.19192204475402833
epoch: 116 training_loss 0.1815510167181492 test_loss: 0.1867546558380127
epoch: 117 training_loss 0.1791889875382185 test_loss: 0.16865897178649902
epoch: 118 training_loss 0.17689385019242765 test_loss: 0.17357075214385986
epoch: 119 training_loss 0.1763470233976841 test_loss: 0.18719111680984496
epoch: 120 training_loss 0.17762359753251075 test_loss: 0.2005016565322876
epoch: 121 training_loss 0.17752090513706206 test_loss: 0.19829562902450562
epoch: 122 training_loss 0.1776042015105486 test_loss: 0.17975945472717286
epoch: 123 training_loss 0.17587043657898904 test_loss: 0.17347307205200196
epoch: 124 training_loss 0.17765471063554286 test_loss: 0.19074842929840088
epoch: 125 training_loss 0.17309216648340225 test_loss: 0.19146602153778075
epoch: 126 training_loss 0.1828178098797798 test_loss: 0.18556005954742433
epoch: 127 training_loss 0.1793721631169319 test_loss: 0.18509578704833984
epoch: 128 training_loss 0.17598544627428056 test_loss: 0.17078877687454225
epoch: 129 training_loss 0.17490958251059055 test_loss: 0.18892749547958373
epoch: 130 training_loss 0.17855567190796137 test_loss: 0.18778501749038695
epoch: 131 training_loss 0.17988673150539397 test_loss: 0.18308258056640625
epoch: 132 training_loss 0.16938749000430106 test_loss: 0.18062775135040282
epoch: 133 training_loss 0.17410114698112011 test_loss: 0.17802574634552001
epoch: 134 training_loss 0.1768863220512867 test_loss: 0.18472745418548583
epoch: 135 training_loss 0.17825131118297577 test_loss: 0.18897730112075806
epoch: 136 training_loss 0.17150999173521997 test_loss: 0.18272817134857178
epoch: 137 training_loss 0.17965304378420113 test_loss: 0.19320603609085082
epoch: 138 training_loss 0.17621074296534062 test_loss: 0.20026032924652098
epoch: 139 training_loss 0.18140954285860061 test_loss: 0.1663091540336609
epoch: 140 training_loss 0.17290739580988884 test_loss: 0.1871926188468933
epoch: 141 training_loss 0.1741023411601782 test_loss: 0.16690012216567993
epoch: 142 training_loss 0.17708235934376718 test_loss: 0.18098925352096557
epoch: 143 training_loss 0.1775865062326193 test_loss: 0.19301708936691284
epoch: 144 training_loss 0.17812216520309448 test_loss: 0.17212721109390258
epoch: 145 training_loss 0.18346579305827618 test_loss: 0.17567284107208253
epoch: 146 training_loss 0.17613463908433913 test_loss: 0.17365639209747313
epoch: 147 training_loss 0.17628408014774322 test_loss: 0.19539686441421508
epoch: 148 training_loss 0.1714035854488611 test_loss: 0.19154394865036012
epoch: 149 training_loss 0.18482577245682477 test_loss: 0.19400073289871217
epoch: 0 training_loss 8.448511681556703 test_loss: 4.655580139160156
epoch: 1 training_loss 3.851235258579254 test_loss: 3.1586965560913085
epoch: 2 training_loss 2.742440594434738 test_loss: 2.368133544921875
epoch: 3 training_loss 2.201329823732376 test_loss: 1.9730079650878907
epoch: 4 training_loss 1.893299959897995 test_loss: 1.7359098434448241
epoch: 5 training_loss 1.6631783413887025 test_loss: 1.5693673133850097
epoch: 6 training_loss 1.5087604427337646 test_loss: 1.4647612571716309
epoch: 7 training_loss 1.387571759223938 test_loss: 1.3072293281555176
epoch: 8 training_loss 1.3005667912960053 test_loss: 1.2560716629028321
epoch: 9 training_loss 1.2424224174022676 test_loss: 1.186809253692627
epoch: 10 training_loss 1.1910347330570221 test_loss: 1.1750548362731934
epoch: 11 training_loss 1.1381725567579268 test_loss: 1.073183536529541
epoch: 12 training_loss 1.0971681880950928 test_loss: 1.0769122123718262
epoch: 13 training_loss 1.0576194697618484 test_loss: 0.9989980697631836
epoch: 14 training_loss 1.019784809947014 test_loss: 1.0178606033325195
epoch: 15 training_loss 1.0153857469558716 test_loss: 0.9770840644836426
epoch: 16 training_loss 0.9616654282808303 test_loss: 0.9516541481018066
epoch: 17 training_loss 0.9635609799623489 test_loss: 0.9063291549682617
epoch: 18 training_loss 0.9189698112010956 test_loss: 0.9163796424865722
epoch: 19 training_loss 0.9031800776720047 test_loss: 0.9079904556274414
epoch: 20 training_loss 0.8815887051820755 test_loss: 0.84915132522583
epoch: 21 training_loss 0.8492380160093308 test_loss: 0.8487299919128418
epoch: 22 training_loss 0.8485716849565506 test_loss: 0.8160052299499512
epoch: 23 training_loss 0.8356122463941574 test_loss: 0.8103857040405273
epoch: 24 training_loss 0.8384282863140107 test_loss: 0.8360476493835449
epoch: 25 training_loss 0.8125564908981323 test_loss: 0.83102445602417
epoch: 26 training_loss 0.8035952693223953 test_loss: 0.8416245460510254
epoch: 27 training_loss 0.7800390547513962 test_loss: 0.7763324737548828
epoch: 28 training_loss 0.7709048157930374 test_loss: 0.7768595695495606
epoch: 29 training_loss 0.7539601403474808 test_loss: 0.7199048042297364
epoch: 30 training_loss 0.7483847427368164 test_loss: 0.7338402271270752
epoch: 31 training_loss 0.7407021689414978 test_loss: 0.7220677375793457
epoch: 32 training_loss 0.7262535065412521 test_loss: 0.6874020099639893
epoch: 33 training_loss 0.7260062390565872 test_loss: 0.7446000099182128
epoch: 34 training_loss 0.7224126332998275 test_loss: 0.711890983581543
epoch: 35 training_loss 0.7197723776102066 test_loss: 0.7189794540405273
epoch: 36 training_loss 0.7125350916385651 test_loss: 0.6800454139709473
epoch: 37 training_loss 0.6958875149488449 test_loss: 0.7000414371490479
epoch: 38 training_loss 0.7022282826900482 test_loss: 0.6916489124298095
epoch: 39 training_loss 0.67954330265522 test_loss: 0.6653041362762451
epoch: 40 training_loss 0.6786228704452515 test_loss: 0.6670995235443116
epoch: 41 training_loss 0.675020609498024 test_loss: 0.7537102699279785
epoch: 42 training_loss 0.6896856051683425 test_loss: 0.7126950740814209
epoch: 43 training_loss 0.674715147614479 test_loss: 0.6560987949371337
epoch: 44 training_loss 0.6662300169467926 test_loss: 0.6628284931182862
epoch: 45 training_loss 0.647584525346756 test_loss: 0.651669979095459
epoch: 46 training_loss 0.6477822452783585 test_loss: 0.6300029754638672
epoch: 47 training_loss 0.6353207302093505 test_loss: 0.6313055515289306
epoch: 48 training_loss 0.6435139572620392 test_loss: 0.6690206050872802
epoch: 49 training_loss 0.6496332943439483 test_loss: 0.6451697826385498
epoch: 50 training_loss 0.6370975422859192 test_loss: 0.6474345684051513
epoch: 51 training_loss 0.6380307793617248 test_loss: 0.6336829662322998
epoch: 52 training_loss 0.6254981690645218 test_loss: 0.6460610389709472
epoch: 53 training_loss 0.6352135682106018 test_loss: 0.6643979549407959
epoch: 54 training_loss 0.6200041192770004 test_loss: 0.6497387886047363
epoch: 55 training_loss 0.618367183804512 test_loss: 0.6208352088928223
epoch: 56 training_loss 0.6172745257616044 test_loss: 0.6239206314086914
epoch: 57 training_loss 0.6106494426727295 test_loss: 0.6071174621582032
epoch: 58 training_loss 0.6198617362976074 test_loss: 0.6009776592254639
epoch: 59 training_loss 0.6118645864725113 test_loss: 0.5907161235809326
epoch: 60 training_loss 0.5961548960208893 test_loss: 0.5919193744659423
epoch: 61 training_loss 0.5951766252517701 test_loss: 0.5902309417724609
epoch: 62 training_loss 0.6040029269456864 test_loss: 0.6314553260803223
epoch: 63 training_loss 0.6042420423030853 test_loss: 0.5846009731292725
epoch: 64 training_loss 0.6062673312425614 test_loss: 0.5912272930145264
epoch: 65 training_loss 0.5936470800638198 test_loss: 0.5800170421600341
epoch: 66 training_loss 0.5757343691587448 test_loss: 0.5724760055541992
epoch: 67 training_loss 0.5890157788991928 test_loss: 0.5811004161834716
epoch: 68 training_loss 0.5832398152351379 test_loss: 0.5932654857635498
epoch: 69 training_loss 0.5886753070354461 test_loss: 0.5823640346527099
epoch: 70 training_loss 0.5788178998231888 test_loss: 0.570598030090332
epoch: 71 training_loss 0.5801404935121536 test_loss: 0.6003524780273437
epoch: 72 training_loss 0.5837392655014991 test_loss: 0.5672135829925538
epoch: 73 training_loss 0.5832790219783783 test_loss: 0.5814743995666504
epoch: 74 training_loss 0.5815431118011475 test_loss: 0.5827957630157471
epoch: 75 training_loss 0.5739487040042878 test_loss: 0.5673072814941407
epoch: 76 training_loss 0.577071197628975 test_loss: 0.5831794261932373
epoch: 77 training_loss 0.5705663490295411 test_loss: 0.5809734344482422
epoch: 78 training_loss 0.5689890378713608 test_loss: 0.5458362102508545
epoch: 79 training_loss 0.5732195520401001 test_loss: 0.5714257240295411
epoch: 80 training_loss 0.5678306013345719 test_loss: 0.5425227642059326
epoch: 81 training_loss 0.5642211604118347 test_loss: 0.5459208965301514
epoch: 82 training_loss 0.5748016095161438 test_loss: 0.5502328872680664
epoch: 83 training_loss 0.5576933205127717 test_loss: 0.5603323936462402
epoch: 84 training_loss 0.5605397668480873 test_loss: 0.5841598033905029
epoch: 85 training_loss 0.5549085527658463 test_loss: 0.5502548217773438
epoch: 86 training_loss 0.5675123345851898 test_loss: 0.5749149799346924
epoch: 87 training_loss 0.5599660450220107 test_loss: 0.5605091571807861
epoch: 88 training_loss 0.5462371397018433 test_loss: 0.5614145278930665
epoch: 89 training_loss 0.5549033063650132 test_loss: 0.5553832530975342
epoch: 90 training_loss 0.5496040213108063 test_loss: 0.554188585281372
epoch: 91 training_loss 0.5571970012784004 test_loss: 0.529827880859375
epoch: 92 training_loss 0.5528920575976372 test_loss: 0.5722341060638427
epoch: 93 training_loss 0.5702104920148849 test_loss: 0.5523088932037353
epoch: 94 training_loss 0.5462474635243416 test_loss: 0.5331207275390625
epoch: 95 training_loss 0.5455870553851128 test_loss: 0.5504289627075195
epoch: 96 training_loss 0.5500351959466934 test_loss: 0.551060962677002
epoch: 97 training_loss 0.5381815010309219 test_loss: 0.5188414096832276
epoch: 98 training_loss 0.5388340082764626 test_loss: 0.5411713600158692
epoch: 99 training_loss 0.5389621436595917 test_loss: 0.5355590343475342
epoch: 100 training_loss 0.5344170752167702 test_loss: 0.5463826179504394
epoch: 101 training_loss 0.5398586440086365 test_loss: 0.529640007019043
epoch: 102 training_loss 0.540813444852829 test_loss: 0.522502088546753
epoch: 103 training_loss 0.5481399255990982 test_loss: 0.5315075874328613
epoch: 104 training_loss 0.5361765491962432 test_loss: 0.537412691116333
epoch: 105 training_loss 0.5249088117480278 test_loss: 0.5255464553833008
epoch: 106 training_loss 0.529427837729454 test_loss: 0.5470996856689453
epoch: 107 training_loss 0.5340489491820335 test_loss: 0.5388950824737548
epoch: 108 training_loss 0.5305357682704925 test_loss: 0.5245284557342529
epoch: 109 training_loss 0.5351808226108551 test_loss: 0.5178147315979004
epoch: 110 training_loss 0.5337973725795746 test_loss: 0.5297099113464355
epoch: 111 training_loss 0.531186997294426 test_loss: 0.5138665676116944
epoch: 112 training_loss 0.528170762360096 test_loss: 0.5135090827941895
epoch: 113 training_loss 0.533459135890007 test_loss: 0.5212789058685303
epoch: 114 training_loss 0.5362360042333603 test_loss: 0.5207516193389893
epoch: 115 training_loss 0.5323789671063424 test_loss: 0.5179307937622071
epoch: 116 training_loss 0.5305131325125694 test_loss: 0.5198895454406738
epoch: 117 training_loss 0.5311627733707428 test_loss: 0.5218623161315918
epoch: 118 training_loss 0.5192537209391594 test_loss: 0.5298380851745605
epoch: 119 training_loss 0.5129543232917786 test_loss: 0.5086751937866211
epoch: 120 training_loss 0.5167651852965355 test_loss: 0.5044883728027344
epoch: 121 training_loss 0.5214027974009514 test_loss: 0.5123259544372558
epoch: 122 training_loss 0.5150069430470466 test_loss: 0.4985511302947998
epoch: 123 training_loss 0.5130368143320083 test_loss: 0.5107827663421631
epoch: 124 training_loss 0.5208039307594299 test_loss: 0.5034633636474609
epoch: 125 training_loss 0.5168956115841865 test_loss: 0.5020270347595215
epoch: 126 training_loss 0.5160029223561287 test_loss: 0.5232926368713379
epoch: 127 training_loss 0.5106594482064247 test_loss: 0.5269484996795655
epoch: 128 training_loss 0.5247046825289726 test_loss: 0.5398426055908203
epoch: 129 training_loss 0.5063007041811943 test_loss: 0.5084615230560303
epoch: 130 training_loss 0.5166226175427436 test_loss: 0.5103302955627441
epoch: 131 training_loss 0.5057075887918472 test_loss: 0.5202277660369873
epoch: 132 training_loss 0.5044962164759635 test_loss: 0.49844775199890134
epoch: 133 training_loss 0.5109551844000816 test_loss: 0.495024585723877
epoch: 134 training_loss 0.5074174001812934 test_loss: 0.5193696975708008
epoch: 135 training_loss 0.5124736133217812 test_loss: 0.52316312789917
epoch: 136 training_loss 0.5047195383906364 test_loss: 0.5135191440582275
epoch: 137 training_loss 0.5076020514965057 test_loss: 0.5018634796142578
epoch: 138 training_loss 0.5078476110100746 test_loss: 0.5272998332977294
epoch: 139 training_loss 0.5085599881410598 test_loss: 0.5069032669067383
epoch: 140 training_loss 0.5064750999212265 test_loss: 0.5027927398681641
epoch: 141 training_loss 0.500843026638031 test_loss: 0.49852948188781737
epoch: 142 training_loss 0.5047140476107598 test_loss: 0.5012848377227783
epoch: 143 training_loss 0.500717182457447 test_loss: 0.5205275058746338
epoch: 144 training_loss 0.5001275050640106 test_loss: 0.5116000652313233
epoch: 145 training_loss 0.502006991803646 test_loss: 0.5047806739807129
epoch: 146 training_loss 0.49516116559505463 test_loss: 0.4903580188751221
epoch: 147 training_loss 0.5134749743342399 test_loss: 0.5306373596191406
epoch: 148 training_loss 0.49163961052894595 test_loss: 0.49811320304870604
epoch: 149 training_loss 0.49440835118293763 test_loss: 0.49582653045654296
1938.509694725593
episode: 0 training return: tensor(133.6296, device='cuda:0')
episode: 1 training return: tensor(-183.9971, device='cuda:0')
episode: 2 training return: tensor(-292.7017, device='cuda:0')
episode: 3 training return: tensor(-222.8673, device='cuda:0')
epoch: 1 test_true_pfm: 2093.551589982257 sim_pfm: -14.268637264962308
episode: 4 training return: tensor(-303.6811, device='cuda:0')
episode: 5 training return: tensor(-306.9585, device='cuda:0')
episode: 6 training return: tensor(337.2613, device='cuda:0')
episode: 7 training return: tensor(342.2440, device='cuda:0')
epoch: 2 test_true_pfm: 2611.529362559549 sim_pfm: -82.28793041369256
episode: 8 training return: tensor(-264.4442, device='cuda:0')
episode: 9 training return: tensor(-135.6262, device='cuda:0')
episode: 10 training return: tensor(-283.3104, device='cuda:0')
episode: 11 training return: tensor(-100.4292, device='cuda:0')
epoch: 3 test_true_pfm: 2542.2259034513663 sim_pfm: -175.19464577179556
episode: 12 training return: tensor(-173.5102, device='cuda:0')
episode: 13 training return: tensor(-415.7147, device='cuda:0')
episode: 14 training return: tensor(-298.9983, device='cuda:0')
episode: 15 training return: tensor(-211.1165, device='cuda:0')
epoch: 4 test_true_pfm: 1807.2590987269868 sim_pfm: 90.08052495709853
episode: 16 training return: tensor(-369.2191, device='cuda:0')
episode: 17 training return: tensor(-379.7269, device='cuda:0')
episode: 18 training return: tensor(-374.7198, device='cuda:0')
episode: 19 training return: tensor(-108.8613, device='cuda:0')
epoch: 5 test_true_pfm: 1274.278854280263 sim_pfm: -376.86795651260763
episode: 20 training return: tensor(-206.1743, device='cuda:0')
episode: 21 training return: tensor(-154.0564, device='cuda:0')
episode: 22 training return: tensor(-45.5242, device='cuda:0')
episode: 23 training return: tensor(-364.3043, device='cuda:0')
epoch: 6 test_true_pfm: 2358.570528161004 sim_pfm: -103.41311382437318
episode: 24 training return: tensor(-30.1023, device='cuda:0')
episode: 25 training return: tensor(129.2072, device='cuda:0')
episode: 26 training return: tensor(-86.7656, device='cuda:0')
episode: 27 training return: tensor(-160.7316, device='cuda:0')
epoch: 7 test_true_pfm: 1598.3209071709387 sim_pfm: -282.29178837737226
episode: 28 training return: tensor(-255.0026, device='cuda:0')
episode: 29 training return: tensor(-358.3151, device='cuda:0')
episode: 30 training return: tensor(180.9338, device='cuda:0')
episode: 31 training return: tensor(-141.5717, device='cuda:0')
epoch: 8 test_true_pfm: 1590.8465384281753 sim_pfm: -324.8145496972914
episode: 32 training return: tensor(-367.9672, device='cuda:0')
episode: 33 training return: tensor(-225.2054, device='cuda:0')
episode: 34 training return: tensor(-262.2161, device='cuda:0')
episode: 35 training return: tensor(-379.2215, device='cuda:0')
epoch: 9 test_true_pfm: 2369.019144825628 sim_pfm: -296.3829354613942
episode: 36 training return: tensor(-250.8577, device='cuda:0')
episode: 37 training return: tensor(-101.0237, device='cuda:0')
episode: 38 training return: tensor(281.9650, device='cuda:0')
episode: 39 training return: tensor(-160.4200, device='cuda:0')
epoch: 10 test_true_pfm: 1352.826164358972 sim_pfm: -305.0571428471788
episode: 40 training return: tensor(375.8248, device='cuda:0')
episode: 41 training return: tensor(-281.7024, device='cuda:0')
episode: 42 training return: tensor(-153.8104, device='cuda:0')
episode: 43 training return: tensor(-179.0707, device='cuda:0')
epoch: 11 test_true_pfm: 1296.1224520636335 sim_pfm: -358.0830241414951
episode: 44 training return: tensor(-186.2011, device='cuda:0')
episode: 45 training return: tensor(-183.6961, device='cuda:0')
episode: 46 training return: tensor(-193.6742, device='cuda:0')
episode: 47 training return: tensor(-369.6020, device='cuda:0')
epoch: 12 test_true_pfm: 1288.2883760690659 sim_pfm: -375.07396654365584
episode: 48 training return: tensor(-347.4880, device='cuda:0')
episode: 49 training return: tensor(93.1375, device='cuda:0')
episode: 50 training return: tensor(22.3763, device='cuda:0')
episode: 51 training return: tensor(-303.8271, device='cuda:0')
epoch: 13 test_true_pfm: 1449.3959425045302 sim_pfm: -324.14121744959266
episode: 52 training return: tensor(-198.4700, device='cuda:0')
episode: 53 training return: tensor(-203.5869, device='cuda:0')
episode: 54 training return: tensor(-368.9459, device='cuda:0')
episode: 55 training return: tensor(3.2375, device='cuda:0')
epoch: 14 test_true_pfm: 2121.182921881715 sim_pfm: 15.380755509249866
episode: 56 training return: tensor(-382.8507, device='cuda:0')
episode: 57 training return: tensor(336.2733, device='cuda:0')
episode: 58 training return: tensor(-80.2153, device='cuda:0')
episode: 59 training return: tensor(-211.8272, device='cuda:0')
epoch: 15 test_true_pfm: 2194.8696956539497 sim_pfm: -32.35325959371403
episode: 60 training return: tensor(-96.5843, device='cuda:0')
episode: 61 training return: tensor(55.1358, device='cuda:0')
episode: 62 training return: tensor(-376.0864, device='cuda:0')
episode: 63 training return: tensor(-271.1326, device='cuda:0')
epoch: 16 test_true_pfm: 3131.1200496036477 sim_pfm: 65.00742652067372
episode: 64 training return: tensor(-274.5546, device='cuda:0')
episode: 65 training return: tensor(-70.2249, device='cuda:0')
episode: 66 training return: tensor(-109.9378, device='cuda:0')
episode: 67 training return: tensor(-285.0053, device='cuda:0')
epoch: 17 test_true_pfm: 1498.492021848022 sim_pfm: -255.3355347881637
episode: 68 training return: tensor(-362.1421, device='cuda:0')
episode: 69 training return: tensor(61.5846, device='cuda:0')
episode: 70 training return: tensor(339.9081, device='cuda:0')
episode: 71 training return: tensor(-383.7881, device='cuda:0')
epoch: 18 test_true_pfm: 1725.6281860919528 sim_pfm: 236.62978648409867
episode: 72 training return: tensor(326.9539, device='cuda:0')
episode: 73 training return: tensor(357.4853, device='cuda:0')
episode: 74 training return: tensor(-24.7488, device='cuda:0')
episode: 75 training return: tensor(380.2776, device='cuda:0')
epoch: 19 test_true_pfm: 2397.6607336328616 sim_pfm: -250.1901947769026
episode: 76 training return: tensor(-258.4951, device='cuda:0')
episode: 77 training return: tensor(40.5025, device='cuda:0')
episode: 78 training return: tensor(-362.1255, device='cuda:0')
episode: 79 training return: tensor(-307.8700, device='cuda:0')
epoch: 20 test_true_pfm: 1620.1106207554712 sim_pfm: 36.121059709325586
episode: 80 training return: tensor(-271.3831, device='cuda:0')
episode: 81 training return: tensor(-49.5122, device='cuda:0')
episode: 82 training return: tensor(-358.6967, device='cuda:0')
episode: 83 training return: tensor(-233.4056, device='cuda:0')
epoch: 21 test_true_pfm: 2061.1487415951765 sim_pfm: -278.32994823142263
episode: 84 training return: tensor(166.7268, device='cuda:0')
episode: 85 training return: tensor(33.1693, device='cuda:0')
episode: 86 training return: tensor(-339.0100, device='cuda:0')
episode: 87 training return: tensor(-209.1538, device='cuda:0')
epoch: 22 test_true_pfm: 2100.67609863436 sim_pfm: -194.19065878955493
episode: 88 training return: tensor(-356.7126, device='cuda:0')
episode: 89 training return: tensor(-231.8103, device='cuda:0')
episode: 90 training return: tensor(-158.9487, device='cuda:0')
episode: 91 training return: tensor(123.2805, device='cuda:0')
epoch: 23 test_true_pfm: 1695.4041315015877 sim_pfm: 100.52137990862441
episode: 92 training return: tensor(-231.5246, device='cuda:0')
episode: 93 training return: tensor(-375.8785, device='cuda:0')
episode: 94 training return: tensor(-213.6874, device='cuda:0')
episode: 95 training return: tensor(-130.8803, device='cuda:0')
epoch: 24 test_true_pfm: 1863.7732406125233 sim_pfm: -272.6785447945488
episode: 96 training return: tensor(-297.1936, device='cuda:0')
episode: 97 training return: tensor(-89.4701, device='cuda:0')
episode: 98 training return: tensor(325.6249, device='cuda:0')
episode: 99 training return: tensor(-56.0316, device='cuda:0')
epoch: 25 test_true_pfm: 1524.262387362184 sim_pfm: 52.52551337735107
episode: 100 training return: tensor(-373.8763, device='cuda:0')
episode: 101 training return: tensor(329.1010, device='cuda:0')
episode: 102 training return: tensor(36.3725, device='cuda:0')
episode: 103 training return: tensor(-372.5572, device='cuda:0')
epoch: 26 test_true_pfm: 1696.1078621559852 sim_pfm: -164.40129333350342
episode: 104 training return: tensor(-364.7902, device='cuda:0')
episode: 105 training return: tensor(-363.4189, device='cuda:0')
episode: 106 training return: tensor(-242.3849, device='cuda:0')
episode: 107 training return: tensor(108.3828, device='cuda:0')
epoch: 27 test_true_pfm: 2111.9766437916037 sim_pfm: 58.20124651057025
episode: 108 training return: tensor(-254.8687, device='cuda:0')
episode: 109 training return: tensor(339.2360, device='cuda:0')
episode: 110 training return: tensor(-340.3340, device='cuda:0')
episode: 111 training return: tensor(254.9474, device='cuda:0')
epoch: 28 test_true_pfm: 2516.591327040474 sim_pfm: -167.55351043383902
episode: 112 training return: tensor(-369.5866, device='cuda:0')
episode: 113 training return: tensor(134.2096, device='cuda:0')
episode: 114 training return: tensor(-267.2700, device='cuda:0')
episode: 115 training return: tensor(-256.2020, device='cuda:0')
epoch: 29 test_true_pfm: 2238.098105449142 sim_pfm: -249.00341896353834
episode: 116 training return: tensor(-378.3188, device='cuda:0')
episode: 117 training return: tensor(-363.6358, device='cuda:0')
episode: 118 training return: tensor(-211.2253, device='cuda:0')
episode: 119 training return: tensor(107.7765, device='cuda:0')
epoch: 30 test_true_pfm: 1588.315912928554 sim_pfm: -254.46957511759442
episode: 120 training return: tensor(-285.5117, device='cuda:0')
episode: 121 training return: tensor(-241.8247, device='cuda:0')
episode: 122 training return: tensor(270.2070, device='cuda:0')
episode: 123 training return: tensor(-315.5579, device='cuda:0')
epoch: 31 test_true_pfm: 2463.3668786083595 sim_pfm: -112.798686844646
episode: 124 training return: tensor(-337.5019, device='cuda:0')
episode: 125 training return: tensor(-62.6565, device='cuda:0')
episode: 126 training return: tensor(-23.1853, device='cuda:0')
episode: 127 training return: tensor(-325.8267, device='cuda:0')
epoch: 32 test_true_pfm: 2037.7528386915303 sim_pfm: -244.3007682407624
episode: 128 training return: tensor(-202.8613, device='cuda:0')
episode: 129 training return: tensor(-244.6016, device='cuda:0')
episode: 130 training return: tensor(-322.9690, device='cuda:0')
episode: 131 training return: tensor(-56.0853, device='cuda:0')
epoch: 33 test_true_pfm: 2395.74254585784 sim_pfm: 83.9234450601313
episode: 132 training return: tensor(-139.3671, device='cuda:0')
episode: 133 training return: tensor(297.4273, device='cuda:0')
episode: 134 training return: tensor(292.1458, device='cuda:0')
episode: 135 training return: tensor(321.9662, device='cuda:0')
epoch: 34 test_true_pfm: 1742.0376441944748 sim_pfm: 155.26968353024373
episode: 136 training return: tensor(-333.4905, device='cuda:0')
episode: 137 training return: tensor(350.4080, device='cuda:0')
episode: 138 training return: tensor(-139.6890, device='cuda:0')
episode: 139 training return: tensor(279.4997, device='cuda:0')
epoch: 35 test_true_pfm: 2186.300937479458 sim_pfm: -172.72438520470556
episode: 140 training return: tensor(-368.4361, device='cuda:0')
episode: 141 training return: tensor(-370.5174, device='cuda:0')
episode: 142 training return: tensor(-262.8534, device='cuda:0')
episode: 143 training return: tensor(-245.6133, device='cuda:0')
epoch: 36 test_true_pfm: 2100.8345113762575 sim_pfm: -177.91296330765667
episode: 144 training return: tensor(58.6434, device='cuda:0')
episode: 145 training return: tensor(-217.2539, device='cuda:0')
episode: 146 training return: tensor(-122.9041, device='cuda:0')
episode: 147 training return: tensor(-62.3230, device='cuda:0')
epoch: 37 test_true_pfm: 1881.4096739353402 sim_pfm: -52.20211234802264
episode: 148 training return: tensor(-226.4807, device='cuda:0')
episode: 149 training return: tensor(197.1099, device='cuda:0')
episode: 150 training return: tensor(-213.7660, device='cuda:0')
episode: 151 training return: tensor(-260.8878, device='cuda:0')
epoch: 38 test_true_pfm: 2166.94826956912 sim_pfm: -192.6702175734002
episode: 152 training return: tensor(324.5202, device='cuda:0')
episode: 153 training return: tensor(-206.1951, device='cuda:0')
episode: 154 training return: tensor(-371.3764, device='cuda:0')
episode: 155 training return: tensor(-367.5104, device='cuda:0')
epoch: 39 test_true_pfm: 2227.4712049753302 sim_pfm: -258.6494822856718
episode: 156 training return: tensor(-313.0239, device='cuda:0')
episode: 157 training return: tensor(-370.1362, device='cuda:0')
episode: 158 training return: tensor(216.0923, device='cuda:0')
episode: 159 training return: tensor(-56.4286, device='cuda:0')
epoch: 40 test_true_pfm: 2160.5042988467903 sim_pfm: 198.32577428687364
episode: 160 training return: tensor(-322.4402, device='cuda:0')
episode: 161 training return: tensor(-326.6108, device='cuda:0')
episode: 162 training return: tensor(-363.7935, device='cuda:0')
episode: 163 training return: tensor(-360.7909, device='cuda:0')
epoch: 41 test_true_pfm: 2362.4818128103757 sim_pfm: -244.95047257487508
episode: 164 training return: tensor(-315.2984, device='cuda:0')
episode: 165 training return: tensor(381.0087, device='cuda:0')
episode: 166 training return: tensor(103.1732, device='cuda:0')
episode: 167 training return: tensor(228.9605, device='cuda:0')
epoch: 42 test_true_pfm: 2132.5946722719405 sim_pfm: 134.80133015383035
episode: 168 training return: tensor(248.2770, device='cuda:0')
episode: 169 training return: tensor(3.4151, device='cuda:0')
episode: 170 training return: tensor(100.5245, device='cuda:0')
episode: 171 training return: tensor(32.6862, device='cuda:0')
epoch: 43 test_true_pfm: 3303.902954020635 sim_pfm: 184.74881643373132
episode: 172 training return: tensor(269.1084, device='cuda:0')
episode: 173 training return: tensor(217.4404, device='cuda:0')
episode: 174 training return: tensor(-322.1759, device='cuda:0')
episode: 175 training return: tensor(-176.0178, device='cuda:0')
epoch: 44 test_true_pfm: 2940.397090953262 sim_pfm: 253.1625404158452
episode: 176 training return: tensor(-72.5126, device='cuda:0')
episode: 177 training return: tensor(-46.0731, device='cuda:0')
episode: 178 training return: tensor(179.3235, device='cuda:0')
episode: 179 training return: tensor(-186.4037, device='cuda:0')
epoch: 45 test_true_pfm: 2651.1049404350756 sim_pfm: -163.2076516901919
episode: 180 training return: tensor(141.4144, device='cuda:0')
episode: 181 training return: tensor(349.5607, device='cuda:0')
episode: 182 training return: tensor(352.1940, device='cuda:0')
episode: 183 training return: tensor(323.4638, device='cuda:0')
epoch: 46 test_true_pfm: 2177.787292056081 sim_pfm: -245.42051772859608
episode: 184 training return: tensor(334.7693, device='cuda:0')
episode: 185 training return: tensor(-293.6135, device='cuda:0')
episode: 186 training return: tensor(343.5345, device='cuda:0')
episode: 187 training return: tensor(315.9619, device='cuda:0')
epoch: 47 test_true_pfm: 2765.0220835129026 sim_pfm: 56.81340046040714
episode: 188 training return: tensor(-244.7596, device='cuda:0')
episode: 189 training return: tensor(250.8885, device='cuda:0')
episode: 190 training return: tensor(-227.0766, device='cuda:0')
episode: 191 training return: tensor(338.4621, device='cuda:0')
epoch: 48 test_true_pfm: 2697.141007695465 sim_pfm: 38.54408796862117
episode: 192 training return: tensor(-56.2336, device='cuda:0')
episode: 193 training return: tensor(338.7822, device='cuda:0')
episode: 194 training return: tensor(-62.0380, device='cuda:0')
episode: 195 training return: tensor(340.1594, device='cuda:0')
epoch: 49 test_true_pfm: 2733.344976037131 sim_pfm: -7.627458528770755
episode: 196 training return: tensor(-306.9283, device='cuda:0')
episode: 197 training return: tensor(94.6563, device='cuda:0')
episode: 198 training return: tensor(-247.8075, device='cuda:0')
episode: 199 training return: tensor(132.1909, device='cuda:0')
epoch: 50 test_true_pfm: 3241.8328474506757 sim_pfm: 298.946943291851
episode: 200 training return: tensor(317.8691, device='cuda:0')
episode: 201 training return: tensor(335.4928, device='cuda:0')
episode: 202 training return: tensor(-269.1765, device='cuda:0')
episode: 203 training return: tensor(-370.2958, device='cuda:0')
epoch: 51 test_true_pfm: 3088.090009609548 sim_pfm: 323.53728440425283
episode: 204 training return: tensor(49.5190, device='cuda:0')
episode: 205 training return: tensor(341.6065, device='cuda:0')
episode: 206 training return: tensor(34.9049, device='cuda:0')
episode: 207 training return: tensor(355.3664, device='cuda:0')
epoch: 52 test_true_pfm: 3103.6128643995835 sim_pfm: -200.2051794686025
episode: 208 training return: tensor(376.0026, device='cuda:0')
episode: 209 training return: tensor(353.7001, device='cuda:0')
episode: 210 training return: tensor(-115.7965, device='cuda:0')
episode: 211 training return: tensor(-129.6315, device='cuda:0')
epoch: 53 test_true_pfm: 2270.1915450003944 sim_pfm: 165.7540729228543
episode: 212 training return: tensor(391.2043, device='cuda:0')
episode: 213 training return: tensor(143.5891, device='cuda:0')
episode: 214 training return: tensor(314.3692, device='cuda:0')
episode: 215 training return: tensor(332.5661, device='cuda:0')
epoch: 54 test_true_pfm: 2785.066496812964 sim_pfm: 352.1571632173145
episode: 216 training return: tensor(307.6440, device='cuda:0')
episode: 217 training return: tensor(331.6563, device='cuda:0')
episode: 218 training return: tensor(319.9696, device='cuda:0')
episode: 219 training return: tensor(394.3677, device='cuda:0')
epoch: 55 test_true_pfm: 2721.8102419642714 sim_pfm: 245.81894219059419
episode: 220 training return: tensor(194.4356, device='cuda:0')
episode: 221 training return: tensor(301.9846, device='cuda:0')
episode: 222 training return: tensor(-167.2364, device='cuda:0')
episode: 223 training return: tensor(356.3586, device='cuda:0')
epoch: 56 test_true_pfm: 2689.3615479871546 sim_pfm: 211.46689965368327
episode: 224 training return: tensor(333.5136, device='cuda:0')
episode: 225 training return: tensor(-252.5682, device='cuda:0')
episode: 226 training return: tensor(333.8365, device='cuda:0')
episode: 227 training return: tensor(-365.1580, device='cuda:0')
epoch: 57 test_true_pfm: 3119.7340531765785 sim_pfm: 388.1308193124617
episode: 228 training return: tensor(-246.1947, device='cuda:0')
episode: 229 training return: tensor(-47.0089, device='cuda:0')
episode: 230 training return: tensor(-219.5453, device='cuda:0')
episode: 231 training return: tensor(-294.6958, device='cuda:0')
epoch: 58 test_true_pfm: 2826.5863966872494 sim_pfm: 311.8356464781488
episode: 232 training return: tensor(349.0360, device='cuda:0')
episode: 233 training return: tensor(343.6769, device='cuda:0')
episode: 234 training return: tensor(38.8036, device='cuda:0')
episode: 235 training return: tensor(50.1433, device='cuda:0')
epoch: 59 test_true_pfm: 2472.348156705959 sim_pfm: 161.1807819601187
episode: 236 training return: tensor(-89.0742, device='cuda:0')
episode: 237 training return: tensor(328.0794, device='cuda:0')
episode: 238 training return: tensor(330.9904, device='cuda:0')
episode: 239 training return: tensor(131.0408, device='cuda:0')
epoch: 60 test_true_pfm: 3305.9677926477725 sim_pfm: 347.0116172678827
episode: 240 training return: tensor(323.6463, device='cuda:0')
episode: 241 training return: tensor(-271.9456, device='cuda:0')
episode: 242 training return: tensor(341.8754, device='cuda:0')
episode: 243 training return: tensor(299.2977, device='cuda:0')
epoch: 61 test_true_pfm: 1630.036366480296 sim_pfm: 165.10302124706018
episode: 244 training return: tensor(307.9005, device='cuda:0')
episode: 245 training return: tensor(355.5422, device='cuda:0')
episode: 246 training return: tensor(207.7027, device='cuda:0')
episode: 247 training return: tensor(-32.0466, device='cuda:0')
epoch: 62 test_true_pfm: 3203.2619836045474 sim_pfm: 315.6247982741722
episode: 248 training return: tensor(-180.0392, device='cuda:0')
episode: 249 training return: tensor(350.2787, device='cuda:0')
episode: 250 training return: tensor(352.4191, device='cuda:0')
episode: 251 training return: tensor(242.4058, device='cuda:0')
epoch: 63 test_true_pfm: 3302.6527583690745 sim_pfm: 361.9973569010811
episode: 252 training return: tensor(-268.2578, device='cuda:0')
episode: 253 training return: tensor(100.0604, device='cuda:0')
episode: 254 training return: tensor(380.2135, device='cuda:0')
episode: 255 training return: tensor(340.9180, device='cuda:0')
epoch: 64 test_true_pfm: 2944.2194572979456 sim_pfm: 220.06235316494713
episode: 256 training return: tensor(347.3024, device='cuda:0')
episode: 257 training return: tensor(343.7843, device='cuda:0')
episode: 258 training return: tensor(338.7960, device='cuda:0')
episode: 259 training return: tensor(330.4609, device='cuda:0')
epoch: 65 test_true_pfm: 2728.3981144446752 sim_pfm: 256.3482532018291
episode: 260 training return: tensor(334.9616, device='cuda:0')
episode: 261 training return: tensor(337.4644, device='cuda:0')
episode: 262 training return: tensor(-321.7391, device='cuda:0')
episode: 263 training return: tensor(-365.4668, device='cuda:0')
epoch: 66 test_true_pfm: 3312.975196470274 sim_pfm: 167.07712826053225
episode: 264 training return: tensor(333.6059, device='cuda:0')
episode: 265 training return: tensor(-120.0771, device='cuda:0')
episode: 266 training return: tensor(315.8133, device='cuda:0')
episode: 267 training return: tensor(340.6560, device='cuda:0')
epoch: 67 test_true_pfm: 3090.9495525900797 sim_pfm: 221.38950476405444
episode: 268 training return: tensor(79.0643, device='cuda:0')
episode: 269 training return: tensor(-146.6645, device='cuda:0')
episode: 270 training return: tensor(-370.2550, device='cuda:0')
episode: 271 training return: tensor(-317.6870, device='cuda:0')
epoch: 68 test_true_pfm: 2725.2988175648434 sim_pfm: 292.28968280541204
episode: 272 training return: tensor(394.5806, device='cuda:0')
episode: 273 training return: tensor(-214.6689, device='cuda:0')
episode: 274 training return: tensor(358.5089, device='cuda:0')
episode: 275 training return: tensor(329.9244, device='cuda:0')
epoch: 69 test_true_pfm: 3295.9651703010027 sim_pfm: 362.28684200440574
episode: 276 training return: tensor(-118.8438, device='cuda:0')
episode: 277 training return: tensor(341.9900, device='cuda:0')
episode: 278 training return: tensor(334.2729, device='cuda:0')
episode: 279 training return: tensor(107.4715, device='cuda:0')
epoch: 70 test_true_pfm: 2474.0862211065764 sim_pfm: 342.31291726817534
episode: 280 training return: tensor(348.9531, device='cuda:0')
episode: 281 training return: tensor(338.0738, device='cuda:0')
episode: 282 training return: tensor(344.1482, device='cuda:0')
episode: 283 training return: tensor(331.4623, device='cuda:0')
epoch: 71 test_true_pfm: 3295.7514668352983 sim_pfm: 371.4966113952784
episode: 284 training return: tensor(0.0700, device='cuda:0')
episode: 285 training return: tensor(354.1202, device='cuda:0')
episode: 286 training return: tensor(-195.8680, device='cuda:0')
episode: 287 training return: tensor(195.3086, device='cuda:0')
epoch: 72 test_true_pfm: 3188.7622477158898 sim_pfm: 345.5777772507669
episode: 288 training return: tensor(-364.3593, device='cuda:0')
episode: 289 training return: tensor(354.9360, device='cuda:0')
episode: 290 training return: tensor(-159.8777, device='cuda:0')
episode: 291 training return: tensor(260.0952, device='cuda:0')
epoch: 73 test_true_pfm: 3304.343741203116 sim_pfm: 306.16958152698743
episode: 292 training return: tensor(336.9237, device='cuda:0')
episode: 293 training return: tensor(347.8141, device='cuda:0')
episode: 294 training return: tensor(265.4471, device='cuda:0')
episode: 295 training return: tensor(337.0784, device='cuda:0')
epoch: 74 test_true_pfm: 2389.692033919307 sim_pfm: 320.3377130554679
episode: 296 training return: tensor(337.2625, device='cuda:0')
episode: 297 training return: tensor(338.3135, device='cuda:0')
episode: 298 training return: tensor(336.3561, device='cuda:0')
episode: 299 training return: tensor(332.9419, device='cuda:0')
epoch: 75 test_true_pfm: 3308.5712466471255 sim_pfm: 365.2533087521054
episode: 300 training return: tensor(219.4091, device='cuda:0')
episode: 301 training return: tensor(335.0812, device='cuda:0')
episode: 302 training return: tensor(-267.8781, device='cuda:0')
episode: 303 training return: tensor(324.5592, device='cuda:0')
epoch: 76 test_true_pfm: 3295.977536077445 sim_pfm: 356.71082030270674
episode: 304 training return: tensor(-28.3077, device='cuda:0')
episode: 305 training return: tensor(-144.6299, device='cuda:0')
episode: 306 training return: tensor(228.8977, device='cuda:0')
episode: 307 training return: tensor(248.0258, device='cuda:0')
epoch: 77 test_true_pfm: 3310.2328354169717 sim_pfm: 374.86526792564354
episode: 308 training return: tensor(342.6405, device='cuda:0')
episode: 309 training return: tensor(177.4369, device='cuda:0')
episode: 310 training return: tensor(-321.7794, device='cuda:0')
episode: 311 training return: tensor(-167.5862, device='cuda:0')
epoch: 78 test_true_pfm: 2837.0389597908 sim_pfm: 362.9719745359228
episode: 312 training return: tensor(343.1398, device='cuda:0')
episode: 313 training return: tensor(343.7664, device='cuda:0')
episode: 314 training return: tensor(-52.1304, device='cuda:0')
episode: 315 training return: tensor(349.4833, device='cuda:0')
epoch: 79 test_true_pfm: 2467.8274874404974 sim_pfm: 281.0235437223746
episode: 316 training return: tensor(329.1229, device='cuda:0')
episode: 317 training return: tensor(391.3555, device='cuda:0')
episode: 318 training return: tensor(398.3992, device='cuda:0')
episode: 319 training return: tensor(326.6562, device='cuda:0')
epoch: 80 test_true_pfm: 3294.7767775146094 sim_pfm: 373.3439758874786
episode: 320 training return: tensor(-346.1347, device='cuda:0')
episode: 321 training return: tensor(388.6031, device='cuda:0')
episode: 322 training return: tensor(347.9985, device='cuda:0')
episode: 323 training return: tensor(321.4389, device='cuda:0')
epoch: 81 test_true_pfm: 3318.65267091609 sim_pfm: 380.7974949128305
episode: 324 training return: tensor(336.8053, device='cuda:0')
episode: 325 training return: tensor(328.1713, device='cuda:0')
episode: 326 training return: tensor(273.4781, device='cuda:0')
episode: 327 training return: tensor(346.7768, device='cuda:0')
epoch: 82 test_true_pfm: 2650.5009257030833 sim_pfm: 369.7525190289016
episode: 328 training return: tensor(288.8252, device='cuda:0')
episode: 329 training return: tensor(135.5590, device='cuda:0')
episode: 330 training return: tensor(339.2540, device='cuda:0')
episode: 331 training return: tensor(352.5637, device='cuda:0')
epoch: 83 test_true_pfm: 3278.985128616474 sim_pfm: 329.37989479792304
episode: 332 training return: tensor(326.9055, device='cuda:0')
episode: 333 training return: tensor(335.7490, device='cuda:0')
episode: 334 training return: tensor(215.2467, device='cuda:0')
episode: 335 training return: tensor(-12.0629, device='cuda:0')
epoch: 84 test_true_pfm: 2547.251918046837 sim_pfm: 365.7792241067703
episode: 336 training return: tensor(-135.3433, device='cuda:0')
episode: 337 training return: tensor(345.3132, device='cuda:0')
episode: 338 training return: tensor(349.3569, device='cuda:0')
episode: 339 training return: tensor(334.8394, device='cuda:0')
epoch: 85 test_true_pfm: 3286.3124056477404 sim_pfm: 310.8025729666988
episode: 340 training return: tensor(362.5837, device='cuda:0')
episode: 341 training return: tensor(348.2573, device='cuda:0')
episode: 342 training return: tensor(351.2923, device='cuda:0')
episode: 343 training return: tensor(340.8412, device='cuda:0')
epoch: 86 test_true_pfm: 2883.1721080147377 sim_pfm: 165.42815415437022
episode: 344 training return: tensor(349.1947, device='cuda:0')
episode: 345 training return: tensor(371.2016, device='cuda:0')
episode: 346 training return: tensor(345.5310, device='cuda:0')
episode: 347 training return: tensor(337.7769, device='cuda:0')
epoch: 87 test_true_pfm: 3154.815652068688 sim_pfm: 298.4735638956384
episode: 348 training return: tensor(354.5221, device='cuda:0')
episode: 349 training return: tensor(345.7443, device='cuda:0')
episode: 350 training return: tensor(347.8061, device='cuda:0')
episode: 351 training return: tensor(335.4914, device='cuda:0')
epoch: 88 test_true_pfm: 3156.745931242753 sim_pfm: 164.48524211727394
episode: 352 training return: tensor(348.3757, device='cuda:0')
episode: 353 training return: tensor(105.8452, device='cuda:0')
episode: 354 training return: tensor(172.0385, device='cuda:0')
episode: 355 training return: tensor(-309.5340, device='cuda:0')
epoch: 89 test_true_pfm: 2984.794575750458 sim_pfm: 331.7791497068247
episode: 356 training return: tensor(322.0354, device='cuda:0')
episode: 357 training return: tensor(381.1809, device='cuda:0')
episode: 358 training return: tensor(348.6193, device='cuda:0')
episode: 359 training return: tensor(340.8480, device='cuda:0')
epoch: 90 test_true_pfm: 3056.2537964341786 sim_pfm: 281.13976118912495
episode: 360 training return: tensor(348.0333, device='cuda:0')
episode: 361 training return: tensor(352.3332, device='cuda:0')
episode: 362 training return: tensor(342.6426, device='cuda:0')
episode: 363 training return: tensor(338.2522, device='cuda:0')
epoch: 91 test_true_pfm: 2649.787636433425 sim_pfm: 156.5106493808174
episode: 364 training return: tensor(341.4233, device='cuda:0')
episode: 365 training return: tensor(-372.3311, device='cuda:0')
episode: 366 training return: tensor(354.0970, device='cuda:0')
episode: 367 training return: tensor(-16.0265, device='cuda:0')
epoch: 92 test_true_pfm: 2561.40010823983 sim_pfm: 219.67581152165076
episode: 368 training return: tensor(320.8528, device='cuda:0')
episode: 369 training return: tensor(354.1878, device='cuda:0')
episode: 370 training return: tensor(-110.2465, device='cuda:0')
episode: 371 training return: tensor(322.5902, device='cuda:0')
epoch: 93 test_true_pfm: 3292.8506440219844 sim_pfm: 354.4559710902977
episode: 372 training return: tensor(337.3967, device='cuda:0')
episode: 373 training return: tensor(345.8476, device='cuda:0')
episode: 374 training return: tensor(-240.4969, device='cuda:0')
episode: 375 training return: tensor(360.1860, device='cuda:0')
epoch: 94 test_true_pfm: 2814.4627768832956 sim_pfm: 250.76282247819472
episode: 376 training return: tensor(178.1471, device='cuda:0')
episode: 377 training return: tensor(353.8538, device='cuda:0')
episode: 378 training return: tensor(369.8433, device='cuda:0')
episode: 379 training return: tensor(382.6867, device='cuda:0')
epoch: 95 test_true_pfm: 1777.474177889455 sim_pfm: 366.44793231451575
episode: 380 training return: tensor(349.8506, device='cuda:0')
episode: 381 training return: tensor(334.4391, device='cuda:0')
episode: 382 training return: tensor(392.0265, device='cuda:0')
episode: 383 training return: tensor(357.7985, device='cuda:0')
epoch: 96 test_true_pfm: 3273.2774332338827 sim_pfm: 352.76144384520984
episode: 384 training return: tensor(323.0523, device='cuda:0')
episode: 385 training return: tensor(347.3854, device='cuda:0')
episode: 386 training return: tensor(-100.6791, device='cuda:0')
episode: 387 training return: tensor(339.6458, device='cuda:0')
epoch: 97 test_true_pfm: 3317.934308744978 sim_pfm: 377.62421664999175
episode: 388 training return: tensor(343.3561, device='cuda:0')
episode: 389 training return: tensor(-371.2820, device='cuda:0')
episode: 390 training return: tensor(330.9627, device='cuda:0')
episode: 391 training return: tensor(0.2628, device='cuda:0')
epoch: 98 test_true_pfm: 3291.3638799201312 sim_pfm: -52.1956180423634
episode: 392 training return: tensor(342.3995, device='cuda:0')
episode: 393 training return: tensor(317.9482, device='cuda:0')
episode: 394 training return: tensor(-324.5248, device='cuda:0')
episode: 395 training return: tensor(351.6598, device='cuda:0')
epoch: 99 test_true_pfm: 3308.2079166806993 sim_pfm: 163.93851896097962
episode: 396 training return: tensor(327.0120, device='cuda:0')
episode: 397 training return: tensor(345.4087, device='cuda:0')
episode: 398 training return: tensor(335.5933, device='cuda:0')
episode: 399 training return: tensor(347.3765, device='cuda:0')
epoch: 100 test_true_pfm: 3288.8792308196303 sim_pfm: 232.96098142660534
episode: 400 training return: tensor(-205.1050, device='cuda:0')
episode: 401 training return: tensor(333.4198, device='cuda:0')
episode: 402 training return: tensor(340.3857, device='cuda:0')
episode: 403 training return: tensor(-306.1328, device='cuda:0')
epoch: 101 test_true_pfm: 2673.8150784782933 sim_pfm: 374.96878992090933
episode: 404 training return: tensor(277.6141, device='cuda:0')
episode: 405 training return: tensor(-139.8890, device='cuda:0')
episode: 406 training return: tensor(335.3326, device='cuda:0')
episode: 407 training return: tensor(341.0969, device='cuda:0')
epoch: 102 test_true_pfm: 2311.521835569119 sim_pfm: 254.8056832063982
episode: 408 training return: tensor(369.0284, device='cuda:0')
episode: 409 training return: tensor(344.4657, device='cuda:0')
episode: 410 training return: tensor(84.8200, device='cuda:0')
episode: 411 training return: tensor(228.6146, device='cuda:0')
epoch: 103 test_true_pfm: 2643.8918368495306 sim_pfm: 171.49866562791672
episode: 412 training return: tensor(335.3706, device='cuda:0')
episode: 413 training return: tensor(350.4590, device='cuda:0')
episode: 414 training return: tensor(340.8969, device='cuda:0')
episode: 415 training return: tensor(346.6682, device='cuda:0')
epoch: 104 test_true_pfm: 3298.3055479967006 sim_pfm: 173.92416837375882
episode: 416 training return: tensor(344.9673, device='cuda:0')
episode: 417 training return: tensor(333.2510, device='cuda:0')
episode: 418 training return: tensor(346.1466, device='cuda:0')
episode: 419 training return: tensor(361.9169, device='cuda:0')
epoch: 105 test_true_pfm: 3304.336848850098 sim_pfm: 371.45627062457305
episode: 420 training return: tensor(346.2935, device='cuda:0')
episode: 421 training return: tensor(335.9464, device='cuda:0')
episode: 422 training return: tensor(339.5168, device='cuda:0')
episode: 423 training return: tensor(340.6450, device='cuda:0')
epoch: 106 test_true_pfm: 3292.4989633352357 sim_pfm: 265.70337558168103
episode: 424 training return: tensor(-17.3351, device='cuda:0')
episode: 425 training return: tensor(342.8477, device='cuda:0')
episode: 426 training return: tensor(350.5317, device='cuda:0')
episode: 427 training return: tensor(242.1969, device='cuda:0')
epoch: 107 test_true_pfm: 2647.906245282236 sim_pfm: 363.84218890359625
episode: 428 training return: tensor(54.5075, device='cuda:0')
episode: 429 training return: tensor(114.7316, device='cuda:0')
episode: 430 training return: tensor(44.0597, device='cuda:0')
episode: 431 training return: tensor(343.4810, device='cuda:0')
epoch: 108 test_true_pfm: 3287.7434407319274 sim_pfm: 310.9887152825443
episode: 432 training return: tensor(-99.4052, device='cuda:0')
episode: 433 training return: tensor(338.8110, device='cuda:0')
episode: 434 training return: tensor(336.3989, device='cuda:0')
episode: 435 training return: tensor(345.2836, device='cuda:0')
epoch: 109 test_true_pfm: 3066.3189135874636 sim_pfm: 377.7626919791316
episode: 436 training return: tensor(356.7555, device='cuda:0')
episode: 437 training return: tensor(333.8327, device='cuda:0')
episode: 438 training return: tensor(-235.7828, device='cuda:0')
episode: 439 training return: tensor(335.6129, device='cuda:0')
epoch: 110 test_true_pfm: 3158.446547135645 sim_pfm: 294.82254757833044
episode: 440 training return: tensor(399.4645, device='cuda:0')
episode: 441 training return: tensor(291.5925, device='cuda:0')
episode: 442 training return: tensor(373.5707, device='cuda:0')
episode: 443 training return: tensor(370.0352, device='cuda:0')
epoch: 111 test_true_pfm: 3313.569257712219 sim_pfm: 378.70277504386223
episode: 444 training return: tensor(349.2821, device='cuda:0')
episode: 445 training return: tensor(339.9670, device='cuda:0')
episode: 446 training return: tensor(341.7507, device='cuda:0')
episode: 447 training return: tensor(343.7669, device='cuda:0')
epoch: 112 test_true_pfm: 3312.987330243879 sim_pfm: 374.5316186075409
episode: 448 training return: tensor(323.9066, device='cuda:0')
episode: 449 training return: tensor(-62.0687, device='cuda:0')
episode: 450 training return: tensor(-296.1358, device='cuda:0')
episode: 451 training return: tensor(-241.4885, device='cuda:0')
epoch: 113 test_true_pfm: 2736.3317276636867 sim_pfm: 264.13895689633983
episode: 452 training return: tensor(360.9564, device='cuda:0')
episode: 453 training return: tensor(-243.0461, device='cuda:0')
episode: 454 training return: tensor(335.5240, device='cuda:0')
episode: 455 training return: tensor(392.7415, device='cuda:0')
epoch: 114 test_true_pfm: 3119.76325930414 sim_pfm: 219.67714358730396
episode: 456 training return: tensor(-123.1654, device='cuda:0')
episode: 457 training return: tensor(337.0366, device='cuda:0')
episode: 458 training return: tensor(345.2307, device='cuda:0')
episode: 459 training return: tensor(342.5607, device='cuda:0')
epoch: 115 test_true_pfm: 3071.6596603049898 sim_pfm: 364.9544436532694
episode: 460 training return: tensor(333.4064, device='cuda:0')
episode: 461 training return: tensor(343.0399, device='cuda:0')
episode: 462 training return: tensor(279.9321, device='cuda:0')
episode: 463 training return: tensor(341.0383, device='cuda:0')
epoch: 116 test_true_pfm: 3308.5846691930165 sim_pfm: 370.7648264853633
episode: 464 training return: tensor(347.8497, device='cuda:0')
episode: 465 training return: tensor(85.2056, device='cuda:0')
episode: 466 training return: tensor(338.9854, device='cuda:0')
episode: 467 training return: tensor(329.1193, device='cuda:0')
epoch: 117 test_true_pfm: 3302.9561271220286 sim_pfm: 373.20230413858854
episode: 468 training return: tensor(329.5864, device='cuda:0')
episode: 469 training return: tensor(-223.3120, device='cuda:0')
episode: 470 training return: tensor(343.1414, device='cuda:0')
episode: 471 training return: tensor(341.5482, device='cuda:0')
epoch: 118 test_true_pfm: 3302.8146583441703 sim_pfm: 180.4791515057053
episode: 472 training return: tensor(352.3565, device='cuda:0')
episode: 473 training return: tensor(308.8009, device='cuda:0')
episode: 474 training return: tensor(4.3511, device='cuda:0')
episode: 475 training return: tensor(349.5568, device='cuda:0')
epoch: 119 test_true_pfm: 3287.8119428520135 sim_pfm: 366.65057454110746
episode: 476 training return: tensor(396.2939, device='cuda:0')
episode: 477 training return: tensor(347.4560, device='cuda:0')
episode: 478 training return: tensor(336.0251, device='cuda:0')
episode: 479 training return: tensor(344.9619, device='cuda:0')
epoch: 120 test_true_pfm: 3285.1201036623174 sim_pfm: 279.4162983639787
episode: 480 training return: tensor(333.1230, device='cuda:0')
episode: 481 training return: tensor(340.2221, device='cuda:0')
episode: 482 training return: tensor(356.8036, device='cuda:0')
episode: 483 training return: tensor(343.0035, device='cuda:0')
epoch: 121 test_true_pfm: 3287.8562718044955 sim_pfm: 262.07476408795145
episode: 484 training return: tensor(346.6768, device='cuda:0')
episode: 485 training return: tensor(353.4861, device='cuda:0')
episode: 486 training return: tensor(353.1740, device='cuda:0')
episode: 487 training return: tensor(-268.6737, device='cuda:0')
epoch: 122 test_true_pfm: 2916.6377868862123 sim_pfm: 368.5336762283211
episode: 488 training return: tensor(338.0053, device='cuda:0')
episode: 489 training return: tensor(-202.9934, device='cuda:0')
episode: 490 training return: tensor(130.9968, device='cuda:0')
episode: 491 training return: tensor(57.5081, device='cuda:0')
epoch: 123 test_true_pfm: 3066.0333794250473 sim_pfm: 368.30810254228226
episode: 492 training return: tensor(344.9729, device='cuda:0')
episode: 493 training return: tensor(359.3708, device='cuda:0')
episode: 494 training return: tensor(-204.8276, device='cuda:0')
episode: 495 training return: tensor(355.6082, device='cuda:0')
epoch: 124 test_true_pfm: 3294.948342561158 sim_pfm: 251.09324021094167
episode: 496 training return: tensor(-196.0576, device='cuda:0')
episode: 497 training return: tensor(351.8665, device='cuda:0')
episode: 498 training return: tensor(-119.3122, device='cuda:0')
episode: 499 training return: tensor(325.0681, device='cuda:0')
epoch: 125 test_true_pfm: 3304.6132081518967 sim_pfm: 365.9696787153759
episode: 500 training return: tensor(161.5914, device='cuda:0')
episode: 501 training return: tensor(352.9042, device='cuda:0')
episode: 502 training return: tensor(-264.2807, device='cuda:0')
episode: 503 training return: tensor(337.9909, device='cuda:0')
epoch: 126 test_true_pfm: 3290.707992353393 sim_pfm: 368.63127551660483
episode: 504 training return: tensor(357.1968, device='cuda:0')
episode: 505 training return: tensor(251.8359, device='cuda:0')
episode: 506 training return: tensor(329.7035, device='cuda:0')
episode: 507 training return: tensor(93.0812, device='cuda:0')
epoch: 127 test_true_pfm: 3298.35467748096 sim_pfm: 370.2984301834561
episode: 508 training return: tensor(360.6739, device='cuda:0')
episode: 509 training return: tensor(339.1387, device='cuda:0')
episode: 510 training return: tensor(361.3590, device='cuda:0')
episode: 511 training return: tensor(-342.1722, device='cuda:0')
epoch: 128 test_true_pfm: 3297.5576048588687 sim_pfm: 371.50897184949525
episode: 512 training return: tensor(339.9198, device='cuda:0')
episode: 513 training return: tensor(358.4287, device='cuda:0')
episode: 514 training return: tensor(338.2242, device='cuda:0')
episode: 515 training return: tensor(342.0563, device='cuda:0')
epoch: 129 test_true_pfm: 3305.7006728723536 sim_pfm: 374.33596433481824
episode: 516 training return: tensor(332.8691, device='cuda:0')
episode: 517 training return: tensor(341.0672, device='cuda:0')
episode: 518 training return: tensor(359.5360, device='cuda:0')
episode: 519 training return: tensor(344.6783, device='cuda:0')
epoch: 130 test_true_pfm: 3314.3672079083026 sim_pfm: 373.05133971909527
episode: 520 training return: tensor(401.7180, device='cuda:0')
episode: 521 training return: tensor(85.2565, device='cuda:0')
episode: 522 training return: tensor(343.9916, device='cuda:0')
episode: 523 training return: tensor(343.6412, device='cuda:0')
epoch: 131 test_true_pfm: 3322.506611899438 sim_pfm: 378.96455942007015
episode: 524 training return: tensor(146.3801, device='cuda:0')
episode: 525 training return: tensor(347.9700, device='cuda:0')
episode: 526 training return: tensor(343.4687, device='cuda:0')
episode: 527 training return: tensor(349.4302, device='cuda:0')
epoch: 132 test_true_pfm: 3294.6852127800976 sim_pfm: 369.96066444572836
episode: 528 training return: tensor(114.1879, device='cuda:0')
episode: 529 training return: tensor(-379.4896, device='cuda:0')
episode: 530 training return: tensor(344.4273, device='cuda:0')
episode: 531 training return: tensor(342.1555, device='cuda:0')
epoch: 133 test_true_pfm: 3162.3214396031976 sim_pfm: 234.28028853501505
episode: 532 training return: tensor(347.1616, device='cuda:0')
episode: 533 training return: tensor(347.1731, device='cuda:0')
episode: 534 training return: tensor(357.5038, device='cuda:0')
episode: 535 training return: tensor(350.5988, device='cuda:0')
epoch: 134 test_true_pfm: 3304.7313676159943 sim_pfm: 373.9460442873339
episode: 536 training return: tensor(360.3752, device='cuda:0')
episode: 537 training return: tensor(328.2203, device='cuda:0')
episode: 538 training return: tensor(365.7135, device='cuda:0')
episode: 539 training return: tensor(344.6635, device='cuda:0')
epoch: 135 test_true_pfm: 3327.0590742608097 sim_pfm: 383.6580405772159
episode: 540 training return: tensor(344.9447, device='cuda:0')
episode: 541 training return: tensor(-1.1876, device='cuda:0')
episode: 542 training return: tensor(343.2730, device='cuda:0')
episode: 543 training return: tensor(373.8477, device='cuda:0')
epoch: 136 test_true_pfm: 3322.237714257673 sim_pfm: 379.69310171040706
episode: 544 training return: tensor(333.6932, device='cuda:0')
episode: 545 training return: tensor(337.4652, device='cuda:0')
episode: 546 training return: tensor(351.6038, device='cuda:0')
episode: 547 training return: tensor(-149.5683, device='cuda:0')
epoch: 137 test_true_pfm: 3314.5204372974295 sim_pfm: 381.7182899914139
episode: 548 training return: tensor(346.6286, device='cuda:0')
episode: 549 training return: tensor(328.5818, device='cuda:0')
episode: 550 training return: tensor(356.7452, device='cuda:0')
episode: 551 training return: tensor(343.5820, device='cuda:0')
epoch: 138 test_true_pfm: 3304.5855538892406 sim_pfm: 367.9059149430832
episode: 552 training return: tensor(361.8439, device='cuda:0')
episode: 553 training return: tensor(357.8442, device='cuda:0')
episode: 554 training return: tensor(378.5727, device='cuda:0')
episode: 555 training return: tensor(404.0384, device='cuda:0')
epoch: 139 test_true_pfm: 3302.1501391908164 sim_pfm: 372.02402122677694
episode: 556 training return: tensor(329.0851, device='cuda:0')
episode: 557 training return: tensor(-27.8190, device='cuda:0')
episode: 558 training return: tensor(346.4158, device='cuda:0')
episode: 559 training return: tensor(345.7149, device='cuda:0')
epoch: 140 test_true_pfm: 3312.089120145299 sim_pfm: 379.548517545724
episode: 560 training return: tensor(343.3770, device='cuda:0')
episode: 561 training return: tensor(-137.2450, device='cuda:0')
episode: 562 training return: tensor(-155.3367, device='cuda:0')
episode: 563 training return: tensor(365.4055, device='cuda:0')
epoch: 141 test_true_pfm: 3294.5186845978656 sim_pfm: 372.3984410917231
episode: 564 training return: tensor(410.3194, device='cuda:0')
episode: 565 training return: tensor(349.3836, device='cuda:0')
episode: 566 training return: tensor(373.3583, device='cuda:0')
episode: 567 training return: tensor(-144.6550, device='cuda:0')
epoch: 142 test_true_pfm: 3321.7061454169866 sim_pfm: 381.4637711135826
episode: 568 training return: tensor(348.8459, device='cuda:0')
episode: 569 training return: tensor(331.3307, device='cuda:0')
episode: 570 training return: tensor(348.9850, device='cuda:0')
episode: 571 training return: tensor(326.0320, device='cuda:0')
epoch: 143 test_true_pfm: 3315.4473481629843 sim_pfm: 379.3939779563807
episode: 572 training return: tensor(199.9636, device='cuda:0')
episode: 573 training return: tensor(376.6149, device='cuda:0')
episode: 574 training return: tensor(187.8287, device='cuda:0')
episode: 575 training return: tensor(343.7370, device='cuda:0')
epoch: 144 test_true_pfm: 3296.096740372119 sim_pfm: 369.1350310893419
episode: 576 training return: tensor(348.2791, device='cuda:0')
episode: 577 training return: tensor(-98.8103, device='cuda:0')
episode: 578 training return: tensor(385.9520, device='cuda:0')
episode: 579 training return: tensor(375.8514, device='cuda:0')
epoch: 145 test_true_pfm: 3321.7626629116608 sim_pfm: 375.6675183752862
episode: 580 training return: tensor(339.6321, device='cuda:0')
episode: 581 training return: tensor(342.0946, device='cuda:0')
episode: 582 training return: tensor(350.8032, device='cuda:0')
episode: 583 training return: tensor(350.2096, device='cuda:0')
epoch: 146 test_true_pfm: 3307.9883431927597 sim_pfm: 372.5350964584698
episode: 584 training return: tensor(350.9936, device='cuda:0')
episode: 585 training return: tensor(358.5045, device='cuda:0')
episode: 586 training return: tensor(354.4972, device='cuda:0')
episode: 587 training return: tensor(-233.2143, device='cuda:0')
epoch: 147 test_true_pfm: 3309.9255369909565 sim_pfm: 257.462470468871
episode: 588 training return: tensor(350.0817, device='cuda:0')
episode: 589 training return: tensor(403.3297, device='cuda:0')
episode: 590 training return: tensor(327.3588, device='cuda:0')
episode: 591 training return: tensor(361.1345, device='cuda:0')
epoch: 148 test_true_pfm: 3308.298086749559 sim_pfm: 374.5405043390735
episode: 592 training return: tensor(337.6236, device='cuda:0')
episode: 593 training return: tensor(272.9294, device='cuda:0')
episode: 594 training return: tensor(238.3166, device='cuda:0')
episode: 595 training return: tensor(-117.5688, device='cuda:0')
epoch: 149 test_true_pfm: 3329.0590648834254 sim_pfm: 383.26006871811114
episode: 596 training return: tensor(346.0767, device='cuda:0')
episode: 597 training return: tensor(-384.1284, device='cuda:0')
episode: 598 training return: tensor(162.4632, device='cuda:0')
episode: 599 training return: tensor(73.8813, device='cuda:0')
epoch: 150 test_true_pfm: 3316.648814460647 sim_pfm: 373.6967295417562
