['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '30000']
epoch: 0 training_loss 0.360034596323967 test_loss: 0.21676509380340575
epoch: 1 training_loss 0.1891719102859497 test_loss: 0.16723572015762328
epoch: 2 training_loss 0.15597242921590804 test_loss: 0.15298082828521728
epoch: 3 training_loss 0.1402403347194195 test_loss: 0.1386621356010437
epoch: 4 training_loss 0.12558629646897315 test_loss: 0.13608165979385375
epoch: 5 training_loss 0.12778029832988977 test_loss: 0.12427705526351929
epoch: 6 training_loss 0.12111940857023001 test_loss: 0.12072064876556396
epoch: 7 training_loss 0.11805341996252537 test_loss: 0.12533406019210816
epoch: 8 training_loss 0.116416149251163 test_loss: 0.12742536067962645
epoch: 9 training_loss 0.11413563679903746 test_loss: 0.1002741813659668
epoch: 10 training_loss 0.1056999146938324 test_loss: 0.12148038148880005
epoch: 11 training_loss 0.1118875784613192 test_loss: 0.12392537593841553
epoch: 12 training_loss 0.11554124288260936 test_loss: 0.12633179426193236
epoch: 13 training_loss 0.10594519370235503 test_loss: 0.11700584888458251
epoch: 14 training_loss 0.11889039572328329 test_loss: 0.11365382671356201
epoch: 15 training_loss 0.1115002503618598 test_loss: 0.09732704162597657
epoch: 16 training_loss 0.1136421375721693 test_loss: 0.0983979046344757
epoch: 17 training_loss 0.12075659113004804 test_loss: 0.1258481979370117
epoch: 18 training_loss 0.10967504609376193 test_loss: 0.1268623113632202
epoch: 19 training_loss 0.10605861727148294 test_loss: 0.12963608503341675
epoch: 20 training_loss 0.1109929507598281 test_loss: 0.12679152488708495
epoch: 21 training_loss 0.10704671364277601 test_loss: 0.12522727251052856
epoch: 22 training_loss 0.11469648186117411 test_loss: 0.13119752407073976
epoch: 23 training_loss 0.10526261983439326 test_loss: 0.10951690673828125
epoch: 24 training_loss 0.10760133095085621 test_loss: 0.12745273113250732
epoch: 25 training_loss 0.10190578993409873 test_loss: 0.11590255498886108
epoch: 26 training_loss 0.105412588249892 test_loss: 0.10972750186920166
epoch: 27 training_loss 0.10198591489344835 test_loss: 0.10750759840011596
epoch: 28 training_loss 0.10511350706219673 test_loss: 0.11829718351364135
epoch: 29 training_loss 0.10601552959531546 test_loss: 0.12245738506317139
epoch: 30 training_loss 0.1037644476071 test_loss: 0.109110426902771
epoch: 31 training_loss 0.10431167658418417 test_loss: 0.1000239372253418
epoch: 32 training_loss 0.09757625728845597 test_loss: 0.10804258584976197
epoch: 33 training_loss 0.10340638848021627 test_loss: 0.1052203893661499
epoch: 34 training_loss 0.10744167428463697 test_loss: 0.0981325626373291
epoch: 35 training_loss 0.11154269214719534 test_loss: 0.1163744568824768
epoch: 36 training_loss 0.10417726196348667 test_loss: 0.10512853860855102
epoch: 37 training_loss 0.10509531069546937 test_loss: 0.10746226310729981
epoch: 38 training_loss 0.09797914495691656 test_loss: 0.12280381917953491
epoch: 39 training_loss 0.09726616172119976 test_loss: 0.10923374891281128
epoch: 40 training_loss 0.10508411254733802 test_loss: 0.12268027067184448
epoch: 41 training_loss 0.10958972282707691 test_loss: 0.11106570959091186
epoch: 42 training_loss 0.09885941099375486 test_loss: 0.12095085382461548
epoch: 43 training_loss 0.11256661029532551 test_loss: 0.12226237058639526
epoch: 44 training_loss 0.10416381262242794 test_loss: 0.10957406759262085
epoch: 45 training_loss 0.10453235819935798 test_loss: 0.10784636735916138
epoch: 46 training_loss 0.10818067813292145 test_loss: 0.12026199102401733
epoch: 47 training_loss 0.10744901856407524 test_loss: 0.10880441665649414
epoch: 48 training_loss 0.10528922749683262 test_loss: 0.1084052562713623
epoch: 49 training_loss 0.10926165273413062 test_loss: 0.08985607624053955
epoch: 50 training_loss 0.10762955628335476 test_loss: 0.10275192260742187
epoch: 51 training_loss 0.10366872550919652 test_loss: 0.113667893409729
epoch: 52 training_loss 0.10120276873931289 test_loss: 0.09653162956237793
epoch: 53 training_loss 0.10424619980156422 test_loss: 0.1116412878036499
epoch: 54 training_loss 0.10595953986048698 test_loss: 0.1254602074623108
epoch: 55 training_loss 0.10576260779052973 test_loss: 0.12144696712493896
epoch: 56 training_loss 0.11155665893107652 test_loss: 0.10877710580825806
epoch: 57 training_loss 0.10566527690738439 test_loss: 0.10313607454299926
epoch: 58 training_loss 0.10088795891031622 test_loss: 0.10133093595504761
epoch: 59 training_loss 0.09791263546794653 test_loss: 0.10613096952438354
epoch: 60 training_loss 0.10025216739624739 test_loss: 0.1146549940109253
epoch: 61 training_loss 0.1033960720896721 test_loss: 0.11470960378646851
epoch: 62 training_loss 0.10450394354760646 test_loss: 0.11484768390655517
epoch: 63 training_loss 0.09927550645545125 test_loss: 0.09372798800468445
epoch: 64 training_loss 0.109849244505167 test_loss: 0.11032595634460449
epoch: 65 training_loss 0.09869206991046667 test_loss: 0.09394143223762512
epoch: 66 training_loss 0.10129473177716136 test_loss: 0.08761911392211914
epoch: 67 training_loss 0.10765847964212298 test_loss: 0.11910513639450074
epoch: 68 training_loss 0.09716485511511565 test_loss: 0.0896589457988739
epoch: 69 training_loss 0.10246495513245463 test_loss: 0.11161330938339234
epoch: 70 training_loss 0.1040821597725153 test_loss: 0.1208153486251831
epoch: 71 training_loss 0.10078154951334 test_loss: 0.10668537616729737
epoch: 72 training_loss 0.10645774206146598 test_loss: 0.11126055717468261
epoch: 73 training_loss 0.09939445376396179 test_loss: 0.10047513246536255
epoch: 74 training_loss 0.10401273630559445 test_loss: 0.11070572137832642
epoch: 75 training_loss 0.10291268955916166 test_loss: 0.11398448944091796
epoch: 76 training_loss 0.09719849007204175 test_loss: 0.11811405420303345
epoch: 77 training_loss 0.09896268168464303 test_loss: 0.11062538623809814
epoch: 78 training_loss 0.10463768670335412 test_loss: 0.11563618183135986
epoch: 79 training_loss 0.09914069959893823 test_loss: 0.10869468450546264
epoch: 80 training_loss 0.0987124440446496 test_loss: 0.10792734622955322
epoch: 81 training_loss 0.09711329589597881 test_loss: 0.10715527534484863
epoch: 82 training_loss 0.10426265504211188 test_loss: 0.12309929132461547
epoch: 83 training_loss 0.10094536699354649 test_loss: 0.09836779236793518
epoch: 84 training_loss 0.09732503036037088 test_loss: 0.11051636934280396
epoch: 85 training_loss 0.10351921135559677 test_loss: 0.12114568948745727
epoch: 86 training_loss 0.10046229546889662 test_loss: 0.11591968536376954
epoch: 87 training_loss 0.09760593710467219 test_loss: 0.10657918453216553
epoch: 88 training_loss 0.10232955403625965 test_loss: 0.1134761929512024
epoch: 89 training_loss 0.09740718822926282 test_loss: 0.11773196458816529
epoch: 90 training_loss 0.10691992573440075 test_loss: 0.11295812129974366
epoch: 91 training_loss 0.0893697314709425 test_loss: 0.09921795725822449
epoch: 92 training_loss 0.0977594456076622 test_loss: 0.10128566026687622
epoch: 93 training_loss 0.09828494474291802 test_loss: 0.11206634044647217
epoch: 94 training_loss 0.10658574171364307 test_loss: 0.11108448505401611
epoch: 95 training_loss 0.10434409596025944 test_loss: 0.12218766212463379
epoch: 96 training_loss 0.10434731224551796 test_loss: 0.10461000204086304
epoch: 97 training_loss 0.0970419829711318 test_loss: 0.10636509656906128
epoch: 98 training_loss 0.09339551134034992 test_loss: 0.11163614988327027
epoch: 99 training_loss 0.09236511528491974 test_loss: 0.12500765323638915
epoch: 100 training_loss 0.10202446455135941 test_loss: 0.11821622848510742
epoch: 101 training_loss 0.10561194628477097 test_loss: 0.12029480934143066
epoch: 102 training_loss 0.09232858560979366 test_loss: 0.09833863973617554
epoch: 103 training_loss 0.09618074331432581 test_loss: 0.11904429197311402
epoch: 104 training_loss 0.08972625777125359 test_loss: 0.12109744548797607
epoch: 105 training_loss 0.10165291663259268 test_loss: 0.09689438343048096
epoch: 106 training_loss 0.09804657381027937 test_loss: 0.10030921697616577
epoch: 107 training_loss 0.10324648723006248 test_loss: 0.09773694276809693
epoch: 108 training_loss 0.09685815549455583 test_loss: 0.10309454202651977
epoch: 109 training_loss 0.09424188196659088 test_loss: 0.11750388145446777
epoch: 110 training_loss 0.10029131004586817 test_loss: 0.11817437410354614
epoch: 111 training_loss 0.09670943219214678 test_loss: 0.10082236528396607
epoch: 112 training_loss 0.09065705129876733 test_loss: 0.10139143466949463
epoch: 113 training_loss 0.10253759656101465 test_loss: 0.12070367336273194
epoch: 114 training_loss 0.10040931407362223 test_loss: 0.13071033954620362
epoch: 115 training_loss 0.10058883808553219 test_loss: 0.10197737216949462
epoch: 116 training_loss 0.09568920100107789 test_loss: 0.10135369300842285
epoch: 117 training_loss 0.09452845308929682 test_loss: 0.09963282942771912
epoch: 118 training_loss 0.09401883941143752 test_loss: 0.12328095436096191
epoch: 119 training_loss 0.09597379729151725 test_loss: 0.11464071273803711
epoch: 120 training_loss 0.09724002601578832 test_loss: 0.10480858087539673
epoch: 121 training_loss 0.1016847974061966 test_loss: 0.13067317008972168
epoch: 122 training_loss 0.09430652087554335 test_loss: 0.13059626817703246
epoch: 123 training_loss 0.09424640329554677 test_loss: 0.10392656326293945
epoch: 124 training_loss 0.09417997909709812 test_loss: 0.11553486585617065
epoch: 125 training_loss 0.10095241969451309 test_loss: 0.12214529514312744
epoch: 126 training_loss 0.10681488178670406 test_loss: 0.11279141902923584
epoch: 127 training_loss 0.09817595027387142 test_loss: 0.1148297905921936
epoch: 128 training_loss 0.09237387949600816 test_loss: 0.10295097827911377
epoch: 129 training_loss 0.09080186758190394 test_loss: 0.10363003015518188
epoch: 130 training_loss 0.10004541745409369 test_loss: 0.11433475017547608
epoch: 131 training_loss 0.09694772707298398 test_loss: 0.10765279531478882
epoch: 132 training_loss 0.09450683664530515 test_loss: 0.09340587258338928
epoch: 133 training_loss 0.09525757843628525 test_loss: 0.11693836450576782
epoch: 134 training_loss 0.09672110395506024 test_loss: 0.09517586827278138
epoch: 135 training_loss 0.09846871795132756 test_loss: 0.09645664691925049
epoch: 136 training_loss 0.0957923992164433 test_loss: 0.11688129901885987
epoch: 137 training_loss 0.09875598654150963 test_loss: 0.12154557704925537
epoch: 138 training_loss 0.09610672223381699 test_loss: 0.09975451231002808
epoch: 139 training_loss 0.09885227490216493 test_loss: 0.11463141441345215
epoch: 140 training_loss 0.09912443198263646 test_loss: 0.12265202999114991
epoch: 141 training_loss 0.09549199383705854 test_loss: 0.09900081753730774
epoch: 142 training_loss 0.1017178805731237 test_loss: 0.09970920085906983
epoch: 143 training_loss 0.09801133571192622 test_loss: 0.09844036102294922
epoch: 144 training_loss 0.09757969573140145 test_loss: 0.1343248724937439
epoch: 145 training_loss 0.09977232649922371 test_loss: 0.10825185775756836
epoch: 146 training_loss 0.09583276249468327 test_loss: 0.10830123424530029
epoch: 147 training_loss 0.09973598221316933 test_loss: 0.10261253118515015
epoch: 148 training_loss 0.09780713230371475 test_loss: 0.10302828550338745
epoch: 149 training_loss 0.09838204808533192 test_loss: 0.11551297903060913
epoch: 0 training_loss 52.11868055343628 test_loss: 25.8425048828125
epoch: 1 training_loss 19.16877751350403 test_loss: 15.432923889160156
epoch: 2 training_loss 13.49162486076355 test_loss: 11.810330200195313
epoch: 3 training_loss 10.594159841537476 test_loss: 9.695252990722656
epoch: 4 training_loss 8.882902669906617 test_loss: 8.077071380615234
epoch: 5 training_loss 7.877848725318909 test_loss: 7.329130554199219
epoch: 6 training_loss 6.839022316932678 test_loss: 6.4898124694824215
epoch: 7 training_loss 6.145166420936585 test_loss: 5.810174179077149
epoch: 8 training_loss 5.743170948028564 test_loss: 5.415487670898438
epoch: 9 training_loss 5.243156294822693 test_loss: 5.09140625
epoch: 10 training_loss 4.812134609222412 test_loss: 4.5715888977050785
epoch: 11 training_loss 4.561971716880798 test_loss: 4.4646247863769535
epoch: 12 training_loss 4.306104815006256 test_loss: 4.266321182250977
epoch: 13 training_loss 4.092660813331604 test_loss: 4.032835006713867
epoch: 14 training_loss 3.925463182926178 test_loss: 3.809468460083008
epoch: 15 training_loss 3.641291286945343 test_loss: 3.772671127319336
epoch: 16 training_loss 3.6215398502349854 test_loss: 3.3801124572753904
epoch: 17 training_loss 3.4167439556121826 test_loss: 3.5564105987548826
epoch: 18 training_loss 3.333991174697876 test_loss: 3.2534481048583985
epoch: 19 training_loss 3.244315085411072 test_loss: 3.286336898803711
epoch: 20 training_loss 3.085802276134491 test_loss: 3.0021596908569337
epoch: 21 training_loss 3.0298405694961548 test_loss: 2.965224838256836
epoch: 22 training_loss 2.9050039267539978 test_loss: 2.9602373123168944
epoch: 23 training_loss 2.8264950037002565 test_loss: 2.7877630233764648
epoch: 24 training_loss 2.8221474719047546 test_loss: 2.719154930114746
epoch: 25 training_loss 2.715616798400879 test_loss: 2.803327751159668
epoch: 26 training_loss 2.630118293762207 test_loss: 2.5786584854125976
epoch: 27 training_loss 2.6267706823348997 test_loss: 2.7149581909179688
epoch: 28 training_loss 2.5557056403160097 test_loss: 2.475086212158203
epoch: 29 training_loss 2.513321044445038 test_loss: 2.62528076171875
epoch: 30 training_loss 2.4359446215629577 test_loss: 2.4703485488891603
epoch: 31 training_loss 2.421624310016632 test_loss: 2.3489238739013674
epoch: 32 training_loss 2.369060115814209 test_loss: 2.3520767211914064
epoch: 33 training_loss 2.394869096279144 test_loss: 2.3068258285522463
epoch: 34 training_loss 2.2968562483787536 test_loss: 2.24071044921875
epoch: 35 training_loss 2.2665636420249937 test_loss: 2.421584892272949
epoch: 36 training_loss 2.2433852684497833 test_loss: 2.2383432388305664
epoch: 37 training_loss 2.2339953649044038 test_loss: 2.164655876159668
epoch: 38 training_loss 2.1843455457687377 test_loss: 2.261923599243164
epoch: 39 training_loss 2.1881776678562166 test_loss: 2.2215131759643554
epoch: 40 training_loss 2.1365891194343565 test_loss: 2.243205451965332
epoch: 41 training_loss 2.1512040269374846 test_loss: 2.1392478942871094
epoch: 42 training_loss 2.11614089012146 test_loss: 2.1354743957519533
epoch: 43 training_loss 2.0950282430648803 test_loss: 2.1270563125610353
epoch: 44 training_loss 2.093614000082016 test_loss: 1.9608854293823241
epoch: 45 training_loss 2.030263918638229 test_loss: 2.163016700744629
epoch: 46 training_loss 2.0110338592529295 test_loss: 2.085067939758301
epoch: 47 training_loss 1.9948244655132295 test_loss: 2.0752704620361326
epoch: 48 training_loss 1.9982460606098176 test_loss: 2.0003259658813475
epoch: 49 training_loss 1.9631664526462556 test_loss: 2.0858083724975587
epoch: 50 training_loss 1.9663261902332305 test_loss: 2.0012367248535154
epoch: 51 training_loss 1.9772027480602263 test_loss: 2.02099723815918
epoch: 52 training_loss 1.953628865480423 test_loss: 2.039212226867676
epoch: 53 training_loss 1.898050971031189 test_loss: 2.055587387084961
epoch: 54 training_loss 1.9306955301761628 test_loss: 1.9135683059692383
epoch: 55 training_loss 1.9258301091194152 test_loss: 1.8761714935302733
epoch: 56 training_loss 1.9150244069099427 test_loss: 1.8729801177978516
epoch: 57 training_loss 1.8748387014865875 test_loss: 1.8338411331176758
epoch: 58 training_loss 1.844726880788803 test_loss: 1.8656808853149414
epoch: 59 training_loss 1.8401023435592652 test_loss: 1.8809564590454102
epoch: 60 training_loss 1.8599258577823639 test_loss: 1.869114875793457
epoch: 61 training_loss 1.8650784265995026 test_loss: 1.8731975555419922
epoch: 62 training_loss 1.840499370098114 test_loss: 1.9022327423095704
epoch: 63 training_loss 1.823026568889618 test_loss: 1.8596635818481446
epoch: 64 training_loss 1.8235835123062134 test_loss: 1.822110939025879
epoch: 65 training_loss 1.7786297500133514 test_loss: 1.762034034729004
epoch: 66 training_loss 1.7855676460266112 test_loss: 1.8213611602783204
epoch: 67 training_loss 1.7942739176750182 test_loss: 1.7466569900512696
epoch: 68 training_loss 1.7849806034564972 test_loss: 1.8483110427856446
epoch: 69 training_loss 1.7511776316165923 test_loss: 1.7609556198120118
epoch: 70 training_loss 1.749348028898239 test_loss: 1.826762580871582
epoch: 71 training_loss 1.7499999678134919 test_loss: 1.8388137817382812
epoch: 72 training_loss 1.7806307065486908 test_loss: 1.7714458465576173
epoch: 73 training_loss 1.7607374536991118 test_loss: 1.8146827697753907
epoch: 74 training_loss 1.7596537244319916 test_loss: 1.7729413986206055
epoch: 75 training_loss 1.7210594165325164 test_loss: 1.7137517929077148
epoch: 76 training_loss 1.7449859821796416 test_loss: 1.66558895111084
epoch: 77 training_loss 1.746491289138794 test_loss: 1.783381462097168
epoch: 78 training_loss 1.7256267809867858 test_loss: 1.7045066833496094
epoch: 79 training_loss 1.7041269147396088 test_loss: 1.7792163848876954
epoch: 80 training_loss 1.730378053188324 test_loss: 1.6908878326416015
epoch: 81 training_loss 1.6665450489521028 test_loss: 1.644417953491211
epoch: 82 training_loss 1.673177298307419 test_loss: 1.6557870864868165
epoch: 83 training_loss 1.6809492027759552 test_loss: 1.623971939086914
epoch: 84 training_loss 1.6645738697052002 test_loss: 1.6300662994384765
epoch: 85 training_loss 1.6616040635108948 test_loss: 1.6254280090332032
epoch: 86 training_loss 1.6801382315158844 test_loss: 1.5953893661499023
epoch: 87 training_loss 1.648955490589142 test_loss: 1.6421211242675782
epoch: 88 training_loss 1.616721453666687 test_loss: 1.7106904983520508
epoch: 89 training_loss 1.6448063969612121 test_loss: 1.6291688919067382
epoch: 90 training_loss 1.6456508445739746 test_loss: 1.6123348236083985
epoch: 91 training_loss 1.6073014843463898 test_loss: 1.6135940551757812
epoch: 92 training_loss 1.6292786824703216 test_loss: 1.6684583663940429
epoch: 93 training_loss 1.6147203052043915 test_loss: 1.7573345184326172
epoch: 94 training_loss 1.611431188583374 test_loss: 1.605202865600586
epoch: 95 training_loss 1.581513774394989 test_loss: 1.559452438354492
epoch: 96 training_loss 1.6074969029426576 test_loss: 1.6580358505249024
epoch: 97 training_loss 1.6273189055919648 test_loss: 1.6005582809448242
epoch: 98 training_loss 1.5772685313224792 test_loss: 1.6454519271850585
epoch: 99 training_loss 1.575716520547867 test_loss: 1.5406033515930175
epoch: 100 training_loss 1.5666198921203613 test_loss: 1.6055482864379882
epoch: 101 training_loss 1.6008779799938202 test_loss: 1.6035955429077149
epoch: 102 training_loss 1.5646147429943085 test_loss: 1.562491226196289
epoch: 103 training_loss 1.6063333392143249 test_loss: 1.5730005264282227
epoch: 104 training_loss 1.5489995408058166 test_loss: 1.626536750793457
epoch: 105 training_loss 1.5742327773571014 test_loss: 1.5835012435913085
epoch: 106 training_loss 1.593970036506653 test_loss: 1.5624550819396972
epoch: 107 training_loss 1.5775783824920655 test_loss: 1.5457707405090333
epoch: 108 training_loss 1.5694868648052216 test_loss: 1.5788279533386231
epoch: 109 training_loss 1.5697570490837096 test_loss: 1.5480156898498536
epoch: 110 training_loss 1.5613610816001893 test_loss: 1.56587553024292
epoch: 111 training_loss 1.5571639502048493 test_loss: 1.5902191162109376
epoch: 112 training_loss 1.534714665412903 test_loss: 1.6023160934448242
epoch: 113 training_loss 1.551984441280365 test_loss: 1.6013473510742187
epoch: 114 training_loss 1.5860367846488952 test_loss: 1.5506012916564942
epoch: 115 training_loss 1.538018956184387 test_loss: 1.5237372398376465
epoch: 116 training_loss 1.562219696044922 test_loss: 1.5546025276184081
epoch: 117 training_loss 1.5495051634311676 test_loss: 1.5767715454101563
epoch: 118 training_loss 1.5347998130321503 test_loss: 1.5725799560546876
epoch: 119 training_loss 1.5305839729309083 test_loss: 1.5783986091613769
epoch: 120 training_loss 1.5193934237957 test_loss: 1.5771653175354003
epoch: 121 training_loss 1.5160764789581298 test_loss: 1.544593620300293
epoch: 122 training_loss 1.5205924785137177 test_loss: 1.5471272468566895
epoch: 123 training_loss 1.5224956667423248 test_loss: 1.6225284576416015
epoch: 124 training_loss 1.5092371809482574 test_loss: 1.5374360084533691
epoch: 125 training_loss 1.504997614622116 test_loss: 1.4499832153320313
epoch: 126 training_loss 1.5253571677207947 test_loss: 1.5143233299255372
epoch: 127 training_loss 1.5503762292861938 test_loss: 1.485936737060547
epoch: 128 training_loss 1.522036988735199 test_loss: 1.5186247825622559
epoch: 129 training_loss 1.4892949616909028 test_loss: 1.537338638305664
epoch: 130 training_loss 1.509015874862671 test_loss: 1.5013764381408692
epoch: 131 training_loss 1.4829878687858582 test_loss: 1.6254417419433593
epoch: 132 training_loss 1.5042969810962676 test_loss: 1.474020767211914
epoch: 133 training_loss 1.5007961404323578 test_loss: 1.5562087059020997
epoch: 134 training_loss 1.5158486151695252 test_loss: 1.43856840133667
epoch: 135 training_loss 1.4718345427513122 test_loss: 1.5160597801208495
epoch: 136 training_loss 1.5007057249546052 test_loss: 1.6358964920043946
epoch: 137 training_loss 1.4771811151504517 test_loss: 1.4782177925109863
epoch: 138 training_loss 1.487220823764801 test_loss: 1.4804776191711426
epoch: 139 training_loss 1.4917933142185211 test_loss: 1.538410472869873
epoch: 140 training_loss 1.487376219034195 test_loss: 1.4993186950683595
epoch: 141 training_loss 1.489780740737915 test_loss: 1.5605381965637206
epoch: 142 training_loss 1.4838928043842317 test_loss: 1.6178447723388671
epoch: 143 training_loss 1.477640551328659 test_loss: 1.5082637786865234
epoch: 144 training_loss 1.4681942582130432 test_loss: 1.5269586563110351
epoch: 145 training_loss 1.4892347073554992 test_loss: 1.448880672454834
epoch: 146 training_loss 1.456051994562149 test_loss: 1.4677888870239257
epoch: 147 training_loss 1.463665361404419 test_loss: 1.449939250946045
epoch: 148 training_loss 1.4599224758148193 test_loss: 1.5071442604064942
epoch: 149 training_loss 1.45569140791893 test_loss: 1.4817103385925292
5089.3936342940315
episode: 0 training return: tensor(-74.6536, device='cuda:0')
episode: 1 training return: tensor(-16.6995, device='cuda:0')
episode: 2 training return: tensor(-165.4929, device='cuda:0')
episode: 3 training return: tensor(-27.9847, device='cuda:0')
epoch: 1 test_true_pfm: 5125.108787790352 sim_pfm: 29.162666643465247
episode: 4 training return: tensor(-114.4684, device='cuda:0')
episode: 5 training return: tensor(-75.4670, device='cuda:0')
episode: 6 training return: tensor(-77.8333, device='cuda:0')
episode: 7 training return: tensor(-8.7244, device='cuda:0')
epoch: 2 test_true_pfm: 5053.012126975599 sim_pfm: -71.06850135696975
episode: 8 training return: tensor(25.6569, device='cuda:0')
episode: 9 training return: tensor(31.5674, device='cuda:0')
episode: 10 training return: tensor(-52.5150, device='cuda:0')
episode: 11 training return: tensor(-32.1359, device='cuda:0')
epoch: 3 test_true_pfm: 5114.75629568866 sim_pfm: -49.64902859752571
episode: 12 training return: tensor(-185.2894, device='cuda:0')
episode: 13 training return: tensor(-147.7658, device='cuda:0')
episode: 14 training return: tensor(-119.8026, device='cuda:0')
episode: 15 training return: tensor(6.7254, device='cuda:0')
epoch: 4 test_true_pfm: 5140.163326229084 sim_pfm: -56.08586421490569
episode: 16 training return: tensor(-86.5777, device='cuda:0')
episode: 17 training return: tensor(-46.6070, device='cuda:0')
episode: 18 training return: tensor(-87.0514, device='cuda:0')
episode: 19 training return: tensor(-62.8471, device='cuda:0')
epoch: 5 test_true_pfm: 5107.695688170309 sim_pfm: 9.773412019994188
episode: 20 training return: tensor(28.1525, device='cuda:0')
episode: 21 training return: tensor(-32.8595, device='cuda:0')
episode: 22 training return: tensor(-2.7028, device='cuda:0')
episode: 23 training return: tensor(-104.7301, device='cuda:0')
epoch: 6 test_true_pfm: 5160.873955401722 sim_pfm: 95.78102733924364
episode: 24 training return: tensor(125.5191, device='cuda:0')
episode: 25 training return: tensor(12.0320, device='cuda:0')
episode: 26 training return: tensor(11.3042, device='cuda:0')
episode: 27 training return: tensor(-126.5253, device='cuda:0')
epoch: 7 test_true_pfm: 5174.203759621396 sim_pfm: -30.131968241611805
episode: 28 training return: tensor(-140.6202, device='cuda:0')
episode: 29 training return: tensor(45.7237, device='cuda:0')
episode: 30 training return: tensor(5.3086, device='cuda:0')
episode: 31 training return: tensor(-79.5940, device='cuda:0')
epoch: 8 test_true_pfm: 5194.172491983386 sim_pfm: -2.8952215893853768
episode: 32 training return: tensor(-71.3297, device='cuda:0')
episode: 33 training return: tensor(-28.2343, device='cuda:0')
episode: 34 training return: tensor(-81.5088, device='cuda:0')
episode: 35 training return: tensor(-150.2233, device='cuda:0')
epoch: 9 test_true_pfm: 5115.454660986081 sim_pfm: -25.738014896710713
episode: 36 training return: tensor(14.4667, device='cuda:0')
episode: 37 training return: tensor(-51.1721, device='cuda:0')
episode: 38 training return: tensor(169.6869, device='cuda:0')
episode: 39 training return: tensor(-8.9774, device='cuda:0')
epoch: 10 test_true_pfm: 5099.333630067245 sim_pfm: 18.018127530861722
episode: 40 training return: tensor(9.0264, device='cuda:0')
episode: 41 training return: tensor(-32.3931, device='cuda:0')
episode: 42 training return: tensor(-78.5424, device='cuda:0')
episode: 43 training return: tensor(-14.0161, device='cuda:0')
epoch: 11 test_true_pfm: 5257.315969969893 sim_pfm: 96.12007416255074
episode: 44 training return: tensor(-30.1597, device='cuda:0')
episode: 45 training return: tensor(-136.3651, device='cuda:0')
episode: 46 training return: tensor(-164.8822, device='cuda:0')
episode: 47 training return: tensor(103.8540, device='cuda:0')
epoch: 12 test_true_pfm: 5195.430145231775 sim_pfm: 125.39543486652353
episode: 48 training return: tensor(-105.7121, device='cuda:0')
episode: 49 training return: tensor(-41.8434, device='cuda:0')
episode: 50 training return: tensor(53.0072, device='cuda:0')
episode: 51 training return: tensor(-56.3075, device='cuda:0')
epoch: 13 test_true_pfm: 5284.125358324108 sim_pfm: 88.66754983311209
episode: 52 training return: tensor(-110.4604, device='cuda:0')
episode: 53 training return: tensor(-8.7521, device='cuda:0')
episode: 54 training return: tensor(-129.6375, device='cuda:0')
episode: 55 training return: tensor(-141.1638, device='cuda:0')
epoch: 14 test_true_pfm: 5188.22775919077 sim_pfm: -112.80241540672917
episode: 56 training return: tensor(-65.5363, device='cuda:0')
episode: 57 training return: tensor(-32.1683, device='cuda:0')
episode: 58 training return: tensor(78.9932, device='cuda:0')
episode: 59 training return: tensor(15.4531, device='cuda:0')
epoch: 15 test_true_pfm: 5215.1705879211595 sim_pfm: 16.44554099809223
episode: 60 training return: tensor(52.1328, device='cuda:0')
episode: 61 training return: tensor(3.0325, device='cuda:0')
episode: 62 training return: tensor(-116.7161, device='cuda:0')
episode: 63 training return: tensor(104.6709, device='cuda:0')
epoch: 16 test_true_pfm: 5231.610637040673 sim_pfm: 124.64587546644422
episode: 64 training return: tensor(21.8601, device='cuda:0')
episode: 65 training return: tensor(-115.5605, device='cuda:0')
episode: 66 training return: tensor(39.4842, device='cuda:0')
episode: 67 training return: tensor(-77.8684, device='cuda:0')
epoch: 17 test_true_pfm: 5311.3988238215525 sim_pfm: 25.680147141664445
episode: 68 training return: tensor(25.4485, device='cuda:0')
episode: 69 training return: tensor(-9.3937, device='cuda:0')
episode: 70 training return: tensor(-7.8681, device='cuda:0')
episode: 71 training return: tensor(-60.8585, device='cuda:0')
epoch: 18 test_true_pfm: 5236.765829838019 sim_pfm: 162.92006946669426
episode: 72 training return: tensor(-22.6343, device='cuda:0')
episode: 73 training return: tensor(-16.9845, device='cuda:0')
episode: 74 training return: tensor(-1.9402, device='cuda:0')
episode: 75 training return: tensor(39.7743, device='cuda:0')
epoch: 19 test_true_pfm: 5361.348188996851 sim_pfm: 130.8771758787916
episode: 76 training return: tensor(15.0386, device='cuda:0')
episode: 77 training return: tensor(25.1729, device='cuda:0')
episode: 78 training return: tensor(26.8567, device='cuda:0')
episode: 79 training return: tensor(41.6803, device='cuda:0')
epoch: 20 test_true_pfm: 5327.532319512753 sim_pfm: 98.9545094781206
episode: 80 training return: tensor(90.9955, device='cuda:0')
episode: 81 training return: tensor(-16.1888, device='cuda:0')
episode: 82 training return: tensor(88.6008, device='cuda:0')
episode: 83 training return: tensor(14.5286, device='cuda:0')
epoch: 21 test_true_pfm: 5455.917545720931 sim_pfm: 208.12973238890604
episode: 84 training return: tensor(-48.7976, device='cuda:0')
episode: 85 training return: tensor(42.6630, device='cuda:0')
episode: 86 training return: tensor(-4.6767, device='cuda:0')
episode: 87 training return: tensor(158.7697, device='cuda:0')
epoch: 22 test_true_pfm: 5306.42190666949 sim_pfm: 188.51736500418824
episode: 88 training return: tensor(4.8693, device='cuda:0')
episode: 89 training return: tensor(28.1173, device='cuda:0')
episode: 90 training return: tensor(55.0823, device='cuda:0')
episode: 91 training return: tensor(35.8148, device='cuda:0')
epoch: 23 test_true_pfm: 5381.65337655956 sim_pfm: 207.34013936317447
episode: 92 training return: tensor(-53.7581, device='cuda:0')
episode: 93 training return: tensor(79.2652, device='cuda:0')
episode: 94 training return: tensor(65.0064, device='cuda:0')
episode: 95 training return: tensor(48.3601, device='cuda:0')
epoch: 24 test_true_pfm: 5351.242163361073 sim_pfm: 170.93904328765348
episode: 96 training return: tensor(110.6147, device='cuda:0')
episode: 97 training return: tensor(177.1179, device='cuda:0')
episode: 98 training return: tensor(39.4179, device='cuda:0')
episode: 99 training return: tensor(51.1129, device='cuda:0')
epoch: 25 test_true_pfm: 5336.697544068143 sim_pfm: 155.98290106405815
episode: 100 training return: tensor(-62.4785, device='cuda:0')
episode: 101 training return: tensor(180.7312, device='cuda:0')
episode: 102 training return: tensor(33.5831, device='cuda:0')
episode: 103 training return: tensor(62.5055, device='cuda:0')
epoch: 26 test_true_pfm: 5362.201925468463 sim_pfm: 171.32485821477408
episode: 104 training return: tensor(-24.4140, device='cuda:0')
episode: 105 training return: tensor(26.5108, device='cuda:0')
episode: 106 training return: tensor(15.0912, device='cuda:0')
episode: 107 training return: tensor(57.2875, device='cuda:0')
epoch: 27 test_true_pfm: 5393.840135054281 sim_pfm: 120.07787230387719
episode: 108 training return: tensor(123.4266, device='cuda:0')
episode: 109 training return: tensor(96.9591, device='cuda:0')
episode: 110 training return: tensor(85.9658, device='cuda:0')
episode: 111 training return: tensor(196.1976, device='cuda:0')
epoch: 28 test_true_pfm: 5331.169099372321 sim_pfm: 183.2216842782412
episode: 112 training return: tensor(188.9727, device='cuda:0')
episode: 113 training return: tensor(80.9620, device='cuda:0')
episode: 114 training return: tensor(166.4815, device='cuda:0')
episode: 115 training return: tensor(90.6983, device='cuda:0')
epoch: 29 test_true_pfm: 5418.88071416038 sim_pfm: 201.78635107869437
episode: 116 training return: tensor(-63.1973, device='cuda:0')
episode: 117 training return: tensor(58.8826, device='cuda:0')
episode: 118 training return: tensor(105.9989, device='cuda:0')
episode: 119 training return: tensor(57.9212, device='cuda:0')
epoch: 30 test_true_pfm: 5417.748520739659 sim_pfm: 186.5913042197159
episode: 120 training return: tensor(192.7384, device='cuda:0')
episode: 121 training return: tensor(167.5991, device='cuda:0')
episode: 122 training return: tensor(218.0423, device='cuda:0')
episode: 123 training return: tensor(99.2586, device='cuda:0')
epoch: 31 test_true_pfm: 5499.549768236712 sim_pfm: 168.42081048315353
episode: 124 training return: tensor(65.0380, device='cuda:0')
episode: 125 training return: tensor(238.6517, device='cuda:0')
episode: 126 training return: tensor(23.9620, device='cuda:0')
episode: 127 training return: tensor(148.8604, device='cuda:0')
epoch: 32 test_true_pfm: 5392.161360305389 sim_pfm: 160.07428757590242
episode: 128 training return: tensor(126.5836, device='cuda:0')
episode: 129 training return: tensor(179.8763, device='cuda:0')
episode: 130 training return: tensor(118.6572, device='cuda:0')
episode: 131 training return: tensor(135.5922, device='cuda:0')
epoch: 33 test_true_pfm: 5440.769135059888 sim_pfm: 269.48996406029136
episode: 132 training return: tensor(65.7378, device='cuda:0')
episode: 133 training return: tensor(-763.2292, device='cuda:0')
episode: 134 training return: tensor(101.9409, device='cuda:0')
episode: 135 training return: tensor(67.7232, device='cuda:0')
epoch: 34 test_true_pfm: 5365.825636654442 sim_pfm: 196.86931735399412
episode: 136 training return: tensor(34.5348, device='cuda:0')
episode: 137 training return: tensor(116.3732, device='cuda:0')
episode: 138 training return: tensor(150.7318, device='cuda:0')
episode: 139 training return: tensor(165.0085, device='cuda:0')
epoch: 35 test_true_pfm: 5429.345702337659 sim_pfm: 268.730918181934
episode: 140 training return: tensor(151.0956, device='cuda:0')
episode: 141 training return: tensor(108.4587, device='cuda:0')
episode: 142 training return: tensor(269.2030, device='cuda:0')
episode: 143 training return: tensor(165.2687, device='cuda:0')
epoch: 36 test_true_pfm: 5544.697193813904 sim_pfm: 271.7302116779222
episode: 144 training return: tensor(-55.5333, device='cuda:0')
episode: 145 training return: tensor(122.2220, device='cuda:0')
episode: 146 training return: tensor(104.5715, device='cuda:0')
episode: 147 training return: tensor(142.9352, device='cuda:0')
epoch: 37 test_true_pfm: 5421.5004600068105 sim_pfm: 271.37478624292027
episode: 148 training return: tensor(135.0604, device='cuda:0')
episode: 149 training return: tensor(71.6539, device='cuda:0')
episode: 150 training return: tensor(252.9064, device='cuda:0')
episode: 151 training return: tensor(140.6573, device='cuda:0')
epoch: 38 test_true_pfm: 5409.499414238543 sim_pfm: 264.2794689844789
episode: 152 training return: tensor(50.7064, device='cuda:0')
episode: 153 training return: tensor(198.1278, device='cuda:0')
episode: 154 training return: tensor(126.3374, device='cuda:0')
episode: 155 training return: tensor(54.3841, device='cuda:0')
epoch: 39 test_true_pfm: 5472.391634137583 sim_pfm: 311.8136128854724
episode: 156 training return: tensor(97.5861, device='cuda:0')
episode: 157 training return: tensor(49.2123, device='cuda:0')
episode: 158 training return: tensor(224.2865, device='cuda:0')
episode: 159 training return: tensor(112.4344, device='cuda:0')
epoch: 40 test_true_pfm: 5415.170877855452 sim_pfm: 255.57903508308422
episode: 160 training return: tensor(134.5357, device='cuda:0')
episode: 161 training return: tensor(184.9472, device='cuda:0')
episode: 162 training return: tensor(138.5067, device='cuda:0')
episode: 163 training return: tensor(134.9189, device='cuda:0')
epoch: 41 test_true_pfm: 5481.37720091558 sim_pfm: 245.11329150015567
episode: 164 training return: tensor(113.5541, device='cuda:0')
episode: 165 training return: tensor(57.5209, device='cuda:0')
episode: 166 training return: tensor(81.3009, device='cuda:0')
episode: 167 training return: tensor(145.3542, device='cuda:0')
epoch: 42 test_true_pfm: 5462.801392467019 sim_pfm: 272.86161798545317
episode: 168 training return: tensor(140.3210, device='cuda:0')
episode: 169 training return: tensor(163.3550, device='cuda:0')
episode: 170 training return: tensor(118.4687, device='cuda:0')
episode: 171 training return: tensor(241.2885, device='cuda:0')
epoch: 43 test_true_pfm: 5460.507774782907 sim_pfm: 278.47743564520107
episode: 172 training return: tensor(86.7921, device='cuda:0')
episode: 173 training return: tensor(67.3850, device='cuda:0')
episode: 174 training return: tensor(317.4807, device='cuda:0')
episode: 175 training return: tensor(259.5752, device='cuda:0')
epoch: 44 test_true_pfm: 5543.02496392824 sim_pfm: 313.3190638299566
episode: 176 training return: tensor(104.6553, device='cuda:0')
episode: 177 training return: tensor(162.9306, device='cuda:0')
episode: 178 training return: tensor(239.7233, device='cuda:0')
episode: 179 training return: tensor(149.1865, device='cuda:0')
epoch: 45 test_true_pfm: 5556.51269036609 sim_pfm: 268.1696585067354
episode: 180 training return: tensor(84.8879, device='cuda:0')
episode: 181 training return: tensor(147.7852, device='cuda:0')
episode: 182 training return: tensor(176.5241, device='cuda:0')
episode: 183 training return: tensor(204.5220, device='cuda:0')
epoch: 46 test_true_pfm: 5491.290911906673 sim_pfm: 279.6798787288135
episode: 184 training return: tensor(145.1324, device='cuda:0')
episode: 185 training return: tensor(188.0763, device='cuda:0')
episode: 186 training return: tensor(197.3308, device='cuda:0')
episode: 187 training return: tensor(231.3026, device='cuda:0')
epoch: 47 test_true_pfm: 5562.827696787357 sim_pfm: 351.2938021095858
episode: 188 training return: tensor(113.3902, device='cuda:0')
episode: 189 training return: tensor(125.7666, device='cuda:0')
episode: 190 training return: tensor(90.4418, device='cuda:0')
episode: 191 training return: tensor(75.6219, device='cuda:0')
epoch: 48 test_true_pfm: 5512.6069431760525 sim_pfm: 243.37265495267152
episode: 192 training return: tensor(189.5051, device='cuda:0')
episode: 193 training return: tensor(201.5973, device='cuda:0')
episode: 194 training return: tensor(83.5170, device='cuda:0')
episode: 195 training return: tensor(182.8172, device='cuda:0')
epoch: 49 test_true_pfm: 5365.635365542163 sim_pfm: 296.2267658453978
episode: 196 training return: tensor(82.0815, device='cuda:0')
episode: 197 training return: tensor(236.3224, device='cuda:0')
episode: 198 training return: tensor(85.6448, device='cuda:0')
episode: 199 training return: tensor(150.1540, device='cuda:0')
epoch: 50 test_true_pfm: 5543.705097058678 sim_pfm: 278.06027725954
episode: 200 training return: tensor(47.1286, device='cuda:0')
episode: 201 training return: tensor(207.0746, device='cuda:0')
episode: 202 training return: tensor(210.9509, device='cuda:0')
episode: 203 training return: tensor(218.6626, device='cuda:0')
epoch: 51 test_true_pfm: 5504.041726982723 sim_pfm: 283.9230615034273
episode: 204 training return: tensor(132.4817, device='cuda:0')
episode: 205 training return: tensor(190.0445, device='cuda:0')
episode: 206 training return: tensor(264.2498, device='cuda:0')
episode: 207 training return: tensor(246.6038, device='cuda:0')
epoch: 52 test_true_pfm: 5511.668683204033 sim_pfm: 263.06130709295394
episode: 208 training return: tensor(222.4096, device='cuda:0')
episode: 209 training return: tensor(201.3105, device='cuda:0')
episode: 210 training return: tensor(313.8915, device='cuda:0')
episode: 211 training return: tensor(119.2139, device='cuda:0')
epoch: 53 test_true_pfm: 5597.915388823439 sim_pfm: 293.12254521239083
episode: 212 training return: tensor(115.4477, device='cuda:0')
episode: 213 training return: tensor(227.9304, device='cuda:0')
episode: 214 training return: tensor(240.6487, device='cuda:0')
episode: 215 training return: tensor(227.9634, device='cuda:0')
epoch: 54 test_true_pfm: 5503.833990665098 sim_pfm: 240.0569477393292
episode: 216 training return: tensor(212.0509, device='cuda:0')
episode: 217 training return: tensor(187.6975, device='cuda:0')
episode: 218 training return: tensor(237.7814, device='cuda:0')
episode: 219 training return: tensor(13.1288, device='cuda:0')
epoch: 55 test_true_pfm: 5536.401752703176 sim_pfm: 194.6950884864491
episode: 220 training return: tensor(133.6029, device='cuda:0')
episode: 221 training return: tensor(86.5089, device='cuda:0')
episode: 222 training return: tensor(154.3360, device='cuda:0')
episode: 223 training return: tensor(62.4258, device='cuda:0')
epoch: 56 test_true_pfm: 5597.4625252983 sim_pfm: 271.0699032778696
episode: 224 training return: tensor(193.4773, device='cuda:0')
episode: 225 training return: tensor(236.7057, device='cuda:0')
episode: 226 training return: tensor(162.7657, device='cuda:0')
episode: 227 training return: tensor(140.3537, device='cuda:0')
epoch: 57 test_true_pfm: 5617.0046127926 sim_pfm: 270.6356211641105
episode: 228 training return: tensor(200.2296, device='cuda:0')
episode: 229 training return: tensor(182.6184, device='cuda:0')
episode: 230 training return: tensor(130.8922, device='cuda:0')
episode: 231 training return: tensor(170.7234, device='cuda:0')
epoch: 58 test_true_pfm: 5560.981307249363 sim_pfm: 307.6149359036256
episode: 232 training return: tensor(153.1423, device='cuda:0')
episode: 233 training return: tensor(280.8007, device='cuda:0')
episode: 234 training return: tensor(202.7632, device='cuda:0')
episode: 235 training return: tensor(136.9322, device='cuda:0')
epoch: 59 test_true_pfm: 5540.338015561305 sim_pfm: 296.0278373924375
episode: 236 training return: tensor(75.1356, device='cuda:0')
episode: 237 training return: tensor(265.1968, device='cuda:0')
episode: 238 training return: tensor(98.5014, device='cuda:0')
episode: 239 training return: tensor(199.4682, device='cuda:0')
epoch: 60 test_true_pfm: 5474.509679235712 sim_pfm: 229.22922804431678
episode: 240 training return: tensor(220.9592, device='cuda:0')
episode: 241 training return: tensor(135.7288, device='cuda:0')
episode: 242 training return: tensor(292.4936, device='cuda:0')
episode: 243 training return: tensor(312.6390, device='cuda:0')
epoch: 61 test_true_pfm: 5592.384262461007 sim_pfm: 281.84236166704795
episode: 244 training return: tensor(136.2500, device='cuda:0')
episode: 245 training return: tensor(128.4602, device='cuda:0')
episode: 246 training return: tensor(157.2922, device='cuda:0')
episode: 247 training return: tensor(85.3533, device='cuda:0')
epoch: 62 test_true_pfm: 5533.889036293513 sim_pfm: 361.04897630853037
episode: 248 training return: tensor(130.2906, device='cuda:0')
episode: 249 training return: tensor(233.5632, device='cuda:0')
episode: 250 training return: tensor(140.6768, device='cuda:0')
episode: 251 training return: tensor(219.5454, device='cuda:0')
epoch: 63 test_true_pfm: 5564.636600271972 sim_pfm: 298.54845785814297
episode: 252 training return: tensor(207.1300, device='cuda:0')
episode: 253 training return: tensor(310.2692, device='cuda:0')
episode: 254 training return: tensor(241.1177, device='cuda:0')
episode: 255 training return: tensor(194.5645, device='cuda:0')
epoch: 64 test_true_pfm: 5461.842207206089 sim_pfm: 337.59235531902715
episode: 256 training return: tensor(340.3625, device='cuda:0')
episode: 257 training return: tensor(150.6064, device='cuda:0')
episode: 258 training return: tensor(211.2535, device='cuda:0')
episode: 259 training return: tensor(310.0341, device='cuda:0')
epoch: 65 test_true_pfm: 5459.658048254918 sim_pfm: 261.03321775702835
episode: 260 training return: tensor(230.2111, device='cuda:0')
episode: 261 training return: tensor(67.2302, device='cuda:0')
episode: 262 training return: tensor(318.7773, device='cuda:0')
episode: 263 training return: tensor(200.4286, device='cuda:0')
epoch: 66 test_true_pfm: 5508.481689828329 sim_pfm: 283.10562299378216
episode: 264 training return: tensor(302.1213, device='cuda:0')
episode: 265 training return: tensor(94.8175, device='cuda:0')
episode: 266 training return: tensor(132.8772, device='cuda:0')
episode: 267 training return: tensor(270.6147, device='cuda:0')
epoch: 67 test_true_pfm: 5595.561759388103 sim_pfm: 322.51928538214025
episode: 268 training return: tensor(349.6075, device='cuda:0')
episode: 269 training return: tensor(227.6548, device='cuda:0')
episode: 270 training return: tensor(295.9206, device='cuda:0')
episode: 271 training return: tensor(139.7127, device='cuda:0')
epoch: 68 test_true_pfm: 5447.832136818679 sim_pfm: 333.82195149724913
episode: 272 training return: tensor(97.2011, device='cuda:0')
episode: 273 training return: tensor(263.5469, device='cuda:0')
episode: 274 training return: tensor(107.2051, device='cuda:0')
episode: 275 training return: tensor(199.8644, device='cuda:0')
epoch: 69 test_true_pfm: 5535.385339224323 sim_pfm: 299.6663451558367
episode: 276 training return: tensor(217.6108, device='cuda:0')
episode: 277 training return: tensor(186.0682, device='cuda:0')
episode: 278 training return: tensor(213.9658, device='cuda:0')
episode: 279 training return: tensor(390.7732, device='cuda:0')
epoch: 70 test_true_pfm: 5634.155792962813 sim_pfm: 251.50142655311114
episode: 280 training return: tensor(242.8016, device='cuda:0')
episode: 281 training return: tensor(317.4972, device='cuda:0')
episode: 282 training return: tensor(238.6016, device='cuda:0')
episode: 283 training return: tensor(140.9334, device='cuda:0')
epoch: 71 test_true_pfm: 5552.07538218502 sim_pfm: 374.6162400385365
episode: 284 training return: tensor(185.3508, device='cuda:0')
episode: 285 training return: tensor(144.2107, device='cuda:0')
episode: 286 training return: tensor(230.5055, device='cuda:0')
episode: 287 training return: tensor(163.5019, device='cuda:0')
epoch: 72 test_true_pfm: 5620.972986271337 sim_pfm: 365.64913951652125
episode: 288 training return: tensor(115.7256, device='cuda:0')
episode: 289 training return: tensor(234.1185, device='cuda:0')
episode: 290 training return: tensor(269.5558, device='cuda:0')
episode: 291 training return: tensor(246.6536, device='cuda:0')
epoch: 73 test_true_pfm: 5666.089961814818 sim_pfm: 374.9793567038917
episode: 292 training return: tensor(202.2725, device='cuda:0')
episode: 293 training return: tensor(254.8965, device='cuda:0')
episode: 294 training return: tensor(244.0822, device='cuda:0')
episode: 295 training return: tensor(328.2191, device='cuda:0')
epoch: 74 test_true_pfm: 5619.408537653889 sim_pfm: 378.72400357709074
episode: 296 training return: tensor(184.4800, device='cuda:0')
episode: 297 training return: tensor(196.0909, device='cuda:0')
episode: 298 training return: tensor(105.9766, device='cuda:0')
episode: 299 training return: tensor(268.9734, device='cuda:0')
epoch: 75 test_true_pfm: 5606.749076587119 sim_pfm: 354.1481173490756
episode: 300 training return: tensor(155.0856, device='cuda:0')
episode: 301 training return: tensor(190.1710, device='cuda:0')
episode: 302 training return: tensor(362.8772, device='cuda:0')
episode: 303 training return: tensor(138.4169, device='cuda:0')
epoch: 76 test_true_pfm: 5644.051474305263 sim_pfm: 318.4261390312943
episode: 304 training return: tensor(265.0762, device='cuda:0')
episode: 305 training return: tensor(337.3224, device='cuda:0')
episode: 306 training return: tensor(274.3878, device='cuda:0')
episode: 307 training return: tensor(187.4059, device='cuda:0')
epoch: 77 test_true_pfm: 5556.9839664713245 sim_pfm: 368.1958235090715
episode: 308 training return: tensor(260.9868, device='cuda:0')
episode: 309 training return: tensor(218.9292, device='cuda:0')
episode: 310 training return: tensor(261.3404, device='cuda:0')
episode: 311 training return: tensor(219.7280, device='cuda:0')
epoch: 78 test_true_pfm: 5673.773609562664 sim_pfm: 341.1062055881678
episode: 312 training return: tensor(223.8394, device='cuda:0')
episode: 313 training return: tensor(279.5479, device='cuda:0')
episode: 314 training return: tensor(262.2894, device='cuda:0')
episode: 315 training return: tensor(232.7838, device='cuda:0')
epoch: 79 test_true_pfm: 5598.5058256453585 sim_pfm: 358.84451186445466
episode: 316 training return: tensor(137.9595, device='cuda:0')
episode: 317 training return: tensor(148.3096, device='cuda:0')
episode: 318 training return: tensor(259.2638, device='cuda:0')
episode: 319 training return: tensor(189.7110, device='cuda:0')
epoch: 80 test_true_pfm: 5622.823097331315 sim_pfm: 358.87293807254173
episode: 320 training return: tensor(145.9444, device='cuda:0')
episode: 321 training return: tensor(137.8493, device='cuda:0')
episode: 322 training return: tensor(118.3151, device='cuda:0')
episode: 323 training return: tensor(264.8646, device='cuda:0')
epoch: 81 test_true_pfm: 5628.959803288097 sim_pfm: -136.5644757275004
episode: 324 training return: tensor(253.5511, device='cuda:0')
episode: 325 training return: tensor(196.7645, device='cuda:0')
episode: 326 training return: tensor(291.9389, device='cuda:0')
episode: 327 training return: tensor(287.3311, device='cuda:0')
epoch: 82 test_true_pfm: 5613.819744644457 sim_pfm: 366.3006990160793
episode: 328 training return: tensor(272.1483, device='cuda:0')
episode: 329 training return: tensor(67.8487, device='cuda:0')
episode: 330 training return: tensor(349.0741, device='cuda:0')
episode: 331 training return: tensor(290.7832, device='cuda:0')
epoch: 83 test_true_pfm: 5637.306637312379 sim_pfm: 349.8696596851223
episode: 332 training return: tensor(309.7536, device='cuda:0')
episode: 333 training return: tensor(237.7130, device='cuda:0')
episode: 334 training return: tensor(185.0800, device='cuda:0')
episode: 335 training return: tensor(276.7957, device='cuda:0')
epoch: 84 test_true_pfm: 5632.688716399961 sim_pfm: 334.01008675129077
episode: 336 training return: tensor(191.9998, device='cuda:0')
episode: 337 training return: tensor(200.2170, device='cuda:0')
episode: 338 training return: tensor(333.3368, device='cuda:0')
episode: 339 training return: tensor(140.7431, device='cuda:0')
epoch: 85 test_true_pfm: 5634.198800664078 sim_pfm: 406.52889537764713
episode: 340 training return: tensor(286.8344, device='cuda:0')
episode: 341 training return: tensor(197.3958, device='cuda:0')
episode: 342 training return: tensor(185.8946, device='cuda:0')
episode: 343 training return: tensor(140.8436, device='cuda:0')
epoch: 86 test_true_pfm: 5649.099406729848 sim_pfm: 367.8307842399615
episode: 344 training return: tensor(380.6880, device='cuda:0')
episode: 345 training return: tensor(122.4419, device='cuda:0')
episode: 346 training return: tensor(344.1863, device='cuda:0')
episode: 347 training return: tensor(302.5117, device='cuda:0')
epoch: 87 test_true_pfm: 5634.982758638766 sim_pfm: 320.03912651127513
episode: 348 training return: tensor(170.0513, device='cuda:0')
episode: 349 training return: tensor(264.7293, device='cuda:0')
episode: 350 training return: tensor(-117.2727, device='cuda:0')
episode: 351 training return: tensor(120.6231, device='cuda:0')
epoch: 88 test_true_pfm: 5594.180966100205 sim_pfm: 361.91661785674904
episode: 352 training return: tensor(198.0833, device='cuda:0')
episode: 353 training return: tensor(201.1703, device='cuda:0')
episode: 354 training return: tensor(185.3407, device='cuda:0')
episode: 355 training return: tensor(258.3829, device='cuda:0')
epoch: 89 test_true_pfm: 5647.381759616517 sim_pfm: 345.3150378189748
episode: 356 training return: tensor(262.0215, device='cuda:0')
episode: 357 training return: tensor(197.2252, device='cuda:0')
episode: 358 training return: tensor(233.0735, device='cuda:0')
episode: 359 training return: tensor(314.2708, device='cuda:0')
epoch: 90 test_true_pfm: 5698.450974495693 sim_pfm: 425.65186699984287
episode: 360 training return: tensor(184.0557, device='cuda:0')
episode: 361 training return: tensor(257.5806, device='cuda:0')
episode: 362 training return: tensor(242.7360, device='cuda:0')
episode: 363 training return: tensor(135.3798, device='cuda:0')
epoch: 91 test_true_pfm: 5623.635167176201 sim_pfm: 350.37683562952833
episode: 364 training return: tensor(47.2906, device='cuda:0')
episode: 365 training return: tensor(297.3542, device='cuda:0')
episode: 366 training return: tensor(263.4132, device='cuda:0')
episode: 367 training return: tensor(209.6241, device='cuda:0')
epoch: 92 test_true_pfm: 5616.245333869464 sim_pfm: 407.2057454445555
episode: 368 training return: tensor(146.4614, device='cuda:0')
episode: 369 training return: tensor(191.9693, device='cuda:0')
episode: 370 training return: tensor(257.1632, device='cuda:0')
episode: 371 training return: tensor(319.3117, device='cuda:0')
epoch: 93 test_true_pfm: 5701.190376671443 sim_pfm: 399.638451677815
episode: 372 training return: tensor(267.1823, device='cuda:0')
episode: 373 training return: tensor(230.8774, device='cuda:0')
episode: 374 training return: tensor(272.9762, device='cuda:0')
episode: 375 training return: tensor(289.2925, device='cuda:0')
epoch: 94 test_true_pfm: 5623.979160080819 sim_pfm: 381.1952145602457
episode: 376 training return: tensor(167.5812, device='cuda:0')
episode: 377 training return: tensor(211.5189, device='cuda:0')
episode: 378 training return: tensor(267.7620, device='cuda:0')
episode: 379 training return: tensor(207.1701, device='cuda:0')
epoch: 95 test_true_pfm: 5698.034732305467 sim_pfm: 417.0601620645709
episode: 380 training return: tensor(184.5403, device='cuda:0')
episode: 381 training return: tensor(391.7846, device='cuda:0')
episode: 382 training return: tensor(222.1473, device='cuda:0')
episode: 383 training return: tensor(251.4677, device='cuda:0')
epoch: 96 test_true_pfm: 5651.085011876821 sim_pfm: 362.5807323986276
episode: 384 training return: tensor(252.9131, device='cuda:0')
episode: 385 training return: tensor(359.4532, device='cuda:0')
episode: 386 training return: tensor(200.3297, device='cuda:0')
episode: 387 training return: tensor(275.3199, device='cuda:0')
epoch: 97 test_true_pfm: 5748.740590502094 sim_pfm: 414.61867047551397
episode: 388 training return: tensor(254.8467, device='cuda:0')
episode: 389 training return: tensor(215.9736, device='cuda:0')
episode: 390 training return: tensor(317.3232, device='cuda:0')
episode: 391 training return: tensor(343.2519, device='cuda:0')
epoch: 98 test_true_pfm: 5679.677362355392 sim_pfm: 352.3045344310037
episode: 392 training return: tensor(163.4975, device='cuda:0')
episode: 393 training return: tensor(363.5670, device='cuda:0')
episode: 394 training return: tensor(225.6495, device='cuda:0')
episode: 395 training return: tensor(306.2074, device='cuda:0')
epoch: 99 test_true_pfm: 5586.787983283281 sim_pfm: 417.08150376263075
episode: 396 training return: tensor(235.9650, device='cuda:0')
episode: 397 training return: tensor(185.3878, device='cuda:0')
episode: 398 training return: tensor(227.6064, device='cuda:0')
episode: 399 training return: tensor(286.9565, device='cuda:0')
epoch: 100 test_true_pfm: 5637.450233035008 sim_pfm: 381.910804277674
episode: 400 training return: tensor(253.2402, device='cuda:0')
episode: 401 training return: tensor(207.2241, device='cuda:0')
episode: 402 training return: tensor(286.8121, device='cuda:0')
episode: 403 training return: tensor(185.4209, device='cuda:0')
epoch: 101 test_true_pfm: 5689.170358419605 sim_pfm: 310.1215465463271
episode: 404 training return: tensor(164.8479, device='cuda:0')
episode: 405 training return: tensor(355.3346, device='cuda:0')
episode: 406 training return: tensor(166.8221, device='cuda:0')
episode: 407 training return: tensor(287.4965, device='cuda:0')
epoch: 102 test_true_pfm: 5640.46940534881 sim_pfm: 398.9991526935967
episode: 408 training return: tensor(288.7379, device='cuda:0')
episode: 409 training return: tensor(65.7783, device='cuda:0')
episode: 410 training return: tensor(240.2413, device='cuda:0')
episode: 411 training return: tensor(169.2781, device='cuda:0')
epoch: 103 test_true_pfm: 5669.085427771953 sim_pfm: 357.67810102148604
episode: 412 training return: tensor(22.8796, device='cuda:0')
episode: 413 training return: tensor(220.7669, device='cuda:0')
episode: 414 training return: tensor(261.3226, device='cuda:0')
episode: 415 training return: tensor(128.5748, device='cuda:0')
epoch: 104 test_true_pfm: 5760.863459419891 sim_pfm: 335.5138833070717
episode: 416 training return: tensor(288.7758, device='cuda:0')
episode: 417 training return: tensor(343.9700, device='cuda:0')
episode: 418 training return: tensor(222.4797, device='cuda:0')
episode: 419 training return: tensor(286.8424, device='cuda:0')
epoch: 105 test_true_pfm: 5672.809605106442 sim_pfm: 384.2529713717522
episode: 420 training return: tensor(322.0900, device='cuda:0')
episode: 421 training return: tensor(365.0082, device='cuda:0')
episode: 422 training return: tensor(319.3157, device='cuda:0')
episode: 423 training return: tensor(222.4195, device='cuda:0')
epoch: 106 test_true_pfm: 5665.7553681920435 sim_pfm: 392.27934848793666
episode: 424 training return: tensor(264.6689, device='cuda:0')
episode: 425 training return: tensor(330.0683, device='cuda:0')
episode: 426 training return: tensor(272.2020, device='cuda:0')
episode: 427 training return: tensor(218.5560, device='cuda:0')
epoch: 107 test_true_pfm: 5748.369387166632 sim_pfm: 388.036702743615
episode: 428 training return: tensor(330.5079, device='cuda:0')
episode: 429 training return: tensor(149.4470, device='cuda:0')
episode: 430 training return: tensor(118.7979, device='cuda:0')
episode: 431 training return: tensor(124.3420, device='cuda:0')
epoch: 108 test_true_pfm: 5698.676108734801 sim_pfm: 337.8000458259582
episode: 432 training return: tensor(368.6788, device='cuda:0')
episode: 433 training return: tensor(345.6675, device='cuda:0')
episode: 434 training return: tensor(231.6913, device='cuda:0')
episode: 435 training return: tensor(308.8736, device='cuda:0')
epoch: 109 test_true_pfm: 5673.272125910196 sim_pfm: 360.4791151504808
episode: 436 training return: tensor(99.1375, device='cuda:0')
episode: 437 training return: tensor(204.7614, device='cuda:0')
episode: 438 training return: tensor(320.5712, device='cuda:0')
episode: 439 training return: tensor(367.0075, device='cuda:0')
epoch: 110 test_true_pfm: 5669.148505203067 sim_pfm: 424.67777005945874
episode: 440 training return: tensor(274.6247, device='cuda:0')
episode: 441 training return: tensor(290.5840, device='cuda:0')
episode: 442 training return: tensor(105.5983, device='cuda:0')
episode: 443 training return: tensor(261.7686, device='cuda:0')
epoch: 111 test_true_pfm: 5652.232965721351 sim_pfm: 394.74586378314416
episode: 444 training return: tensor(344.2145, device='cuda:0')
episode: 445 training return: tensor(207.5564, device='cuda:0')
episode: 446 training return: tensor(222.8594, device='cuda:0')
episode: 447 training return: tensor(305.6410, device='cuda:0')
epoch: 112 test_true_pfm: 5690.343927984634 sim_pfm: 359.67547302619397
episode: 448 training return: tensor(196.6507, device='cuda:0')
episode: 449 training return: tensor(198.3168, device='cuda:0')
episode: 450 training return: tensor(205.6188, device='cuda:0')
episode: 451 training return: tensor(310.0724, device='cuda:0')
epoch: 113 test_true_pfm: 5681.740599783646 sim_pfm: 373.9872594553356
episode: 452 training return: tensor(110.6228, device='cuda:0')
episode: 453 training return: tensor(351.5159, device='cuda:0')
episode: 454 training return: tensor(164.8772, device='cuda:0')
episode: 455 training return: tensor(244.1382, device='cuda:0')
epoch: 114 test_true_pfm: 5610.33209768012 sim_pfm: 433.92225716954755
episode: 456 training return: tensor(180.4998, device='cuda:0')
episode: 457 training return: tensor(200.2960, device='cuda:0')
episode: 458 training return: tensor(193.5950, device='cuda:0')
episode: 459 training return: tensor(296.6974, device='cuda:0')
epoch: 115 test_true_pfm: 5718.046441993115 sim_pfm: 404.5517505579919
episode: 460 training return: tensor(239.1999, device='cuda:0')
episode: 461 training return: tensor(268.6804, device='cuda:0')
episode: 462 training return: tensor(424.6828, device='cuda:0')
episode: 463 training return: tensor(229.6633, device='cuda:0')
epoch: 116 test_true_pfm: 5649.2044639510195 sim_pfm: 390.8743978853648
episode: 464 training return: tensor(112.6624, device='cuda:0')
episode: 465 training return: tensor(289.0004, device='cuda:0')
episode: 466 training return: tensor(233.3759, device='cuda:0')
episode: 467 training return: tensor(318.2553, device='cuda:0')
epoch: 117 test_true_pfm: 5686.395295504751 sim_pfm: 412.80739265067194
episode: 468 training return: tensor(293.4798, device='cuda:0')
episode: 469 training return: tensor(235.8744, device='cuda:0')
episode: 470 training return: tensor(250.1909, device='cuda:0')
episode: 471 training return: tensor(169.3561, device='cuda:0')
epoch: 118 test_true_pfm: 5639.792917218546 sim_pfm: 366.6650197355193
episode: 472 training return: tensor(433.1964, device='cuda:0')
episode: 473 training return: tensor(287.8600, device='cuda:0')
episode: 474 training return: tensor(346.0115, device='cuda:0')
episode: 475 training return: tensor(29.9447, device='cuda:0')
epoch: 119 test_true_pfm: 5624.756214389689 sim_pfm: 397.2617056896367
episode: 476 training return: tensor(299.4994, device='cuda:0')
episode: 477 training return: tensor(333.4275, device='cuda:0')
episode: 478 training return: tensor(260.3006, device='cuda:0')
episode: 479 training return: tensor(317.5343, device='cuda:0')
epoch: 120 test_true_pfm: 5539.183162105714 sim_pfm: 462.2242203153534
episode: 480 training return: tensor(283.4432, device='cuda:0')
episode: 481 training return: tensor(161.2861, device='cuda:0')
episode: 482 training return: tensor(240.9016, device='cuda:0')
episode: 483 training return: tensor(213.5116, device='cuda:0')
epoch: 121 test_true_pfm: 5664.909807144365 sim_pfm: 336.99393419987365
episode: 484 training return: tensor(242.1045, device='cuda:0')
episode: 485 training return: tensor(339.1303, device='cuda:0')
episode: 486 training return: tensor(262.4452, device='cuda:0')
episode: 487 training return: tensor(290.6966, device='cuda:0')
epoch: 122 test_true_pfm: 5659.395365243676 sim_pfm: 328.4875601697907
episode: 488 training return: tensor(-74.6055, device='cuda:0')
episode: 489 training return: tensor(334.9621, device='cuda:0')
episode: 490 training return: tensor(231.8773, device='cuda:0')
episode: 491 training return: tensor(354.8374, device='cuda:0')
epoch: 123 test_true_pfm: 5670.741012898925 sim_pfm: 403.7362234806642
episode: 492 training return: tensor(258.6389, device='cuda:0')
episode: 493 training return: tensor(130.4528, device='cuda:0')
episode: 494 training return: tensor(190.6872, device='cuda:0')
episode: 495 training return: tensor(231.0141, device='cuda:0')
epoch: 124 test_true_pfm: 5675.613858273289 sim_pfm: 338.08431951278664
episode: 496 training return: tensor(229.1804, device='cuda:0')
episode: 497 training return: tensor(332.2657, device='cuda:0')
episode: 498 training return: tensor(372.1174, device='cuda:0')
episode: 499 training return: tensor(354.9146, device='cuda:0')
epoch: 125 test_true_pfm: 5721.314129371407 sim_pfm: 424.0535574441892
episode: 500 training return: tensor(234.1328, device='cuda:0')
episode: 501 training return: tensor(317.1739, device='cuda:0')
episode: 502 training return: tensor(122.9632, device='cuda:0')
episode: 503 training return: tensor(253.0466, device='cuda:0')
epoch: 126 test_true_pfm: 5665.728967392298 sim_pfm: 350.61975617838715
episode: 504 training return: tensor(418.6206, device='cuda:0')
episode: 505 training return: tensor(271.0620, device='cuda:0')
episode: 506 training return: tensor(226.9559, device='cuda:0')
episode: 507 training return: tensor(78.7130, device='cuda:0')
epoch: 127 test_true_pfm: 5748.028885591382 sim_pfm: 378.1977604485389
episode: 508 training return: tensor(249.2007, device='cuda:0')
episode: 509 training return: tensor(283.4541, device='cuda:0')
episode: 510 training return: tensor(262.5164, device='cuda:0')
episode: 511 training return: tensor(271.9626, device='cuda:0')
epoch: 128 test_true_pfm: 5695.046754081187 sim_pfm: 414.8871740307465
episode: 512 training return: tensor(288.4526, device='cuda:0')
episode: 513 training return: tensor(297.0097, device='cuda:0')
episode: 514 training return: tensor(254.7760, device='cuda:0')
episode: 515 training return: tensor(341.2576, device='cuda:0')
epoch: 129 test_true_pfm: 5632.28668083313 sim_pfm: 365.54486387847766
episode: 516 training return: tensor(208.2787, device='cuda:0')
episode: 517 training return: tensor(338.5141, device='cuda:0')
episode: 518 training return: tensor(226.1005, device='cuda:0')
episode: 519 training return: tensor(224.6550, device='cuda:0')
epoch: 130 test_true_pfm: 5792.706380923977 sim_pfm: 371.08150728509645
episode: 520 training return: tensor(94.8305, device='cuda:0')
episode: 521 training return: tensor(326.0369, device='cuda:0')
episode: 522 training return: tensor(208.9155, device='cuda:0')
episode: 523 training return: tensor(378.1485, device='cuda:0')
epoch: 131 test_true_pfm: 5657.749277580572 sim_pfm: 412.1546811850276
episode: 524 training return: tensor(201.2797, device='cuda:0')
episode: 525 training return: tensor(259.0345, device='cuda:0')
episode: 526 training return: tensor(197.1717, device='cuda:0')
episode: 527 training return: tensor(162.4857, device='cuda:0')
epoch: 132 test_true_pfm: 5774.299460020035 sim_pfm: 411.3880915469005
episode: 528 training return: tensor(311.6714, device='cuda:0')
episode: 529 training return: tensor(298.4614, device='cuda:0')
episode: 530 training return: tensor(291.1436, device='cuda:0')
episode: 531 training return: tensor(201.2715, device='cuda:0')
epoch: 133 test_true_pfm: 5719.4308213154145 sim_pfm: 405.4343364497181
episode: 532 training return: tensor(378.4093, device='cuda:0')
episode: 533 training return: tensor(181.4820, device='cuda:0')
episode: 534 training return: tensor(229.4718, device='cuda:0')
episode: 535 training return: tensor(214.3587, device='cuda:0')
epoch: 134 test_true_pfm: 5736.737965880816 sim_pfm: 407.8438094429827
episode: 536 training return: tensor(358.9916, device='cuda:0')
episode: 537 training return: tensor(246.4400, device='cuda:0')
episode: 538 training return: tensor(235.9581, device='cuda:0')
episode: 539 training return: tensor(364.4543, device='cuda:0')
epoch: 135 test_true_pfm: 5569.70966333325 sim_pfm: 442.14286598173203
episode: 540 training return: tensor(334.8047, device='cuda:0')
episode: 541 training return: tensor(242.8910, device='cuda:0')
episode: 542 training return: tensor(118.7045, device='cuda:0')
episode: 543 training return: tensor(327.2527, device='cuda:0')
epoch: 136 test_true_pfm: 5714.604888883357 sim_pfm: 433.1956195644646
episode: 544 training return: tensor(309.0410, device='cuda:0')
episode: 545 training return: tensor(319.2253, device='cuda:0')
episode: 546 training return: tensor(347.3691, device='cuda:0')
episode: 547 training return: tensor(272.4306, device='cuda:0')
epoch: 137 test_true_pfm: 5654.831184888979 sim_pfm: 416.5196287299138
episode: 548 training return: tensor(263.0009, device='cuda:0')
episode: 549 training return: tensor(176.7481, device='cuda:0')
episode: 550 training return: tensor(279.6517, device='cuda:0')
episode: 551 training return: tensor(290.9913, device='cuda:0')
epoch: 138 test_true_pfm: 5669.601891507911 sim_pfm: 384.55450688422815
episode: 552 training return: tensor(297.3561, device='cuda:0')
episode: 553 training return: tensor(282.4442, device='cuda:0')
episode: 554 training return: tensor(266.9546, device='cuda:0')
episode: 555 training return: tensor(389.6826, device='cuda:0')
epoch: 139 test_true_pfm: 5714.087470884799 sim_pfm: 387.38519126553246
episode: 556 training return: tensor(326.4600, device='cuda:0')
episode: 557 training return: tensor(304.0061, device='cuda:0')
episode: 558 training return: tensor(344.7600, device='cuda:0')
episode: 559 training return: tensor(253.5704, device='cuda:0')
epoch: 140 test_true_pfm: 5671.3606174016795 sim_pfm: 422.04553309873637
episode: 560 training return: tensor(348.9703, device='cuda:0')
episode: 561 training return: tensor(420.6181, device='cuda:0')
episode: 562 training return: tensor(310.0100, device='cuda:0')
episode: 563 training return: tensor(315.2629, device='cuda:0')
epoch: 141 test_true_pfm: 5720.293676064942 sim_pfm: 388.26858404278755
episode: 564 training return: tensor(196.2655, device='cuda:0')
episode: 565 training return: tensor(276.8023, device='cuda:0')
episode: 566 training return: tensor(229.2059, device='cuda:0')
episode: 567 training return: tensor(293.5376, device='cuda:0')
epoch: 142 test_true_pfm: 5694.311143020693 sim_pfm: 389.2878540687573
episode: 568 training return: tensor(279.9345, device='cuda:0')
episode: 569 training return: tensor(234.0292, device='cuda:0')
episode: 570 training return: tensor(270.8988, device='cuda:0')
episode: 571 training return: tensor(342.2678, device='cuda:0')
epoch: 143 test_true_pfm: 5623.975841526142 sim_pfm: 406.8794104726209
episode: 572 training return: tensor(96.6468, device='cuda:0')
episode: 573 training return: tensor(359.3529, device='cuda:0')
episode: 574 training return: tensor(203.1811, device='cuda:0')
episode: 575 training return: tensor(360.4783, device='cuda:0')
epoch: 144 test_true_pfm: 5719.753572012704 sim_pfm: 371.5937235740421
episode: 576 training return: tensor(211.1434, device='cuda:0')
episode: 577 training return: tensor(263.4321, device='cuda:0')
episode: 578 training return: tensor(237.4131, device='cuda:0')
episode: 579 training return: tensor(380.0359, device='cuda:0')
epoch: 145 test_true_pfm: 5726.495270360084 sim_pfm: 398.72225569805596
episode: 580 training return: tensor(51.6688, device='cuda:0')
episode: 581 training return: tensor(314.8821, device='cuda:0')
episode: 582 training return: tensor(287.6648, device='cuda:0')
episode: 583 training return: tensor(153.1690, device='cuda:0')
epoch: 146 test_true_pfm: 5719.850389004085 sim_pfm: 375.6304003055363
episode: 584 training return: tensor(221.6991, device='cuda:0')
episode: 585 training return: tensor(293.0588, device='cuda:0')
episode: 586 training return: tensor(258.8040, device='cuda:0')
episode: 587 training return: tensor(242.9016, device='cuda:0')
epoch: 147 test_true_pfm: 5663.754637587418 sim_pfm: 395.0296592152833
episode: 588 training return: tensor(257.4984, device='cuda:0')
episode: 589 training return: tensor(279.5120, device='cuda:0')
episode: 590 training return: tensor(296.9801, device='cuda:0')
episode: 591 training return: tensor(154.9277, device='cuda:0')
epoch: 148 test_true_pfm: 5735.380664329466 sim_pfm: 413.6683961947371
episode: 592 training return: tensor(195.8584, device='cuda:0')
episode: 593 training return: tensor(109.3118, device='cuda:0')
episode: 594 training return: tensor(175.8849, device='cuda:0')
episode: 595 training return: tensor(217.9688, device='cuda:0')
epoch: 149 test_true_pfm: 5783.049027535032 sim_pfm: 404.9069399453001
episode: 596 training return: tensor(236.4169, device='cuda:0')
episode: 597 training return: tensor(285.0835, device='cuda:0')
episode: 598 training return: tensor(264.2738, device='cuda:0')
episode: 599 training return: tensor(392.6916, device='cuda:0')
epoch: 150 test_true_pfm: 5695.535000609441 sim_pfm: 403.7964999502777
