['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '10000']
epoch: 0 training_loss 0.30961626999080183 test_loss: 0.18190070390701293
epoch: 1 training_loss 0.1676490230858326 test_loss: 0.13276180028915405
epoch: 2 training_loss 0.13966337148100139 test_loss: 0.11758207082748413
epoch: 3 training_loss 0.1263728143274784 test_loss: 0.11928967237472535
epoch: 4 training_loss 0.11871815919876098 test_loss: 0.11155285835266113
epoch: 5 training_loss 0.11026661306619644 test_loss: 0.129630184173584
epoch: 6 training_loss 0.11170650523155928 test_loss: 0.11937545537948609
epoch: 7 training_loss 0.104091987721622 test_loss: 0.10761779546737671
epoch: 8 training_loss 0.09790718797594308 test_loss: 0.10513615608215332
epoch: 9 training_loss 0.10518308341503144 test_loss: 0.11229863166809081
epoch: 10 training_loss 0.10482914213091135 test_loss: 0.11726504564285278
epoch: 11 training_loss 0.09578234693035484 test_loss: 0.1261077642440796
epoch: 12 training_loss 0.10764466533437371 test_loss: 0.10589704513549805
epoch: 13 training_loss 0.09753682680428027 test_loss: 0.12184100151062012
epoch: 14 training_loss 0.10174826137721539 test_loss: 0.11255300045013428
epoch: 15 training_loss 0.10123082105070352 test_loss: 0.11385202407836914
epoch: 16 training_loss 0.09860477961599827 test_loss: 0.10477271080017089
epoch: 17 training_loss 0.0952914485707879 test_loss: 0.12036137580871582
epoch: 18 training_loss 0.0973919409699738 test_loss: 0.10876603126525879
epoch: 19 training_loss 0.1039526965469122 test_loss: 0.11401082277297973
epoch: 20 training_loss 0.09159799875691534 test_loss: 0.10635483264923096
epoch: 21 training_loss 0.09552654694765807 test_loss: 0.12402085065841675
epoch: 22 training_loss 0.09964527562260628 test_loss: 0.11671320199966431
epoch: 23 training_loss 0.09135068558156491 test_loss: 0.09992544651031494
epoch: 24 training_loss 0.09409102695062757 test_loss: 0.11087770462036133
epoch: 25 training_loss 0.09022728394716978 test_loss: 0.10086385011672974
epoch: 26 training_loss 0.09649817941710354 test_loss: 0.11206791400909424
epoch: 27 training_loss 0.08807945115491748 test_loss: 0.1151430368423462
epoch: 28 training_loss 0.08800924159586429 test_loss: 0.0991879642009735
epoch: 29 training_loss 0.08277424201369285 test_loss: 0.1152876615524292
epoch: 30 training_loss 0.09010945957154036 test_loss: 0.12291973829269409
epoch: 31 training_loss 0.08982964305207133 test_loss: 0.115097177028656
epoch: 32 training_loss 0.09664820987731218 test_loss: 0.10565640926361083
epoch: 33 training_loss 0.08953565940260887 test_loss: 0.09808686375617981
epoch: 34 training_loss 0.09304486893117428 test_loss: 0.1049807071685791
epoch: 35 training_loss 0.08940879157744348 test_loss: 0.11113710403442383
epoch: 36 training_loss 0.08728359265252948 test_loss: 0.1106762409210205
epoch: 37 training_loss 0.08209868628531694 test_loss: 0.11327533721923828
epoch: 38 training_loss 0.08609123475849628 test_loss: 0.11617921590805054
epoch: 39 training_loss 0.08195599028840661 test_loss: 0.11102781295776368
epoch: 40 training_loss 0.08872159646824002 test_loss: 0.10326058864593506
epoch: 41 training_loss 0.0839296218007803 test_loss: 0.1087003231048584
epoch: 42 training_loss 0.0856622844748199 test_loss: 0.12158359289169311
epoch: 43 training_loss 0.08218182610347867 test_loss: 0.11311074495315551
epoch: 44 training_loss 0.07643217651173473 test_loss: 0.11176267862319947
epoch: 45 training_loss 0.08000548465177416 test_loss: 0.11082669496536254
epoch: 46 training_loss 0.08026759305968881 test_loss: 0.1286590576171875
epoch: 47 training_loss 0.08318802397232503 test_loss: 0.1091928243637085
epoch: 48 training_loss 0.07993822483345867 test_loss: 0.10954440832138061
epoch: 49 training_loss 0.07875430658459663 test_loss: 0.1160886287689209
epoch: 50 training_loss 0.07875385297462344 test_loss: 0.09658042192459107
epoch: 51 training_loss 0.0762796682678163 test_loss: 0.11215600967407227
epoch: 52 training_loss 0.07538548793643712 test_loss: 0.11513354778289794
epoch: 53 training_loss 0.07834913644939662 test_loss: 0.11249570846557617
epoch: 54 training_loss 0.08332558883354067 test_loss: 0.12431957721710205
epoch: 55 training_loss 0.07258467992767692 test_loss: 0.11323628425598145
epoch: 56 training_loss 0.078049738984555 test_loss: 0.10926669836044312
epoch: 57 training_loss 0.07505161508917808 test_loss: 0.11782875061035156
epoch: 58 training_loss 0.07719560340046883 test_loss: 0.10067586898803711
epoch: 59 training_loss 0.07588072691112757 test_loss: 0.11771711111068725
epoch: 60 training_loss 0.07652612244710326 test_loss: 0.1015583634376526
epoch: 61 training_loss 0.08169585060328245 test_loss: 0.11060270071029663
epoch: 62 training_loss 0.0773035872913897 test_loss: 0.11589945554733276
epoch: 63 training_loss 0.08111204099841415 test_loss: 0.10652384757995606
epoch: 64 training_loss 0.07791367840021848 test_loss: 0.10130753517150878
epoch: 65 training_loss 0.07568078551441432 test_loss: 0.10160049200057983
epoch: 66 training_loss 0.07274490490555763 test_loss: 0.12472769021987914
epoch: 67 training_loss 0.07043705232441426 test_loss: 0.10720968246459961
epoch: 68 training_loss 0.07307852139696479 test_loss: 0.12341150045394897
epoch: 69 training_loss 0.06527140801772476 test_loss: 0.11957726478576661
epoch: 70 training_loss 0.06513067297637462 test_loss: 0.11565470695495605
epoch: 71 training_loss 0.06770877808332443 test_loss: 0.12280358076095581
epoch: 72 training_loss 0.0684596442990005 test_loss: 0.12555400133132935
epoch: 73 training_loss 0.06873629892244935 test_loss: 0.11648252010345458
epoch: 74 training_loss 0.06447356456890702 test_loss: 0.12269779443740844
epoch: 75 training_loss 0.06525893978774548 test_loss: 0.11639457941055298
epoch: 76 training_loss 0.06759099075570703 test_loss: 0.13465816974639894
epoch: 77 training_loss 0.06602298317477107 test_loss: 0.12522684335708617
epoch: 78 training_loss 0.06709046595729888 test_loss: 0.11867637634277343
epoch: 79 training_loss 0.062494938960298896 test_loss: 0.12252171039581299
epoch: 80 training_loss 0.06193648237735033 test_loss: 0.13292343616485597
epoch: 81 training_loss 0.060259183878079055 test_loss: 0.12392904758453369
epoch: 82 training_loss 0.06465758746489883 test_loss: 0.12069553136825562
epoch: 83 training_loss 0.06563826594501734 test_loss: 0.11711891889572143
epoch: 84 training_loss 0.06338839383795858 test_loss: 0.13021442890167237
epoch: 85 training_loss 0.06044628799892962 test_loss: 0.11198728084564209
epoch: 86 training_loss 0.058437748719006775 test_loss: 0.13445258140563965
epoch: 87 training_loss 0.056925767976790664 test_loss: 0.1220849633216858
epoch: 88 training_loss 0.06024702019058168 test_loss: 0.12432805299758912
epoch: 89 training_loss 0.05515077408403158 test_loss: 0.12690747976303102
epoch: 90 training_loss 0.06279585122130811 test_loss: 0.1166806936264038
epoch: 91 training_loss 0.0566334881959483 test_loss: 0.1309332013130188
epoch: 92 training_loss 0.060573094449937345 test_loss: 0.12006621360778809
epoch: 93 training_loss 0.05787060690112412 test_loss: 0.12342188358306885
epoch: 94 training_loss 0.0591183109767735 test_loss: 0.13987041711807252
epoch: 95 training_loss 0.05751342355273664 test_loss: 0.11787070035934448
epoch: 96 training_loss 0.053316845521330834 test_loss: 0.12357704639434815
epoch: 97 training_loss 0.06288950294721872 test_loss: 0.13143774271011352
epoch: 98 training_loss 0.05635925404727459 test_loss: 0.10511082410812378
epoch: 99 training_loss 0.05225315926596522 test_loss: 0.12670888900756835
epoch: 100 training_loss 0.05505281408317387 test_loss: 0.1337329626083374
epoch: 101 training_loss 0.05822043608874083 test_loss: 0.15463078022003174
epoch: 102 training_loss 0.05567771062254906 test_loss: 0.13805092573165895
epoch: 103 training_loss 0.053310806304216383 test_loss: 0.13219505548477173
epoch: 104 training_loss 0.04966107539832592 test_loss: 0.14655290842056273
epoch: 105 training_loss 0.05413048111833632 test_loss: 0.13126766681671143
epoch: 106 training_loss 0.04491805351339281 test_loss: 0.129726779460907
epoch: 107 training_loss 0.05192751511000097 test_loss: 0.16344515085220337
epoch: 108 training_loss 0.054387469254434107 test_loss: 0.12878401279449464
epoch: 109 training_loss 0.05187039659358561 test_loss: 0.1387476682662964
epoch: 110 training_loss 0.052930799275636674 test_loss: 0.1387869119644165
epoch: 111 training_loss 0.050388226797804234 test_loss: 0.13367823362350464
epoch: 112 training_loss 0.049650493068620564 test_loss: 0.12515778541564943
epoch: 113 training_loss 0.048331488510593774 test_loss: 0.11940866708755493
epoch: 114 training_loss 0.050883640674874185 test_loss: 0.12986488342285157
epoch: 115 training_loss 0.048589346101507544 test_loss: 0.1361333727836609
epoch: 116 training_loss 0.05076445902697742 test_loss: 0.1255077004432678
epoch: 117 training_loss 0.04587378497235477 test_loss: 0.14880355596542358
epoch: 118 training_loss 0.05282210530713201 test_loss: 0.1455820918083191
epoch: 119 training_loss 0.04492051111999899 test_loss: 0.13404626846313478
epoch: 120 training_loss 0.043949160389602185 test_loss: 0.13933597803115844
epoch: 121 training_loss 0.04378933975473046 test_loss: 0.1420690417289734
epoch: 122 training_loss 0.045501975612714886 test_loss: 0.14525638818740844
epoch: 123 training_loss 0.04076531779952347 test_loss: 0.15061707496643068
epoch: 124 training_loss 0.03986957278102636 test_loss: 0.13890327215194703
epoch: 125 training_loss 0.04490525253117084 test_loss: 0.14008947610855102
epoch: 126 training_loss 0.03939567755907774 test_loss: 0.1418072462081909
epoch: 127 training_loss 0.0474233749974519 test_loss: 0.15430136919021606
epoch: 128 training_loss 0.04881337380036712 test_loss: 0.1288472056388855
epoch: 129 training_loss 0.04460899571422487 test_loss: 0.1387952446937561
epoch: 130 training_loss 0.041487198434770105 test_loss: 0.14819614887237548
epoch: 131 training_loss 0.04163093008100986 test_loss: 0.1483291745185852
epoch: 132 training_loss 0.04164741678163409 test_loss: 0.14821442365646362
epoch: 133 training_loss 0.041558435689657926 test_loss: 0.1566474199295044
epoch: 134 training_loss 0.0429749768320471 test_loss: 0.14708739519119263
epoch: 135 training_loss 0.04276639376301319 test_loss: 0.14196830987930298
epoch: 136 training_loss 0.03579477227292955 test_loss: 0.12340339422225952
epoch: 137 training_loss 0.03800576872192323 test_loss: 0.14421069622039795
epoch: 138 training_loss 0.040745020098984244 test_loss: 0.14456905126571656
epoch: 139 training_loss 0.0381049363873899 test_loss: 0.16199309825897218
epoch: 140 training_loss 0.04084403679706156 test_loss: 0.1382456064224243
epoch: 141 training_loss 0.03688641455024481 test_loss: 0.15149352550506592
epoch: 142 training_loss 0.03905852355994284 test_loss: 0.14151607751846312
epoch: 143 training_loss 0.03838518193922937 test_loss: 0.13653382062911987
epoch: 144 training_loss 0.03568973988294601 test_loss: 0.13350728750228882
epoch: 145 training_loss 0.03567699877545238 test_loss: 0.13233230113983155
epoch: 146 training_loss 0.031225817906670273 test_loss: 0.14431982040405272
epoch: 147 training_loss 0.039316218439489604 test_loss: 0.15066924095153808
epoch: 148 training_loss 0.037275291243568064 test_loss: 0.1493394374847412
epoch: 149 training_loss 0.03489632357377559 test_loss: 0.16295331716537476
epoch: 0 training_loss 49.485713481903076 test_loss: 24.21174774169922
epoch: 1 training_loss 18.221478576660157 test_loss: 14.800753784179687
epoch: 2 training_loss 13.253677310943603 test_loss: 11.96439437866211
epoch: 3 training_loss 10.684391613006591 test_loss: 9.683844757080077
epoch: 4 training_loss 8.872737979888916 test_loss: 8.428861236572265
epoch: 5 training_loss 7.561064167022705 test_loss: 7.2921875
epoch: 6 training_loss 6.688636026382446 test_loss: 6.480224609375
epoch: 7 training_loss 5.934066371917725 test_loss: 5.879713439941407
epoch: 8 training_loss 5.467001566886902 test_loss: 5.409569549560547
epoch: 9 training_loss 5.144092226028443 test_loss: 5.135354232788086
epoch: 10 training_loss 4.737863125801087 test_loss: 4.649427795410157
epoch: 11 training_loss 4.433127093315124 test_loss: 4.404234313964844
epoch: 12 training_loss 4.122526288032532 test_loss: 4.373495864868164
epoch: 13 training_loss 3.988229687213898 test_loss: 3.9492191314697265
epoch: 14 training_loss 3.778742470741272 test_loss: 3.791349411010742
epoch: 15 training_loss 3.5787602615356446 test_loss: 3.7666114807128905
epoch: 16 training_loss 3.4984796833992005 test_loss: 3.4979232788085937
epoch: 17 training_loss 3.281501865386963 test_loss: 3.58980712890625
epoch: 18 training_loss 3.2350454211235045 test_loss: 3.3216831207275392
epoch: 19 training_loss 3.1300410270690917 test_loss: 3.2357101440429688
epoch: 20 training_loss 3.1038237023353576 test_loss: 3.2094051361083986
epoch: 21 training_loss 3.0077324604988096 test_loss: 3.176729202270508
epoch: 22 training_loss 2.919645609855652 test_loss: 3.0232479095458986
epoch: 23 training_loss 2.803310706615448 test_loss: 3.0377323150634767
epoch: 24 training_loss 2.7550042772293093 test_loss: 2.875798225402832
epoch: 25 training_loss 2.687922616004944 test_loss: 2.9219539642333983
epoch: 26 training_loss 2.6828134989738466 test_loss: 2.864567184448242
epoch: 27 training_loss 2.555435256958008 test_loss: 2.666354942321777
epoch: 28 training_loss 2.560911388397217 test_loss: 2.671329689025879
epoch: 29 training_loss 2.524259524345398 test_loss: 2.560077667236328
epoch: 30 training_loss 2.4776497507095336 test_loss: 2.583944892883301
epoch: 31 training_loss 2.44083771109581 test_loss: 2.6097536087036133
epoch: 32 training_loss 2.410992660522461 test_loss: 2.5957948684692385
epoch: 33 training_loss 2.329398744106293 test_loss: 2.5833288192749024
epoch: 34 training_loss 2.334164886474609 test_loss: 2.3942771911621095
epoch: 35 training_loss 2.2945104110240937 test_loss: 2.414876937866211
epoch: 36 training_loss 2.224891889095306 test_loss: 2.351274871826172
epoch: 37 training_loss 2.226644767522812 test_loss: 2.3022893905639648
epoch: 38 training_loss 2.1720977461338045 test_loss: 2.431941795349121
epoch: 39 training_loss 2.1674826741218567 test_loss: 2.400859069824219
epoch: 40 training_loss 2.1193160462379455 test_loss: 2.3275850296020506
epoch: 41 training_loss 2.149968538284302 test_loss: 2.2936450958251955
epoch: 42 training_loss 2.1373327207565307 test_loss: 2.127397918701172
epoch: 43 training_loss 2.0794659090042114 test_loss: 2.277850341796875
epoch: 44 training_loss 2.0394562923908235 test_loss: 2.150507164001465
epoch: 45 training_loss 2.057407591342926 test_loss: 2.150302505493164
epoch: 46 training_loss 2.016789137125015 test_loss: 2.101497840881348
epoch: 47 training_loss 2.0047263622283937 test_loss: 2.1062749862670898
epoch: 48 training_loss 1.9546360456943512 test_loss: 2.1459224700927733
epoch: 49 training_loss 1.9869163274765014 test_loss: 2.0052318572998047
epoch: 50 training_loss 1.9958992624282836 test_loss: 2.0372814178466796
epoch: 51 training_loss 1.9541719686985015 test_loss: 2.190962791442871
epoch: 52 training_loss 1.973316012620926 test_loss: 1.9916803359985351
epoch: 53 training_loss 1.915605536699295 test_loss: 2.0615732192993166
epoch: 54 training_loss 1.8965696787834168 test_loss: 1.931451416015625
epoch: 55 training_loss 1.885967539548874 test_loss: 1.9475147247314453
epoch: 56 training_loss 1.8812668788433076 test_loss: 1.9729915618896485
epoch: 57 training_loss 1.886654337644577 test_loss: 2.1123403549194335
epoch: 58 training_loss 1.860675446987152 test_loss: 2.035307693481445
epoch: 59 training_loss 1.8373823130130769 test_loss: 2.066171646118164
epoch: 60 training_loss 1.834041644334793 test_loss: 1.9343122482299804
epoch: 61 training_loss 1.8128657627105713 test_loss: 1.9701316833496094
epoch: 62 training_loss 1.813095716238022 test_loss: 1.8960868835449218
epoch: 63 training_loss 1.7793673300743102 test_loss: 1.851569938659668
epoch: 64 training_loss 1.8010587310791015 test_loss: 2.0160404205322267
epoch: 65 training_loss 1.7677074086666107 test_loss: 1.9502010345458984
epoch: 66 training_loss 1.7631548368930816 test_loss: 1.90357666015625
epoch: 67 training_loss 1.814305099248886 test_loss: 1.9007692337036133
epoch: 68 training_loss 1.7323708498477937 test_loss: 1.9533241271972657
epoch: 69 training_loss 1.744584753513336 test_loss: 1.93626708984375
epoch: 70 training_loss 1.7270213890075683 test_loss: 1.8608432769775392
epoch: 71 training_loss 1.7566417801380156 test_loss: 1.8204740524291991
epoch: 72 training_loss 1.6856638252735139 test_loss: 1.7884653091430665
epoch: 73 training_loss 1.7118801033496858 test_loss: 1.8102788925170898
epoch: 74 training_loss 1.6887729465961456 test_loss: 1.8758468627929688
epoch: 75 training_loss 1.6975952088832855 test_loss: 1.9137178421020509
epoch: 76 training_loss 1.7204000461101532 test_loss: 1.7632518768310548
epoch: 77 training_loss 1.673164380788803 test_loss: 1.8444887161254884
epoch: 78 training_loss 1.700063306093216 test_loss: 1.8709072113037108
epoch: 79 training_loss 1.7024874627590179 test_loss: 1.8179485321044921
epoch: 80 training_loss 1.6481367576122283 test_loss: 1.7956720352172852
epoch: 81 training_loss 1.7092382073402406 test_loss: 1.834834098815918
epoch: 82 training_loss 1.6291556668281555 test_loss: 1.7908809661865235
epoch: 83 training_loss 1.6455792343616487 test_loss: 1.6580102920532227
epoch: 84 training_loss 1.6834020018577576 test_loss: 1.8426115036010742
epoch: 85 training_loss 1.6295979225635528 test_loss: 1.859934425354004
epoch: 86 training_loss 1.6278061008453368 test_loss: 1.7503402709960938
epoch: 87 training_loss 1.6502952909469604 test_loss: 1.6518363952636719
epoch: 88 training_loss 1.659508501291275 test_loss: 1.6973041534423827
epoch: 89 training_loss 1.629096200466156 test_loss: 1.7356231689453125
epoch: 90 training_loss 1.615291612148285 test_loss: 1.7755777359008789
epoch: 91 training_loss 1.5944767642021178 test_loss: 1.6021297454833985
epoch: 92 training_loss 1.6357390820980071 test_loss: 1.732657241821289
epoch: 93 training_loss 1.573028302192688 test_loss: 1.7619802474975585
epoch: 94 training_loss 1.5820799827575684 test_loss: 1.6595130920410157
epoch: 95 training_loss 1.6145153164863586 test_loss: 1.702001953125
epoch: 96 training_loss 1.587179844379425 test_loss: 1.6125486373901368
epoch: 97 training_loss 1.5453261637687683 test_loss: 1.7474449157714844
epoch: 98 training_loss 1.5514235556125642 test_loss: 1.728070068359375
epoch: 99 training_loss 1.5684152150154114 test_loss: 1.7654340744018555
epoch: 100 training_loss 1.564311784505844 test_loss: 1.742831039428711
epoch: 101 training_loss 1.5361608231067658 test_loss: 1.7431427001953126
epoch: 102 training_loss 1.534441007375717 test_loss: 1.7226198196411133
epoch: 103 training_loss 1.5347232055664062 test_loss: 1.6962186813354492
epoch: 104 training_loss 1.5474768447875977 test_loss: 1.6637945175170898
epoch: 105 training_loss 1.52955486536026 test_loss: 1.7449556350708009
epoch: 106 training_loss 1.5322413313388825 test_loss: 1.7156402587890625
epoch: 107 training_loss 1.5295567774772645 test_loss: 1.6886877059936523
epoch: 108 training_loss 1.514610049724579 test_loss: 1.5623795509338378
epoch: 109 training_loss 1.5380897271633147 test_loss: 1.7398117065429688
epoch: 110 training_loss 1.546921181678772 test_loss: 1.6526859283447266
epoch: 111 training_loss 1.5341313695907592 test_loss: 1.7310928344726562
epoch: 112 training_loss 1.4965960001945495 test_loss: 1.600975799560547
epoch: 113 training_loss 1.4886275029182434 test_loss: 1.642976951599121
epoch: 114 training_loss 1.5100428807735442 test_loss: 1.5200702667236328
epoch: 115 training_loss 1.5248316824436188 test_loss: 1.6186065673828125
epoch: 116 training_loss 1.4774137580394744 test_loss: 1.5898284912109375
epoch: 117 training_loss 1.4960131537914276 test_loss: 1.6813615798950194
epoch: 118 training_loss 1.5173534333705903 test_loss: 1.6219005584716797
epoch: 119 training_loss 1.526051470041275 test_loss: 1.586251926422119
epoch: 120 training_loss 1.5127778434753418 test_loss: 1.6109386444091798
epoch: 121 training_loss 1.4821702253818512 test_loss: 1.5749944686889648
epoch: 122 training_loss 1.4893555414676667 test_loss: 1.530451774597168
epoch: 123 training_loss 1.488215217590332 test_loss: 1.5367006301879882
epoch: 124 training_loss 1.462065327167511 test_loss: 1.5694498062133788
epoch: 125 training_loss 1.4928286516666411 test_loss: 1.57554292678833
epoch: 126 training_loss 1.4751663541793822 test_loss: 1.5752025604248048
epoch: 127 training_loss 1.4892125177383422 test_loss: 1.5234550476074218
epoch: 128 training_loss 1.5064678859710694 test_loss: 1.6240556716918946
epoch: 129 training_loss 1.4495680344104767 test_loss: 1.6019798278808595
epoch: 130 training_loss 1.4620437633991241 test_loss: 1.558913516998291
epoch: 131 training_loss 1.4705808007717132 test_loss: 1.5308523178100586
epoch: 132 training_loss 1.4529745280742645 test_loss: 1.6106050491333008
epoch: 133 training_loss 1.446521281003952 test_loss: 1.5933409690856934
epoch: 134 training_loss 1.4441746377944946 test_loss: 1.6096364974975585
epoch: 135 training_loss 1.4300830364227295 test_loss: 1.5164616584777832
epoch: 136 training_loss 1.4421280992031098 test_loss: 1.5507311820983887
epoch: 137 training_loss 1.4243223822116853 test_loss: 1.5916122436523437
epoch: 138 training_loss 1.4778317296504975 test_loss: 1.5546518325805665
epoch: 139 training_loss 1.4389311122894286 test_loss: 1.5624095916748046
epoch: 140 training_loss 1.4553740203380585 test_loss: 1.4920751571655273
epoch: 141 training_loss 1.4197295880317689 test_loss: 1.4677352905273438
epoch: 142 training_loss 1.4298471415042877 test_loss: 1.5038521766662598
epoch: 143 training_loss 1.409969745874405 test_loss: 1.5262766838073731
epoch: 144 training_loss 1.42308070063591 test_loss: 1.5157355308532714
epoch: 145 training_loss 1.4190679800510406 test_loss: 1.5405080795288086
epoch: 146 training_loss 1.403125923871994 test_loss: 1.4815064430236817
epoch: 147 training_loss 1.4174761605262756 test_loss: 1.529905128479004
epoch: 148 training_loss 1.432560247182846 test_loss: 1.5542807579040527
epoch: 149 training_loss 1.4068687808513642 test_loss: 1.5025699615478516
5069.868754401612
episode: 0 training return: tensor(57.7308, device='cuda:0')
episode: 1 training return: tensor(-133.1230, device='cuda:0')
episode: 2 training return: tensor(-194.9591, device='cuda:0')
episode: 3 training return: tensor(-325.4377, device='cuda:0')
epoch: 1 test_true_pfm: 5164.911506126094 sim_pfm: 14.460736413951963
episode: 4 training return: tensor(10.3531, device='cuda:0')
episode: 5 training return: tensor(-127.8672, device='cuda:0')
episode: 6 training return: tensor(-240.4593, device='cuda:0')
episode: 7 training return: tensor(63.9327, device='cuda:0')
epoch: 2 test_true_pfm: 5106.031381760455 sim_pfm: -65.80871509054366
episode: 8 training return: tensor(-84.5614, device='cuda:0')
episode: 9 training return: tensor(-93.8363, device='cuda:0')
episode: 10 training return: tensor(-204.3890, device='cuda:0')
episode: 11 training return: tensor(-118.6223, device='cuda:0')
epoch: 3 test_true_pfm: 5029.062351349837 sim_pfm: 1.9461524911651698
episode: 12 training return: tensor(-26.9867, device='cuda:0')
episode: 13 training return: tensor(3.8368, device='cuda:0')
episode: 14 training return: tensor(-192.2757, device='cuda:0')
episode: 15 training return: tensor(-441.9014, device='cuda:0')
epoch: 4 test_true_pfm: 5124.388125705783 sim_pfm: -181.38401610392611
episode: 16 training return: tensor(-135.9967, device='cuda:0')
episode: 17 training return: tensor(-239.8952, device='cuda:0')
episode: 18 training return: tensor(-44.7062, device='cuda:0')
episode: 19 training return: tensor(-181.4915, device='cuda:0')
epoch: 5 test_true_pfm: 5255.290395675363 sim_pfm: -37.1277566148589
episode: 20 training return: tensor(-128.6080, device='cuda:0')
episode: 21 training return: tensor(-62.9000, device='cuda:0')
episode: 22 training return: tensor(-125.7637, device='cuda:0')
episode: 23 training return: tensor(33.0100, device='cuda:0')
epoch: 6 test_true_pfm: 5187.545247108202 sim_pfm: 158.96980247085835
episode: 24 training return: tensor(63.0917, device='cuda:0')
episode: 25 training return: tensor(-53.4502, device='cuda:0')
episode: 26 training return: tensor(150.2724, device='cuda:0')
episode: 27 training return: tensor(-191.8439, device='cuda:0')
epoch: 7 test_true_pfm: 5120.947579252636 sim_pfm: -16.120233127013005
episode: 28 training return: tensor(-141.5300, device='cuda:0')
episode: 29 training return: tensor(-30.8378, device='cuda:0')
episode: 30 training return: tensor(14.5268, device='cuda:0')
episode: 31 training return: tensor(-82.1625, device='cuda:0')
epoch: 8 test_true_pfm: 5137.000124235733 sim_pfm: 49.48911812420314
episode: 32 training return: tensor(20.7687, device='cuda:0')
episode: 33 training return: tensor(-49.2921, device='cuda:0')
episode: 34 training return: tensor(-110.2521, device='cuda:0')
episode: 35 training return: tensor(45.3982, device='cuda:0')
epoch: 9 test_true_pfm: 5252.139415130254 sim_pfm: 152.307358645446
episode: 36 training return: tensor(130.5791, device='cuda:0')
episode: 37 training return: tensor(-8.3717, device='cuda:0')
episode: 38 training return: tensor(-126.5341, device='cuda:0')
episode: 39 training return: tensor(21.0882, device='cuda:0')
epoch: 10 test_true_pfm: 5093.652350058822 sim_pfm: 64.62332499634552
episode: 40 training return: tensor(-232.2827, device='cuda:0')
episode: 41 training return: tensor(87.5937, device='cuda:0')
episode: 42 training return: tensor(-53.6961, device='cuda:0')
episode: 43 training return: tensor(-81.0839, device='cuda:0')
epoch: 11 test_true_pfm: 5203.927069830591 sim_pfm: 6.361078243722052
episode: 44 training return: tensor(-16.8070, device='cuda:0')
episode: 45 training return: tensor(69.8943, device='cuda:0')
episode: 46 training return: tensor(-24.4609, device='cuda:0')
episode: 47 training return: tensor(48.7313, device='cuda:0')
epoch: 12 test_true_pfm: 5360.0941305026345 sim_pfm: 23.700115308068536
episode: 48 training return: tensor(-70.2351, device='cuda:0')
episode: 49 training return: tensor(-8.6401, device='cuda:0')
episode: 50 training return: tensor(-119.9118, device='cuda:0')
episode: 51 training return: tensor(82.4248, device='cuda:0')
epoch: 13 test_true_pfm: 5152.17854576021 sim_pfm: 37.293344404839445
episode: 52 training return: tensor(37.5937, device='cuda:0')
episode: 53 training return: tensor(-7.2950, device='cuda:0')
episode: 54 training return: tensor(-121.5784, device='cuda:0')
episode: 55 training return: tensor(167.0586, device='cuda:0')
epoch: 14 test_true_pfm: 5264.624376878796 sim_pfm: 117.32497415222072
episode: 56 training return: tensor(-23.5434, device='cuda:0')
episode: 57 training return: tensor(5.7829, device='cuda:0')
episode: 58 training return: tensor(-166.1944, device='cuda:0')
episode: 59 training return: tensor(104.4293, device='cuda:0')
epoch: 15 test_true_pfm: 5229.168538940573 sim_pfm: 115.96290128464655
episode: 60 training return: tensor(-36.4918, device='cuda:0')
episode: 61 training return: tensor(210.0716, device='cuda:0')
episode: 62 training return: tensor(74.7783, device='cuda:0')
episode: 63 training return: tensor(48.3418, device='cuda:0')
epoch: 16 test_true_pfm: 5287.361542183146 sim_pfm: 90.82671363851598
episode: 64 training return: tensor(-28.7626, device='cuda:0')
episode: 65 training return: tensor(82.3071, device='cuda:0')
episode: 66 training return: tensor(24.6962, device='cuda:0')
episode: 67 training return: tensor(76.9439, device='cuda:0')
epoch: 17 test_true_pfm: 5308.079053165858 sim_pfm: 147.66277131958245
episode: 68 training return: tensor(-42.7003, device='cuda:0')
episode: 69 training return: tensor(113.7782, device='cuda:0')
episode: 70 training return: tensor(22.9762, device='cuda:0')
episode: 71 training return: tensor(88.2381, device='cuda:0')
epoch: 18 test_true_pfm: 5328.986166598946 sim_pfm: 99.46678707034637
episode: 72 training return: tensor(53.2818, device='cuda:0')
episode: 73 training return: tensor(29.2660, device='cuda:0')
episode: 74 training return: tensor(-74.9128, device='cuda:0')
episode: 75 training return: tensor(174.1791, device='cuda:0')
epoch: 19 test_true_pfm: 5324.882376738932 sim_pfm: 187.443671771558
episode: 76 training return: tensor(144.1761, device='cuda:0')
episode: 77 training return: tensor(108.5480, device='cuda:0')
episode: 78 training return: tensor(63.9726, device='cuda:0')
episode: 79 training return: tensor(-21.4994, device='cuda:0')
epoch: 20 test_true_pfm: 5281.043106717957 sim_pfm: 181.32069733075332
episode: 80 training return: tensor(194.4846, device='cuda:0')
episode: 81 training return: tensor(20.8835, device='cuda:0')
episode: 82 training return: tensor(193.4630, device='cuda:0')
episode: 83 training return: tensor(16.3423, device='cuda:0')
epoch: 21 test_true_pfm: 5348.381344418863 sim_pfm: 168.15601959995305
episode: 84 training return: tensor(147.2889, device='cuda:0')
episode: 85 training return: tensor(7.8772, device='cuda:0')
episode: 86 training return: tensor(177.5988, device='cuda:0')
episode: 87 training return: tensor(-8.8519, device='cuda:0')
epoch: 22 test_true_pfm: 5411.760161005374 sim_pfm: 295.359401050179
episode: 88 training return: tensor(72.7424, device='cuda:0')
episode: 89 training return: tensor(292.5410, device='cuda:0')
episode: 90 training return: tensor(151.0022, device='cuda:0')
episode: 91 training return: tensor(242.5318, device='cuda:0')
epoch: 23 test_true_pfm: 5422.371537901661 sim_pfm: 198.86730063186647
episode: 92 training return: tensor(109.9914, device='cuda:0')
episode: 93 training return: tensor(262.9489, device='cuda:0')
episode: 94 training return: tensor(207.4277, device='cuda:0')
episode: 95 training return: tensor(11.4848, device='cuda:0')
epoch: 24 test_true_pfm: 5364.500031511484 sim_pfm: 246.72834531401168
episode: 96 training return: tensor(-3.3571, device='cuda:0')
episode: 97 training return: tensor(30.5226, device='cuda:0')
episode: 98 training return: tensor(96.5655, device='cuda:0')
episode: 99 training return: tensor(234.8933, device='cuda:0')
epoch: 25 test_true_pfm: 5334.159075782401 sim_pfm: 179.13203618799648
episode: 100 training return: tensor(216.7983, device='cuda:0')
episode: 101 training return: tensor(286.6177, device='cuda:0')
episode: 102 training return: tensor(171.1007, device='cuda:0')
episode: 103 training return: tensor(198.5319, device='cuda:0')
epoch: 26 test_true_pfm: 5275.39452413663 sim_pfm: 216.36923418263905
episode: 104 training return: tensor(2.6090, device='cuda:0')
episode: 105 training return: tensor(143.9264, device='cuda:0')
episode: 106 training return: tensor(183.0617, device='cuda:0')
episode: 107 training return: tensor(104.1937, device='cuda:0')
epoch: 27 test_true_pfm: 5356.809905553092 sim_pfm: 283.80637715628836
episode: 108 training return: tensor(197.8678, device='cuda:0')
episode: 109 training return: tensor(210.9188, device='cuda:0')
episode: 110 training return: tensor(286.0751, device='cuda:0')
episode: 111 training return: tensor(133.9540, device='cuda:0')
epoch: 28 test_true_pfm: 5474.170288799517 sim_pfm: 266.46597774419934
episode: 112 training return: tensor(106.5537, device='cuda:0')
episode: 113 training return: tensor(249.3179, device='cuda:0')
episode: 114 training return: tensor(229.9001, device='cuda:0')
episode: 115 training return: tensor(82.1717, device='cuda:0')
epoch: 29 test_true_pfm: 5323.677141928367 sim_pfm: 380.70986734345206
episode: 116 training return: tensor(171.7855, device='cuda:0')
episode: 117 training return: tensor(246.1831, device='cuda:0')
episode: 118 training return: tensor(232.1728, device='cuda:0')
episode: 119 training return: tensor(133.4644, device='cuda:0')
epoch: 30 test_true_pfm: 5411.583206316146 sim_pfm: 366.1599207050943
episode: 120 training return: tensor(234.8606, device='cuda:0')
episode: 121 training return: tensor(254.7402, device='cuda:0')
episode: 122 training return: tensor(292.2985, device='cuda:0')
episode: 123 training return: tensor(216.0948, device='cuda:0')
epoch: 31 test_true_pfm: 5432.149641863568 sim_pfm: 309.03426234794705
episode: 124 training return: tensor(132.4977, device='cuda:0')
episode: 125 training return: tensor(150.0876, device='cuda:0')
episode: 126 training return: tensor(48.1579, device='cuda:0')
episode: 127 training return: tensor(171.5306, device='cuda:0')
epoch: 32 test_true_pfm: 5526.323794781395 sim_pfm: 265.6440022429354
episode: 128 training return: tensor(221.7547, device='cuda:0')
episode: 129 training return: tensor(202.6025, device='cuda:0')
episode: 130 training return: tensor(215.8755, device='cuda:0')
episode: 131 training return: tensor(165.6935, device='cuda:0')
epoch: 33 test_true_pfm: 5447.292206374121 sim_pfm: 315.5543549931317
episode: 132 training return: tensor(182.3795, device='cuda:0')
episode: 133 training return: tensor(46.9967, device='cuda:0')
episode: 134 training return: tensor(230.8851, device='cuda:0')
episode: 135 training return: tensor(162.6627, device='cuda:0')
epoch: 34 test_true_pfm: 5378.935821765507 sim_pfm: 341.4644520424772
episode: 136 training return: tensor(265.4724, device='cuda:0')
episode: 137 training return: tensor(33.5900, device='cuda:0')
episode: 138 training return: tensor(338.8661, device='cuda:0')
episode: 139 training return: tensor(155.3318, device='cuda:0')
epoch: 35 test_true_pfm: 5507.505346822841 sim_pfm: 245.46619126053216
episode: 140 training return: tensor(380.2468, device='cuda:0')
episode: 141 training return: tensor(152.8645, device='cuda:0')
episode: 142 training return: tensor(106.4046, device='cuda:0')
episode: 143 training return: tensor(174.0247, device='cuda:0')
epoch: 36 test_true_pfm: 5451.779257087816 sim_pfm: 435.37859485230484
episode: 144 training return: tensor(158.9624, device='cuda:0')
episode: 145 training return: tensor(170.8915, device='cuda:0')
episode: 146 training return: tensor(171.0351, device='cuda:0')
episode: 147 training return: tensor(165.8645, device='cuda:0')
epoch: 37 test_true_pfm: 5503.288298967422 sim_pfm: 271.6611083894192
episode: 148 training return: tensor(124.0472, device='cuda:0')
episode: 149 training return: tensor(148.7372, device='cuda:0')
episode: 150 training return: tensor(284.8183, device='cuda:0')
episode: 151 training return: tensor(88.1235, device='cuda:0')
epoch: 38 test_true_pfm: 5464.594898232742 sim_pfm: 406.28226145748823
episode: 152 training return: tensor(205.1088, device='cuda:0')
episode: 153 training return: tensor(270.5678, device='cuda:0')
episode: 154 training return: tensor(240.6553, device='cuda:0')
episode: 155 training return: tensor(116.2708, device='cuda:0')
epoch: 39 test_true_pfm: 5515.668985204651 sim_pfm: 247.80802375087902
episode: 156 training return: tensor(149.5170, device='cuda:0')
episode: 157 training return: tensor(272.2823, device='cuda:0')
episode: 158 training return: tensor(282.2820, device='cuda:0')
episode: 159 training return: tensor(184.9602, device='cuda:0')
epoch: 40 test_true_pfm: 5547.8694873028535 sim_pfm: 308.1421856348364
episode: 160 training return: tensor(351.8104, device='cuda:0')
episode: 161 training return: tensor(126.9290, device='cuda:0')
episode: 162 training return: tensor(191.4389, device='cuda:0')
episode: 163 training return: tensor(235.0579, device='cuda:0')
epoch: 41 test_true_pfm: 5468.994308172599 sim_pfm: 480.9812805199569
episode: 164 training return: tensor(272.5222, device='cuda:0')
episode: 165 training return: tensor(171.3097, device='cuda:0')
episode: 166 training return: tensor(310.0854, device='cuda:0')
episode: 167 training return: tensor(195.3286, device='cuda:0')
epoch: 42 test_true_pfm: 5596.701578544341 sim_pfm: 404.3851927392534
episode: 168 training return: tensor(233.8472, device='cuda:0')
episode: 169 training return: tensor(147.2137, device='cuda:0')
episode: 170 training return: tensor(107.4982, device='cuda:0')
episode: 171 training return: tensor(270.1530, device='cuda:0')
epoch: 43 test_true_pfm: 5517.6101761216605 sim_pfm: 439.3401884973282
episode: 172 training return: tensor(247.9363, device='cuda:0')
episode: 173 training return: tensor(261.1486, device='cuda:0')
episode: 174 training return: tensor(255.9256, device='cuda:0')
episode: 175 training return: tensor(296.6407, device='cuda:0')
epoch: 44 test_true_pfm: 5520.890392372637 sim_pfm: 382.2339892170955
episode: 176 training return: tensor(227.8835, device='cuda:0')
episode: 177 training return: tensor(212.3276, device='cuda:0')
episode: 178 training return: tensor(209.7688, device='cuda:0')
episode: 179 training return: tensor(298.2648, device='cuda:0')
epoch: 45 test_true_pfm: 5627.254276966059 sim_pfm: 354.1252909384978
episode: 180 training return: tensor(282.5634, device='cuda:0')
episode: 181 training return: tensor(152.5336, device='cuda:0')
episode: 182 training return: tensor(321.8152, device='cuda:0')
episode: 183 training return: tensor(165.4315, device='cuda:0')
epoch: 46 test_true_pfm: 5534.277168319849 sim_pfm: 418.6935058848467
episode: 184 training return: tensor(268.7221, device='cuda:0')
episode: 185 training return: tensor(199.7062, device='cuda:0')
episode: 186 training return: tensor(158.1010, device='cuda:0')
episode: 187 training return: tensor(126.3484, device='cuda:0')
epoch: 47 test_true_pfm: 5626.175111773766 sim_pfm: 335.6698595463725
episode: 188 training return: tensor(239.7259, device='cuda:0')
episode: 189 training return: tensor(239.4904, device='cuda:0')
episode: 190 training return: tensor(98.2811, device='cuda:0')
episode: 191 training return: tensor(289.2193, device='cuda:0')
epoch: 48 test_true_pfm: 5615.291456256168 sim_pfm: 482.42987581812
episode: 192 training return: tensor(364.6175, device='cuda:0')
episode: 193 training return: tensor(145.8555, device='cuda:0')
episode: 194 training return: tensor(299.0123, device='cuda:0')
episode: 195 training return: tensor(241.5718, device='cuda:0')
epoch: 49 test_true_pfm: 5563.52845577327 sim_pfm: 415.5444182778786
episode: 196 training return: tensor(212.1199, device='cuda:0')
episode: 197 training return: tensor(53.9577, device='cuda:0')
episode: 198 training return: tensor(323.4729, device='cuda:0')
episode: 199 training return: tensor(347.7078, device='cuda:0')
epoch: 50 test_true_pfm: 5599.270768143318 sim_pfm: 437.7307897084781
episode: 200 training return: tensor(212.9397, device='cuda:0')
episode: 201 training return: tensor(317.2661, device='cuda:0')
episode: 202 training return: tensor(324.7328, device='cuda:0')
episode: 203 training return: tensor(247.5348, device='cuda:0')
epoch: 51 test_true_pfm: 5587.223429569011 sim_pfm: 396.7172918928554
episode: 204 training return: tensor(211.3988, device='cuda:0')
episode: 205 training return: tensor(256.0791, device='cuda:0')
episode: 206 training return: tensor(344.5722, device='cuda:0')
episode: 207 training return: tensor(447.0487, device='cuda:0')
epoch: 52 test_true_pfm: 5411.084400134416 sim_pfm: 411.71464277491515
episode: 208 training return: tensor(232.6329, device='cuda:0')
episode: 209 training return: tensor(20.1143, device='cuda:0')
episode: 210 training return: tensor(265.1057, device='cuda:0')
episode: 211 training return: tensor(188.4252, device='cuda:0')
epoch: 53 test_true_pfm: 5506.215513025695 sim_pfm: 390.8021060019576
episode: 212 training return: tensor(164.5703, device='cuda:0')
episode: 213 training return: tensor(214.6084, device='cuda:0')
episode: 214 training return: tensor(299.2355, device='cuda:0')
episode: 215 training return: tensor(475.2633, device='cuda:0')
epoch: 54 test_true_pfm: 5699.584456881293 sim_pfm: 465.63780908100307
episode: 216 training return: tensor(319.5652, device='cuda:0')
episode: 217 training return: tensor(314.5658, device='cuda:0')
episode: 218 training return: tensor(317.3961, device='cuda:0')
episode: 219 training return: tensor(212.0254, device='cuda:0')
epoch: 55 test_true_pfm: 5519.168754533605 sim_pfm: 403.65014605394873
episode: 220 training return: tensor(349.7630, device='cuda:0')
episode: 221 training return: tensor(223.5164, device='cuda:0')
episode: 222 training return: tensor(241.6106, device='cuda:0')
episode: 223 training return: tensor(99.5813, device='cuda:0')
epoch: 56 test_true_pfm: 5660.05231770223 sim_pfm: 401.6521670293684
episode: 224 training return: tensor(236.5058, device='cuda:0')
episode: 225 training return: tensor(277.7538, device='cuda:0')
episode: 226 training return: tensor(244.0755, device='cuda:0')
episode: 227 training return: tensor(360.8541, device='cuda:0')
epoch: 57 test_true_pfm: 5658.848036212534 sim_pfm: 366.0500155261252
episode: 228 training return: tensor(175.0180, device='cuda:0')
episode: 229 training return: tensor(86.9928, device='cuda:0')
episode: 230 training return: tensor(208.2375, device='cuda:0')
episode: 231 training return: tensor(391.0657, device='cuda:0')
epoch: 58 test_true_pfm: 5577.691996106907 sim_pfm: 442.89739461864036
episode: 232 training return: tensor(445.8916, device='cuda:0')
episode: 233 training return: tensor(237.9504, device='cuda:0')
episode: 234 training return: tensor(232.8685, device='cuda:0')
episode: 235 training return: tensor(372.4285, device='cuda:0')
epoch: 59 test_true_pfm: 5626.6771430247345 sim_pfm: 354.29978138538235
episode: 236 training return: tensor(296.8474, device='cuda:0')
episode: 237 training return: tensor(386.2586, device='cuda:0')
episode: 238 training return: tensor(166.3848, device='cuda:0')
episode: 239 training return: tensor(153.9637, device='cuda:0')
epoch: 60 test_true_pfm: 5641.248831972131 sim_pfm: 398.73774071615964
episode: 240 training return: tensor(262.7623, device='cuda:0')
episode: 241 training return: tensor(264.9242, device='cuda:0')
episode: 242 training return: tensor(302.1852, device='cuda:0')
episode: 243 training return: tensor(273.3864, device='cuda:0')
epoch: 61 test_true_pfm: 5646.686463970172 sim_pfm: 481.34548803495517
episode: 244 training return: tensor(353.0145, device='cuda:0')
episode: 245 training return: tensor(280.4375, device='cuda:0')
episode: 246 training return: tensor(444.8235, device='cuda:0')
episode: 247 training return: tensor(336.2945, device='cuda:0')
epoch: 62 test_true_pfm: 5685.490845207147 sim_pfm: 407.23207687349833
episode: 248 training return: tensor(223.3891, device='cuda:0')
episode: 249 training return: tensor(373.6374, device='cuda:0')
episode: 250 training return: tensor(393.5108, device='cuda:0')
episode: 251 training return: tensor(259.5147, device='cuda:0')
epoch: 63 test_true_pfm: 5575.973780313361 sim_pfm: 450.38261954288464
episode: 252 training return: tensor(129.6769, device='cuda:0')
episode: 253 training return: tensor(331.9088, device='cuda:0')
episode: 254 training return: tensor(397.0356, device='cuda:0')
episode: 255 training return: tensor(285.7671, device='cuda:0')
epoch: 64 test_true_pfm: 5628.920433037275 sim_pfm: 392.7080312573623
episode: 256 training return: tensor(394.2235, device='cuda:0')
episode: 257 training return: tensor(441.9858, device='cuda:0')
episode: 258 training return: tensor(213.0919, device='cuda:0')
episode: 259 training return: tensor(261.0992, device='cuda:0')
epoch: 65 test_true_pfm: 5594.134249217449 sim_pfm: 429.8364218104786
episode: 260 training return: tensor(361.3997, device='cuda:0')
episode: 261 training return: tensor(260.2666, device='cuda:0')
episode: 262 training return: tensor(141.9348, device='cuda:0')
episode: 263 training return: tensor(269.6253, device='cuda:0')
epoch: 66 test_true_pfm: 5551.7873771327895 sim_pfm: 513.4978187169569
episode: 264 training return: tensor(274.1504, device='cuda:0')
episode: 265 training return: tensor(220.9527, device='cuda:0')
episode: 266 training return: tensor(63.3199, device='cuda:0')
episode: 267 training return: tensor(264.6069, device='cuda:0')
epoch: 67 test_true_pfm: 5656.903517667815 sim_pfm: 486.82461470194784
episode: 268 training return: tensor(406.9560, device='cuda:0')
episode: 269 training return: tensor(264.8791, device='cuda:0')
episode: 270 training return: tensor(170.3791, device='cuda:0')
episode: 271 training return: tensor(315.0570, device='cuda:0')
epoch: 68 test_true_pfm: 5552.791018703392 sim_pfm: 399.23194375564344
episode: 272 training return: tensor(191.7268, device='cuda:0')
episode: 273 training return: tensor(266.6219, device='cuda:0')
episode: 274 training return: tensor(320.6786, device='cuda:0')
episode: 275 training return: tensor(354.5433, device='cuda:0')
epoch: 69 test_true_pfm: 5627.503931446154 sim_pfm: 402.3947986625717
episode: 276 training return: tensor(284.6930, device='cuda:0')
episode: 277 training return: tensor(288.5782, device='cuda:0')
episode: 278 training return: tensor(149.7200, device='cuda:0')
episode: 279 training return: tensor(455.4790, device='cuda:0')
epoch: 70 test_true_pfm: 5643.033155636577 sim_pfm: 512.0472537677269
episode: 280 training return: tensor(357.0713, device='cuda:0')
episode: 281 training return: tensor(252.0377, device='cuda:0')
episode: 282 training return: tensor(122.8278, device='cuda:0')
episode: 283 training return: tensor(262.5678, device='cuda:0')
epoch: 71 test_true_pfm: 5700.810192708622 sim_pfm: 438.0807353302759
episode: 284 training return: tensor(298.3650, device='cuda:0')
episode: 285 training return: tensor(306.0466, device='cuda:0')
episode: 286 training return: tensor(400.3471, device='cuda:0')
episode: 287 training return: tensor(245.8470, device='cuda:0')
epoch: 72 test_true_pfm: 5653.159534382324 sim_pfm: 500.5849232493977
episode: 288 training return: tensor(447.0507, device='cuda:0')
episode: 289 training return: tensor(329.3297, device='cuda:0')
episode: 290 training return: tensor(466.4531, device='cuda:0')
episode: 291 training return: tensor(433.4030, device='cuda:0')
epoch: 73 test_true_pfm: 5570.471143910986 sim_pfm: 434.9039021623709
episode: 292 training return: tensor(308.2080, device='cuda:0')
episode: 293 training return: tensor(338.0630, device='cuda:0')
episode: 294 training return: tensor(391.4814, device='cuda:0')
episode: 295 training return: tensor(335.4900, device='cuda:0')
epoch: 74 test_true_pfm: 5641.29634029901 sim_pfm: 420.4984601511581
episode: 296 training return: tensor(247.9266, device='cuda:0')
episode: 297 training return: tensor(309.6167, device='cuda:0')
episode: 298 training return: tensor(325.6015, device='cuda:0')
episode: 299 training return: tensor(378.0567, device='cuda:0')
epoch: 75 test_true_pfm: 5674.868254224871 sim_pfm: 436.01485512116534
episode: 300 training return: tensor(270.7147, device='cuda:0')
episode: 301 training return: tensor(309.4932, device='cuda:0')
episode: 302 training return: tensor(367.8885, device='cuda:0')
episode: 303 training return: tensor(331.1050, device='cuda:0')
epoch: 76 test_true_pfm: 5631.196633525113 sim_pfm: 459.23157630759914
episode: 304 training return: tensor(423.0440, device='cuda:0')
episode: 305 training return: tensor(485.4095, device='cuda:0')
episode: 306 training return: tensor(338.4159, device='cuda:0')
episode: 307 training return: tensor(200.4319, device='cuda:0')
epoch: 77 test_true_pfm: 5595.546203803405 sim_pfm: 439.69371337556123
episode: 308 training return: tensor(321.5463, device='cuda:0')
episode: 309 training return: tensor(332.2916, device='cuda:0')
episode: 310 training return: tensor(398.9461, device='cuda:0')
episode: 311 training return: tensor(348.1815, device='cuda:0')
epoch: 78 test_true_pfm: 5616.167164974121 sim_pfm: 487.2729610693835
episode: 312 training return: tensor(287.6793, device='cuda:0')
episode: 313 training return: tensor(287.1933, device='cuda:0')
episode: 314 training return: tensor(307.1137, device='cuda:0')
episode: 315 training return: tensor(305.9467, device='cuda:0')
epoch: 79 test_true_pfm: 5621.9663612035465 sim_pfm: 418.6588379524571
episode: 316 training return: tensor(194.9281, device='cuda:0')
episode: 317 training return: tensor(488.6605, device='cuda:0')
episode: 318 training return: tensor(346.5048, device='cuda:0')
episode: 319 training return: tensor(396.1779, device='cuda:0')
epoch: 80 test_true_pfm: 5707.5171320146555 sim_pfm: 475.88249181911425
episode: 320 training return: tensor(351.4836, device='cuda:0')
episode: 321 training return: tensor(270.0920, device='cuda:0')
episode: 322 training return: tensor(450.1707, device='cuda:0')
episode: 323 training return: tensor(326.0898, device='cuda:0')
epoch: 81 test_true_pfm: 5666.7772818262865 sim_pfm: 571.51648556411
episode: 324 training return: tensor(208.5699, device='cuda:0')
episode: 325 training return: tensor(313.8718, device='cuda:0')
episode: 326 training return: tensor(337.5094, device='cuda:0')
episode: 327 training return: tensor(238.2490, device='cuda:0')
epoch: 82 test_true_pfm: 5608.529905782376 sim_pfm: 458.6880476200604
episode: 328 training return: tensor(409.1522, device='cuda:0')
episode: 329 training return: tensor(299.4924, device='cuda:0')
episode: 330 training return: tensor(272.1012, device='cuda:0')
episode: 331 training return: tensor(401.9924, device='cuda:0')
epoch: 83 test_true_pfm: 5673.012103433658 sim_pfm: 494.13849466014653
episode: 332 training return: tensor(385.2301, device='cuda:0')
episode: 333 training return: tensor(436.0768, device='cuda:0')
episode: 334 training return: tensor(218.4568, device='cuda:0')
episode: 335 training return: tensor(380.6360, device='cuda:0')
epoch: 84 test_true_pfm: 5750.16204001546 sim_pfm: 471.1201992080799
episode: 336 training return: tensor(358.5940, device='cuda:0')
episode: 337 training return: tensor(362.6015, device='cuda:0')
episode: 338 training return: tensor(510.2972, device='cuda:0')
episode: 339 training return: tensor(372.5167, device='cuda:0')
epoch: 85 test_true_pfm: 5573.389501836634 sim_pfm: 511.44257064875757
episode: 340 training return: tensor(451.4314, device='cuda:0')
episode: 341 training return: tensor(284.8570, device='cuda:0')
episode: 342 training return: tensor(350.2523, device='cuda:0')
episode: 343 training return: tensor(530.7626, device='cuda:0')
epoch: 86 test_true_pfm: 5663.092259168763 sim_pfm: 532.022696360092
episode: 344 training return: tensor(289.7264, device='cuda:0')
episode: 345 training return: tensor(316.6422, device='cuda:0')
episode: 346 training return: tensor(435.1385, device='cuda:0')
episode: 347 training return: tensor(388.5403, device='cuda:0')
epoch: 87 test_true_pfm: 5684.349089418596 sim_pfm: 523.6876725879071
episode: 348 training return: tensor(218.3748, device='cuda:0')
episode: 349 training return: tensor(371.5746, device='cuda:0')
episode: 350 training return: tensor(361.8481, device='cuda:0')
episode: 351 training return: tensor(398.1129, device='cuda:0')
epoch: 88 test_true_pfm: 5585.414412412622 sim_pfm: 508.8743857103788
episode: 352 training return: tensor(193.2666, device='cuda:0')
episode: 353 training return: tensor(313.1503, device='cuda:0')
episode: 354 training return: tensor(296.2614, device='cuda:0')
episode: 355 training return: tensor(336.8303, device='cuda:0')
epoch: 89 test_true_pfm: 5713.145331780467 sim_pfm: 480.2572509355862
episode: 356 training return: tensor(291.4807, device='cuda:0')
episode: 357 training return: tensor(320.5747, device='cuda:0')
episode: 358 training return: tensor(236.9187, device='cuda:0')
episode: 359 training return: tensor(318.8047, device='cuda:0')
epoch: 90 test_true_pfm: 5760.693347344274 sim_pfm: 533.9143122506017
episode: 360 training return: tensor(488.0881, device='cuda:0')
episode: 361 training return: tensor(274.8933, device='cuda:0')
episode: 362 training return: tensor(382.8310, device='cuda:0')
episode: 363 training return: tensor(417.8271, device='cuda:0')
epoch: 91 test_true_pfm: 5745.815886566935 sim_pfm: 534.2799917157002
episode: 364 training return: tensor(322.0051, device='cuda:0')
episode: 365 training return: tensor(255.7746, device='cuda:0')
episode: 366 training return: tensor(436.2181, device='cuda:0')
episode: 367 training return: tensor(268.9023, device='cuda:0')
epoch: 92 test_true_pfm: 5643.202396372599 sim_pfm: 523.6092285365448
episode: 368 training return: tensor(469.9813, device='cuda:0')
episode: 369 training return: tensor(378.8354, device='cuda:0')
episode: 370 training return: tensor(422.4420, device='cuda:0')
episode: 371 training return: tensor(340.4227, device='cuda:0')
epoch: 93 test_true_pfm: 5644.526275225396 sim_pfm: 537.7074119867272
episode: 372 training return: tensor(445.4080, device='cuda:0')
episode: 373 training return: tensor(405.5469, device='cuda:0')
episode: 374 training return: tensor(189.9572, device='cuda:0')
episode: 375 training return: tensor(415.7675, device='cuda:0')
epoch: 94 test_true_pfm: 5709.932157007369 sim_pfm: 509.87692672184977
episode: 376 training return: tensor(454.0699, device='cuda:0')
episode: 377 training return: tensor(507.8841, device='cuda:0')
episode: 378 training return: tensor(350.8592, device='cuda:0')
episode: 379 training return: tensor(290.8058, device='cuda:0')
epoch: 95 test_true_pfm: 5690.824805608973 sim_pfm: 532.8342294185422
episode: 380 training return: tensor(474.4459, device='cuda:0')
episode: 381 training return: tensor(264.0251, device='cuda:0')
episode: 382 training return: tensor(514.1006, device='cuda:0')
episode: 383 training return: tensor(392.1007, device='cuda:0')
epoch: 96 test_true_pfm: 5661.191230513702 sim_pfm: 485.59828847403213
episode: 384 training return: tensor(272.5114, device='cuda:0')
episode: 385 training return: tensor(308.7321, device='cuda:0')
episode: 386 training return: tensor(423.6304, device='cuda:0')
episode: 387 training return: tensor(366.5932, device='cuda:0')
epoch: 97 test_true_pfm: 5680.57506236041 sim_pfm: 499.0394816630287
episode: 388 training return: tensor(424.2192, device='cuda:0')
episode: 389 training return: tensor(391.4563, device='cuda:0')
episode: 390 training return: tensor(416.2310, device='cuda:0')
episode: 391 training return: tensor(388.3765, device='cuda:0')
epoch: 98 test_true_pfm: 5719.540881570686 sim_pfm: 493.5831303433127
episode: 392 training return: tensor(330.9289, device='cuda:0')
episode: 393 training return: tensor(503.1267, device='cuda:0')
episode: 394 training return: tensor(395.6165, device='cuda:0')
episode: 395 training return: tensor(466.2073, device='cuda:0')
epoch: 99 test_true_pfm: 5708.239823757988 sim_pfm: 462.8532506559859
episode: 396 training return: tensor(329.8051, device='cuda:0')
episode: 397 training return: tensor(369.1745, device='cuda:0')
episode: 398 training return: tensor(163.6731, device='cuda:0')
episode: 399 training return: tensor(335.0488, device='cuda:0')
epoch: 100 test_true_pfm: 5716.748708392457 sim_pfm: 489.1310613414389
episode: 400 training return: tensor(470.2927, device='cuda:0')
episode: 401 training return: tensor(424.0531, device='cuda:0')
episode: 402 training return: tensor(353.0204, device='cuda:0')
episode: 403 training return: tensor(412.5174, device='cuda:0')
epoch: 101 test_true_pfm: 5742.77343820138 sim_pfm: 516.9188896790923
episode: 404 training return: tensor(442.9580, device='cuda:0')
episode: 405 training return: tensor(316.0729, device='cuda:0')
episode: 406 training return: tensor(501.3003, device='cuda:0')
episode: 407 training return: tensor(453.3974, device='cuda:0')
epoch: 102 test_true_pfm: 5653.928160253941 sim_pfm: 442.2641443545387
episode: 408 training return: tensor(192.5501, device='cuda:0')
episode: 409 training return: tensor(347.8268, device='cuda:0')
episode: 410 training return: tensor(326.3188, device='cuda:0')
episode: 411 training return: tensor(255.4535, device='cuda:0')
epoch: 103 test_true_pfm: 5679.479304457539 sim_pfm: 533.0219303481086
episode: 412 training return: tensor(418.0630, device='cuda:0')
episode: 413 training return: tensor(271.6848, device='cuda:0')
episode: 414 training return: tensor(381.5334, device='cuda:0')
episode: 415 training return: tensor(328.5350, device='cuda:0')
epoch: 104 test_true_pfm: 5591.695531045541 sim_pfm: 526.3309676436475
episode: 416 training return: tensor(351.8297, device='cuda:0')
episode: 417 training return: tensor(351.4512, device='cuda:0')
episode: 418 training return: tensor(410.8141, device='cuda:0')
episode: 419 training return: tensor(331.0658, device='cuda:0')
epoch: 105 test_true_pfm: 5654.0396436953415 sim_pfm: 448.1586010624499
episode: 420 training return: tensor(398.3940, device='cuda:0')
episode: 421 training return: tensor(421.5291, device='cuda:0')
episode: 422 training return: tensor(266.0882, device='cuda:0')
episode: 423 training return: tensor(532.3451, device='cuda:0')
epoch: 106 test_true_pfm: 5703.338406008102 sim_pfm: 520.3753121530948
episode: 424 training return: tensor(482.9032, device='cuda:0')
episode: 425 training return: tensor(431.4300, device='cuda:0')
episode: 426 training return: tensor(467.6314, device='cuda:0')
episode: 427 training return: tensor(373.4041, device='cuda:0')
epoch: 107 test_true_pfm: 5678.038030493789 sim_pfm: 539.0792476423085
episode: 428 training return: tensor(462.5143, device='cuda:0')
episode: 429 training return: tensor(318.2477, device='cuda:0')
episode: 430 training return: tensor(300.5938, device='cuda:0')
episode: 431 training return: tensor(565.4858, device='cuda:0')
epoch: 108 test_true_pfm: 5639.211180083035 sim_pfm: 517.9898278160448
episode: 432 training return: tensor(305.5750, device='cuda:0')
episode: 433 training return: tensor(389.1569, device='cuda:0')
episode: 434 training return: tensor(378.0770, device='cuda:0')
episode: 435 training return: tensor(509.8937, device='cuda:0')
epoch: 109 test_true_pfm: 5697.4436369363 sim_pfm: 509.1825998835654
episode: 436 training return: tensor(343.6780, device='cuda:0')
episode: 437 training return: tensor(367.4080, device='cuda:0')
episode: 438 training return: tensor(420.1065, device='cuda:0')
episode: 439 training return: tensor(377.6694, device='cuda:0')
epoch: 110 test_true_pfm: 5690.271850901364 sim_pfm: 429.76457264010486
episode: 440 training return: tensor(343.9722, device='cuda:0')
episode: 441 training return: tensor(384.7160, device='cuda:0')
episode: 442 training return: tensor(440.7812, device='cuda:0')
episode: 443 training return: tensor(282.0294, device='cuda:0')
epoch: 111 test_true_pfm: 5636.941782498469 sim_pfm: 535.1931471864615
episode: 444 training return: tensor(367.5781, device='cuda:0')
episode: 445 training return: tensor(561.4247, device='cuda:0')
episode: 446 training return: tensor(553.3315, device='cuda:0')
episode: 447 training return: tensor(451.6712, device='cuda:0')
epoch: 112 test_true_pfm: 5727.829436562653 sim_pfm: 544.2761910465779
episode: 448 training return: tensor(349.4004, device='cuda:0')
episode: 449 training return: tensor(465.7727, device='cuda:0')
episode: 450 training return: tensor(328.7544, device='cuda:0')
episode: 451 training return: tensor(434.5822, device='cuda:0')
epoch: 113 test_true_pfm: 5780.055415492118 sim_pfm: 560.2808258342557
episode: 452 training return: tensor(297.2146, device='cuda:0')
episode: 453 training return: tensor(311.3770, device='cuda:0')
episode: 454 training return: tensor(499.6111, device='cuda:0')
episode: 455 training return: tensor(413.2725, device='cuda:0')
epoch: 114 test_true_pfm: 5638.8245963095105 sim_pfm: 487.0911192680166
episode: 456 training return: tensor(359.7983, device='cuda:0')
episode: 457 training return: tensor(429.2900, device='cuda:0')
episode: 458 training return: tensor(398.1010, device='cuda:0')
episode: 459 training return: tensor(467.3924, device='cuda:0')
epoch: 115 test_true_pfm: 5688.824532294352 sim_pfm: 550.0882638529098
episode: 460 training return: tensor(483.3995, device='cuda:0')
episode: 461 training return: tensor(323.4913, device='cuda:0')
episode: 462 training return: tensor(469.3077, device='cuda:0')
episode: 463 training return: tensor(465.4833, device='cuda:0')
epoch: 116 test_true_pfm: 5660.8740792920735 sim_pfm: 455.4577350981417
episode: 464 training return: tensor(339.6607, device='cuda:0')
episode: 465 training return: tensor(250.6576, device='cuda:0')
episode: 466 training return: tensor(528.3351, device='cuda:0')
episode: 467 training return: tensor(370.6757, device='cuda:0')
epoch: 117 test_true_pfm: 5673.34873487267 sim_pfm: 529.7441869593944
episode: 468 training return: tensor(457.3844, device='cuda:0')
episode: 469 training return: tensor(367.2864, device='cuda:0')
episode: 470 training return: tensor(193.2650, device='cuda:0')
episode: 471 training return: tensor(408.0375, device='cuda:0')
epoch: 118 test_true_pfm: 5679.372079827518 sim_pfm: 558.0503516766767
episode: 472 training return: tensor(424.1367, device='cuda:0')
episode: 473 training return: tensor(353.6363, device='cuda:0')
episode: 474 training return: tensor(353.6250, device='cuda:0')
episode: 475 training return: tensor(433.1339, device='cuda:0')
epoch: 119 test_true_pfm: 5800.313855639343 sim_pfm: 545.7712551556373
episode: 476 training return: tensor(405.0051, device='cuda:0')
episode: 477 training return: tensor(475.0274, device='cuda:0')
episode: 478 training return: tensor(442.6995, device='cuda:0')
episode: 479 training return: tensor(410.6941, device='cuda:0')
epoch: 120 test_true_pfm: 5758.901978829449 sim_pfm: 381.74346561033354
episode: 480 training return: tensor(397.0791, device='cuda:0')
episode: 481 training return: tensor(285.1283, device='cuda:0')
episode: 482 training return: tensor(293.0906, device='cuda:0')
episode: 483 training return: tensor(436.0079, device='cuda:0')
epoch: 121 test_true_pfm: 5682.440116860743 sim_pfm: 558.9644481382857
episode: 484 training return: tensor(375.0130, device='cuda:0')
episode: 485 training return: tensor(352.2426, device='cuda:0')
episode: 486 training return: tensor(476.4431, device='cuda:0')
episode: 487 training return: tensor(266.5851, device='cuda:0')
epoch: 122 test_true_pfm: 5731.974657248407 sim_pfm: 514.1070821223935
episode: 488 training return: tensor(570.3461, device='cuda:0')
episode: 489 training return: tensor(431.5919, device='cuda:0')
episode: 490 training return: tensor(341.8981, device='cuda:0')
episode: 491 training return: tensor(301.9210, device='cuda:0')
epoch: 123 test_true_pfm: 5696.106669541235 sim_pfm: 586.038596898395
episode: 492 training return: tensor(424.7129, device='cuda:0')
episode: 493 training return: tensor(297.0256, device='cuda:0')
episode: 494 training return: tensor(387.7054, device='cuda:0')
episode: 495 training return: tensor(316.5029, device='cuda:0')
epoch: 124 test_true_pfm: 5672.560766799145 sim_pfm: 613.1071183609214
episode: 496 training return: tensor(420.9997, device='cuda:0')
episode: 497 training return: tensor(351.1031, device='cuda:0')
episode: 498 training return: tensor(375.0555, device='cuda:0')
episode: 499 training return: tensor(351.2121, device='cuda:0')
epoch: 125 test_true_pfm: 5747.020146139695 sim_pfm: 528.7537795578246
episode: 500 training return: tensor(341.1959, device='cuda:0')
episode: 501 training return: tensor(403.5793, device='cuda:0')
episode: 502 training return: tensor(317.5356, device='cuda:0')
episode: 503 training return: tensor(513.4153, device='cuda:0')
epoch: 126 test_true_pfm: 5822.218219677244 sim_pfm: 605.0459759824638
episode: 504 training return: tensor(302.4549, device='cuda:0')
episode: 505 training return: tensor(321.2784, device='cuda:0')
episode: 506 training return: tensor(533.8914, device='cuda:0')
episode: 507 training return: tensor(471.7360, device='cuda:0')
epoch: 127 test_true_pfm: 5741.886653487665 sim_pfm: 543.1673726681232
episode: 508 training return: tensor(413.5739, device='cuda:0')
episode: 509 training return: tensor(337.3004, device='cuda:0')
episode: 510 training return: tensor(387.0741, device='cuda:0')
episode: 511 training return: tensor(358.1379, device='cuda:0')
epoch: 128 test_true_pfm: 5751.073862696939 sim_pfm: 468.98508976384375
episode: 512 training return: tensor(448.7075, device='cuda:0')
episode: 513 training return: tensor(457.4873, device='cuda:0')
episode: 514 training return: tensor(412.7617, device='cuda:0')
episode: 515 training return: tensor(392.5493, device='cuda:0')
epoch: 129 test_true_pfm: 5752.282351019029 sim_pfm: 504.08729636116186
episode: 516 training return: tensor(316.0230, device='cuda:0')
episode: 517 training return: tensor(392.1317, device='cuda:0')
episode: 518 training return: tensor(373.7547, device='cuda:0')
episode: 519 training return: tensor(511.1628, device='cuda:0')
epoch: 130 test_true_pfm: 5736.594678767567 sim_pfm: 493.36508640588727
episode: 520 training return: tensor(386.3999, device='cuda:0')
episode: 521 training return: tensor(535.4188, device='cuda:0')
episode: 522 training return: tensor(345.8406, device='cuda:0')
episode: 523 training return: tensor(448.7590, device='cuda:0')
epoch: 131 test_true_pfm: 5680.949329292797 sim_pfm: 497.42655681298737
episode: 524 training return: tensor(354.9322, device='cuda:0')
episode: 525 training return: tensor(450.2331, device='cuda:0')
episode: 526 training return: tensor(379.3055, device='cuda:0')
episode: 527 training return: tensor(440.0755, device='cuda:0')
epoch: 132 test_true_pfm: 5747.26530377498 sim_pfm: 538.2201976761668
episode: 528 training return: tensor(328.0636, device='cuda:0')
episode: 529 training return: tensor(451.4630, device='cuda:0')
episode: 530 training return: tensor(445.4270, device='cuda:0')
episode: 531 training return: tensor(426.1708, device='cuda:0')
epoch: 133 test_true_pfm: 5828.987943821635 sim_pfm: 531.6080458927512
episode: 532 training return: tensor(494.4410, device='cuda:0')
episode: 533 training return: tensor(412.9537, device='cuda:0')
episode: 534 training return: tensor(405.4144, device='cuda:0')
episode: 535 training return: tensor(423.4796, device='cuda:0')
epoch: 134 test_true_pfm: 5716.226609627636 sim_pfm: 516.3872043202476
episode: 536 training return: tensor(400.6794, device='cuda:0')
episode: 537 training return: tensor(350.7037, device='cuda:0')
episode: 538 training return: tensor(502.4424, device='cuda:0')
episode: 539 training return: tensor(397.3153, device='cuda:0')
epoch: 135 test_true_pfm: 5714.7518451537835 sim_pfm: 554.9439577706702
episode: 540 training return: tensor(483.8779, device='cuda:0')
episode: 541 training return: tensor(389.0735, device='cuda:0')
episode: 542 training return: tensor(442.5898, device='cuda:0')
episode: 543 training return: tensor(357.9975, device='cuda:0')
epoch: 136 test_true_pfm: 5736.444508742435 sim_pfm: 579.3155195975851
episode: 544 training return: tensor(432.3770, device='cuda:0')
episode: 545 training return: tensor(502.8167, device='cuda:0')
episode: 546 training return: tensor(497.8134, device='cuda:0')
episode: 547 training return: tensor(472.2856, device='cuda:0')
epoch: 137 test_true_pfm: 5733.6443248520445 sim_pfm: 527.5059140694793
episode: 548 training return: tensor(566.0826, device='cuda:0')
episode: 549 training return: tensor(252.0693, device='cuda:0')
episode: 550 training return: tensor(462.2014, device='cuda:0')
episode: 551 training return: tensor(310.6392, device='cuda:0')
epoch: 138 test_true_pfm: 5779.097002934913 sim_pfm: 576.2683984421504
episode: 552 training return: tensor(554.5167, device='cuda:0')
episode: 553 training return: tensor(293.9994, device='cuda:0')
episode: 554 training return: tensor(396.7729, device='cuda:0')
episode: 555 training return: tensor(506.6540, device='cuda:0')
epoch: 139 test_true_pfm: 5624.746188658089 sim_pfm: 522.6895612799563
episode: 556 training return: tensor(559.3226, device='cuda:0')
episode: 557 training return: tensor(436.6949, device='cuda:0')
episode: 558 training return: tensor(433.0810, device='cuda:0')
episode: 559 training return: tensor(520.2685, device='cuda:0')
epoch: 140 test_true_pfm: 5740.458265591672 sim_pfm: 577.1343351129908
episode: 560 training return: tensor(295.7923, device='cuda:0')
episode: 561 training return: tensor(382.0753, device='cuda:0')
episode: 562 training return: tensor(350.0080, device='cuda:0')
episode: 563 training return: tensor(397.9027, device='cuda:0')
epoch: 141 test_true_pfm: 5711.869428912708 sim_pfm: 530.5046776723854
episode: 564 training return: tensor(429.8166, device='cuda:0')
episode: 565 training return: tensor(448.1645, device='cuda:0')
episode: 566 training return: tensor(357.1995, device='cuda:0')
episode: 567 training return: tensor(464.0290, device='cuda:0')
epoch: 142 test_true_pfm: 5710.198468361889 sim_pfm: 552.0004521457207
episode: 568 training return: tensor(401.8776, device='cuda:0')
episode: 569 training return: tensor(527.0548, device='cuda:0')
episode: 570 training return: tensor(475.6606, device='cuda:0')
episode: 571 training return: tensor(426.2502, device='cuda:0')
epoch: 143 test_true_pfm: 5805.583627524839 sim_pfm: 563.6814217436186
episode: 572 training return: tensor(509.1748, device='cuda:0')
episode: 573 training return: tensor(347.9263, device='cuda:0')
episode: 574 training return: tensor(562.7815, device='cuda:0')
episode: 575 training return: tensor(432.2347, device='cuda:0')
epoch: 144 test_true_pfm: 5771.342500459944 sim_pfm: 554.653745682716
episode: 576 training return: tensor(474.2452, device='cuda:0')
episode: 577 training return: tensor(299.1800, device='cuda:0')
episode: 578 training return: tensor(375.9492, device='cuda:0')
episode: 579 training return: tensor(407.5317, device='cuda:0')
epoch: 145 test_true_pfm: 5813.971362256605 sim_pfm: 509.5111981566879
episode: 580 training return: tensor(421.0453, device='cuda:0')
episode: 581 training return: tensor(397.7496, device='cuda:0')
episode: 582 training return: tensor(468.2233, device='cuda:0')
episode: 583 training return: tensor(342.5014, device='cuda:0')
epoch: 146 test_true_pfm: 5750.1900527435255 sim_pfm: 615.9492774317041
episode: 584 training return: tensor(434.7964, device='cuda:0')
episode: 585 training return: tensor(284.9114, device='cuda:0')
episode: 586 training return: tensor(403.5978, device='cuda:0')
episode: 587 training return: tensor(365.2798, device='cuda:0')
epoch: 147 test_true_pfm: 5697.516621041182 sim_pfm: 519.1240351481441
episode: 588 training return: tensor(442.0258, device='cuda:0')
episode: 589 training return: tensor(463.4491, device='cuda:0')
episode: 590 training return: tensor(310.6407, device='cuda:0')
episode: 591 training return: tensor(425.7211, device='cuda:0')
epoch: 148 test_true_pfm: 5790.165020161178 sim_pfm: 537.5055006300876
episode: 592 training return: tensor(416.6437, device='cuda:0')
episode: 593 training return: tensor(385.0860, device='cuda:0')
episode: 594 training return: tensor(474.5571, device='cuda:0')
episode: 595 training return: tensor(279.1520, device='cuda:0')
epoch: 149 test_true_pfm: 5800.239671168439 sim_pfm: 543.9329743574586
episode: 596 training return: tensor(468.4055, device='cuda:0')
episode: 597 training return: tensor(429.0114, device='cuda:0')
episode: 598 training return: tensor(480.9703, device='cuda:0')
episode: 599 training return: tensor(449.9247, device='cuda:0')
epoch: 150 test_true_pfm: 5757.349032018091 sim_pfm: 581.7781364496719
