['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '3000']
epoch: 0 training_loss 0.2524627132713795 test_loss: 0.10204540491104126
epoch: 1 training_loss 0.2002255930006504 test_loss: 0.09867622256278992
epoch: 2 training_loss 0.1973771630972624 test_loss: 0.09483522176742554
epoch: 3 training_loss 0.18661804229021073 test_loss: 0.1010543704032898
epoch: 4 training_loss 0.18608947686851024 test_loss: 0.09935075044631958
epoch: 5 training_loss 0.1830730940401554 test_loss: 0.0943239152431488
epoch: 6 training_loss 0.19131310641765595 test_loss: 0.09537745118141175
epoch: 7 training_loss 0.17522414222359659 test_loss: 0.09497827887535096
epoch: 8 training_loss 0.1859166356921196 test_loss: 0.09229782223701477
epoch: 9 training_loss 0.1841251278668642 test_loss: 0.09394930005073547
epoch: 10 training_loss 0.1773095493018627 test_loss: 0.10582238435745239
epoch: 11 training_loss 0.19103375650942325 test_loss: 0.095551997423172
epoch: 12 training_loss 0.16886052310466768 test_loss: 0.09794878959655762
epoch: 13 training_loss 0.17694177106022835 test_loss: 0.09671139717102051
epoch: 14 training_loss 0.17620957598090173 test_loss: 0.09669869542121887
epoch: 15 training_loss 0.18137188941240312 test_loss: 0.09392563104629517
epoch: 16 training_loss 0.17604018181562422 test_loss: 0.0982572615146637
epoch: 17 training_loss 0.174310013204813 test_loss: 0.09623767137527466
epoch: 18 training_loss 0.1747065255045891 test_loss: 0.0961256742477417
epoch: 19 training_loss 0.17299985863268375 test_loss: 0.09658600091934204
epoch: 20 training_loss 0.17293017588555812 test_loss: 0.09720142483711243
epoch: 21 training_loss 0.17496627189218997 test_loss: 0.09236931204795837
epoch: 22 training_loss 0.17496973730623722 test_loss: 0.09614169597625732
epoch: 23 training_loss 0.17478637993335724 test_loss: 0.10084801912307739
epoch: 24 training_loss 0.17621381424367427 test_loss: 0.10419323444366455
epoch: 25 training_loss 0.1743925393372774 test_loss: 0.09581164121627808
epoch: 26 training_loss 0.16576552957296373 test_loss: 0.09611297249794007
epoch: 27 training_loss 0.1698189878463745 test_loss: 0.09818705320358276
epoch: 28 training_loss 0.17959740526974202 test_loss: 0.09602282643318176
epoch: 29 training_loss 0.1694940010458231 test_loss: 0.09776766896247864
epoch: 30 training_loss 0.1736206201463938 test_loss: 0.09839602112770081
epoch: 31 training_loss 0.16631214197725058 test_loss: 0.10235918760299682
epoch: 32 training_loss 0.16221600636839867 test_loss: 0.0947149157524109
epoch: 33 training_loss 0.1649067034199834 test_loss: 0.09319745302200318
epoch: 34 training_loss 0.16948657155036925 test_loss: 0.1007804274559021
epoch: 35 training_loss 0.17348880410194398 test_loss: 0.0951033353805542
epoch: 36 training_loss 0.17220044240355492 test_loss: 0.09694642424583436
epoch: 37 training_loss 0.16912302926182746 test_loss: 0.0924460232257843
epoch: 38 training_loss 0.1621854981780052 test_loss: 0.1006617784500122
epoch: 39 training_loss 0.1605124483257532 test_loss: 0.1010590672492981
epoch: 40 training_loss 0.1618184044957161 test_loss: 0.10095163583755493
epoch: 41 training_loss 0.16364993579685688 test_loss: 0.101604425907135
epoch: 42 training_loss 0.15956753239035606 test_loss: 0.10404787063598633
epoch: 43 training_loss 0.16447173848748206 test_loss: 0.10404725074768066
epoch: 44 training_loss 0.1654799073934555 test_loss: 0.09945096373558045
epoch: 45 training_loss 0.16507298298180104 test_loss: 0.10568430423736572
epoch: 46 training_loss 0.16375631179660557 test_loss: 0.10717644691467285
epoch: 47 training_loss 0.1599578044563532 test_loss: 0.1072277545928955
epoch: 48 training_loss 0.15951566234230996 test_loss: 0.11007047891616821
epoch: 49 training_loss 0.156134088113904 test_loss: 0.11033676862716675
epoch: 50 training_loss 0.1612721037864685 test_loss: 0.10914676189422608
epoch: 51 training_loss 0.15299018971621992 test_loss: 0.11178885698318482
epoch: 52 training_loss 0.16046621054410934 test_loss: 0.11192346811294555
epoch: 53 training_loss 0.1522369408607483 test_loss: 0.10710523128509522
epoch: 54 training_loss 0.15782308116555213 test_loss: 0.10765846967697143
epoch: 55 training_loss 0.15801106996834277 test_loss: 0.10803781747817993
epoch: 56 training_loss 0.1509851622581482 test_loss: 0.11150904893875122
epoch: 57 training_loss 0.15647476360201837 test_loss: 0.11481624841690063
epoch: 58 training_loss 0.15819947570562362 test_loss: 0.10726653337478638
epoch: 59 training_loss 0.15463441893458366 test_loss: 0.11054115295410157
epoch: 60 training_loss 0.15441140372306109 test_loss: 0.11187009811401367
epoch: 61 training_loss 0.14438515849411487 test_loss: 0.11788657903671265
epoch: 62 training_loss 0.14702829968184233 test_loss: 0.11223307847976685
epoch: 63 training_loss 0.15216843090951443 test_loss: 0.11124234199523926
epoch: 64 training_loss 0.14851231388747693 test_loss: 0.11545290946960449
epoch: 65 training_loss 0.15104271464049815 test_loss: 0.10941464900970459
epoch: 66 training_loss 0.15913977921009065 test_loss: 0.1129732608795166
epoch: 67 training_loss 0.14792021229863167 test_loss: 0.11086963415145874
epoch: 68 training_loss 0.15052300430834292 test_loss: 0.12143809795379638
epoch: 69 training_loss 0.1550524478405714 test_loss: 0.11428984403610229
epoch: 70 training_loss 0.1479348748922348 test_loss: 0.11733213663101197
epoch: 71 training_loss 0.13681095756590367 test_loss: 0.11367172002792358
epoch: 72 training_loss 0.14404792945832015 test_loss: 0.11756445169448852
epoch: 73 training_loss 0.14435389604419469 test_loss: 0.11967658996582031
epoch: 74 training_loss 0.13794377826154233 test_loss: 0.11606944799423217
epoch: 75 training_loss 0.13463728584349155 test_loss: 0.12590264081954955
epoch: 76 training_loss 0.13034997168928386 test_loss: 0.11516079902648926
epoch: 77 training_loss 0.1368065520375967 test_loss: 0.11457974910736084
epoch: 78 training_loss 0.13557762648910285 test_loss: 0.12203370332717896
epoch: 79 training_loss 0.13631723649799823 test_loss: 0.1161739706993103
epoch: 80 training_loss 0.13044611383229493 test_loss: 0.1165112853050232
epoch: 81 training_loss 0.13689800210297107 test_loss: 0.12099500894546508
epoch: 82 training_loss 0.13165204226970673 test_loss: 0.11618659496307374
epoch: 83 training_loss 0.13104293916374446 test_loss: 0.11584961414337158
epoch: 84 training_loss 0.12465163983404637 test_loss: 0.1274121880531311
epoch: 85 training_loss 0.13329869160428642 test_loss: 0.12378709316253662
epoch: 86 training_loss 0.1348308601230383 test_loss: 0.12288765907287598
epoch: 87 training_loss 0.12397649295628071 test_loss: 0.12488365173339844
epoch: 88 training_loss 0.1218425927683711 test_loss: 0.12389683723449707
epoch: 89 training_loss 0.11948458444327116 test_loss: 0.1278479814529419
epoch: 90 training_loss 0.12815841037780046 test_loss: 0.12012611627578736
epoch: 91 training_loss 0.12750523881986736 test_loss: 0.11959623098373413
epoch: 92 training_loss 0.12305390130728483 test_loss: 0.1305635690689087
epoch: 93 training_loss 0.11786575328558684 test_loss: 0.12700390815734863
epoch: 94 training_loss 0.1300343234091997 test_loss: 0.12664647102355958
epoch: 95 training_loss 0.11512334167957305 test_loss: 0.13794350624084473
epoch: 96 training_loss 0.11613777987658977 test_loss: 0.12337220907211303
epoch: 97 training_loss 0.11831214368343353 test_loss: 0.12960355281829833
epoch: 98 training_loss 0.11604380115866661 test_loss: 0.13531956672668458
epoch: 99 training_loss 0.10774277415126562 test_loss: 0.12958284616470336
epoch: 100 training_loss 0.1066589954122901 test_loss: 0.14337034225463868
epoch: 101 training_loss 0.11339921340346336 test_loss: 0.13302109241485596
epoch: 102 training_loss 0.10912915665656328 test_loss: 0.14247987270355225
epoch: 103 training_loss 0.11166715353727341 test_loss: 0.14547590017318726
epoch: 104 training_loss 0.11207176946103573 test_loss: 0.13853830099105835
epoch: 105 training_loss 0.10572624459862709 test_loss: 0.14418461322784423
epoch: 106 training_loss 0.10316886130720376 test_loss: 0.1414958119392395
epoch: 107 training_loss 0.10111000422388315 test_loss: 0.14496017694473268
epoch: 108 training_loss 0.09990718998014927 test_loss: 0.14592493772506715
epoch: 109 training_loss 0.10450653854757547 test_loss: 0.14491655826568603
epoch: 110 training_loss 0.09751328520476818 test_loss: 0.14278851747512816
epoch: 111 training_loss 0.10131039626896382 test_loss: 0.15077334642410278
epoch: 112 training_loss 0.09717967621982097 test_loss: 0.14369163513183594
epoch: 113 training_loss 0.10570392157882452 test_loss: 0.1480473041534424
epoch: 114 training_loss 0.0983088331669569 test_loss: 0.14658697843551635
epoch: 115 training_loss 0.10347018610686064 test_loss: 0.1539188504219055
epoch: 116 training_loss 0.10176343351602554 test_loss: 0.14653234481811522
epoch: 117 training_loss 0.09932360161095857 test_loss: 0.1462607979774475
epoch: 118 training_loss 0.09377716956660151 test_loss: 0.1473510146141052
epoch: 119 training_loss 0.0918207934871316 test_loss: 0.15854004621505738
epoch: 120 training_loss 0.08553179077804089 test_loss: 0.1601096510887146
epoch: 121 training_loss 0.09040585357695818 test_loss: 0.14232972860336304
epoch: 122 training_loss 0.09705546010285616 test_loss: 0.15570623874664308
epoch: 123 training_loss 0.0942028172314167 test_loss: 0.16982699632644654
epoch: 124 training_loss 0.08629614418372512 test_loss: 0.16270028352737426
epoch: 125 training_loss 0.08700434820726514 test_loss: 0.14804346561431886
epoch: 126 training_loss 0.0872468407265842 test_loss: 0.1660679578781128
epoch: 127 training_loss 0.08928483378142119 test_loss: 0.16640641689300537
epoch: 128 training_loss 0.08194952186197042 test_loss: 0.15968159437179566
epoch: 129 training_loss 0.08245107209309936 test_loss: 0.1653369903564453
epoch: 130 training_loss 0.08336475328542292 test_loss: 0.1647631049156189
epoch: 131 training_loss 0.08234429059550166 test_loss: 0.16307377815246582
epoch: 132 training_loss 0.0841448163986206 test_loss: 0.1639963746070862
epoch: 133 training_loss 0.0784710200689733 test_loss: 0.17383135557174684
epoch: 134 training_loss 0.0787379046715796 test_loss: 0.17269352674484253
epoch: 135 training_loss 0.0788989525474608 test_loss: 0.1627817153930664
epoch: 136 training_loss 0.08055604759603739 test_loss: 0.16556413173675538
epoch: 137 training_loss 0.07341337705031037 test_loss: 0.17536603212356566
epoch: 138 training_loss 0.07196050344035029 test_loss: 0.17920961380004882
epoch: 139 training_loss 0.07557026665657758 test_loss: 0.17796388864517212
epoch: 140 training_loss 0.0797155748680234 test_loss: 0.18556336164474488
epoch: 141 training_loss 0.07823504108935594 test_loss: 0.17199693918228148
epoch: 142 training_loss 0.07614932715892792 test_loss: 0.1735774278640747
epoch: 143 training_loss 0.06783478107303381 test_loss: 0.17231920957565308
epoch: 144 training_loss 0.07129298284649849 test_loss: 0.17352205514907837
epoch: 145 training_loss 0.06768754418939352 test_loss: 0.17257609367370605
epoch: 146 training_loss 0.06939017336815595 test_loss: 0.17576342821121216
epoch: 147 training_loss 0.0656000984273851 test_loss: 0.17762001752853393
epoch: 148 training_loss 0.064797098711133 test_loss: 0.18606775999069214
epoch: 149 training_loss 0.07292254004627466 test_loss: 0.18316295146942138
epoch: 0 training_loss 8.394295339584351 test_loss: 2.3878637313842774
epoch: 1 training_loss 3.8273951530456545 test_loss: 1.5328404426574707
epoch: 2 training_loss 2.6821552932262422 test_loss: 1.2015741348266602
epoch: 3 training_loss 2.224081416130066 test_loss: 0.9975186347961426
epoch: 4 training_loss 1.8765510714054108 test_loss: 0.8860457420349122
epoch: 5 training_loss 1.6934253287315368 test_loss: 0.8159408569335938
epoch: 6 training_loss 1.54998131275177 test_loss: 0.7169878482818604
epoch: 7 training_loss 1.417898268699646 test_loss: 0.672338342666626
epoch: 8 training_loss 1.3316454148292542 test_loss: 0.6411739349365234
epoch: 9 training_loss 1.2719707190990448 test_loss: 0.5994574069976807
epoch: 10 training_loss 1.1799275022745133 test_loss: 0.5765516757965088
epoch: 11 training_loss 1.1310065299272538 test_loss: 0.553156852722168
epoch: 12 training_loss 1.1018637746572495 test_loss: 0.5293243885040283
epoch: 13 training_loss 1.060067092180252 test_loss: 0.516794490814209
epoch: 14 training_loss 1.0314450937509536 test_loss: 0.5055282592773438
epoch: 15 training_loss 1.0067658805847168 test_loss: 0.4783015727996826
epoch: 16 training_loss 0.9933633470535278 test_loss: 0.4728553771972656
epoch: 17 training_loss 0.9619862842559814 test_loss: 0.4702911853790283
epoch: 18 training_loss 0.9246447056531906 test_loss: 0.4557699203491211
epoch: 19 training_loss 0.9298609572649003 test_loss: 0.46152920722961427
epoch: 20 training_loss 0.8927436190843582 test_loss: 0.432590389251709
epoch: 21 training_loss 0.8727862340211868 test_loss: 0.4237508296966553
epoch: 22 training_loss 0.8660325050354004 test_loss: 0.4300984859466553
epoch: 23 training_loss 0.8644165819883347 test_loss: 0.4187017440795898
epoch: 24 training_loss 0.8447450381517411 test_loss: 0.40442829132080077
epoch: 25 training_loss 0.8317294669151306 test_loss: 0.4040225982666016
epoch: 26 training_loss 0.8171757715940475 test_loss: 0.3966045379638672
epoch: 27 training_loss 0.8024581074714661 test_loss: 0.39124221801757814
epoch: 28 training_loss 0.7954246205091476 test_loss: 0.3847110509872437
epoch: 29 training_loss 0.7863888835906983 test_loss: 0.3837254762649536
epoch: 30 training_loss 0.7653374046087265 test_loss: 0.38713147640228274
epoch: 31 training_loss 0.7655396515130997 test_loss: 0.3904407501220703
epoch: 32 training_loss 0.7704857379198075 test_loss: 0.37975828647613524
epoch: 33 training_loss 0.7521436309814453 test_loss: 0.36499199867248533
epoch: 34 training_loss 0.7433598476648331 test_loss: 0.3727705001831055
epoch: 35 training_loss 0.7462243896722793 test_loss: 0.35662012100219725
epoch: 36 training_loss 0.7308008289337158 test_loss: 0.369072413444519
epoch: 37 training_loss 0.7276519519090653 test_loss: 0.35195283889770507
epoch: 38 training_loss 0.7147757834196091 test_loss: 0.35349981784820556
epoch: 39 training_loss 0.7183646363019943 test_loss: 0.34878804683685305
epoch: 40 training_loss 0.7101864272356033 test_loss: 0.347186803817749
epoch: 41 training_loss 0.7098587852716446 test_loss: 0.3478773355484009
epoch: 42 training_loss 0.685950368642807 test_loss: 0.33752529621124266
epoch: 43 training_loss 0.6791517823934555 test_loss: 0.36315407752990725
epoch: 44 training_loss 0.6822406601905823 test_loss: 0.34615292549133303
epoch: 45 training_loss 0.6703803569078446 test_loss: 0.3282103300094604
epoch: 46 training_loss 0.6805644190311432 test_loss: 0.3387512445449829
epoch: 47 training_loss 0.6685759973526001 test_loss: 0.3365729093551636
epoch: 48 training_loss 0.6659733295440674 test_loss: 0.32690417766571045
epoch: 49 training_loss 0.6686422067880631 test_loss: 0.3490281105041504
epoch: 50 training_loss 0.6494171363115311 test_loss: 0.320219612121582
epoch: 51 training_loss 0.6546191638708114 test_loss: 0.33701348304748535
epoch: 52 training_loss 0.654492616057396 test_loss: 0.3477615833282471
epoch: 53 training_loss 0.6457878279685975 test_loss: 0.3349339246749878
epoch: 54 training_loss 0.647046103477478 test_loss: 0.31760740280151367
epoch: 55 training_loss 0.6304993987083435 test_loss: 0.31732726097106934
epoch: 56 training_loss 0.6374655503034592 test_loss: 0.3217071533203125
epoch: 57 training_loss 0.6297819936275482 test_loss: 0.3090581655502319
epoch: 58 training_loss 0.6182411903142929 test_loss: 0.30836465358734133
epoch: 59 training_loss 0.6193963348865509 test_loss: 0.3033822774887085
epoch: 60 training_loss 0.6214729857444763 test_loss: 0.30733840465545653
epoch: 61 training_loss 0.6248022800683976 test_loss: 0.31988959312438964
epoch: 62 training_loss 0.6108328372240066 test_loss: 0.30107343196868896
epoch: 63 training_loss 0.6089743500947953 test_loss: 0.2980828285217285
epoch: 64 training_loss 0.6075424087047577 test_loss: 0.2975724935531616
epoch: 65 training_loss 0.6017788898944855 test_loss: 0.3014951229095459
epoch: 66 training_loss 0.6082584834098816 test_loss: 0.2926456928253174
epoch: 67 training_loss 0.597080317735672 test_loss: 0.29148890972137453
epoch: 68 training_loss 0.5935957592725754 test_loss: 0.31953113079071044
epoch: 69 training_loss 0.6038629877567291 test_loss: 0.29612908363342283
epoch: 70 training_loss 0.5975772941112518 test_loss: 0.29975988864898684
epoch: 71 training_loss 0.5801105827093125 test_loss: 0.294508171081543
epoch: 72 training_loss 0.5791979193687439 test_loss: 0.3016798734664917
epoch: 73 training_loss 0.5859407699108123 test_loss: 0.28854100704193114
epoch: 74 training_loss 0.5820280587673188 test_loss: 0.2927040338516235
epoch: 75 training_loss 0.5801589763164521 test_loss: 0.29864001274108887
epoch: 76 training_loss 0.5863399541378022 test_loss: 0.2884174585342407
epoch: 77 training_loss 0.5786521762609482 test_loss: 0.28771724700927737
epoch: 78 training_loss 0.5747409647703171 test_loss: 0.303722882270813
epoch: 79 training_loss 0.574735352396965 test_loss: 0.28262403011322024
epoch: 80 training_loss 0.5615817305445671 test_loss: 0.2849406719207764
epoch: 81 training_loss 0.5670954525470734 test_loss: 0.28576562404632566
epoch: 82 training_loss 0.5759586095809937 test_loss: 0.2780990362167358
epoch: 83 training_loss 0.5644660490751267 test_loss: 0.278440260887146
epoch: 84 training_loss 0.5560503581166267 test_loss: 0.2818771839141846
epoch: 85 training_loss 0.5567085382342338 test_loss: 0.2783719301223755
epoch: 86 training_loss 0.5675504589080811 test_loss: 0.28404419422149657
epoch: 87 training_loss 0.5694038838148117 test_loss: 0.2957471370697021
epoch: 88 training_loss 0.5554456523060799 test_loss: 0.27380027770996096
epoch: 89 training_loss 0.5561826568841934 test_loss: 0.2816270589828491
epoch: 90 training_loss 0.5571399545669555 test_loss: 0.2806884527206421
epoch: 91 training_loss 0.5545702037215233 test_loss: 0.27903430461883544
epoch: 92 training_loss 0.5535114333033562 test_loss: 0.2688791275024414
epoch: 93 training_loss 0.5589750757813454 test_loss: 0.2734579563140869
epoch: 94 training_loss 0.5521486681699753 test_loss: 0.2791935682296753
epoch: 95 training_loss 0.5407774072885513 test_loss: 0.2693443775177002
epoch: 96 training_loss 0.5420087051391601 test_loss: 0.2742048501968384
epoch: 97 training_loss 0.5373051837086678 test_loss: 0.26925108432769773
epoch: 98 training_loss 0.549429557621479 test_loss: 0.2633167266845703
epoch: 99 training_loss 0.5418654215335846 test_loss: 0.26588413715362547
epoch: 100 training_loss 0.5327695971727371 test_loss: 0.2752470731735229
epoch: 101 training_loss 0.5400968816876411 test_loss: 0.2631454229354858
epoch: 102 training_loss 0.5482876765727996 test_loss: 0.27797636985778806
epoch: 103 training_loss 0.5374928861856461 test_loss: 0.2673455476760864
epoch: 104 training_loss 0.5461133900284767 test_loss: 0.27168190479278564
epoch: 105 training_loss 0.5391760513186454 test_loss: 0.26190781593322754
epoch: 106 training_loss 0.526199332177639 test_loss: 0.2609914541244507
epoch: 107 training_loss 0.5390762957930565 test_loss: 0.2686255216598511
epoch: 108 training_loss 0.5330843263864518 test_loss: 0.26026244163513185
epoch: 109 training_loss 0.5252335220575333 test_loss: 0.2666919231414795
epoch: 110 training_loss 0.5257896691560745 test_loss: 0.26975820064544676
epoch: 111 training_loss 0.523953292965889 test_loss: 0.2623615264892578
epoch: 112 training_loss 0.5258031842112542 test_loss: 0.267846941947937
epoch: 113 training_loss 0.5344379335641861 test_loss: 0.2662402391433716
epoch: 114 training_loss 0.5197196668386459 test_loss: 0.25986952781677247
epoch: 115 training_loss 0.5196761664748192 test_loss: 0.2586045742034912
epoch: 116 training_loss 0.5230404525995255 test_loss: 0.2610323429107666
epoch: 117 training_loss 0.523902312219143 test_loss: 0.25816047191619873
epoch: 118 training_loss 0.5211217415332794 test_loss: 0.2856522798538208
epoch: 119 training_loss 0.5170954895019532 test_loss: 0.2685049533843994
epoch: 120 training_loss 0.5241768646240235 test_loss: 0.25913825035095217
epoch: 121 training_loss 0.5153316196799278 test_loss: 0.27306618690490725
epoch: 122 training_loss 0.5205900049209595 test_loss: 0.25293042659759524
epoch: 123 training_loss 0.5219307732582092 test_loss: 0.25448577404022216
epoch: 124 training_loss 0.5081130811572074 test_loss: 0.2730041265487671
epoch: 125 training_loss 0.5131247913837433 test_loss: 0.2577024221420288
epoch: 126 training_loss 0.519529218673706 test_loss: 0.2675931453704834
epoch: 127 training_loss 0.5120260328054428 test_loss: 0.259115195274353
epoch: 128 training_loss 0.5117530360817909 test_loss: 0.2745767831802368
epoch: 129 training_loss 0.5155945694446564 test_loss: 0.2635584831237793
epoch: 130 training_loss 0.5040217360854149 test_loss: 0.25213425159454345
epoch: 131 training_loss 0.5017660254240036 test_loss: 0.26336472034454345
epoch: 132 training_loss 0.5070396310091019 test_loss: 0.2563436031341553
epoch: 133 training_loss 0.5108455324172974 test_loss: 0.2646225929260254
epoch: 134 training_loss 0.5143921616673469 test_loss: 0.2588324546813965
epoch: 135 training_loss 0.5153593930602074 test_loss: 0.24864506721496582
epoch: 136 training_loss 0.5030772522091865 test_loss: 0.26434926986694335
epoch: 137 training_loss 0.5063876691460609 test_loss: 0.2534282445907593
epoch: 138 training_loss 0.4965787985920906 test_loss: 0.2568854093551636
epoch: 139 training_loss 0.5020508688688278 test_loss: 0.24832007884979249
epoch: 140 training_loss 0.5051633289456368 test_loss: 0.2497631072998047
epoch: 141 training_loss 0.4953397756814957 test_loss: 0.24737851619720458
epoch: 142 training_loss 0.4994871312379837 test_loss: 0.24976537227630616
epoch: 143 training_loss 0.5104488787055016 test_loss: 0.24723384380340577
epoch: 144 training_loss 0.5022441896796227 test_loss: 0.265553879737854
epoch: 145 training_loss 0.5024123674631119 test_loss: 0.25030858516693116
epoch: 146 training_loss 0.5027160677313804 test_loss: 0.24769363403320313
epoch: 147 training_loss 0.4898781985044479 test_loss: 0.2507314205169678
epoch: 148 training_loss 0.49652893900871276 test_loss: 0.24712903499603273
epoch: 149 training_loss 0.4905677419900894 test_loss: 0.24422521591186525
2457.303190368818
episode: 0 training return: tensor(11.0320, device='cuda:0')
episode: 1 training return: tensor(104.6949, device='cuda:0')
episode: 2 training return: tensor(466.0991, device='cuda:0')
episode: 3 training return: tensor(-32.3661, device='cuda:0')
epoch: 1 test_true_pfm: 1441.2216581111873 sim_pfm: -82.53904594329651
episode: 4 training return: tensor(-5.6645, device='cuda:0')
episode: 5 training return: tensor(39.0557, device='cuda:0')
episode: 6 training return: tensor(324.2165, device='cuda:0')
episode: 7 training return: tensor(-18.9762, device='cuda:0')
epoch: 2 test_true_pfm: 2181.825057779422 sim_pfm: -13.887649070316305
episode: 8 training return: tensor(-27.5471, device='cuda:0')
episode: 9 training return: tensor(-33.8953, device='cuda:0')
episode: 10 training return: tensor(-16.2389, device='cuda:0')
episode: 11 training return: tensor(229.1881, device='cuda:0')
epoch: 3 test_true_pfm: 1554.157611215038 sim_pfm: 112.17115729688278
episode: 12 training return: tensor(-71.7352, device='cuda:0')
episode: 13 training return: tensor(82.8101, device='cuda:0')
episode: 14 training return: tensor(205.2598, device='cuda:0')
episode: 15 training return: tensor(503.5010, device='cuda:0')
epoch: 4 test_true_pfm: 1350.0434113125084 sim_pfm: 56.46794349172463
episode: 16 training return: tensor(-5.0476, device='cuda:0')
episode: 17 training return: tensor(208.9609, device='cuda:0')
episode: 18 training return: tensor(160.3727, device='cuda:0')
episode: 19 training return: tensor(302.6987, device='cuda:0')
epoch: 5 test_true_pfm: 2091.8003251228997 sim_pfm: 236.59977970371256
episode: 20 training return: tensor(-40.4777, device='cuda:0')
episode: 21 training return: tensor(-19.4969, device='cuda:0')
episode: 22 training return: tensor(474.8773, device='cuda:0')
episode: 23 training return: tensor(-34.8972, device='cuda:0')
epoch: 6 test_true_pfm: 2120.844679879713 sim_pfm: 3.422131124573449
episode: 24 training return: tensor(-64.9336, device='cuda:0')
episode: 25 training return: tensor(0.5592, device='cuda:0')
episode: 26 training return: tensor(-2.1062, device='cuda:0')
episode: 27 training return: tensor(-3.8793, device='cuda:0')
epoch: 7 test_true_pfm: 1927.9040231691906 sim_pfm: 71.75530108856037
episode: 28 training return: tensor(-10.4133, device='cuda:0')
episode: 29 training return: tensor(357.1331, device='cuda:0')
episode: 30 training return: tensor(57.9423, device='cuda:0')
episode: 31 training return: tensor(149.4784, device='cuda:0')
epoch: 8 test_true_pfm: 1809.4830457821363 sim_pfm: 330.26810992970906
episode: 32 training return: tensor(-44.0020, device='cuda:0')
episode: 33 training return: tensor(260.2120, device='cuda:0')
episode: 34 training return: tensor(-63.8983, device='cuda:0')
episode: 35 training return: tensor(25.5321, device='cuda:0')
epoch: 9 test_true_pfm: 2193.1469445423722 sim_pfm: 218.88296730076158
episode: 36 training return: tensor(14.8357, device='cuda:0')
episode: 37 training return: tensor(222.7134, device='cuda:0')
episode: 38 training return: tensor(-40.2373, device='cuda:0')
episode: 39 training return: tensor(212.4454, device='cuda:0')
epoch: 10 test_true_pfm: 2543.7440890037055 sim_pfm: 243.24181469669566
episode: 40 training return: tensor(-73.7883, device='cuda:0')
episode: 41 training return: tensor(60.3777, device='cuda:0')
episode: 42 training return: tensor(3.1284, device='cuda:0')
episode: 43 training return: tensor(488.7243, device='cuda:0')
epoch: 11 test_true_pfm: 2432.7168963623885 sim_pfm: 211.41616203676676
episode: 44 training return: tensor(45.1954, device='cuda:0')
episode: 45 training return: tensor(294.3491, device='cuda:0')
episode: 46 training return: tensor(-132.2799, device='cuda:0')
episode: 47 training return: tensor(228.1279, device='cuda:0')
epoch: 12 test_true_pfm: 2320.5524644529887 sim_pfm: 239.29064437228953
episode: 48 training return: tensor(50.5139, device='cuda:0')
episode: 49 training return: tensor(17.1472, device='cuda:0')
episode: 50 training return: tensor(-29.6652, device='cuda:0')
episode: 51 training return: tensor(455.2773, device='cuda:0')
epoch: 13 test_true_pfm: 2727.953758537055 sim_pfm: 289.42469253127155
episode: 52 training return: tensor(216.5716, device='cuda:0')
episode: 53 training return: tensor(-10.5719, device='cuda:0')
episode: 54 training return: tensor(251.5116, device='cuda:0')
episode: 55 training return: tensor(-14.6833, device='cuda:0')
epoch: 14 test_true_pfm: 2567.8818210860527 sim_pfm: 156.71067415867583
episode: 56 training return: tensor(58.2775, device='cuda:0')
episode: 57 training return: tensor(11.3461, device='cuda:0')
episode: 58 training return: tensor(46.0899, device='cuda:0')
episode: 59 training return: tensor(516.9051, device='cuda:0')
epoch: 15 test_true_pfm: 2164.611334980338 sim_pfm: 424.2106983455208
episode: 60 training return: tensor(174.3729, device='cuda:0')
episode: 61 training return: tensor(360.7375, device='cuda:0')
episode: 62 training return: tensor(33.5217, device='cuda:0')
episode: 63 training return: tensor(41.3096, device='cuda:0')
epoch: 16 test_true_pfm: 2465.3382376769564 sim_pfm: 67.63036071055103
episode: 64 training return: tensor(14.5055, device='cuda:0')
episode: 65 training return: tensor(273.3671, device='cuda:0')
episode: 66 training return: tensor(273.6392, device='cuda:0')
episode: 67 training return: tensor(64.8972, device='cuda:0')
epoch: 17 test_true_pfm: 2313.5394427267147 sim_pfm: 135.66597541184942
episode: 68 training return: tensor(108.4046, device='cuda:0')
episode: 69 training return: tensor(28.8303, device='cuda:0')
episode: 70 training return: tensor(305.7004, device='cuda:0')
episode: 71 training return: tensor(308.9739, device='cuda:0')
epoch: 18 test_true_pfm: 2297.4813471588236 sim_pfm: 404.5961477751068
episode: 72 training return: tensor(221.9894, device='cuda:0')
episode: 73 training return: tensor(85.0214, device='cuda:0')
episode: 74 training return: tensor(10.4784, device='cuda:0')
episode: 75 training return: tensor(525.4493, device='cuda:0')
epoch: 19 test_true_pfm: 2240.5843457984565 sim_pfm: 364.7307419840072
episode: 76 training return: tensor(-32.6953, device='cuda:0')
episode: 77 training return: tensor(11.0350, device='cuda:0')
episode: 78 training return: tensor(68.8294, device='cuda:0')
episode: 79 training return: tensor(317.0850, device='cuda:0')
epoch: 20 test_true_pfm: 2179.4210542508504 sim_pfm: 392.2411942233254
episode: 80 training return: tensor(325.0672, device='cuda:0')
episode: 81 training return: tensor(18.2914, device='cuda:0')
episode: 82 training return: tensor(207.8804, device='cuda:0')
episode: 83 training return: tensor(489.4973, device='cuda:0')
epoch: 21 test_true_pfm: 1979.9828053978633 sim_pfm: 206.28248265086827
episode: 84 training return: tensor(415.0020, device='cuda:0')
episode: 85 training return: tensor(22.6680, device='cuda:0')
episode: 86 training return: tensor(483.8624, device='cuda:0')
episode: 87 training return: tensor(-1.4332, device='cuda:0')
epoch: 22 test_true_pfm: 2743.469919219382 sim_pfm: 92.61688579176553
episode: 88 training return: tensor(82.1869, device='cuda:0')
episode: 89 training return: tensor(329.2884, device='cuda:0')
episode: 90 training return: tensor(47.3455, device='cuda:0')
episode: 91 training return: tensor(20.9952, device='cuda:0')
epoch: 23 test_true_pfm: 2587.8675191308807 sim_pfm: 323.7108674296954
episode: 92 training return: tensor(283.0405, device='cuda:0')
episode: 93 training return: tensor(282.6974, device='cuda:0')
episode: 94 training return: tensor(461.4214, device='cuda:0')
episode: 95 training return: tensor(212.9980, device='cuda:0')
epoch: 24 test_true_pfm: 1949.1845833721036 sim_pfm: 145.06510921301864
episode: 96 training return: tensor(10.0167, device='cuda:0')
episode: 97 training return: tensor(62.8748, device='cuda:0')
episode: 98 training return: tensor(86.3379, device='cuda:0')
episode: 99 training return: tensor(153.6894, device='cuda:0')
epoch: 25 test_true_pfm: 2157.3559682186683 sim_pfm: 50.05244295842325
episode: 100 training return: tensor(269.2904, device='cuda:0')
episode: 101 training return: tensor(99.8603, device='cuda:0')
episode: 102 training return: tensor(21.6297, device='cuda:0')
episode: 103 training return: tensor(35.7986, device='cuda:0')
epoch: 26 test_true_pfm: 2905.649448046961 sim_pfm: 112.64817101932324
episode: 104 training return: tensor(30.2027, device='cuda:0')
episode: 105 training return: tensor(38.2247, device='cuda:0')
episode: 106 training return: tensor(46.5557, device='cuda:0')
episode: 107 training return: tensor(213.7245, device='cuda:0')
epoch: 27 test_true_pfm: 1831.2844037594384 sim_pfm: 263.9203782475088
episode: 108 training return: tensor(458.8125, device='cuda:0')
episode: 109 training return: tensor(497.1786, device='cuda:0')
episode: 110 training return: tensor(43.3581, device='cuda:0')
episode: 111 training return: tensor(-52.7988, device='cuda:0')
epoch: 28 test_true_pfm: 2416.393809515768 sim_pfm: 95.36402216445033
episode: 112 training return: tensor(473.6021, device='cuda:0')
episode: 113 training return: tensor(41.3163, device='cuda:0')
episode: 114 training return: tensor(205.7726, device='cuda:0')
episode: 115 training return: tensor(-44.9888, device='cuda:0')
epoch: 29 test_true_pfm: 2247.880904660378 sim_pfm: 190.8184094768561
episode: 116 training return: tensor(114.2431, device='cuda:0')
episode: 117 training return: tensor(172.5328, device='cuda:0')
episode: 118 training return: tensor(-12.0051, device='cuda:0')
episode: 119 training return: tensor(223.4159, device='cuda:0')
epoch: 30 test_true_pfm: 2794.7205718634373 sim_pfm: 320.1444021393545
episode: 120 training return: tensor(-17.6792, device='cuda:0')
episode: 121 training return: tensor(21.6480, device='cuda:0')
episode: 122 training return: tensor(298.5743, device='cuda:0')
episode: 123 training return: tensor(62.0356, device='cuda:0')
epoch: 31 test_true_pfm: 2261.7622137538788 sim_pfm: 300.72411883579724
episode: 124 training return: tensor(464.0477, device='cuda:0')
episode: 125 training return: tensor(-36.5930, device='cuda:0')
episode: 126 training return: tensor(478.6557, device='cuda:0')
episode: 127 training return: tensor(147.7469, device='cuda:0')
epoch: 32 test_true_pfm: 2328.816957764203 sim_pfm: 202.76020343147684
episode: 128 training return: tensor(-53.7345, device='cuda:0')
episode: 129 training return: tensor(239.7519, device='cuda:0')
episode: 130 training return: tensor(5.2981, device='cuda:0')
episode: 131 training return: tensor(113.1838, device='cuda:0')
epoch: 33 test_true_pfm: 1947.873849429575 sim_pfm: 217.0912228151768
episode: 132 training return: tensor(347.5777, device='cuda:0')
episode: 133 training return: tensor(-21.7631, device='cuda:0')
episode: 134 training return: tensor(135.4109, device='cuda:0')
episode: 135 training return: tensor(484.9902, device='cuda:0')
epoch: 34 test_true_pfm: 2447.3558863803187 sim_pfm: 231.99497669221213
episode: 136 training return: tensor(83.6846, device='cuda:0')
episode: 137 training return: tensor(23.2880, device='cuda:0')
episode: 138 training return: tensor(447.7653, device='cuda:0')
episode: 139 training return: tensor(85.0053, device='cuda:0')
epoch: 35 test_true_pfm: 2730.7649045892613 sim_pfm: 167.70690214451557
episode: 140 training return: tensor(443.6287, device='cuda:0')
episode: 141 training return: tensor(246.3328, device='cuda:0')
episode: 142 training return: tensor(-29.1832, device='cuda:0')
episode: 143 training return: tensor(59.3995, device='cuda:0')
epoch: 36 test_true_pfm: 1761.0738188307753 sim_pfm: 224.11027102457592
episode: 144 training return: tensor(147.0625, device='cuda:0')
episode: 145 training return: tensor(488.0194, device='cuda:0')
episode: 146 training return: tensor(-42.7494, device='cuda:0')
episode: 147 training return: tensor(118.8815, device='cuda:0')
epoch: 37 test_true_pfm: 2299.6850135846903 sim_pfm: 68.07377507498798
episode: 148 training return: tensor(262.3481, device='cuda:0')
episode: 149 training return: tensor(58.0095, device='cuda:0')
episode: 150 training return: tensor(225.2458, device='cuda:0')
episode: 151 training return: tensor(139.6319, device='cuda:0')
epoch: 38 test_true_pfm: 2506.962917414154 sim_pfm: 118.23216793569736
episode: 152 training return: tensor(253.6782, device='cuda:0')
episode: 153 training return: tensor(-13.9386, device='cuda:0')
episode: 154 training return: tensor(492.9339, device='cuda:0')
episode: 155 training return: tensor(168.2968, device='cuda:0')
epoch: 39 test_true_pfm: 1996.9360910469907 sim_pfm: 72.2083870419495
episode: 156 training return: tensor(285.1154, device='cuda:0')
episode: 157 training return: tensor(-17.1980, device='cuda:0')
episode: 158 training return: tensor(301.6186, device='cuda:0')
episode: 159 training return: tensor(91.6750, device='cuda:0')
epoch: 40 test_true_pfm: 1990.1170944635567 sim_pfm: 139.14782099425793
episode: 160 training return: tensor(384.8730, device='cuda:0')
episode: 161 training return: tensor(33.0559, device='cuda:0')
episode: 162 training return: tensor(38.2211, device='cuda:0')
episode: 163 training return: tensor(2.8173, device='cuda:0')
epoch: 41 test_true_pfm: 2321.7983120449207 sim_pfm: -8.927792829927057
episode: 164 training return: tensor(128.6919, device='cuda:0')
episode: 165 training return: tensor(73.4782, device='cuda:0')
episode: 166 training return: tensor(34.6933, device='cuda:0')
episode: 167 training return: tensor(-45.7997, device='cuda:0')
epoch: 42 test_true_pfm: 2074.520275877571 sim_pfm: 432.1044896571936
episode: 168 training return: tensor(297.0611, device='cuda:0')
episode: 169 training return: tensor(20.2954, device='cuda:0')
episode: 170 training return: tensor(61.9111, device='cuda:0')
episode: 171 training return: tensor(275.0108, device='cuda:0')
epoch: 43 test_true_pfm: 1785.967348047534 sim_pfm: 124.88541818511051
episode: 172 training return: tensor(80.3819, device='cuda:0')
episode: 173 training return: tensor(224.1322, device='cuda:0')
episode: 174 training return: tensor(70.4143, device='cuda:0')
episode: 175 training return: tensor(81.2669, device='cuda:0')
epoch: 44 test_true_pfm: 2345.4070697192687 sim_pfm: 479.5063524545015
episode: 176 training return: tensor(377.4770, device='cuda:0')
episode: 177 training return: tensor(56.5311, device='cuda:0')
episode: 178 training return: tensor(124.2555, device='cuda:0')
episode: 179 training return: tensor(367.5719, device='cuda:0')
epoch: 45 test_true_pfm: 2972.0706059577983 sim_pfm: 27.746265984140337
episode: 180 training return: tensor(394.9594, device='cuda:0')
episode: 181 training return: tensor(71.1479, device='cuda:0')
episode: 182 training return: tensor(-1.3380, device='cuda:0')
episode: 183 training return: tensor(119.8913, device='cuda:0')
epoch: 46 test_true_pfm: 2695.5518261911366 sim_pfm: 99.20400155351187
episode: 184 training return: tensor(111.1601, device='cuda:0')
episode: 185 training return: tensor(262.8265, device='cuda:0')
episode: 186 training return: tensor(-25.3238, device='cuda:0')
episode: 187 training return: tensor(64.7144, device='cuda:0')
epoch: 47 test_true_pfm: 2223.178953805945 sim_pfm: 215.97471183573361
episode: 188 training return: tensor(-32.1081, device='cuda:0')
episode: 189 training return: tensor(47.0164, device='cuda:0')
episode: 190 training return: tensor(301.1047, device='cuda:0')
episode: 191 training return: tensor(58.9809, device='cuda:0')
epoch: 48 test_true_pfm: 1910.2794967644543 sim_pfm: 230.73117205669405
episode: 192 training return: tensor(86.6782, device='cuda:0')
episode: 193 training return: tensor(365.9058, device='cuda:0')
episode: 194 training return: tensor(368.0638, device='cuda:0')
episode: 195 training return: tensor(233.1582, device='cuda:0')
epoch: 49 test_true_pfm: 2150.0771882419594 sim_pfm: 12.191585816520577
episode: 196 training return: tensor(35.8114, device='cuda:0')
episode: 197 training return: tensor(21.6370, device='cuda:0')
episode: 198 training return: tensor(6.3201, device='cuda:0')
episode: 199 training return: tensor(142.2497, device='cuda:0')
epoch: 50 test_true_pfm: 2206.336965193588 sim_pfm: 113.46140036320624
episode: 200 training return: tensor(115.5253, device='cuda:0')
episode: 201 training return: tensor(395.1789, device='cuda:0')
episode: 202 training return: tensor(31.5673, device='cuda:0')
episode: 203 training return: tensor(73.0339, device='cuda:0')
epoch: 51 test_true_pfm: 2609.2630818159314 sim_pfm: 334.11098447537125
episode: 204 training return: tensor(83.4795, device='cuda:0')
episode: 205 training return: tensor(480.4615, device='cuda:0')
episode: 206 training return: tensor(43.7040, device='cuda:0')
episode: 207 training return: tensor(229.0446, device='cuda:0')
epoch: 52 test_true_pfm: 1873.916708646487 sim_pfm: 120.04791140606783
episode: 208 training return: tensor(28.4712, device='cuda:0')
episode: 209 training return: tensor(250.9477, device='cuda:0')
episode: 210 training return: tensor(36.1842, device='cuda:0')
episode: 211 training return: tensor(22.7973, device='cuda:0')
epoch: 53 test_true_pfm: 2069.477310625249 sim_pfm: 178.401838476595
episode: 212 training return: tensor(-39.6888, device='cuda:0')
episode: 213 training return: tensor(50.8714, device='cuda:0')
episode: 214 training return: tensor(98.5743, device='cuda:0')
episode: 215 training return: tensor(27.9666, device='cuda:0')
epoch: 54 test_true_pfm: 1632.3828091887328 sim_pfm: 229.9545319384391
episode: 216 training return: tensor(35.1332, device='cuda:0')
episode: 217 training return: tensor(225.3237, device='cuda:0')
episode: 218 training return: tensor(201.5181, device='cuda:0')
episode: 219 training return: tensor(52.0903, device='cuda:0')
epoch: 55 test_true_pfm: 1890.0086002902783 sim_pfm: 177.2962764252055
episode: 220 training return: tensor(33.2081, device='cuda:0')
episode: 221 training return: tensor(459.3035, device='cuda:0')
episode: 222 training return: tensor(500.9261, device='cuda:0')
episode: 223 training return: tensor(129.1507, device='cuda:0')
epoch: 56 test_true_pfm: 2284.9873469361096 sim_pfm: 128.51188082494386
episode: 224 training return: tensor(34.2264, device='cuda:0')
episode: 225 training return: tensor(112.3835, device='cuda:0')
episode: 226 training return: tensor(25.8311, device='cuda:0')
episode: 227 training return: tensor(264.8214, device='cuda:0')
epoch: 57 test_true_pfm: 2475.152451578397 sim_pfm: 0.7353597020652766
episode: 228 training return: tensor(150.4709, device='cuda:0')
episode: 229 training return: tensor(53.1928, device='cuda:0')
episode: 230 training return: tensor(494.5089, device='cuda:0')
episode: 231 training return: tensor(270.3137, device='cuda:0')
epoch: 58 test_true_pfm: 2464.8968138183045 sim_pfm: 51.267969392666906
episode: 232 training return: tensor(249.7218, device='cuda:0')
episode: 233 training return: tensor(44.9385, device='cuda:0')
episode: 234 training return: tensor(256.3765, device='cuda:0')
episode: 235 training return: tensor(46.1837, device='cuda:0')
epoch: 59 test_true_pfm: 2213.3142135003127 sim_pfm: 239.36007119413503
episode: 236 training return: tensor(194.6750, device='cuda:0')
episode: 237 training return: tensor(46.0348, device='cuda:0')
episode: 238 training return: tensor(25.1894, device='cuda:0')
episode: 239 training return: tensor(60.9262, device='cuda:0')
epoch: 60 test_true_pfm: 2189.0521732745297 sim_pfm: 141.06826724789184
episode: 240 training return: tensor(-39.7511, device='cuda:0')
episode: 241 training return: tensor(507.4237, device='cuda:0')
episode: 242 training return: tensor(111.4987, device='cuda:0')
episode: 243 training return: tensor(77.6268, device='cuda:0')
epoch: 61 test_true_pfm: 2233.247752055821 sim_pfm: -19.65488745016046
episode: 244 training return: tensor(213.7344, device='cuda:0')
episode: 245 training return: tensor(458.3146, device='cuda:0')
episode: 246 training return: tensor(174.0684, device='cuda:0')
episode: 247 training return: tensor(248.9874, device='cuda:0')
epoch: 62 test_true_pfm: 2541.0378166392 sim_pfm: 305.2145634694219
episode: 248 training return: tensor(22.4740, device='cuda:0')
episode: 249 training return: tensor(64.6155, device='cuda:0')
episode: 250 training return: tensor(279.0911, device='cuda:0')
episode: 251 training return: tensor(222.3902, device='cuda:0')
epoch: 63 test_true_pfm: 1615.1492852486701 sim_pfm: 294.46482034615474
episode: 252 training return: tensor(131.0613, device='cuda:0')
episode: 253 training return: tensor(32.3896, device='cuda:0')
episode: 254 training return: tensor(121.9601, device='cuda:0')
episode: 255 training return: tensor(27.9634, device='cuda:0')
epoch: 64 test_true_pfm: 2080.86541259897 sim_pfm: 113.949059459342
episode: 256 training return: tensor(106.0801, device='cuda:0')
episode: 257 training return: tensor(39.2705, device='cuda:0')
episode: 258 training return: tensor(160.3737, device='cuda:0')
episode: 259 training return: tensor(208.8833, device='cuda:0')
epoch: 65 test_true_pfm: 2479.408803625909 sim_pfm: 227.533253891054
episode: 260 training return: tensor(266.9412, device='cuda:0')
episode: 261 training return: tensor(15.0580, device='cuda:0')
episode: 262 training return: tensor(33.1263, device='cuda:0')
episode: 263 training return: tensor(49.0696, device='cuda:0')
epoch: 66 test_true_pfm: 2218.8004127335603 sim_pfm: 133.239513859211
episode: 264 training return: tensor(-54.6518, device='cuda:0')
episode: 265 training return: tensor(307.9027, device='cuda:0')
episode: 266 training return: tensor(496.4950, device='cuda:0')
episode: 267 training return: tensor(-30.3202, device='cuda:0')
epoch: 67 test_true_pfm: 2185.445052111982 sim_pfm: 17.17651008500252
episode: 268 training return: tensor(133.8020, device='cuda:0')
episode: 269 training return: tensor(25.5141, device='cuda:0')
episode: 270 training return: tensor(158.0523, device='cuda:0')
episode: 271 training return: tensor(48.8840, device='cuda:0')
epoch: 68 test_true_pfm: 2753.9237553450057 sim_pfm: 104.21723522703785
episode: 272 training return: tensor(74.6929, device='cuda:0')
episode: 273 training return: tensor(23.8389, device='cuda:0')
episode: 274 training return: tensor(79.2920, device='cuda:0')
episode: 275 training return: tensor(36.2715, device='cuda:0')
epoch: 69 test_true_pfm: 2688.9142372853007 sim_pfm: 312.55654043502483
episode: 276 training return: tensor(195.2783, device='cuda:0')
episode: 277 training return: tensor(212.4173, device='cuda:0')
episode: 278 training return: tensor(3.8798, device='cuda:0')
episode: 279 training return: tensor(11.5994, device='cuda:0')
epoch: 70 test_true_pfm: 2149.4091499025985 sim_pfm: 37.0268105463086
episode: 280 training return: tensor(422.9074, device='cuda:0')
episode: 281 training return: tensor(163.2297, device='cuda:0')
episode: 282 training return: tensor(475.0096, device='cuda:0')
episode: 283 training return: tensor(287.5605, device='cuda:0')
epoch: 71 test_true_pfm: 2241.694670638711 sim_pfm: 151.81039990088902
episode: 284 training return: tensor(467.2393, device='cuda:0')
episode: 285 training return: tensor(482.7680, device='cuda:0')
episode: 286 training return: tensor(-14.9015, device='cuda:0')
episode: 287 training return: tensor(508.9044, device='cuda:0')
epoch: 72 test_true_pfm: 1958.8774750279727 sim_pfm: 166.9733293199679
episode: 288 training return: tensor(302.6172, device='cuda:0')
episode: 289 training return: tensor(125.5110, device='cuda:0')
episode: 290 training return: tensor(189.9603, device='cuda:0')
episode: 291 training return: tensor(179.6915, device='cuda:0')
epoch: 73 test_true_pfm: 2467.331268017148 sim_pfm: -18.43357656771938
episode: 292 training return: tensor(225.2286, device='cuda:0')
episode: 293 training return: tensor(55.2708, device='cuda:0')
episode: 294 training return: tensor(73.6061, device='cuda:0')
episode: 295 training return: tensor(60.5941, device='cuda:0')
epoch: 74 test_true_pfm: 1993.482735631325 sim_pfm: 239.1490652473682
episode: 296 training return: tensor(105.2893, device='cuda:0')
episode: 297 training return: tensor(217.8485, device='cuda:0')
episode: 298 training return: tensor(470.6245, device='cuda:0')
episode: 299 training return: tensor(196.8911, device='cuda:0')
epoch: 75 test_true_pfm: 2380.020711075307 sim_pfm: 191.85596030624583
episode: 300 training return: tensor(26.0134, device='cuda:0')
episode: 301 training return: tensor(35.7773, device='cuda:0')
episode: 302 training return: tensor(69.7573, device='cuda:0')
episode: 303 training return: tensor(65.6917, device='cuda:0')
epoch: 76 test_true_pfm: 2347.5917954533847 sim_pfm: 84.3565751844629
episode: 304 training return: tensor(449.7495, device='cuda:0')
episode: 305 training return: tensor(119.1429, device='cuda:0')
episode: 306 training return: tensor(316.6780, device='cuda:0')
episode: 307 training return: tensor(-4.1557, device='cuda:0')
epoch: 77 test_true_pfm: 2296.8785424560933 sim_pfm: 89.95804988197051
episode: 308 training return: tensor(244.8477, device='cuda:0')
episode: 309 training return: tensor(14.7470, device='cuda:0')
episode: 310 training return: tensor(483.6518, device='cuda:0')
episode: 311 training return: tensor(272.2144, device='cuda:0')
epoch: 78 test_true_pfm: 1892.8404682224907 sim_pfm: -11.73432935772386
episode: 312 training return: tensor(100.3244, device='cuda:0')
episode: 313 training return: tensor(246.2774, device='cuda:0')
episode: 314 training return: tensor(261.4774, device='cuda:0')
episode: 315 training return: tensor(396.3413, device='cuda:0')
epoch: 79 test_true_pfm: 2065.6579012688085 sim_pfm: 94.86342237495894
episode: 316 training return: tensor(50.0096, device='cuda:0')
episode: 317 training return: tensor(37.6504, device='cuda:0')
episode: 318 training return: tensor(474.8605, device='cuda:0')
episode: 319 training return: tensor(499.5029, device='cuda:0')
epoch: 80 test_true_pfm: 1936.8409206193119 sim_pfm: 18.171301292459248
episode: 320 training return: tensor(80.9222, device='cuda:0')
episode: 321 training return: tensor(394.5284, device='cuda:0')
episode: 322 training return: tensor(26.5734, device='cuda:0')
episode: 323 training return: tensor(443.0966, device='cuda:0')
epoch: 81 test_true_pfm: 1649.0062090162248 sim_pfm: 28.745066117146052
episode: 324 training return: tensor(31.0392, device='cuda:0')
episode: 325 training return: tensor(36.4165, device='cuda:0')
episode: 326 training return: tensor(208.1933, device='cuda:0')
episode: 327 training return: tensor(101.8490, device='cuda:0')
epoch: 82 test_true_pfm: 2179.0576230157176 sim_pfm: 254.30538596457336
episode: 328 training return: tensor(210.1623, device='cuda:0')
episode: 329 training return: tensor(143.6124, device='cuda:0')
episode: 330 training return: tensor(217.7552, device='cuda:0')
episode: 331 training return: tensor(48.0878, device='cuda:0')
epoch: 83 test_true_pfm: 1743.4563521831933 sim_pfm: 304.4436660304976
episode: 332 training return: tensor(-3.4653, device='cuda:0')
episode: 333 training return: tensor(67.0532, device='cuda:0')
episode: 334 training return: tensor(239.1465, device='cuda:0')
episode: 335 training return: tensor(153.1158, device='cuda:0')
epoch: 84 test_true_pfm: 2248.706220609201 sim_pfm: 171.51094911136897
episode: 336 training return: tensor(64.2996, device='cuda:0')
episode: 337 training return: tensor(29.3799, device='cuda:0')
episode: 338 training return: tensor(56.2550, device='cuda:0')
episode: 339 training return: tensor(147.4874, device='cuda:0')
epoch: 85 test_true_pfm: 2294.610689642485 sim_pfm: 482.6969358933663
episode: 340 training return: tensor(223.0031, device='cuda:0')
episode: 341 training return: tensor(-19.1520, device='cuda:0')
episode: 342 training return: tensor(216.9376, device='cuda:0')
episode: 343 training return: tensor(8.2090, device='cuda:0')
epoch: 86 test_true_pfm: 2536.42856340528 sim_pfm: 301.6651768684581
episode: 344 training return: tensor(18.0633, device='cuda:0')
episode: 345 training return: tensor(167.2367, device='cuda:0')
episode: 346 training return: tensor(284.7999, device='cuda:0')
episode: 347 training return: tensor(197.3661, device='cuda:0')
epoch: 87 test_true_pfm: 3048.9984661443505 sim_pfm: 186.23668563602646
episode: 348 training return: tensor(250.5044, device='cuda:0')
episode: 349 training return: tensor(488.2124, device='cuda:0')
episode: 350 training return: tensor(477.2940, device='cuda:0')
episode: 351 training return: tensor(411.4602, device='cuda:0')
epoch: 88 test_true_pfm: 2556.253936614193 sim_pfm: 224.07309714656245
episode: 352 training return: tensor(75.5623, device='cuda:0')
episode: 353 training return: tensor(359.1586, device='cuda:0')
episode: 354 training return: tensor(26.3053, device='cuda:0')
episode: 355 training return: tensor(43.1866, device='cuda:0')
epoch: 89 test_true_pfm: 2029.8926388886048 sim_pfm: 322.30697300308384
episode: 356 training return: tensor(83.1609, device='cuda:0')
episode: 357 training return: tensor(455.6052, device='cuda:0')
episode: 358 training return: tensor(67.8495, device='cuda:0')
episode: 359 training return: tensor(342.4922, device='cuda:0')
epoch: 90 test_true_pfm: 2198.519003252602 sim_pfm: 354.3530309502191
episode: 360 training return: tensor(127.1804, device='cuda:0')
episode: 361 training return: tensor(147.7676, device='cuda:0')
episode: 362 training return: tensor(164.0207, device='cuda:0')
episode: 363 training return: tensor(73.8161, device='cuda:0')
epoch: 91 test_true_pfm: 2720.268609813183 sim_pfm: 112.32508280262118
episode: 364 training return: tensor(333.0514, device='cuda:0')
episode: 365 training return: tensor(46.6138, device='cuda:0')
episode: 366 training return: tensor(459.7892, device='cuda:0')
episode: 367 training return: tensor(277.5669, device='cuda:0')
epoch: 92 test_true_pfm: 2158.7436632613812 sim_pfm: 103.98749589611543
episode: 368 training return: tensor(32.2003, device='cuda:0')
episode: 369 training return: tensor(263.5175, device='cuda:0')
episode: 370 training return: tensor(174.7070, device='cuda:0')
episode: 371 training return: tensor(65.3572, device='cuda:0')
epoch: 93 test_true_pfm: 1762.6245502637314 sim_pfm: 228.7853948137878
episode: 372 training return: tensor(380.1637, device='cuda:0')
episode: 373 training return: tensor(47.3914, device='cuda:0')
episode: 374 training return: tensor(1.8569, device='cuda:0')
episode: 375 training return: tensor(26.9897, device='cuda:0')
epoch: 94 test_true_pfm: 2580.4250001966034 sim_pfm: 154.7368107785393
episode: 376 training return: tensor(57.7089, device='cuda:0')
episode: 377 training return: tensor(269.3388, device='cuda:0')
episode: 378 training return: tensor(69.5305, device='cuda:0')
episode: 379 training return: tensor(514.2283, device='cuda:0')
epoch: 95 test_true_pfm: 2815.324786761123 sim_pfm: 251.2309036516041
episode: 380 training return: tensor(475.6287, device='cuda:0')
episode: 381 training return: tensor(399.4522, device='cuda:0')
episode: 382 training return: tensor(225.4481, device='cuda:0')
episode: 383 training return: tensor(252.0430, device='cuda:0')
epoch: 96 test_true_pfm: 2357.8834934479487 sim_pfm: 170.39718971860324
episode: 384 training return: tensor(447.5003, device='cuda:0')
episode: 385 training return: tensor(295.4143, device='cuda:0')
episode: 386 training return: tensor(65.4517, device='cuda:0')
episode: 387 training return: tensor(87.9704, device='cuda:0')
epoch: 97 test_true_pfm: 1882.487226945752 sim_pfm: 108.8016601751442
episode: 388 training return: tensor(24.9360, device='cuda:0')
episode: 389 training return: tensor(-11.2917, device='cuda:0')
episode: 390 training return: tensor(83.9642, device='cuda:0')
episode: 391 training return: tensor(65.2639, device='cuda:0')
epoch: 98 test_true_pfm: 2050.95500181178 sim_pfm: 336.60275940351613
episode: 392 training return: tensor(77.0584, device='cuda:0')
episode: 393 training return: tensor(38.7398, device='cuda:0')
episode: 394 training return: tensor(229.7836, device='cuda:0')
episode: 395 training return: tensor(516.7076, device='cuda:0')
epoch: 99 test_true_pfm: 2025.19042288587 sim_pfm: 116.00956503515287
episode: 396 training return: tensor(509.1757, device='cuda:0')
episode: 397 training return: tensor(11.0914, device='cuda:0')
episode: 398 training return: tensor(272.3428, device='cuda:0')
episode: 399 training return: tensor(224.3257, device='cuda:0')
epoch: 100 test_true_pfm: 2265.821897548826 sim_pfm: 307.9621949774834
episode: 400 training return: tensor(450.7663, device='cuda:0')
episode: 401 training return: tensor(36.9154, device='cuda:0')
episode: 402 training return: tensor(54.0345, device='cuda:0')
episode: 403 training return: tensor(51.5025, device='cuda:0')
epoch: 101 test_true_pfm: 1935.4186043005677 sim_pfm: 42.840185040297605
episode: 404 training return: tensor(76.9539, device='cuda:0')
episode: 405 training return: tensor(430.9833, device='cuda:0')
episode: 406 training return: tensor(140.4331, device='cuda:0')
episode: 407 training return: tensor(515.0920, device='cuda:0')
epoch: 102 test_true_pfm: 2133.6207135450177 sim_pfm: 170.0313773616023
episode: 408 training return: tensor(474.8151, device='cuda:0')
episode: 409 training return: tensor(48.1232, device='cuda:0')
episode: 410 training return: tensor(75.5320, device='cuda:0')
episode: 411 training return: tensor(169.2402, device='cuda:0')
epoch: 103 test_true_pfm: 2696.7626614553533 sim_pfm: 129.97390686829263
episode: 412 training return: tensor(32.4773, device='cuda:0')
episode: 413 training return: tensor(68.8050, device='cuda:0')
episode: 414 training return: tensor(471.8857, device='cuda:0')
episode: 415 training return: tensor(-35.6459, device='cuda:0')
epoch: 104 test_true_pfm: 2473.2250694889995 sim_pfm: 155.94651910096096
episode: 416 training return: tensor(239.0627, device='cuda:0')
episode: 417 training return: tensor(298.9377, device='cuda:0')
episode: 418 training return: tensor(167.9941, device='cuda:0')
episode: 419 training return: tensor(-40.8136, device='cuda:0')
epoch: 105 test_true_pfm: 2091.3438525293273 sim_pfm: 33.30485522482195
episode: 420 training return: tensor(23.1440, device='cuda:0')
episode: 421 training return: tensor(34.6742, device='cuda:0')
episode: 422 training return: tensor(173.8963, device='cuda:0')
episode: 423 training return: tensor(76.5842, device='cuda:0')
epoch: 106 test_true_pfm: 2656.1769570399233 sim_pfm: 146.42484468411809
episode: 424 training return: tensor(26.9423, device='cuda:0')
episode: 425 training return: tensor(52.5349, device='cuda:0')
episode: 426 training return: tensor(53.0652, device='cuda:0')
episode: 427 training return: tensor(368.2881, device='cuda:0')
epoch: 107 test_true_pfm: 2269.3594754545556 sim_pfm: 166.96045925918347
episode: 428 training return: tensor(50.5424, device='cuda:0')
episode: 429 training return: tensor(222.1447, device='cuda:0')
episode: 430 training return: tensor(523.4182, device='cuda:0')
episode: 431 training return: tensor(454.1907, device='cuda:0')
epoch: 108 test_true_pfm: 1806.8241443057766 sim_pfm: 179.5558903189182
episode: 432 training return: tensor(439.8586, device='cuda:0')
episode: 433 training return: tensor(38.7281, device='cuda:0')
episode: 434 training return: tensor(213.9940, device='cuda:0')
episode: 435 training return: tensor(204.6769, device='cuda:0')
epoch: 109 test_true_pfm: 2207.677936624741 sim_pfm: 273.995969799483
episode: 436 training return: tensor(51.3124, device='cuda:0')
episode: 437 training return: tensor(400.4769, device='cuda:0')
episode: 438 training return: tensor(259.1527, device='cuda:0')
episode: 439 training return: tensor(63.9367, device='cuda:0')
epoch: 110 test_true_pfm: 1699.6171461406784 sim_pfm: 275.462293414108
episode: 440 training return: tensor(413.5365, device='cuda:0')
episode: 441 training return: tensor(23.3124, device='cuda:0')
episode: 442 training return: tensor(444.0265, device='cuda:0')
episode: 443 training return: tensor(520.4517, device='cuda:0')
epoch: 111 test_true_pfm: 2066.759146807128 sim_pfm: 198.79337956682625
episode: 444 training return: tensor(92.2839, device='cuda:0')
episode: 445 training return: tensor(428.5313, device='cuda:0')
episode: 446 training return: tensor(104.6628, device='cuda:0')
episode: 447 training return: tensor(140.7527, device='cuda:0')
epoch: 112 test_true_pfm: 2459.552390465867 sim_pfm: 199.1704077917481
episode: 448 training return: tensor(340.3952, device='cuda:0')
episode: 449 training return: tensor(12.6649, device='cuda:0')
episode: 450 training return: tensor(35.4833, device='cuda:0')
episode: 451 training return: tensor(330.4191, device='cuda:0')
epoch: 113 test_true_pfm: 2804.369011689587 sim_pfm: 30.96602589536148
episode: 452 training return: tensor(137.1538, device='cuda:0')
episode: 453 training return: tensor(340.0817, device='cuda:0')
episode: 454 training return: tensor(-15.7908, device='cuda:0')
episode: 455 training return: tensor(351.6255, device='cuda:0')
epoch: 114 test_true_pfm: 1786.4735073635995 sim_pfm: 224.11874936396876
episode: 456 training return: tensor(66.9525, device='cuda:0')
episode: 457 training return: tensor(79.9417, device='cuda:0')
episode: 458 training return: tensor(429.1306, device='cuda:0')
episode: 459 training return: tensor(182.0869, device='cuda:0')
epoch: 115 test_true_pfm: 2489.7363221053924 sim_pfm: 26.51838563817243
episode: 460 training return: tensor(128.8586, device='cuda:0')
episode: 461 training return: tensor(45.7072, device='cuda:0')
episode: 462 training return: tensor(192.4474, device='cuda:0')
episode: 463 training return: tensor(308.6665, device='cuda:0')
epoch: 116 test_true_pfm: 2449.9167861669703 sim_pfm: 255.02297409571474
episode: 464 training return: tensor(63.6530, device='cuda:0')
episode: 465 training return: tensor(41.4500, device='cuda:0')
episode: 466 training return: tensor(158.8779, device='cuda:0')
episode: 467 training return: tensor(232.4087, device='cuda:0')
epoch: 117 test_true_pfm: 2577.6532675105773 sim_pfm: 262.96966319907614
episode: 468 training return: tensor(180.4748, device='cuda:0')
episode: 469 training return: tensor(382.4862, device='cuda:0')
episode: 470 training return: tensor(287.7204, device='cuda:0')
episode: 471 training return: tensor(98.2118, device='cuda:0')
epoch: 118 test_true_pfm: 1863.2648303296992 sim_pfm: 155.71735830761222
episode: 472 training return: tensor(-0.5693, device='cuda:0')
episode: 473 training return: tensor(249.0289, device='cuda:0')
episode: 474 training return: tensor(69.3981, device='cuda:0')
episode: 475 training return: tensor(281.0892, device='cuda:0')
epoch: 119 test_true_pfm: 2690.116361157891 sim_pfm: 372.6052906737702
episode: 476 training return: tensor(437.0125, device='cuda:0')
episode: 477 training return: tensor(149.4254, device='cuda:0')
episode: 478 training return: tensor(111.9101, device='cuda:0')
episode: 479 training return: tensor(103.8671, device='cuda:0')
epoch: 120 test_true_pfm: 2836.3698733042124 sim_pfm: 25.94688516424503
episode: 480 training return: tensor(268.7182, device='cuda:0')
episode: 481 training return: tensor(247.6736, device='cuda:0')
episode: 482 training return: tensor(230.4492, device='cuda:0')
episode: 483 training return: tensor(256.0583, device='cuda:0')
epoch: 121 test_true_pfm: 1925.5775367011167 sim_pfm: 197.42249735973505
episode: 484 training return: tensor(101.1224, device='cuda:0')
episode: 485 training return: tensor(44.3329, device='cuda:0')
episode: 486 training return: tensor(389.9729, device='cuda:0')
episode: 487 training return: tensor(73.5399, device='cuda:0')
epoch: 122 test_true_pfm: 1955.7446517021936 sim_pfm: -11.452427198023846
episode: 488 training return: tensor(193.0351, device='cuda:0')
episode: 489 training return: tensor(-32.9447, device='cuda:0')
episode: 490 training return: tensor(-11.3005, device='cuda:0')
episode: 491 training return: tensor(132.1127, device='cuda:0')
epoch: 123 test_true_pfm: 2447.0782935935786 sim_pfm: 28.08661627653055
episode: 492 training return: tensor(248.6986, device='cuda:0')
episode: 493 training return: tensor(-25.4467, device='cuda:0')
episode: 494 training return: tensor(-7.2871, device='cuda:0')
episode: 495 training return: tensor(294.2047, device='cuda:0')
epoch: 124 test_true_pfm: 2604.5658087878196 sim_pfm: 138.26366433064686
episode: 496 training return: tensor(515.9549, device='cuda:0')
episode: 497 training return: tensor(101.4248, device='cuda:0')
episode: 498 training return: tensor(473.0573, device='cuda:0')
episode: 499 training return: tensor(49.6826, device='cuda:0')
epoch: 125 test_true_pfm: 2345.470588070207 sim_pfm: -4.82540320934883
episode: 500 training return: tensor(56.1657, device='cuda:0')
episode: 501 training return: tensor(0.2015, device='cuda:0')
episode: 502 training return: tensor(40.1302, device='cuda:0')
episode: 503 training return: tensor(314.4394, device='cuda:0')
epoch: 126 test_true_pfm: 2425.343227618471 sim_pfm: 29.830657423318673
episode: 504 training return: tensor(75.3723, device='cuda:0')
episode: 505 training return: tensor(66.8362, device='cuda:0')
episode: 506 training return: tensor(114.7807, device='cuda:0')
episode: 507 training return: tensor(123.9505, device='cuda:0')
epoch: 127 test_true_pfm: 1838.1205642022117 sim_pfm: 332.18000667660573
episode: 508 training return: tensor(91.4189, device='cuda:0')
episode: 509 training return: tensor(244.0622, device='cuda:0')
episode: 510 training return: tensor(50.0572, device='cuda:0')
episode: 511 training return: tensor(261.2067, device='cuda:0')
epoch: 128 test_true_pfm: 2237.1041849816124 sim_pfm: 51.83845484589498
episode: 512 training return: tensor(477.7135, device='cuda:0')
episode: 513 training return: tensor(232.1093, device='cuda:0')
episode: 514 training return: tensor(250.7845, device='cuda:0')
episode: 515 training return: tensor(231.8238, device='cuda:0')
epoch: 129 test_true_pfm: 2096.8813378060113 sim_pfm: 132.39289601771938
episode: 516 training return: tensor(217.7456, device='cuda:0')
episode: 517 training return: tensor(12.0948, device='cuda:0')
episode: 518 training return: tensor(87.7255, device='cuda:0')
episode: 519 training return: tensor(147.7765, device='cuda:0')
epoch: 130 test_true_pfm: 1736.2100275055252 sim_pfm: 299.74728807845776
episode: 520 training return: tensor(427.6206, device='cuda:0')
episode: 521 training return: tensor(90.4415, device='cuda:0')
episode: 522 training return: tensor(105.6193, device='cuda:0')
episode: 523 training return: tensor(73.7255, device='cuda:0')
epoch: 131 test_true_pfm: 2292.3379739151273 sim_pfm: 148.57959134564348
episode: 524 training return: tensor(76.4787, device='cuda:0')
episode: 525 training return: tensor(346.4529, device='cuda:0')
episode: 526 training return: tensor(53.8273, device='cuda:0')
episode: 527 training return: tensor(31.4462, device='cuda:0')
epoch: 132 test_true_pfm: 2638.5500328137027 sim_pfm: 99.20005634772436
episode: 528 training return: tensor(324.1599, device='cuda:0')
episode: 529 training return: tensor(154.5036, device='cuda:0')
episode: 530 training return: tensor(209.4569, device='cuda:0')
episode: 531 training return: tensor(306.2571, device='cuda:0')
epoch: 133 test_true_pfm: 3099.6211961152603 sim_pfm: 170.4741811863496
episode: 532 training return: tensor(139.2435, device='cuda:0')
episode: 533 training return: tensor(508.5527, device='cuda:0')
episode: 534 training return: tensor(47.6343, device='cuda:0')
episode: 535 training return: tensor(175.4118, device='cuda:0')
epoch: 134 test_true_pfm: 2236.8004005181183 sim_pfm: 215.4341463133072
episode: 536 training return: tensor(323.1342, device='cuda:0')
episode: 537 training return: tensor(125.8426, device='cuda:0')
episode: 538 training return: tensor(193.2368, device='cuda:0')
episode: 539 training return: tensor(52.4386, device='cuda:0')
epoch: 135 test_true_pfm: 2811.4701352723314 sim_pfm: 178.64866671457034
episode: 540 training return: tensor(80.4384, device='cuda:0')
episode: 541 training return: tensor(88.0924, device='cuda:0')
episode: 542 training return: tensor(433.1859, device='cuda:0')
episode: 543 training return: tensor(83.3248, device='cuda:0')
epoch: 136 test_true_pfm: 2261.3994512349195 sim_pfm: 118.00473304376162
episode: 544 training return: tensor(324.3713, device='cuda:0')
episode: 545 training return: tensor(98.4576, device='cuda:0')
episode: 546 training return: tensor(39.5787, device='cuda:0')
episode: 547 training return: tensor(152.9601, device='cuda:0')
epoch: 137 test_true_pfm: 2053.666802351628 sim_pfm: 123.05561334688294
episode: 548 training return: tensor(-17.7582, device='cuda:0')
episode: 549 training return: tensor(463.0401, device='cuda:0')
episode: 550 training return: tensor(63.6218, device='cuda:0')
episode: 551 training return: tensor(532.8765, device='cuda:0')
epoch: 138 test_true_pfm: 2348.164174710421 sim_pfm: 191.06497817090712
episode: 552 training return: tensor(373.2805, device='cuda:0')
episode: 553 training return: tensor(27.7940, device='cuda:0')
episode: 554 training return: tensor(68.6892, device='cuda:0')
episode: 555 training return: tensor(461.4623, device='cuda:0')
epoch: 139 test_true_pfm: 2049.3837349024184 sim_pfm: 191.45686720191347
episode: 556 training return: tensor(182.5914, device='cuda:0')
episode: 557 training return: tensor(405.7570, device='cuda:0')
episode: 558 training return: tensor(-2.0446, device='cuda:0')
episode: 559 training return: tensor(330.4667, device='cuda:0')
epoch: 140 test_true_pfm: 2794.042235924316 sim_pfm: 384.54420215682086
episode: 560 training return: tensor(463.0864, device='cuda:0')
episode: 561 training return: tensor(171.8583, device='cuda:0')
episode: 562 training return: tensor(518.2784, device='cuda:0')
episode: 563 training return: tensor(107.4533, device='cuda:0')
epoch: 141 test_true_pfm: 1774.151781445046 sim_pfm: 202.06359565789657
episode: 564 training return: tensor(484.6682, device='cuda:0')
episode: 565 training return: tensor(40.8587, device='cuda:0')
episode: 566 training return: tensor(133.3340, device='cuda:0')
episode: 567 training return: tensor(297.0786, device='cuda:0')
epoch: 142 test_true_pfm: 2102.525669095629 sim_pfm: 145.05408070989265
episode: 568 training return: tensor(241.1227, device='cuda:0')
episode: 569 training return: tensor(235.6090, device='cuda:0')
episode: 570 training return: tensor(183.8414, device='cuda:0')
episode: 571 training return: tensor(180.8774, device='cuda:0')
epoch: 143 test_true_pfm: 2702.38485573171 sim_pfm: 23.73026865410308
episode: 572 training return: tensor(505.8513, device='cuda:0')
episode: 573 training return: tensor(173.4496, device='cuda:0')
episode: 574 training return: tensor(507.7296, device='cuda:0')
episode: 575 training return: tensor(484.5111, device='cuda:0')
epoch: 144 test_true_pfm: 2688.4036219359673 sim_pfm: 285.6980957665558
episode: 576 training return: tensor(279.2490, device='cuda:0')
episode: 577 training return: tensor(413.7051, device='cuda:0')
episode: 578 training return: tensor(47.2469, device='cuda:0')
episode: 579 training return: tensor(135.4093, device='cuda:0')
epoch: 145 test_true_pfm: 2255.011567919378 sim_pfm: 380.8225646216306
episode: 580 training return: tensor(295.4867, device='cuda:0')
episode: 581 training return: tensor(1.9478, device='cuda:0')
episode: 582 training return: tensor(133.4421, device='cuda:0')
episode: 583 training return: tensor(292.7725, device='cuda:0')
epoch: 146 test_true_pfm: 2320.062156709202 sim_pfm: 443.3386553290572
episode: 584 training return: tensor(464.6124, device='cuda:0')
episode: 585 training return: tensor(82.8330, device='cuda:0')
episode: 586 training return: tensor(53.9372, device='cuda:0')
episode: 587 training return: tensor(16.9546, device='cuda:0')
epoch: 147 test_true_pfm: 2257.2058663276775 sim_pfm: 165.50996576581383
episode: 588 training return: tensor(-49.9119, device='cuda:0')
episode: 589 training return: tensor(86.6314, device='cuda:0')
episode: 590 training return: tensor(143.1276, device='cuda:0')
episode: 591 training return: tensor(466.2988, device='cuda:0')
epoch: 148 test_true_pfm: 2157.276334974725 sim_pfm: 105.19614135159645
episode: 592 training return: tensor(158.0648, device='cuda:0')
episode: 593 training return: tensor(87.4240, device='cuda:0')
episode: 594 training return: tensor(201.3183, device='cuda:0')
episode: 595 training return: tensor(-8.7960, device='cuda:0')
epoch: 149 test_true_pfm: 2264.2620993622572 sim_pfm: 286.04078437166754
episode: 596 training return: tensor(36.8480, device='cuda:0')
episode: 597 training return: tensor(47.1517, device='cuda:0')
episode: 598 training return: tensor(429.6102, device='cuda:0')
episode: 599 training return: tensor(183.4915, device='cuda:0')
epoch: 150 test_true_pfm: 1763.6759888867546 sim_pfm: 100.73322698481691
