['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '2']
epoch: 0 training_loss 0.277715013101697 test_loss: 0.1762192964553833
epoch: 1 training_loss 0.20118206843733788 test_loss: 0.18531070947647094
epoch: 2 training_loss 0.17590916000306606 test_loss: 0.17033298015594484
epoch: 3 training_loss 0.17413014970719815 test_loss: 0.1399288773536682
epoch: 4 training_loss 0.15877775922417642 test_loss: 0.19170041084289552
epoch: 5 training_loss 0.15314997091889382 test_loss: 0.15462865829467773
epoch: 6 training_loss 0.1443463971465826 test_loss: 0.14112069606781005
epoch: 7 training_loss 0.13756775557994844 test_loss: 0.1424547791481018
epoch: 8 training_loss 0.13754605364054442 test_loss: 0.12878029346466063
epoch: 9 training_loss 0.13015570513904096 test_loss: 0.13138498067855836
epoch: 10 training_loss 0.13333301033824682 test_loss: 0.1361216425895691
epoch: 11 training_loss 0.1392835183814168 test_loss: 0.14054881334304808
epoch: 12 training_loss 0.1405931729823351 test_loss: 0.11863917112350464
epoch: 13 training_loss 0.12037219397723675 test_loss: 0.1360860824584961
epoch: 14 training_loss 0.13366549238562583 test_loss: 0.12863610982894896
epoch: 15 training_loss 0.1340196467936039 test_loss: 0.11481692790985107
epoch: 16 training_loss 0.1245971206575632 test_loss: 0.1538543224334717
epoch: 17 training_loss 0.14208175990730523 test_loss: 0.11421679258346558
epoch: 18 training_loss 0.12883794084191322 test_loss: 0.13515266180038452
epoch: 19 training_loss 0.12799026008695363 test_loss: 0.12574236392974852
epoch: 20 training_loss 0.12498718932271004 test_loss: 0.15584464073181153
epoch: 21 training_loss 0.12930185746401548 test_loss: 0.13904637098312378
epoch: 22 training_loss 0.12539079267531633 test_loss: 0.14092708826065065
epoch: 23 training_loss 0.1317179489508271 test_loss: 0.1276812195777893
epoch: 24 training_loss 0.1242501064762473 test_loss: 0.10683987140655518
epoch: 25 training_loss 0.1348344523459673 test_loss: 0.12722439765930177
epoch: 26 training_loss 0.11827027704566717 test_loss: 0.11930413246154785
epoch: 27 training_loss 0.1163083616271615 test_loss: 0.11854857206344604
epoch: 28 training_loss 0.12725806549191476 test_loss: 0.13131242990493774
epoch: 29 training_loss 0.13613440111279487 test_loss: 0.11752088069915771
epoch: 30 training_loss 0.12764200024306774 test_loss: 0.14104940891265869
epoch: 31 training_loss 0.1257244859263301 test_loss: 0.13858848810195923
epoch: 32 training_loss 0.12308573711663484 test_loss: 0.12340364456176758
epoch: 33 training_loss 0.12102610100060701 test_loss: 0.12696533203125
epoch: 34 training_loss 0.12495408482849597 test_loss: 0.14028723239898683
epoch: 35 training_loss 0.12679813720285893 test_loss: 0.13501347303390504
epoch: 36 training_loss 0.12671861492097378 test_loss: 0.11040854454040527
epoch: 37 training_loss 0.1235711956769228 test_loss: 0.1313415765762329
epoch: 38 training_loss 0.11823283404111862 test_loss: 0.1184267520904541
epoch: 39 training_loss 0.12159691158682108 test_loss: 0.13462022542953492
epoch: 40 training_loss 0.12816528629511595 test_loss: 0.12797294855117797
epoch: 41 training_loss 0.12752546180039645 test_loss: 0.11074128150939941
epoch: 42 training_loss 0.12534380335360765 test_loss: 0.11946570873260498
epoch: 43 training_loss 0.11992276791483164 test_loss: 0.11522608995437622
epoch: 44 training_loss 0.1205551678314805 test_loss: 0.11260606050491333
epoch: 45 training_loss 0.13220357812941075 test_loss: 0.12775601148605348
epoch: 46 training_loss 0.11999395247548819 test_loss: 0.12360550165176391
epoch: 47 training_loss 0.12473236192017793 test_loss: 0.10774149894714355
epoch: 48 training_loss 0.12243698913604022 test_loss: 0.11873176097869872
epoch: 49 training_loss 0.12102716624736785 test_loss: 0.13452175855636597
epoch: 50 training_loss 0.12071143828332424 test_loss: 0.12744632959365845
epoch: 51 training_loss 0.12431263215839863 test_loss: 0.1378701686859131
epoch: 52 training_loss 0.1161183050274849 test_loss: 0.16722259521484376
epoch: 53 training_loss 0.11542174007743597 test_loss: 0.13423004150390624
epoch: 54 training_loss 0.1270351332053542 test_loss: 0.1273845076560974
epoch: 55 training_loss 0.11927324023097753 test_loss: 0.1221531867980957
epoch: 56 training_loss 0.1180193230137229 test_loss: 0.12156656980514527
epoch: 57 training_loss 0.1165039847791195 test_loss: 0.11594778299331665
epoch: 58 training_loss 0.11802581120282411 test_loss: 0.09559939503669738
epoch: 59 training_loss 0.11709306210279465 test_loss: 0.10790971517562867
epoch: 60 training_loss 0.1133829789981246 test_loss: 0.11984020471572876
epoch: 61 training_loss 0.12694264981895687 test_loss: 0.11313339471817016
epoch: 62 training_loss 0.12338559633120895 test_loss: 0.11649315357208252
epoch: 63 training_loss 0.11820188969373703 test_loss: 0.10589693784713745
epoch: 64 training_loss 0.1281609459221363 test_loss: 0.114197838306427
epoch: 65 training_loss 0.11779421981424093 test_loss: 0.11135998964309693
epoch: 66 training_loss 0.12285864777863026 test_loss: 0.11644852161407471
epoch: 67 training_loss 0.12126958101987839 test_loss: 0.11864702701568604
epoch: 68 training_loss 0.11410711988806725 test_loss: 0.11786729097366333
epoch: 69 training_loss 0.11971774417907 test_loss: 0.1305086851119995
epoch: 70 training_loss 0.12144806867465377 test_loss: 0.11977616548538209
epoch: 71 training_loss 0.11852808564901351 test_loss: 0.11257946491241455
epoch: 72 training_loss 0.11826143138110638 test_loss: 0.10426360368728638
epoch: 73 training_loss 0.12276413841173053 test_loss: 0.1219670057296753
epoch: 74 training_loss 0.12035630457103252 test_loss: 0.11673223972320557
epoch: 75 training_loss 0.11720054533332586 test_loss: 0.1290750026702881
epoch: 76 training_loss 0.11764926979318262 test_loss: 0.1163520336151123
epoch: 77 training_loss 0.11951090537011623 test_loss: 0.12037191390991211
epoch: 78 training_loss 0.12086720705032349 test_loss: 0.1340230941772461
epoch: 79 training_loss 0.12628296010196208 test_loss: 0.12719638347625734
epoch: 80 training_loss 0.1199612857401371 test_loss: 0.140446674823761
epoch: 81 training_loss 0.12032733403146267 test_loss: 0.11206527948379516
epoch: 82 training_loss 0.11320756008848548 test_loss: 0.11772240400314331
epoch: 83 training_loss 0.1130343591235578 test_loss: 0.10418490171432496
epoch: 84 training_loss 0.11353964317589998 test_loss: 0.1309242606163025
epoch: 85 training_loss 0.1135845959931612 test_loss: 0.10859041213989258
epoch: 86 training_loss 0.12052916537970304 test_loss: 0.12920949459075928
epoch: 87 training_loss 0.12164214711636305 test_loss: 0.10983905792236329
epoch: 88 training_loss 0.12582504220306873 test_loss: 0.12328755855560303
epoch: 89 training_loss 0.1200861306115985 test_loss: 0.1255431890487671
epoch: 90 training_loss 0.11785408932715655 test_loss: 0.12761710882186889
epoch: 91 training_loss 0.11523850014433265 test_loss: 0.11601169109344482
epoch: 92 training_loss 0.1176825838536024 test_loss: 0.12192920446395875
epoch: 93 training_loss 0.11484953101724386 test_loss: 0.11436001062393189
epoch: 94 training_loss 0.128391377851367 test_loss: 0.11988614797592163
epoch: 95 training_loss 0.12084901006892323 test_loss: 0.09611228704452515
epoch: 96 training_loss 0.12579334827139974 test_loss: 0.11124467849731445
epoch: 97 training_loss 0.1111759388819337 test_loss: 0.11257339715957641
epoch: 98 training_loss 0.11759767059236764 test_loss: 0.12155027389526367
epoch: 99 training_loss 0.11561941843479871 test_loss: 0.10716496706008911
epoch: 100 training_loss 0.12473622919991613 test_loss: 0.12199243307113647
epoch: 101 training_loss 0.12212915435433387 test_loss: 0.1214373230934143
epoch: 102 training_loss 0.11070500418543816 test_loss: 0.12169373035430908
epoch: 103 training_loss 0.11583661910146476 test_loss: 0.111053729057312
epoch: 104 training_loss 0.11891143456101418 test_loss: 0.11565849781036378
epoch: 105 training_loss 0.12396222144365311 test_loss: 0.12532498836517333
epoch: 106 training_loss 0.12400386083871126 test_loss: 0.1229138970375061
epoch: 107 training_loss 0.11355248922482133 test_loss: 0.12514652013778688
epoch: 108 training_loss 0.1207970829308033 test_loss: 0.1368124008178711
epoch: 109 training_loss 0.11983687024563551 test_loss: 0.12766355276107788
epoch: 110 training_loss 0.11846581993624568 test_loss: 0.11915290355682373
epoch: 111 training_loss 0.11278358925133944 test_loss: 0.13085999488830566
epoch: 112 training_loss 0.12082207787781954 test_loss: 0.12709990739822388
epoch: 113 training_loss 0.11996516076847911 test_loss: 0.11763309240341187
epoch: 114 training_loss 0.12109273074194789 test_loss: 0.11716856956481933
epoch: 115 training_loss 0.12048679476603866 test_loss: 0.13627417087554933
epoch: 116 training_loss 0.11074137201532722 test_loss: 0.13736652135848998
epoch: 117 training_loss 0.11540671851485967 test_loss: 0.10751982927322387
epoch: 118 training_loss 0.11568811900913716 test_loss: 0.10591673851013184
epoch: 119 training_loss 0.11781007176265121 test_loss: 0.12018238306045533
epoch: 120 training_loss 0.11890263080596924 test_loss: 0.14684008359909057
epoch: 121 training_loss 0.12629336636513472 test_loss: 0.12878904342651368
epoch: 122 training_loss 0.11933223966509104 test_loss: 0.1174439787864685
epoch: 123 training_loss 0.12500506600365044 test_loss: 0.10457940101623535
epoch: 124 training_loss 0.11121796987950802 test_loss: 0.10625780820846557
epoch: 125 training_loss 0.1197786958143115 test_loss: 0.12748442888259887
epoch: 126 training_loss 0.11698279678821563 test_loss: 0.12672449350357057
epoch: 127 training_loss 0.1169037027284503 test_loss: 0.1270646333694458
epoch: 128 training_loss 0.11469395454972982 test_loss: 0.10428515672683716
epoch: 129 training_loss 0.11210678296163679 test_loss: 0.12086458206176758
epoch: 130 training_loss 0.11678516883403063 test_loss: 0.1144181251525879
epoch: 131 training_loss 0.1171125790476799 test_loss: 0.14384734630584717
epoch: 132 training_loss 0.12100485935807229 test_loss: 0.1321993350982666
epoch: 133 training_loss 0.11421323455870151 test_loss: 0.1001024842262268
epoch: 134 training_loss 0.11836543686687946 test_loss: 0.11311522722244263
epoch: 135 training_loss 0.11364246986806392 test_loss: 0.1214401364326477
epoch: 136 training_loss 0.1241862040758133 test_loss: 0.10189584493637086
epoch: 137 training_loss 0.12421836152672767 test_loss: 0.11285995244979859
epoch: 138 training_loss 0.11703163247555494 test_loss: 0.12462496757507324
epoch: 139 training_loss 0.11360741820186376 test_loss: 0.12824535369873047
epoch: 140 training_loss 0.11347249986603856 test_loss: 0.10441744327545166
epoch: 141 training_loss 0.11375435823574662 test_loss: 0.11329706907272338
epoch: 142 training_loss 0.11536755066365004 test_loss: 0.11586894989013671
epoch: 143 training_loss 0.12340346848592162 test_loss: 0.11048698425292969
epoch: 144 training_loss 0.1185082732513547 test_loss: 0.10713906288146972
epoch: 145 training_loss 0.11711553994566203 test_loss: 0.1049169659614563
epoch: 146 training_loss 0.1108213061094284 test_loss: 0.11409596204757691
epoch: 147 training_loss 0.11843463879078626 test_loss: 0.10706264972686767
epoch: 148 training_loss 0.11249690476804972 test_loss: 0.09894476532936096
epoch: 149 training_loss 0.12150349024683237 test_loss: 0.127804172039032
epoch: 0 training_loss 26.75224075317383 test_loss: 9.508817291259765
epoch: 1 training_loss 7.520395441055298 test_loss: 6.193644332885742
epoch: 2 training_loss 5.303753399848938 test_loss: 4.857238006591797
epoch: 3 training_loss 4.33030211687088 test_loss: 3.9843589782714846
epoch: 4 training_loss 3.960406608581543 test_loss: 3.568205642700195
epoch: 5 training_loss 3.3663294601440428 test_loss: 3.4257972717285154
epoch: 6 training_loss 3.1263163828849794 test_loss: 2.9395414352416993
epoch: 7 training_loss 2.9043139719963076 test_loss: 2.8378734588623047
epoch: 8 training_loss 2.762879409790039 test_loss: 2.7401012420654296
epoch: 9 training_loss 2.5649113774299623 test_loss: 2.7024723052978517
epoch: 10 training_loss 2.405141371488571 test_loss: 2.326833152770996
epoch: 11 training_loss 2.3521057415008544 test_loss: 2.3118078231811525
epoch: 12 training_loss 2.175715559720993 test_loss: 2.1977663040161133
epoch: 13 training_loss 2.102314783334732 test_loss: 2.080829620361328
epoch: 14 training_loss 2.026633802652359 test_loss: 2.0672155380249024
epoch: 15 training_loss 2.024128601551056 test_loss: 2.042291259765625
epoch: 16 training_loss 1.9573096120357514 test_loss: 2.174764633178711
epoch: 17 training_loss 1.8814341843128204 test_loss: 1.877570343017578
epoch: 18 training_loss 1.914436686038971 test_loss: 1.8505952835083008
epoch: 19 training_loss 1.8361689925193787 test_loss: 1.7234575271606445
epoch: 20 training_loss 1.8227592885494233 test_loss: 1.8281755447387695
epoch: 21 training_loss 1.7229367971420289 test_loss: 1.6647523880004882
epoch: 22 training_loss 1.7288380444049836 test_loss: 1.6420366287231445
epoch: 23 training_loss 1.705053368806839 test_loss: 1.7129844665527343
epoch: 24 training_loss 1.747368828058243 test_loss: 1.8210294723510743
epoch: 25 training_loss 1.7086260962486266 test_loss: 1.6688159942626952
epoch: 26 training_loss 1.6218969190120698 test_loss: 1.7285736083984375
epoch: 27 training_loss 1.6867595994472504 test_loss: 1.5739809989929199
epoch: 28 training_loss 1.5726958298683167 test_loss: 1.5958584785461425
epoch: 29 training_loss 1.645709388256073 test_loss: 1.5951572418212892
epoch: 30 training_loss 1.6210147035121918 test_loss: 1.6089763641357422
epoch: 31 training_loss 1.5120912027359008 test_loss: 1.7542179107666016
epoch: 32 training_loss 1.507798010110855 test_loss: 1.4965182304382325
epoch: 33 training_loss 1.4863649547100066 test_loss: 1.461085033416748
epoch: 34 training_loss 1.5664193284511567 test_loss: 1.5556707382202148
epoch: 35 training_loss 1.4846241164207459 test_loss: 1.504869270324707
epoch: 36 training_loss 1.5338468658924103 test_loss: 1.62615909576416
epoch: 37 training_loss 1.4913709950447083 test_loss: 1.4858881950378418
epoch: 38 training_loss 1.4175833714008332 test_loss: 1.4464568138122558
epoch: 39 training_loss 1.420354244709015 test_loss: 1.399032211303711
epoch: 40 training_loss 1.4044410872459412 test_loss: 1.4476333618164063
epoch: 41 training_loss 1.3800437092781066 test_loss: 1.4959956169128419
epoch: 42 training_loss 1.4233697080612182 test_loss: 1.4948650360107423
epoch: 43 training_loss 1.4038737177848817 test_loss: 1.353282356262207
epoch: 44 training_loss 1.3841084277629851 test_loss: 1.3696939468383789
epoch: 45 training_loss 1.3633745968341828 test_loss: 1.4257423400878906
epoch: 46 training_loss 1.3709485399723054 test_loss: 1.3691726684570313
epoch: 47 training_loss 1.3361007606983184 test_loss: 1.3943329811096192
epoch: 48 training_loss 1.3726464807987213 test_loss: 1.325800609588623
epoch: 49 training_loss 1.33985502243042 test_loss: 1.350766944885254
epoch: 50 training_loss 1.338322901725769 test_loss: 1.2798391342163087
epoch: 51 training_loss 1.2735490036010741 test_loss: 1.3165298461914063
epoch: 52 training_loss 1.2722888004779815 test_loss: 1.241384792327881
epoch: 53 training_loss 1.273887277841568 test_loss: 1.3289514541625977
epoch: 54 training_loss 1.3025249409675599 test_loss: 1.2425054550170898
epoch: 55 training_loss 1.314582230448723 test_loss: 1.2512093544006349
epoch: 56 training_loss 1.2556494104862213 test_loss: 1.2779021263122559
epoch: 57 training_loss 1.2443209880590438 test_loss: 1.240654468536377
epoch: 58 training_loss 1.2660194182395934 test_loss: 1.2559802055358886
epoch: 59 training_loss 1.2789380168914795 test_loss: 1.3574681282043457
epoch: 60 training_loss 1.2496550983190537 test_loss: 1.2460643768310546
epoch: 61 training_loss 1.2564572042226791 test_loss: 1.262535572052002
epoch: 62 training_loss 1.2735023641586303 test_loss: 1.320450210571289
epoch: 63 training_loss 1.2625946187973023 test_loss: 1.322523593902588
epoch: 64 training_loss 1.2598014611005783 test_loss: 1.282717514038086
epoch: 65 training_loss 1.1863664627075194 test_loss: 1.2713525772094727
epoch: 66 training_loss 1.2157046657800674 test_loss: 1.2310083389282227
epoch: 67 training_loss 1.227417551279068 test_loss: 1.3439196586608886
epoch: 68 training_loss 1.1968961399793625 test_loss: 1.2525372505187988
epoch: 69 training_loss 1.2060193651914597 test_loss: 1.303894329071045
epoch: 70 training_loss 1.191845989227295 test_loss: 1.253627872467041
epoch: 71 training_loss 1.2016526609659195 test_loss: 1.2428794860839845
epoch: 72 training_loss 1.1672163438796996 test_loss: 1.2104879379272462
epoch: 73 training_loss 1.1801009309291839 test_loss: 1.2041940689086914
epoch: 74 training_loss 1.1568207317590713 test_loss: 1.1933183670043945
epoch: 75 training_loss 1.1237319535017014 test_loss: 1.123001766204834
epoch: 76 training_loss 1.163839771747589 test_loss: 1.1792738914489747
epoch: 77 training_loss 1.1298062813282013 test_loss: 1.177562141418457
epoch: 78 training_loss 1.1574350225925445 test_loss: 1.1477120399475098
epoch: 79 training_loss 1.1244773662090302 test_loss: 1.1128665924072265
epoch: 80 training_loss 1.151290003657341 test_loss: 1.2219736099243164
epoch: 81 training_loss 1.18048279941082 test_loss: 1.2185453414916991
epoch: 82 training_loss 1.1351270520687102 test_loss: 1.1366036415100098
epoch: 83 training_loss 1.1569025111198425 test_loss: 1.1988332748413086
epoch: 84 training_loss 1.1014846080541612 test_loss: 1.0713134765625
epoch: 85 training_loss 1.1002052158117295 test_loss: 1.1858664512634278
epoch: 86 training_loss 1.1210796201229096 test_loss: 1.192532730102539
epoch: 87 training_loss 1.1176560789346695 test_loss: 1.0930745124816894
epoch: 88 training_loss 1.1044713079929351 test_loss: 1.1515724182128906
epoch: 89 training_loss 1.1330424326658248 test_loss: 1.1522509574890136
epoch: 90 training_loss 1.1270729905366899 test_loss: 1.1560455322265626
epoch: 91 training_loss 1.0744984406232834 test_loss: 1.1297225952148438
epoch: 92 training_loss 1.0931617617607117 test_loss: 1.1366036415100098
epoch: 93 training_loss 1.141946430206299 test_loss: 1.1450087547302246
epoch: 94 training_loss 1.0734993135929107 test_loss: 1.145863151550293
epoch: 95 training_loss 1.0639631390571593 test_loss: 1.0730864524841308
epoch: 96 training_loss 1.0966979944705963 test_loss: 1.1738949775695802
epoch: 97 training_loss 1.0809304010868073 test_loss: 1.0467830657958985
epoch: 98 training_loss 1.0985322147607803 test_loss: 1.1522768020629883
epoch: 99 training_loss 1.056107429265976 test_loss: 1.1051568984985352
epoch: 100 training_loss 1.0651341772079468 test_loss: 1.108820629119873
epoch: 101 training_loss 1.0528613978624344 test_loss: 1.0860516548156738
epoch: 102 training_loss 1.085052564740181 test_loss: 1.1227603912353517
epoch: 103 training_loss 1.0746544623374938 test_loss: 1.0806580543518067
epoch: 104 training_loss 1.0904006725549698 test_loss: 1.1100075721740723
epoch: 105 training_loss 1.0644997775554657 test_loss: 1.106546401977539
epoch: 106 training_loss 1.0652780735492706 test_loss: 0.9990279197692871
epoch: 107 training_loss 1.0318276053667068 test_loss: 1.0963761329650878
epoch: 108 training_loss 1.035595811009407 test_loss: 1.0433075904846192
epoch: 109 training_loss 1.0468195015192032 test_loss: 1.0714785575866699
epoch: 110 training_loss 1.0432481998205185 test_loss: 1.1038019180297851
epoch: 111 training_loss 1.0357321304082872 test_loss: 1.0441142082214356
epoch: 112 training_loss 1.037518908381462 test_loss: 1.1497907638549805
epoch: 113 training_loss 1.0155201214551925 test_loss: 1.00913724899292
epoch: 114 training_loss 1.028080592751503 test_loss: 1.0845745086669922
epoch: 115 training_loss 1.0253768688440323 test_loss: 1.135978889465332
epoch: 116 training_loss 1.0471093255281447 test_loss: 1.0278989791870117
epoch: 117 training_loss 1.0431517934799195 test_loss: 1.0979047775268556
epoch: 118 training_loss 1.027953575849533 test_loss: 1.0734914779663085
epoch: 119 training_loss 1.0069386690855027 test_loss: 1.0796805381774903
epoch: 120 training_loss 1.0021033585071564 test_loss: 1.05645694732666
epoch: 121 training_loss 1.0302154058218003 test_loss: 1.0355091094970703
epoch: 122 training_loss 1.013114047050476 test_loss: 0.9829953193664551
epoch: 123 training_loss 1.0376919966936111 test_loss: 1.0174269676208496
epoch: 124 training_loss 1.0443400347232819 test_loss: 1.024625873565674
epoch: 125 training_loss 1.012836030125618 test_loss: 1.067772388458252
epoch: 126 training_loss 1.0320205664634705 test_loss: 0.970316219329834
epoch: 127 training_loss 1.008778925538063 test_loss: 1.0803810119628907
epoch: 128 training_loss 1.0488623493909837 test_loss: 1.0241914749145509
epoch: 129 training_loss 1.026394258737564 test_loss: 1.047394371032715
epoch: 130 training_loss 1.0113597548007964 test_loss: 1.0544796943664552
epoch: 131 training_loss 1.0179945778846742 test_loss: 0.9698237419128418
epoch: 132 training_loss 1.0144551491737366 test_loss: 1.0096647262573242
epoch: 133 training_loss 0.9869081491231918 test_loss: 1.1084718704223633
epoch: 134 training_loss 1.0132467943429946 test_loss: 1.0018375396728516
epoch: 135 training_loss 0.9912330663204193 test_loss: 0.9800018310546875
epoch: 136 training_loss 1.026653883457184 test_loss: 1.0156968116760254
epoch: 137 training_loss 0.9989198297262192 test_loss: 0.9752099990844727
epoch: 138 training_loss 0.9959375447034836 test_loss: 1.0302727699279786
epoch: 139 training_loss 1.004724262356758 test_loss: 1.0277812957763672
epoch: 140 training_loss 0.9792923831939697 test_loss: 1.011092472076416
epoch: 141 training_loss 1.0099525487422942 test_loss: 1.0526982307434083
epoch: 142 training_loss 1.0224010515213013 test_loss: 1.0307113647460937
epoch: 143 training_loss 0.9866408205032349 test_loss: 1.033736801147461
epoch: 144 training_loss 0.9783067351579666 test_loss: 1.0607901573181153
epoch: 145 training_loss 0.978022386431694 test_loss: 1.0628427505493163
epoch: 146 training_loss 0.9995436090230941 test_loss: 1.0537431716918946
epoch: 147 training_loss 0.979325503706932 test_loss: 0.9952169418334961
epoch: 148 training_loss 0.9845942968130111 test_loss: 1.0120013236999512
epoch: 149 training_loss 0.9571958684921265 test_loss: 0.9918296813964844
3991.571458184894
episode: 0 training return: tensor(20.5138, device='cuda:0')
episode: 1 training return: tensor(53.2919, device='cuda:0')
episode: 2 training return: tensor(-4.4081, device='cuda:0')
episode: 3 training return: tensor(30.1457, device='cuda:0')
epoch: 1 test_true_pfm: 4015.1170574357197 sim_pfm: 23.68353827954464
episode: 4 training return: tensor(43.3023, device='cuda:0')
episode: 5 training return: tensor(42.4725, device='cuda:0')
episode: 6 training return: tensor(27.2908, device='cuda:0')
episode: 7 training return: tensor(8.7055, device='cuda:0')
epoch: 2 test_true_pfm: 4012.191471975364 sim_pfm: 27.951156572264154
episode: 8 training return: tensor(19.8770, device='cuda:0')
episode: 9 training return: tensor(-886.3203, device='cuda:0')
episode: 10 training return: tensor(-49.5364, device='cuda:0')
episode: 11 training return: tensor(86.3326, device='cuda:0')
epoch: 3 test_true_pfm: 4004.5013289739777 sim_pfm: 37.26970937011841
episode: 12 training return: tensor(30.5697, device='cuda:0')
episode: 13 training return: tensor(-0.0735, device='cuda:0')
episode: 14 training return: tensor(23.3534, device='cuda:0')
episode: 15 training return: tensor(62.6762, device='cuda:0')
epoch: 4 test_true_pfm: 4005.1756786575716 sim_pfm: 38.34590860680328
episode: 16 training return: tensor(56.5424, device='cuda:0')
episode: 17 training return: tensor(23.6111, device='cuda:0')
episode: 18 training return: tensor(-3.4177, device='cuda:0')
episode: 19 training return: tensor(35.2500, device='cuda:0')
epoch: 5 test_true_pfm: 4002.164713174876 sim_pfm: 40.746941058314405
episode: 20 training return: tensor(15.4084, device='cuda:0')
episode: 21 training return: tensor(25.4790, device='cuda:0')
episode: 22 training return: tensor(25.0791, device='cuda:0')
episode: 23 training return: tensor(67.7369, device='cuda:0')
epoch: 6 test_true_pfm: 4002.7088815767625 sim_pfm: -212.5965938056955
episode: 24 training return: tensor(4.1139, device='cuda:0')
episode: 25 training return: tensor(-0.4532, device='cuda:0')
episode: 26 training return: tensor(44.4857, device='cuda:0')
episode: 27 training return: tensor(51.9299, device='cuda:0')
epoch: 7 test_true_pfm: 4003.1166640791184 sim_pfm: 14.682514339035455
episode: 28 training return: tensor(46.9396, device='cuda:0')
episode: 29 training return: tensor(20.2661, device='cuda:0')
episode: 30 training return: tensor(75.1248, device='cuda:0')
episode: 31 training return: tensor(15.5310, device='cuda:0')
epoch: 8 test_true_pfm: 4026.510440384434 sim_pfm: 64.89368280783917
episode: 32 training return: tensor(69.6382, device='cuda:0')
episode: 33 training return: tensor(80.7054, device='cuda:0')
episode: 34 training return: tensor(19.8568, device='cuda:0')
episode: 35 training return: tensor(-766.5667, device='cuda:0')
epoch: 9 test_true_pfm: 3969.6998922771672 sim_pfm: 35.16712512783124
episode: 36 training return: tensor(-20.8278, device='cuda:0')
episode: 37 training return: tensor(69.5166, device='cuda:0')
episode: 38 training return: tensor(49.8353, device='cuda:0')
episode: 39 training return: tensor(41.7359, device='cuda:0')
epoch: 10 test_true_pfm: 3996.596970310511 sim_pfm: 23.115905899525387
episode: 40 training return: tensor(-876.0478, device='cuda:0')
episode: 41 training return: tensor(9.4426, device='cuda:0')
episode: 42 training return: tensor(-890.3344, device='cuda:0')
episode: 43 training return: tensor(-1.3234, device='cuda:0')
epoch: 11 test_true_pfm: 4016.146252286559 sim_pfm: -230.01600097613604
episode: 44 training return: tensor(67.9314, device='cuda:0')
episode: 45 training return: tensor(69.6966, device='cuda:0')
episode: 46 training return: tensor(76.2858, device='cuda:0')
episode: 47 training return: tensor(-0.2118, device='cuda:0')
epoch: 12 test_true_pfm: 4018.6508847061355 sim_pfm: 12.667445078443658
episode: 48 training return: tensor(-824.9524, device='cuda:0')
episode: 49 training return: tensor(-21.6386, device='cuda:0')
episode: 50 training return: tensor(46.9416, device='cuda:0')
episode: 51 training return: tensor(32.4044, device='cuda:0')
epoch: 13 test_true_pfm: 4005.626593990988 sim_pfm: 34.525420957186725
episode: 52 training return: tensor(52.8994, device='cuda:0')
episode: 53 training return: tensor(14.3877, device='cuda:0')
episode: 54 training return: tensor(0.2302, device='cuda:0')
episode: 55 training return: tensor(73.8775, device='cuda:0')
epoch: 14 test_true_pfm: 4016.4861763553986 sim_pfm: 18.267230397012707
episode: 56 training return: tensor(-13.4789, device='cuda:0')
episode: 57 training return: tensor(56.5367, device='cuda:0')
episode: 58 training return: tensor(46.7182, device='cuda:0')
episode: 59 training return: tensor(25.9214, device='cuda:0')
epoch: 15 test_true_pfm: 4038.0364765246 sim_pfm: -259.6132953084307
episode: 60 training return: tensor(30.6195, device='cuda:0')
episode: 61 training return: tensor(18.7576, device='cuda:0')
episode: 62 training return: tensor(59.5410, device='cuda:0')
episode: 63 training return: tensor(-39.5895, device='cuda:0')
epoch: 16 test_true_pfm: 4012.2598489188345 sim_pfm: 20.77800980491641
episode: 64 training return: tensor(14.0996, device='cuda:0')
episode: 65 training return: tensor(55.2482, device='cuda:0')
episode: 66 training return: tensor(22.6755, device='cuda:0')
episode: 67 training return: tensor(48.4414, device='cuda:0')
epoch: 17 test_true_pfm: 4016.6696732141136 sim_pfm: 32.819278550470095
episode: 68 training return: tensor(-30.5816, device='cuda:0')
episode: 69 training return: tensor(-892.1717, device='cuda:0')
episode: 70 training return: tensor(-46.0100, device='cuda:0')
episode: 71 training return: tensor(50.3949, device='cuda:0')
epoch: 18 test_true_pfm: 3999.649734603103 sim_pfm: 4.981033128521328
episode: 72 training return: tensor(-891.6176, device='cuda:0')
episode: 73 training return: tensor(27.4647, device='cuda:0')
episode: 74 training return: tensor(1.4175, device='cuda:0')
episode: 75 training return: tensor(-90.6267, device='cuda:0')
epoch: 19 test_true_pfm: 3955.004410380397 sim_pfm: -13.565795767334444
episode: 76 training return: tensor(29.0366, device='cuda:0')
episode: 77 training return: tensor(29.8869, device='cuda:0')
episode: 78 training return: tensor(31.3272, device='cuda:0')
episode: 79 training return: tensor(-852.0941, device='cuda:0')
epoch: 20 test_true_pfm: 3999.0917862520387 sim_pfm: -199.35377967148088
episode: 80 training return: tensor(46.5378, device='cuda:0')
episode: 81 training return: tensor(17.9905, device='cuda:0')
episode: 82 training return: tensor(72.7105, device='cuda:0')
episode: 83 training return: tensor(72.0042, device='cuda:0')
epoch: 21 test_true_pfm: 3958.5471518791874 sim_pfm: 28.721324216351302
episode: 84 training return: tensor(26.7771, device='cuda:0')
episode: 85 training return: tensor(52.5137, device='cuda:0')
episode: 86 training return: tensor(-64.4243, device='cuda:0')
episode: 87 training return: tensor(68.8546, device='cuda:0')
epoch: 22 test_true_pfm: 4048.928819120489 sim_pfm: 30.889768725037964
episode: 88 training return: tensor(48.0694, device='cuda:0')
episode: 89 training return: tensor(67.6891, device='cuda:0')
episode: 90 training return: tensor(19.7459, device='cuda:0')
episode: 91 training return: tensor(38.8211, device='cuda:0')
epoch: 23 test_true_pfm: 3952.4568949092773 sim_pfm: -6.206210727686994
episode: 92 training return: tensor(67.1862, device='cuda:0')
episode: 93 training return: tensor(89.9166, device='cuda:0')
episode: 94 training return: tensor(-24.9160, device='cuda:0')
episode: 95 training return: tensor(-36.6944, device='cuda:0')
epoch: 24 test_true_pfm: 4024.391184285137 sim_pfm: 36.65274186562359
episode: 96 training return: tensor(-31.1148, device='cuda:0')
episode: 97 training return: tensor(41.5510, device='cuda:0')
episode: 98 training return: tensor(53.0761, device='cuda:0')
episode: 99 training return: tensor(-30.4140, device='cuda:0')
epoch: 25 test_true_pfm: 3968.8337852318314 sim_pfm: -19.218288419933135
episode: 100 training return: tensor(-17.2729, device='cuda:0')
episode: 101 training return: tensor(35.7318, device='cuda:0')
episode: 102 training return: tensor(22.1474, device='cuda:0')
episode: 103 training return: tensor(0.7048, device='cuda:0')
epoch: 26 test_true_pfm: 3993.6735621099447 sim_pfm: 46.0672699373099
episode: 104 training return: tensor(9.7987, device='cuda:0')
episode: 105 training return: tensor(58.2427, device='cuda:0')
episode: 106 training return: tensor(69.4680, device='cuda:0')
episode: 107 training return: tensor(22.5773, device='cuda:0')
epoch: 27 test_true_pfm: 3970.0691884921216 sim_pfm: -12.179828139817497
episode: 108 training return: tensor(14.9207, device='cuda:0')
episode: 109 training return: tensor(59.2092, device='cuda:0')
episode: 110 training return: tensor(39.5936, device='cuda:0')
episode: 111 training return: tensor(38.9951, device='cuda:0')
epoch: 28 test_true_pfm: 3943.5193625527077 sim_pfm: 1.0401656584775385
episode: 112 training return: tensor(67.0687, device='cuda:0')
episode: 113 training return: tensor(61.9535, device='cuda:0')
episode: 114 training return: tensor(11.3760, device='cuda:0')
episode: 115 training return: tensor(52.6945, device='cuda:0')
epoch: 29 test_true_pfm: 2899.694802971861 sim_pfm: 12.059121265929813
episode: 116 training return: tensor(59.0452, device='cuda:0')
episode: 117 training return: tensor(55.0002, device='cuda:0')
episode: 118 training return: tensor(63.6349, device='cuda:0')
episode: 119 training return: tensor(62.6653, device='cuda:0')
epoch: 30 test_true_pfm: 4011.480827091001 sim_pfm: 55.19538209657185
episode: 120 training return: tensor(57.0699, device='cuda:0')
episode: 121 training return: tensor(84.3875, device='cuda:0')
episode: 122 training return: tensor(44.7681, device='cuda:0')
episode: 123 training return: tensor(66.0143, device='cuda:0')
epoch: 31 test_true_pfm: 4037.5315156937154 sim_pfm: 25.995259920319466
episode: 124 training return: tensor(61.7811, device='cuda:0')
episode: 125 training return: tensor(-11.0878, device='cuda:0')
episode: 126 training return: tensor(-52.0425, device='cuda:0')
episode: 127 training return: tensor(12.8095, device='cuda:0')
epoch: 32 test_true_pfm: 3995.4068386543354 sim_pfm: 37.82497208024142
episode: 128 training return: tensor(21.7870, device='cuda:0')
episode: 129 training return: tensor(12.8870, device='cuda:0')
episode: 130 training return: tensor(-6.8318, device='cuda:0')
episode: 131 training return: tensor(-15.7370, device='cuda:0')
epoch: 33 test_true_pfm: 3967.905843924878 sim_pfm: 43.66876392823178
episode: 132 training return: tensor(17.9942, device='cuda:0')
episode: 133 training return: tensor(99.6122, device='cuda:0')
episode: 134 training return: tensor(83.7019, device='cuda:0')
episode: 135 training return: tensor(31.8606, device='cuda:0')
epoch: 34 test_true_pfm: 3992.824625478334 sim_pfm: 31.07121571491977
episode: 136 training return: tensor(37.9701, device='cuda:0')
episode: 137 training return: tensor(-38.4079, device='cuda:0')
episode: 138 training return: tensor(54.6526, device='cuda:0')
episode: 139 training return: tensor(63.7792, device='cuda:0')
epoch: 35 test_true_pfm: 4031.661326661236 sim_pfm: 28.752525275912678
episode: 140 training return: tensor(36.8795, device='cuda:0')
episode: 141 training return: tensor(-8.4221, device='cuda:0')
episode: 142 training return: tensor(9.2661, device='cuda:0')
episode: 143 training return: tensor(6.4734, device='cuda:0')
epoch: 36 test_true_pfm: 4036.260081624189 sim_pfm: 27.273165036419716
episode: 144 training return: tensor(58.6984, device='cuda:0')
episode: 145 training return: tensor(47.1223, device='cuda:0')
episode: 146 training return: tensor(63.9615, device='cuda:0')
episode: 147 training return: tensor(10.1755, device='cuda:0')
epoch: 37 test_true_pfm: 3993.8881994877356 sim_pfm: -235.46132267478
episode: 148 training return: tensor(37.4353, device='cuda:0')
episode: 149 training return: tensor(28.5388, device='cuda:0')
episode: 150 training return: tensor(19.9381, device='cuda:0')
episode: 151 training return: tensor(-18.8119, device='cuda:0')
epoch: 38 test_true_pfm: 4008.845720062965 sim_pfm: 32.940081917709904
episode: 152 training return: tensor(15.0551, device='cuda:0')
episode: 153 training return: tensor(78.4204, device='cuda:0')
episode: 154 training return: tensor(66.2967, device='cuda:0')
episode: 155 training return: tensor(2.0856, device='cuda:0')
epoch: 39 test_true_pfm: 4046.9084600515876 sim_pfm: 41.73369635750229
episode: 156 training return: tensor(9.4163, device='cuda:0')
episode: 157 training return: tensor(67.9641, device='cuda:0')
episode: 158 training return: tensor(33.5692, device='cuda:0')
episode: 159 training return: tensor(-953.0787, device='cuda:0')
epoch: 40 test_true_pfm: 4038.647031064536 sim_pfm: 64.75864317159478
episode: 160 training return: tensor(30.5886, device='cuda:0')
episode: 161 training return: tensor(-32.9353, device='cuda:0')
episode: 162 training return: tensor(25.6477, device='cuda:0')
episode: 163 training return: tensor(37.3649, device='cuda:0')
epoch: 41 test_true_pfm: 4013.8506582164955 sim_pfm: 61.03860734057768
episode: 164 training return: tensor(45.8741, device='cuda:0')
episode: 165 training return: tensor(24.0132, device='cuda:0')
episode: 166 training return: tensor(62.5714, device='cuda:0')
episode: 167 training return: tensor(64.2859, device='cuda:0')
epoch: 42 test_true_pfm: 4004.606591716398 sim_pfm: -2.5960554653623453
episode: 168 training return: tensor(47.6311, device='cuda:0')
episode: 169 training return: tensor(30.7639, device='cuda:0')
episode: 170 training return: tensor(64.8106, device='cuda:0')
episode: 171 training return: tensor(45.6641, device='cuda:0')
epoch: 43 test_true_pfm: 3984.8189524092177 sim_pfm: 61.4115127636372
episode: 172 training return: tensor(46.8593, device='cuda:0')
episode: 173 training return: tensor(74.7042, device='cuda:0')
episode: 174 training return: tensor(69.9992, device='cuda:0')
episode: 175 training return: tensor(-17.7790, device='cuda:0')
epoch: 44 test_true_pfm: 3966.080775334423 sim_pfm: 60.510788635767916
episode: 176 training return: tensor(-10.0069, device='cuda:0')
episode: 177 training return: tensor(-29.5086, device='cuda:0')
episode: 178 training return: tensor(61.8360, device='cuda:0')
episode: 179 training return: tensor(71.4412, device='cuda:0')
epoch: 45 test_true_pfm: 4014.7333070238733 sim_pfm: 23.953883079918644
episode: 180 training return: tensor(36.3461, device='cuda:0')
episode: 181 training return: tensor(78.7392, device='cuda:0')
episode: 182 training return: tensor(-853.2921, device='cuda:0')
episode: 183 training return: tensor(61.3253, device='cuda:0')
epoch: 46 test_true_pfm: 3990.7021331666147 sim_pfm: -5.574573283365074
episode: 184 training return: tensor(18.3568, device='cuda:0')
episode: 185 training return: tensor(-0.2551, device='cuda:0')
episode: 186 training return: tensor(41.8974, device='cuda:0')
episode: 187 training return: tensor(76.5048, device='cuda:0')
epoch: 47 test_true_pfm: 3993.339699966975 sim_pfm: 68.42284829137498
episode: 188 training return: tensor(74.7061, device='cuda:0')
episode: 189 training return: tensor(-22.2977, device='cuda:0')
episode: 190 training return: tensor(16.6811, device='cuda:0')
episode: 191 training return: tensor(91.9528, device='cuda:0')
epoch: 48 test_true_pfm: 4034.6956055163305 sim_pfm: -235.75796003533955
episode: 192 training return: tensor(60.8974, device='cuda:0')
episode: 193 training return: tensor(45.9501, device='cuda:0')
episode: 194 training return: tensor(-847.3250, device='cuda:0')
episode: 195 training return: tensor(-51.5892, device='cuda:0')
epoch: 49 test_true_pfm: 3987.585536020837 sim_pfm: 32.67084533942398
episode: 196 training return: tensor(-861.2354, device='cuda:0')
episode: 197 training return: tensor(58.7437, device='cuda:0')
episode: 198 training return: tensor(-30.8959, device='cuda:0')
episode: 199 training return: tensor(-891.1643, device='cuda:0')
epoch: 50 test_true_pfm: 4021.9438337466895 sim_pfm: 44.36797059571836
episode: 200 training return: tensor(27.1672, device='cuda:0')
episode: 201 training return: tensor(8.0066, device='cuda:0')
episode: 202 training return: tensor(78.1243, device='cuda:0')
episode: 203 training return: tensor(-847.3755, device='cuda:0')
epoch: 51 test_true_pfm: 3978.7707467473824 sim_pfm: 57.219529256767906
episode: 204 training return: tensor(28.1433, device='cuda:0')
episode: 205 training return: tensor(-16.8564, device='cuda:0')
episode: 206 training return: tensor(34.8244, device='cuda:0')
episode: 207 training return: tensor(21.3112, device='cuda:0')
epoch: 52 test_true_pfm: 3981.6367790966256 sim_pfm: -293.73346897777327
episode: 208 training return: tensor(51.1657, device='cuda:0')
episode: 209 training return: tensor(24.0308, device='cuda:0')
episode: 210 training return: tensor(33.3106, device='cuda:0')
episode: 211 training return: tensor(59.6533, device='cuda:0')
epoch: 53 test_true_pfm: 4018.9405693048298 sim_pfm: 28.54307995396084
episode: 212 training return: tensor(-24.1698, device='cuda:0')
episode: 213 training return: tensor(86.9976, device='cuda:0')
episode: 214 training return: tensor(-725.7096, device='cuda:0')
episode: 215 training return: tensor(84.9921, device='cuda:0')
epoch: 54 test_true_pfm: 4014.7634895544143 sim_pfm: 13.518931257034032
episode: 216 training return: tensor(59.0742, device='cuda:0')
episode: 217 training return: tensor(42.7344, device='cuda:0')
episode: 218 training return: tensor(38.0058, device='cuda:0')
episode: 219 training return: tensor(48.5711, device='cuda:0')
epoch: 55 test_true_pfm: 4001.734069048813 sim_pfm: 31.836770812408457
episode: 220 training return: tensor(71.9403, device='cuda:0')
episode: 221 training return: tensor(70.9865, device='cuda:0')
episode: 222 training return: tensor(-5.1874, device='cuda:0')
episode: 223 training return: tensor(-8.2018, device='cuda:0')
epoch: 56 test_true_pfm: 3713.245954997971 sim_pfm: 16.284802475371787
episode: 224 training return: tensor(-5.1166, device='cuda:0')
episode: 225 training return: tensor(29.7121, device='cuda:0')
episode: 226 training return: tensor(60.1855, device='cuda:0')
episode: 227 training return: tensor(15.4308, device='cuda:0')
epoch: 57 test_true_pfm: 3932.979934118876 sim_pfm: 11.107238224862764
episode: 228 training return: tensor(57.4050, device='cuda:0')
episode: 229 training return: tensor(55.4363, device='cuda:0')
episode: 230 training return: tensor(25.9996, device='cuda:0')
episode: 231 training return: tensor(31.3514, device='cuda:0')
epoch: 58 test_true_pfm: 4008.2018306360205 sim_pfm: 13.78390935874389
episode: 232 training return: tensor(-5.2772, device='cuda:0')
episode: 233 training return: tensor(-842.2512, device='cuda:0')
episode: 234 training return: tensor(70.9540, device='cuda:0')
episode: 235 training return: tensor(56.3284, device='cuda:0')
epoch: 59 test_true_pfm: 3995.666271435226 sim_pfm: 29.902100541813223
episode: 236 training return: tensor(15.0520, device='cuda:0')
episode: 237 training return: tensor(48.3491, device='cuda:0')
episode: 238 training return: tensor(50.7822, device='cuda:0')
episode: 239 training return: tensor(39.0737, device='cuda:0')
epoch: 60 test_true_pfm: 2828.9165929902974 sim_pfm: 48.743975604331354
episode: 240 training return: tensor(-29.7353, device='cuda:0')
episode: 241 training return: tensor(-4.7586, device='cuda:0')
episode: 242 training return: tensor(68.4457, device='cuda:0')
episode: 243 training return: tensor(-878.2157, device='cuda:0')
epoch: 61 test_true_pfm: 4021.633453464679 sim_pfm: 5.925316281616688
episode: 244 training return: tensor(46.3441, device='cuda:0')
episode: 245 training return: tensor(24.1985, device='cuda:0')
episode: 246 training return: tensor(37.5534, device='cuda:0')
episode: 247 training return: tensor(94.1388, device='cuda:0')
epoch: 62 test_true_pfm: 3977.425703624582 sim_pfm: 47.23789105428538
episode: 248 training return: tensor(21.3219, device='cuda:0')
episode: 249 training return: tensor(62.7600, device='cuda:0')
episode: 250 training return: tensor(13.9391, device='cuda:0')
episode: 251 training return: tensor(-15.0811, device='cuda:0')
epoch: 63 test_true_pfm: 1961.927340366161 sim_pfm: 43.942416671217266
episode: 252 training return: tensor(24.8061, device='cuda:0')
episode: 253 training return: tensor(-42.2648, device='cuda:0')
episode: 254 training return: tensor(86.0020, device='cuda:0')
episode: 255 training return: tensor(39.6168, device='cuda:0')
epoch: 64 test_true_pfm: 3983.044064343812 sim_pfm: -18.959926163884422
episode: 256 training return: tensor(71.9914, device='cuda:0')
episode: 257 training return: tensor(-695.0812, device='cuda:0')
episode: 258 training return: tensor(72.6843, device='cuda:0')
episode: 259 training return: tensor(89.4455, device='cuda:0')
epoch: 65 test_true_pfm: 4033.304515117726 sim_pfm: 4.731049524038099
episode: 260 training return: tensor(86.4411, device='cuda:0')
episode: 261 training return: tensor(36.3941, device='cuda:0')
episode: 262 training return: tensor(-52.4194, device='cuda:0')
episode: 263 training return: tensor(61.8948, device='cuda:0')
epoch: 66 test_true_pfm: 3952.79326026685 sim_pfm: -1.8663099204131868
episode: 264 training return: tensor(73.3805, device='cuda:0')
episode: 265 training return: tensor(78.5745, device='cuda:0')
episode: 266 training return: tensor(-10.0688, device='cuda:0')
episode: 267 training return: tensor(40.7711, device='cuda:0')
epoch: 67 test_true_pfm: 3996.429182290333 sim_pfm: 34.90990320031415
episode: 268 training return: tensor(-64.1177, device='cuda:0')
episode: 269 training return: tensor(89.3964, device='cuda:0')
episode: 270 training return: tensor(57.1637, device='cuda:0')
episode: 271 training return: tensor(11.4975, device='cuda:0')
epoch: 68 test_true_pfm: 3987.748401698091 sim_pfm: 23.326852017858375
episode: 272 training return: tensor(-33.8973, device='cuda:0')
episode: 273 training return: tensor(26.7834, device='cuda:0')
episode: 274 training return: tensor(4.2372, device='cuda:0')
episode: 275 training return: tensor(10.5069, device='cuda:0')
epoch: 69 test_true_pfm: 4027.3034387474613 sim_pfm: 28.963411579296615
episode: 276 training return: tensor(-3.2440, device='cuda:0')
episode: 277 training return: tensor(58.5344, device='cuda:0')
episode: 278 training return: tensor(16.2508, device='cuda:0')
episode: 279 training return: tensor(18.3556, device='cuda:0')
epoch: 70 test_true_pfm: 3997.0451256297515 sim_pfm: -9.93686830085547
episode: 280 training return: tensor(19.9100, device='cuda:0')
episode: 281 training return: tensor(71.4984, device='cuda:0')
episode: 282 training return: tensor(57.6148, device='cuda:0')
episode: 283 training return: tensor(26.9215, device='cuda:0')
epoch: 71 test_true_pfm: 4014.306004096589 sim_pfm: 48.438525393032855
episode: 284 training return: tensor(25.1919, device='cuda:0')
episode: 285 training return: tensor(53.8283, device='cuda:0')
episode: 286 training return: tensor(87.3791, device='cuda:0')
episode: 287 training return: tensor(52.9319, device='cuda:0')
epoch: 72 test_true_pfm: 3992.5317533458715 sim_pfm: 33.302448983624345
episode: 288 training return: tensor(-41.1792, device='cuda:0')
episode: 289 training return: tensor(56.5208, device='cuda:0')
episode: 290 training return: tensor(-4.0145, device='cuda:0')
episode: 291 training return: tensor(11.3948, device='cuda:0')
epoch: 73 test_true_pfm: 4004.064536393356 sim_pfm: -263.225260923636
episode: 292 training return: tensor(28.3344, device='cuda:0')
episode: 293 training return: tensor(55.8199, device='cuda:0')
episode: 294 training return: tensor(31.8371, device='cuda:0')
episode: 295 training return: tensor(4.4318, device='cuda:0')
epoch: 74 test_true_pfm: 4018.594377409947 sim_pfm: 51.50350693351356
episode: 296 training return: tensor(67.4843, device='cuda:0')
episode: 297 training return: tensor(24.1001, device='cuda:0')
episode: 298 training return: tensor(72.0951, device='cuda:0')
episode: 299 training return: tensor(56.1584, device='cuda:0')
epoch: 75 test_true_pfm: 3987.7789776262453 sim_pfm: 14.872674264595844
episode: 300 training return: tensor(75.1339, device='cuda:0')
episode: 301 training return: tensor(-5.0160, device='cuda:0')
episode: 302 training return: tensor(23.8476, device='cuda:0')
episode: 303 training return: tensor(75.2381, device='cuda:0')
epoch: 76 test_true_pfm: 3994.554428068795 sim_pfm: 26.412783375242725
episode: 304 training return: tensor(41.0703, device='cuda:0')
episode: 305 training return: tensor(-2.5192, device='cuda:0')
episode: 306 training return: tensor(47.0590, device='cuda:0')
episode: 307 training return: tensor(23.5086, device='cuda:0')
epoch: 77 test_true_pfm: 4020.492946233604 sim_pfm: 27.69427100897883
episode: 308 training return: tensor(53.3475, device='cuda:0')
episode: 309 training return: tensor(92.1352, device='cuda:0')
episode: 310 training return: tensor(2.2013, device='cuda:0')
episode: 311 training return: tensor(81.5926, device='cuda:0')
epoch: 78 test_true_pfm: 4013.6533804036576 sim_pfm: 50.03406949541144
episode: 312 training return: tensor(54.7098, device='cuda:0')
episode: 313 training return: tensor(91.4009, device='cuda:0')
episode: 314 training return: tensor(57.2909, device='cuda:0')
episode: 315 training return: tensor(16.6203, device='cuda:0')
epoch: 79 test_true_pfm: 4003.512999986991 sim_pfm: 26.04158550525123
episode: 316 training return: tensor(19.3077, device='cuda:0')
episode: 317 training return: tensor(66.0531, device='cuda:0')
episode: 318 training return: tensor(5.1730, device='cuda:0')
episode: 319 training return: tensor(-10.7343, device='cuda:0')
epoch: 80 test_true_pfm: 4039.663385681469 sim_pfm: 57.02482932875864
episode: 320 training return: tensor(77.7613, device='cuda:0')
episode: 321 training return: tensor(55.2871, device='cuda:0')
episode: 322 training return: tensor(9.9226, device='cuda:0')
episode: 323 training return: tensor(8.4348, device='cuda:0')
epoch: 81 test_true_pfm: 4002.9060424233417 sim_pfm: 29.142213205515873
episode: 324 training return: tensor(29.5812, device='cuda:0')
episode: 325 training return: tensor(-18.1513, device='cuda:0')
episode: 326 training return: tensor(37.3916, device='cuda:0')
episode: 327 training return: tensor(12.4633, device='cuda:0')
epoch: 82 test_true_pfm: 3986.328665582741 sim_pfm: 35.779339161488075
episode: 328 training return: tensor(-15.2311, device='cuda:0')
episode: 329 training return: tensor(37.4758, device='cuda:0')
episode: 330 training return: tensor(-3.5775, device='cuda:0')
episode: 331 training return: tensor(83.8924, device='cuda:0')
epoch: 83 test_true_pfm: 3974.8923322055584 sim_pfm: 50.684298102229754
episode: 332 training return: tensor(66.4829, device='cuda:0')
episode: 333 training return: tensor(68.6543, device='cuda:0')
episode: 334 training return: tensor(33.5092, device='cuda:0')
episode: 335 training return: tensor(80.2977, device='cuda:0')
epoch: 84 test_true_pfm: 4012.93404720587 sim_pfm: 33.367065292589054
episode: 336 training return: tensor(87.7106, device='cuda:0')
episode: 337 training return: tensor(15.2098, device='cuda:0')
episode: 338 training return: tensor(42.0534, device='cuda:0')
episode: 339 training return: tensor(19.9197, device='cuda:0')
epoch: 85 test_true_pfm: 4032.2574220700376 sim_pfm: 59.03150050447827
episode: 340 training return: tensor(24.5316, device='cuda:0')
episode: 341 training return: tensor(-6.9958, device='cuda:0')
episode: 342 training return: tensor(37.7657, device='cuda:0')
episode: 343 training return: tensor(-9.4651, device='cuda:0')
epoch: 86 test_true_pfm: 3972.803780924818 sim_pfm: 26.88103525300782
episode: 344 training return: tensor(76.0176, device='cuda:0')
episode: 345 training return: tensor(29.7554, device='cuda:0')
episode: 346 training return: tensor(31.3730, device='cuda:0')
episode: 347 training return: tensor(50.8566, device='cuda:0')
epoch: 87 test_true_pfm: 4013.7861316893236 sim_pfm: 15.841448989299048
episode: 348 training return: tensor(31.8375, device='cuda:0')
episode: 349 training return: tensor(55.1261, device='cuda:0')
episode: 350 training return: tensor(68.0173, device='cuda:0')
episode: 351 training return: tensor(95.8932, device='cuda:0')
epoch: 88 test_true_pfm: 3977.4099690178937 sim_pfm: 38.90974134201921
episode: 352 training return: tensor(77.6507, device='cuda:0')
episode: 353 training return: tensor(71.1277, device='cuda:0')
episode: 354 training return: tensor(2.3688, device='cuda:0')
episode: 355 training return: tensor(76.4846, device='cuda:0')
epoch: 89 test_true_pfm: 3995.4217607174746 sim_pfm: 19.879463687636115
episode: 356 training return: tensor(75.9126, device='cuda:0')
episode: 357 training return: tensor(36.8654, device='cuda:0')
episode: 358 training return: tensor(16.0507, device='cuda:0')
episode: 359 training return: tensor(63.0873, device='cuda:0')
epoch: 90 test_true_pfm: 4014.924995791642 sim_pfm: 3.980453228801101
episode: 360 training return: tensor(85.7274, device='cuda:0')
episode: 361 training return: tensor(30.8015, device='cuda:0')
episode: 362 training return: tensor(78.9838, device='cuda:0')
episode: 363 training return: tensor(68.0523, device='cuda:0')
epoch: 91 test_true_pfm: 4014.192927201056 sim_pfm: 47.69297139527043
episode: 364 training return: tensor(73.2233, device='cuda:0')
episode: 365 training return: tensor(46.8728, device='cuda:0')
episode: 366 training return: tensor(61.1765, device='cuda:0')
episode: 367 training return: tensor(-8.5364, device='cuda:0')
epoch: 92 test_true_pfm: 3982.744245461298 sim_pfm: -4.101018841941065
episode: 368 training return: tensor(84.2501, device='cuda:0')
episode: 369 training return: tensor(71.6404, device='cuda:0')
episode: 370 training return: tensor(45.9380, device='cuda:0')
episode: 371 training return: tensor(9.2848, device='cuda:0')
epoch: 93 test_true_pfm: 4038.0511588249356 sim_pfm: 17.08598403177651
episode: 372 training return: tensor(37.6128, device='cuda:0')
episode: 373 training return: tensor(49.1895, device='cuda:0')
episode: 374 training return: tensor(44.9555, device='cuda:0')
episode: 375 training return: tensor(71.2100, device='cuda:0')
epoch: 94 test_true_pfm: 4001.1830717219705 sim_pfm: 35.20090590932523
episode: 376 training return: tensor(-26.3420, device='cuda:0')
episode: 377 training return: tensor(36.7420, device='cuda:0')
episode: 378 training return: tensor(13.3980, device='cuda:0')
episode: 379 training return: tensor(38.4173, device='cuda:0')
epoch: 95 test_true_pfm: 4015.3655038457814 sim_pfm: 20.905687256056506
episode: 380 training return: tensor(73.0616, device='cuda:0')
episode: 381 training return: tensor(-7.1729, device='cuda:0')
episode: 382 training return: tensor(22.4860, device='cuda:0')
episode: 383 training return: tensor(80.8647, device='cuda:0')
epoch: 96 test_true_pfm: 4017.2195770020776 sim_pfm: 43.22867285089645
episode: 384 training return: tensor(32.8033, device='cuda:0')
episode: 385 training return: tensor(69.8094, device='cuda:0')
episode: 386 training return: tensor(53.0894, device='cuda:0')
episode: 387 training return: tensor(28.4479, device='cuda:0')
epoch: 97 test_true_pfm: 4021.0275563526834 sim_pfm: 40.72446028211076
episode: 388 training return: tensor(60.0287, device='cuda:0')
episode: 389 training return: tensor(59.3820, device='cuda:0')
episode: 390 training return: tensor(73.7224, device='cuda:0')
episode: 391 training return: tensor(84.6666, device='cuda:0')
epoch: 98 test_true_pfm: 4006.6753918752074 sim_pfm: 37.85086803209075
episode: 392 training return: tensor(93.5318, device='cuda:0')
episode: 393 training return: tensor(96.6702, device='cuda:0')
episode: 394 training return: tensor(22.0089, device='cuda:0')
episode: 395 training return: tensor(85.5636, device='cuda:0')
epoch: 99 test_true_pfm: 4014.8382777741876 sim_pfm: 42.00878640607698
episode: 396 training return: tensor(69.5302, device='cuda:0')
episode: 397 training return: tensor(47.1875, device='cuda:0')
episode: 398 training return: tensor(64.1093, device='cuda:0')
episode: 399 training return: tensor(68.6749, device='cuda:0')
epoch: 100 test_true_pfm: 3990.8292820994816 sim_pfm: 46.70333673560526
episode: 400 training return: tensor(19.8072, device='cuda:0')
episode: 401 training return: tensor(65.6794, device='cuda:0')
episode: 402 training return: tensor(-23.2423, device='cuda:0')
episode: 403 training return: tensor(-33.7549, device='cuda:0')
epoch: 101 test_true_pfm: 3988.8222604191537 sim_pfm: 34.19558326199573
episode: 404 training return: tensor(40.1678, device='cuda:0')
episode: 405 training return: tensor(49.7018, device='cuda:0')
episode: 406 training return: tensor(65.6394, device='cuda:0')
episode: 407 training return: tensor(44.3536, device='cuda:0')
epoch: 102 test_true_pfm: 4033.3368565879646 sim_pfm: 33.290614372041695
episode: 408 training return: tensor(1.8482, device='cuda:0')
episode: 409 training return: tensor(73.9145, device='cuda:0')
episode: 410 training return: tensor(64.0223, device='cuda:0')
episode: 411 training return: tensor(80.6255, device='cuda:0')
epoch: 103 test_true_pfm: 3991.186786425685 sim_pfm: 43.74575346561809
episode: 412 training return: tensor(46.8828, device='cuda:0')
episode: 413 training return: tensor(16.5420, device='cuda:0')
episode: 414 training return: tensor(84.1073, device='cuda:0')
episode: 415 training return: tensor(70.2653, device='cuda:0')
epoch: 104 test_true_pfm: 4009.240730010939 sim_pfm: 43.921848833318414
episode: 416 training return: tensor(22.3638, device='cuda:0')
episode: 417 training return: tensor(69.5804, device='cuda:0')
episode: 418 training return: tensor(58.4629, device='cuda:0')
episode: 419 training return: tensor(-8.0363, device='cuda:0')
epoch: 105 test_true_pfm: 4010.803639123594 sim_pfm: 61.213132790920405
episode: 420 training return: tensor(45.7737, device='cuda:0')
episode: 421 training return: tensor(-5.2187, device='cuda:0')
episode: 422 training return: tensor(75.8346, device='cuda:0')
episode: 423 training return: tensor(83.2061, device='cuda:0')
epoch: 106 test_true_pfm: 4006.0705961730805 sim_pfm: 32.21064515975498
episode: 424 training return: tensor(43.8114, device='cuda:0')
episode: 425 training return: tensor(38.4585, device='cuda:0')
episode: 426 training return: tensor(43.5227, device='cuda:0')
episode: 427 training return: tensor(62.2580, device='cuda:0')
epoch: 107 test_true_pfm: 3992.470155058892 sim_pfm: 23.757217679696623
episode: 428 training return: tensor(36.6868, device='cuda:0')
episode: 429 training return: tensor(-8.6475, device='cuda:0')
episode: 430 training return: tensor(20.3359, device='cuda:0')
episode: 431 training return: tensor(-19.4093, device='cuda:0')
epoch: 108 test_true_pfm: 4024.9649360343647 sim_pfm: 67.61855786447995
episode: 432 training return: tensor(77.5688, device='cuda:0')
episode: 433 training return: tensor(8.8698, device='cuda:0')
episode: 434 training return: tensor(-27.9440, device='cuda:0')
episode: 435 training return: tensor(43.1815, device='cuda:0')
epoch: 109 test_true_pfm: 4063.5338454431244 sim_pfm: 66.58158608352339
episode: 436 training return: tensor(68.7626, device='cuda:0')
episode: 437 training return: tensor(-11.3687, device='cuda:0')
episode: 438 training return: tensor(29.9277, device='cuda:0')
episode: 439 training return: tensor(4.8110, device='cuda:0')
epoch: 110 test_true_pfm: 4041.201938952267 sim_pfm: 71.11291606133454
episode: 440 training return: tensor(42.5792, device='cuda:0')
episode: 441 training return: tensor(25.1588, device='cuda:0')
episode: 442 training return: tensor(56.5415, device='cuda:0')
episode: 443 training return: tensor(53.8524, device='cuda:0')
epoch: 111 test_true_pfm: 3970.580337643991 sim_pfm: 52.328087373947106
episode: 444 training return: tensor(69.2960, device='cuda:0')
episode: 445 training return: tensor(36.0697, device='cuda:0')
episode: 446 training return: tensor(60.2727, device='cuda:0')
episode: 447 training return: tensor(42.4107, device='cuda:0')
epoch: 112 test_true_pfm: 3979.7647467896118 sim_pfm: 27.917612298886525
episode: 448 training return: tensor(72.8535, device='cuda:0')
episode: 449 training return: tensor(54.2249, device='cuda:0')
episode: 450 training return: tensor(6.9731, device='cuda:0')
episode: 451 training return: tensor(52.8725, device='cuda:0')
epoch: 113 test_true_pfm: 4007.0015268348343 sim_pfm: 72.5628128500927
episode: 452 training return: tensor(39.6144, device='cuda:0')
episode: 453 training return: tensor(67.1061, device='cuda:0')
episode: 454 training return: tensor(19.6227, device='cuda:0')
episode: 455 training return: tensor(-896.3051, device='cuda:0')
epoch: 114 test_true_pfm: 4006.1471918545903 sim_pfm: 49.126665085350396
episode: 456 training return: tensor(41.8372, device='cuda:0')
episode: 457 training return: tensor(53.1210, device='cuda:0')
episode: 458 training return: tensor(61.6792, device='cuda:0')
episode: 459 training return: tensor(32.3657, device='cuda:0')
epoch: 115 test_true_pfm: 4033.7423141501326 sim_pfm: 51.71184392708043
episode: 460 training return: tensor(-1.0766, device='cuda:0')
episode: 461 training return: tensor(66.2084, device='cuda:0')
episode: 462 training return: tensor(78.9787, device='cuda:0')
episode: 463 training return: tensor(54.1419, device='cuda:0')
epoch: 116 test_true_pfm: 4002.787870461377 sim_pfm: 41.598448311473476
episode: 464 training return: tensor(38.5858, device='cuda:0')
episode: 465 training return: tensor(39.1765, device='cuda:0')
episode: 466 training return: tensor(77.1671, device='cuda:0')
episode: 467 training return: tensor(23.6596, device='cuda:0')
epoch: 117 test_true_pfm: 4021.997012941474 sim_pfm: 2.6854950334721557
episode: 468 training return: tensor(29.5919, device='cuda:0')
episode: 469 training return: tensor(50.7762, device='cuda:0')
episode: 470 training return: tensor(76.9519, device='cuda:0')
episode: 471 training return: tensor(61.3244, device='cuda:0')
epoch: 118 test_true_pfm: 3955.414176948014 sim_pfm: 44.863107877453636
episode: 472 training return: tensor(84.4146, device='cuda:0')
episode: 473 training return: tensor(-13.3961, device='cuda:0')
episode: 474 training return: tensor(18.9159, device='cuda:0')
episode: 475 training return: tensor(64.9228, device='cuda:0')
epoch: 119 test_true_pfm: 4040.715947872184 sim_pfm: 71.39774205491024
episode: 476 training return: tensor(63.0069, device='cuda:0')
episode: 477 training return: tensor(73.8518, device='cuda:0')
episode: 478 training return: tensor(-6.6585, device='cuda:0')
episode: 479 training return: tensor(83.8686, device='cuda:0')
epoch: 120 test_true_pfm: 4030.5404229566534 sim_pfm: -36.09026272894698
episode: 480 training return: tensor(59.5060, device='cuda:0')
episode: 481 training return: tensor(57.2757, device='cuda:0')
episode: 482 training return: tensor(74.2231, device='cuda:0')
episode: 483 training return: tensor(47.7243, device='cuda:0')
epoch: 121 test_true_pfm: 4010.668686750738 sim_pfm: 18.024672441524064
episode: 484 training return: tensor(86.4651, device='cuda:0')
episode: 485 training return: tensor(45.5827, device='cuda:0')
episode: 486 training return: tensor(87.7055, device='cuda:0')
episode: 487 training return: tensor(32.9654, device='cuda:0')
epoch: 122 test_true_pfm: 4040.186723713264 sim_pfm: 65.72413320061362
episode: 488 training return: tensor(42.7414, device='cuda:0')
episode: 489 training return: tensor(65.5887, device='cuda:0')
episode: 490 training return: tensor(-7.8706, device='cuda:0')
episode: 491 training return: tensor(55.9714, device='cuda:0')
epoch: 123 test_true_pfm: 4040.9717456996464 sim_pfm: 53.610191579481274
episode: 492 training return: tensor(-44.4516, device='cuda:0')
episode: 493 training return: tensor(36.1026, device='cuda:0')
episode: 494 training return: tensor(34.7161, device='cuda:0')
episode: 495 training return: tensor(-13.3511, device='cuda:0')
epoch: 124 test_true_pfm: 4028.7333293728957 sim_pfm: 56.11065341090822
episode: 496 training return: tensor(43.8736, device='cuda:0')
episode: 497 training return: tensor(51.4136, device='cuda:0')
episode: 498 training return: tensor(82.7678, device='cuda:0')
episode: 499 training return: tensor(22.5044, device='cuda:0')
epoch: 125 test_true_pfm: 4022.173487113694 sim_pfm: 53.20800622440098
episode: 500 training return: tensor(77.9267, device='cuda:0')
episode: 501 training return: tensor(-6.2351, device='cuda:0')
episode: 502 training return: tensor(47.7594, device='cuda:0')
episode: 503 training return: tensor(92.7943, device='cuda:0')
epoch: 126 test_true_pfm: 3976.659511020207 sim_pfm: 62.664181147294585
episode: 504 training return: tensor(68.0656, device='cuda:0')
episode: 505 training return: tensor(76.5086, device='cuda:0')
episode: 506 training return: tensor(97.3503, device='cuda:0')
episode: 507 training return: tensor(-776.0340, device='cuda:0')
epoch: 127 test_true_pfm: 4022.4648317115334 sim_pfm: 50.94256978462605
episode: 508 training return: tensor(52.6264, device='cuda:0')
episode: 509 training return: tensor(66.2259, device='cuda:0')
episode: 510 training return: tensor(-22.2960, device='cuda:0')
episode: 511 training return: tensor(56.5264, device='cuda:0')
epoch: 128 test_true_pfm: 4043.8000486813107 sim_pfm: 15.46056218497688
episode: 512 training return: tensor(61.8664, device='cuda:0')
episode: 513 training return: tensor(68.3381, device='cuda:0')
episode: 514 training return: tensor(58.4480, device='cuda:0')
episode: 515 training return: tensor(17.9833, device='cuda:0')
epoch: 129 test_true_pfm: 4042.5669220462282 sim_pfm: 48.55051318030261
episode: 516 training return: tensor(-884.6790, device='cuda:0')
episode: 517 training return: tensor(42.1449, device='cuda:0')
episode: 518 training return: tensor(78.3590, device='cuda:0')
episode: 519 training return: tensor(21.8915, device='cuda:0')
epoch: 130 test_true_pfm: 4036.225037552564 sim_pfm: 77.32246409310028
episode: 520 training return: tensor(22.8346, device='cuda:0')
episode: 521 training return: tensor(79.9531, device='cuda:0')
episode: 522 training return: tensor(72.1332, device='cuda:0')
episode: 523 training return: tensor(15.4300, device='cuda:0')
epoch: 131 test_true_pfm: 3978.8410788063593 sim_pfm: 33.40755560583784
episode: 524 training return: tensor(39.3428, device='cuda:0')
episode: 525 training return: tensor(84.4370, device='cuda:0')
episode: 526 training return: tensor(76.1320, device='cuda:0')
episode: 527 training return: tensor(32.0842, device='cuda:0')
epoch: 132 test_true_pfm: 4001.789522830031 sim_pfm: -204.7727716800582
episode: 528 training return: tensor(20.8111, device='cuda:0')
episode: 529 training return: tensor(56.9868, device='cuda:0')
episode: 530 training return: tensor(53.6401, device='cuda:0')
episode: 531 training return: tensor(28.0474, device='cuda:0')
epoch: 133 test_true_pfm: 3999.3095950318843 sim_pfm: 28.14770831921487
episode: 532 training return: tensor(28.3309, device='cuda:0')
episode: 533 training return: tensor(75.3768, device='cuda:0')
episode: 534 training return: tensor(20.1632, device='cuda:0')
episode: 535 training return: tensor(65.9563, device='cuda:0')
epoch: 134 test_true_pfm: 4027.0670506802435 sim_pfm: 42.53859265556093
episode: 536 training return: tensor(47.2229, device='cuda:0')
episode: 537 training return: tensor(76.1767, device='cuda:0')
episode: 538 training return: tensor(10.2623, device='cuda:0')
episode: 539 training return: tensor(56.9815, device='cuda:0')
epoch: 135 test_true_pfm: 4008.130655843832 sim_pfm: 37.22129274508916
episode: 540 training return: tensor(26.6682, device='cuda:0')
episode: 541 training return: tensor(3.6926, device='cuda:0')
episode: 542 training return: tensor(67.6510, device='cuda:0')
episode: 543 training return: tensor(21.5105, device='cuda:0')
epoch: 136 test_true_pfm: 4008.49368928735 sim_pfm: 68.25387723130795
episode: 544 training return: tensor(51.5412, device='cuda:0')
episode: 545 training return: tensor(87.3690, device='cuda:0')
episode: 546 training return: tensor(68.2813, device='cuda:0')
episode: 547 training return: tensor(84.2224, device='cuda:0')
epoch: 137 test_true_pfm: 4029.6163590760248 sim_pfm: 34.70896106338478
episode: 548 training return: tensor(93.2130, device='cuda:0')
episode: 549 training return: tensor(65.9821, device='cuda:0')
episode: 550 training return: tensor(35.3116, device='cuda:0')
episode: 551 training return: tensor(61.8576, device='cuda:0')
epoch: 138 test_true_pfm: 4023.6477775156873 sim_pfm: 63.24364336349148
episode: 552 training return: tensor(32.7965, device='cuda:0')
episode: 553 training return: tensor(60.6001, device='cuda:0')
episode: 554 training return: tensor(37.3433, device='cuda:0')
episode: 555 training return: tensor(42.9144, device='cuda:0')
epoch: 139 test_true_pfm: 4013.9489280422836 sim_pfm: 47.55173232705177
episode: 556 training return: tensor(50.1877, device='cuda:0')
episode: 557 training return: tensor(19.4523, device='cuda:0')
episode: 558 training return: tensor(72.9959, device='cuda:0')
episode: 559 training return: tensor(42.7030, device='cuda:0')
epoch: 140 test_true_pfm: 3966.830025024621 sim_pfm: 47.28781323106765
episode: 560 training return: tensor(53.7307, device='cuda:0')
episode: 561 training return: tensor(70.8160, device='cuda:0')
episode: 562 training return: tensor(80.3795, device='cuda:0')
episode: 563 training return: tensor(85.7905, device='cuda:0')
epoch: 141 test_true_pfm: 4041.89958119418 sim_pfm: 44.43002691916384
episode: 564 training return: tensor(61.7684, device='cuda:0')
episode: 565 training return: tensor(85.1061, device='cuda:0')
episode: 566 training return: tensor(84.6834, device='cuda:0')
episode: 567 training return: tensor(59.7380, device='cuda:0')
epoch: 142 test_true_pfm: 3994.7712741864048 sim_pfm: 58.7166115444464
episode: 568 training return: tensor(52.9182, device='cuda:0')
episode: 569 training return: tensor(86.1798, device='cuda:0')
episode: 570 training return: tensor(66.7688, device='cuda:0')
episode: 571 training return: tensor(35.2237, device='cuda:0')
epoch: 143 test_true_pfm: 4040.0797722589614 sim_pfm: 56.287515882131025
episode: 572 training return: tensor(56.8583, device='cuda:0')
episode: 573 training return: tensor(9.1261, device='cuda:0')
episode: 574 training return: tensor(84.1208, device='cuda:0')
episode: 575 training return: tensor(43.3628, device='cuda:0')
epoch: 144 test_true_pfm: 4036.2000808603393 sim_pfm: 80.31875470813247
episode: 576 training return: tensor(69.2426, device='cuda:0')
episode: 577 training return: tensor(36.6689, device='cuda:0')
episode: 578 training return: tensor(59.0023, device='cuda:0')
episode: 579 training return: tensor(23.9517, device='cuda:0')
epoch: 145 test_true_pfm: 4036.298797795387 sim_pfm: 33.14401688686727
episode: 580 training return: tensor(91.2821, device='cuda:0')
episode: 581 training return: tensor(70.5729, device='cuda:0')
episode: 582 training return: tensor(12.9345, device='cuda:0')
episode: 583 training return: tensor(58.8202, device='cuda:0')
epoch: 146 test_true_pfm: 4046.165271703428 sim_pfm: 36.538572005345486
episode: 584 training return: tensor(70.1503, device='cuda:0')
episode: 585 training return: tensor(46.0504, device='cuda:0')
episode: 586 training return: tensor(72.2860, device='cuda:0')
episode: 587 training return: tensor(55.7061, device='cuda:0')
epoch: 147 test_true_pfm: 4014.7542962090415 sim_pfm: 46.96343546079394
episode: 588 training return: tensor(62.0125, device='cuda:0')
episode: 589 training return: tensor(-15.6008, device='cuda:0')
episode: 590 training return: tensor(34.7120, device='cuda:0')
episode: 591 training return: tensor(38.3392, device='cuda:0')
epoch: 148 test_true_pfm: 4010.165552693916 sim_pfm: 41.730210224118004
episode: 592 training return: tensor(70.5192, device='cuda:0')
episode: 593 training return: tensor(36.9054, device='cuda:0')
episode: 594 training return: tensor(65.6539, device='cuda:0')
episode: 595 training return: tensor(91.3547, device='cuda:0')
epoch: 149 test_true_pfm: 4043.036073982523 sim_pfm: 60.925341044038454
episode: 596 training return: tensor(68.0808, device='cuda:0')
episode: 597 training return: tensor(84.9443, device='cuda:0')
episode: 598 training return: tensor(71.6351, device='cuda:0')
episode: 599 training return: tensor(62.1724, device='cuda:0')
epoch: 150 test_true_pfm: 4009.6245338505228 sim_pfm: 74.22398908799126
