['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '6', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.3462247596681118 test_loss: 0.22234187126159669
epoch: 1 training_loss 0.1807997278869152 test_loss: 0.16017366647720338
epoch: 2 training_loss 0.1420199940353632 test_loss: 0.15542230606079102
epoch: 3 training_loss 0.1389826986566186 test_loss: 0.17235825061798096
epoch: 4 training_loss 0.1368136527016759 test_loss: 0.14749786853790284
epoch: 5 training_loss 0.12954417802393436 test_loss: 0.146689772605896
epoch: 6 training_loss 0.12952350541949273 test_loss: 0.12135902643203736
epoch: 7 training_loss 0.12023233328014613 test_loss: 0.11582907438278198
epoch: 8 training_loss 0.12148359552025795 test_loss: 0.12294845581054688
epoch: 9 training_loss 0.11828021455556154 test_loss: 0.12717158794403077
epoch: 10 training_loss 0.12266920361667871 test_loss: 0.13801770210266112
epoch: 11 training_loss 0.11538795281201601 test_loss: 0.11310983896255493
epoch: 12 training_loss 0.11219017121940851 test_loss: 0.11107257604599
epoch: 13 training_loss 0.10870035350322724 test_loss: 0.12482577562332153
epoch: 14 training_loss 0.11922385316342116 test_loss: 0.12663669586181642
epoch: 15 training_loss 0.12047531116753817 test_loss: 0.1231716513633728
epoch: 16 training_loss 0.12042793929576874 test_loss: 0.11775994300842285
epoch: 17 training_loss 0.11687418105080724 test_loss: 0.1085841178894043
epoch: 18 training_loss 0.1054501860961318 test_loss: 0.10750858783721924
epoch: 19 training_loss 0.10894430335611105 test_loss: 0.12845633029937745
epoch: 20 training_loss 0.11341238198801876 test_loss: 0.12074528932571411
epoch: 21 training_loss 0.10620845625177026 test_loss: 0.11306809186935425
epoch: 22 training_loss 0.10318659933283925 test_loss: 0.10397114753723144
epoch: 23 training_loss 0.11241668496280909 test_loss: 0.12339221239089966
epoch: 24 training_loss 0.11284342512488366 test_loss: 0.10532124042510986
epoch: 25 training_loss 0.10952087223529816 test_loss: 0.1258128046989441
epoch: 26 training_loss 0.10078430987894535 test_loss: 0.1146041989326477
epoch: 27 training_loss 0.10232537969946862 test_loss: 0.10749616622924804
epoch: 28 training_loss 0.10886245995759963 test_loss: 0.1276632308959961
epoch: 29 training_loss 0.10913607621565462 test_loss: 0.11384605169296265
epoch: 30 training_loss 0.105586709510535 test_loss: 0.10753813982009888
epoch: 31 training_loss 0.10518032222986222 test_loss: 0.11643352508544921
epoch: 32 training_loss 0.1073571565002203 test_loss: 0.10646740198135377
epoch: 33 training_loss 0.11415521835908293 test_loss: 0.11028436422348023
epoch: 34 training_loss 0.10864404574036599 test_loss: 0.12022652626037597
epoch: 35 training_loss 0.10119462348520755 test_loss: 0.12037911415100097
epoch: 36 training_loss 0.10190054211765527 test_loss: 0.11066690683364869
epoch: 37 training_loss 0.10417568437755108 test_loss: 0.10758658647537231
epoch: 38 training_loss 0.10644691061228513 test_loss: 0.10706883668899536
epoch: 39 training_loss 0.10883617039769888 test_loss: 0.12694607973098754
epoch: 40 training_loss 0.10703118575736881 test_loss: 0.11219179630279541
epoch: 41 training_loss 0.10631285797804595 test_loss: 0.1195402979850769
epoch: 42 training_loss 0.10598100068047643 test_loss: 0.11975886821746826
epoch: 43 training_loss 0.10940698590129613 test_loss: 0.11224539279937744
epoch: 44 training_loss 0.11209702726453542 test_loss: 0.1269506573677063
epoch: 45 training_loss 0.11161177504807711 test_loss: 0.10962125062942504
epoch: 46 training_loss 0.10091847727075219 test_loss: 0.12416406869888305
epoch: 47 training_loss 0.10435458002611994 test_loss: 0.113929283618927
epoch: 48 training_loss 0.10664348877966404 test_loss: 0.10055356025695801
epoch: 49 training_loss 0.10981649834662675 test_loss: 0.10821924209594727
epoch: 50 training_loss 0.11260002456605435 test_loss: 0.11007077693939209
epoch: 51 training_loss 0.10746206905692816 test_loss: 0.11219565868377686
epoch: 52 training_loss 0.10310222217813134 test_loss: 0.11553092002868652
epoch: 53 training_loss 0.10717981355264783 test_loss: 0.11241132020950317
epoch: 54 training_loss 0.10869018405675888 test_loss: 0.09651625156402588
epoch: 55 training_loss 0.10725568810477853 test_loss: 0.11579172611236573
epoch: 56 training_loss 0.10743934329599142 test_loss: 0.10983588695526122
epoch: 57 training_loss 0.10086990095674991 test_loss: 0.10944043397903443
epoch: 58 training_loss 0.10296293523162603 test_loss: 0.13893895149230956
epoch: 59 training_loss 0.10113862505182623 test_loss: 0.12613605260848998
epoch: 60 training_loss 0.11308672867715358 test_loss: 0.11031757593154908
epoch: 61 training_loss 0.11013222724199295 test_loss: 0.10793745517730713
epoch: 62 training_loss 0.10543165266513825 test_loss: 0.12041177749633789
epoch: 63 training_loss 0.10702609786763788 test_loss: 0.10896438360214233
epoch: 64 training_loss 0.10647669957950712 test_loss: 0.09038903117179871
epoch: 65 training_loss 0.11002822861075401 test_loss: 0.1160089373588562
epoch: 66 training_loss 0.10720812086015939 test_loss: 0.11901960372924805
epoch: 67 training_loss 0.10391667060554027 test_loss: 0.11436952352523803
epoch: 68 training_loss 0.10365034628659486 test_loss: 0.09876825809478759
epoch: 69 training_loss 0.10817485162988305 test_loss: 0.12001172304153443
epoch: 70 training_loss 0.10797259697690606 test_loss: 0.12107359170913697
epoch: 71 training_loss 0.11055034507066011 test_loss: 0.10530219078063965
epoch: 72 training_loss 0.10726867755874991 test_loss: 0.12403545379638672
epoch: 73 training_loss 0.10176292598247529 test_loss: 0.10085459947586059
epoch: 74 training_loss 0.10511306088417768 test_loss: 0.09147016406059265
epoch: 75 training_loss 0.10359373297542333 test_loss: 0.11526843309402465
epoch: 76 training_loss 0.10609846100211144 test_loss: 0.13917375802993776
epoch: 77 training_loss 0.10959105355665087 test_loss: 0.11223561763763427
epoch: 78 training_loss 0.10244961377233266 test_loss: 0.10906620025634765
epoch: 79 training_loss 0.09546397995203733 test_loss: 0.09585043787956238
epoch: 80 training_loss 0.10049771767109633 test_loss: 0.10496975183486938
epoch: 81 training_loss 0.10410482445731759 test_loss: 0.11179955005645752
epoch: 82 training_loss 0.10192161398008466 test_loss: 0.09343395233154297
epoch: 83 training_loss 0.10848452541977167 test_loss: 0.11634474992752075
epoch: 84 training_loss 0.10660687720403075 test_loss: 0.11041518449783325
epoch: 85 training_loss 0.10047549445182086 test_loss: 0.09070011973381042
epoch: 86 training_loss 0.10505557592958212 test_loss: 0.1261181950569153
epoch: 87 training_loss 0.10835900027304887 test_loss: 0.10330531597137452
epoch: 88 training_loss 0.10095289252698421 test_loss: 0.10600658655166625
epoch: 89 training_loss 0.09966957088559866 test_loss: 0.10915441513061523
epoch: 90 training_loss 0.10335787557065487 test_loss: 0.11067337989807129
epoch: 91 training_loss 0.10302202552556991 test_loss: 0.1252383828163147
epoch: 92 training_loss 0.10609285082668066 test_loss: 0.09606767296791077
epoch: 93 training_loss 0.11493632350116968 test_loss: 0.09974708557128906
epoch: 94 training_loss 0.10380558848381043 test_loss: 0.10827465057373047
epoch: 95 training_loss 0.11120454840362072 test_loss: 0.10766741037368774
epoch: 96 training_loss 0.10328069288283587 test_loss: 0.10944066047668458
epoch: 97 training_loss 0.10508125511929393 test_loss: 0.10296907424926757
epoch: 98 training_loss 0.09957045331597328 test_loss: 0.10226715803146362
epoch: 99 training_loss 0.10594380296766757 test_loss: 0.10652110576629639
epoch: 100 training_loss 0.10028043564409017 test_loss: 0.11367970705032349
epoch: 101 training_loss 0.10059537822380663 test_loss: 0.1085276484489441
epoch: 102 training_loss 0.111452662833035 test_loss: 0.11147300004959107
epoch: 103 training_loss 0.10634566318243742 test_loss: 0.10809605121612549
epoch: 104 training_loss 0.09988448642194271 test_loss: 0.09569681286811829
epoch: 105 training_loss 0.1055631840415299 test_loss: 0.11448385715484619
epoch: 106 training_loss 0.10321233969181776 test_loss: 0.12033580541610718
epoch: 107 training_loss 0.10180705528706312 test_loss: 0.10362180471420288
epoch: 108 training_loss 0.09868300307542086 test_loss: 0.10663270950317383
epoch: 109 training_loss 0.10222099058330059 test_loss: 0.09738781452178955
epoch: 110 training_loss 0.10443512003868818 test_loss: 0.110122811794281
epoch: 111 training_loss 0.10712483700364828 test_loss: 0.11375640630722046
epoch: 112 training_loss 0.10150001468136907 test_loss: 0.11086820363998413
epoch: 113 training_loss 0.09766620652750134 test_loss: 0.11560019254684448
epoch: 114 training_loss 0.11019715474918484 test_loss: 0.1082069993019104
epoch: 115 training_loss 0.09770780796185136 test_loss: 0.10513544082641602
epoch: 116 training_loss 0.10068976745009423 test_loss: 0.11872328519821167
epoch: 117 training_loss 0.1050763775780797 test_loss: 0.10385562181472778
epoch: 118 training_loss 0.10053131809458137 test_loss: 0.10689136981964112
epoch: 119 training_loss 0.11382807668298484 test_loss: 0.10686604976654053
epoch: 120 training_loss 0.09699356995522976 test_loss: 0.10632433891296386
epoch: 121 training_loss 0.10400512844324111 test_loss: 0.09919446110725402
epoch: 122 training_loss 0.09819247087463737 test_loss: 0.0996217131614685
epoch: 123 training_loss 0.10705479830503464 test_loss: 0.1010076642036438
epoch: 124 training_loss 0.10825182985514402 test_loss: 0.12655593156814576
epoch: 125 training_loss 0.09857408169656992 test_loss: 0.12616028785705566
epoch: 126 training_loss 0.10844683285802603 test_loss: 0.11416019201278686
epoch: 127 training_loss 0.09990690445527434 test_loss: 0.12727787494659423
epoch: 128 training_loss 0.09552332334220409 test_loss: 0.10925477743148804
epoch: 129 training_loss 0.10928858712315559 test_loss: 0.12041857242584228
epoch: 130 training_loss 0.10295780140906573 test_loss: 0.08899227380752564
epoch: 131 training_loss 0.10700141824781895 test_loss: 0.11034743785858155
epoch: 132 training_loss 0.10766174394637346 test_loss: 0.10724687576293945
epoch: 133 training_loss 0.10073980832472444 test_loss: 0.10583231449127198
epoch: 134 training_loss 0.1092236277461052 test_loss: 0.12757881879806518
epoch: 135 training_loss 0.107176365070045 test_loss: 0.10362515449523926
epoch: 136 training_loss 0.10656972348690033 test_loss: 0.11293969154357911
epoch: 137 training_loss 0.10431542117148637 test_loss: 0.11766034364700317
epoch: 138 training_loss 0.1015887476876378 test_loss: 0.10240772962570191
epoch: 139 training_loss 0.10213640725240111 test_loss: 0.12379199266433716
epoch: 140 training_loss 0.10603569630533456 test_loss: 0.10927753448486328
epoch: 141 training_loss 0.10391452770680189 test_loss: 0.09317190647125244
epoch: 142 training_loss 0.09872128438204526 test_loss: 0.10345009565353394
epoch: 143 training_loss 0.10780724965035915 test_loss: 0.10223571062088013
epoch: 144 training_loss 0.10592073988169431 test_loss: 0.1165921449661255
epoch: 145 training_loss 0.10189325898885727 test_loss: 0.11909544467926025
epoch: 146 training_loss 0.10137296427041292 test_loss: 0.10509027242660522
epoch: 147 training_loss 0.10379328213632107 test_loss: 0.09728041291236877
epoch: 148 training_loss 0.09945974366739392 test_loss: 0.12161751985549926
epoch: 149 training_loss 0.10080349979922175 test_loss: 0.10891127586364746
epoch: 0 training_loss 51.48919303894043 test_loss: 25.148097229003906
epoch: 1 training_loss 19.224056453704833 test_loss: 15.22955780029297
epoch: 2 training_loss 13.30375192642212 test_loss: 11.510474395751952
epoch: 3 training_loss 10.369260063171387 test_loss: 9.345298767089844
epoch: 4 training_loss 8.629122266769409 test_loss: 8.007782745361329
epoch: 5 training_loss 7.344566559791565 test_loss: 6.684262084960937
epoch: 6 training_loss 6.531641674041748 test_loss: 6.218478393554688
epoch: 7 training_loss 5.75305202960968 test_loss: 5.642497634887695
epoch: 8 training_loss 5.353731708526611 test_loss: 5.339168548583984
epoch: 9 training_loss 4.994312119483948 test_loss: 4.628857421875
epoch: 10 training_loss 4.732405455112457 test_loss: 4.410919570922852
epoch: 11 training_loss 4.425078225135803 test_loss: 4.384371948242188
epoch: 12 training_loss 4.269078252315521 test_loss: 4.247353744506836
epoch: 13 training_loss 4.053697559833527 test_loss: 3.9200088500976564
epoch: 14 training_loss 3.850836899280548 test_loss: 3.7545520782470705
epoch: 15 training_loss 3.6574607992172243 test_loss: 3.7082679748535154
epoch: 16 training_loss 3.4880570435523985 test_loss: 3.490690231323242
epoch: 17 training_loss 3.4342540955543517 test_loss: 3.4492427825927736
epoch: 18 training_loss 3.396112151145935 test_loss: 3.2171791076660154
epoch: 19 training_loss 3.2176560258865354 test_loss: 3.162149429321289
epoch: 20 training_loss 3.1276650166511537 test_loss: 3.0656927108764647
epoch: 21 training_loss 3.032197828292847 test_loss: 2.951358985900879
epoch: 22 training_loss 2.9447893834114076 test_loss: 2.948295783996582
epoch: 23 training_loss 2.9756790351867677 test_loss: 2.920491027832031
epoch: 24 training_loss 2.8614308977127076 test_loss: 2.8091068267822266
epoch: 25 training_loss 2.766043119430542 test_loss: 2.804643440246582
epoch: 26 training_loss 2.7790066623687744 test_loss: 2.809367561340332
epoch: 27 training_loss 2.612860519886017 test_loss: 2.590388298034668
epoch: 28 training_loss 2.585616047382355 test_loss: 2.5554729461669923
epoch: 29 training_loss 2.5797660422325133 test_loss: 2.539630126953125
epoch: 30 training_loss 2.494909312725067 test_loss: 2.46486759185791
epoch: 31 training_loss 2.507711658477783 test_loss: 2.4620712280273436
epoch: 32 training_loss 2.4593704175949096 test_loss: 2.3962663650512694
epoch: 33 training_loss 2.451850023269653 test_loss: 2.381804084777832
epoch: 34 training_loss 2.3846037936210633 test_loss: 2.4318496704101564
epoch: 35 training_loss 2.359594557285309 test_loss: 2.3316839218139647
epoch: 36 training_loss 2.294032771587372 test_loss: 2.353897285461426
epoch: 37 training_loss 2.2826753067970276 test_loss: 2.371271514892578
epoch: 38 training_loss 2.27061320066452 test_loss: 2.301455497741699
epoch: 39 training_loss 2.267907056808472 test_loss: 2.2485528945922852
epoch: 40 training_loss 2.2418072700500487 test_loss: 2.2957082748413087
epoch: 41 training_loss 2.2251790249347687 test_loss: 2.2026880264282225
epoch: 42 training_loss 2.1884378468990326 test_loss: 2.1544992446899416
epoch: 43 training_loss 2.1606650149822233 test_loss: 2.291996955871582
epoch: 44 training_loss 2.181555548906326 test_loss: 2.166602897644043
epoch: 45 training_loss 2.1225832855701445 test_loss: 2.1247278213500977
epoch: 46 training_loss 2.0871536338329317 test_loss: 2.0560653686523436
epoch: 47 training_loss 2.098718310594559 test_loss: 2.1144786834716798
epoch: 48 training_loss 2.0655439817905425 test_loss: 2.180662727355957
epoch: 49 training_loss 2.065421562194824 test_loss: 2.0072437286376954
epoch: 50 training_loss 2.0405381071567534 test_loss: 2.0222822189331056
epoch: 51 training_loss 2.0106744825839997 test_loss: 2.0316579818725584
epoch: 52 training_loss 2.0215282344818117 test_loss: 1.9686761856079102
epoch: 53 training_loss 1.981449352502823 test_loss: 1.9628501892089845
epoch: 54 training_loss 2.0328297066688537 test_loss: 1.9847713470458985
epoch: 55 training_loss 1.9591341984272004 test_loss: 1.9811187744140626
epoch: 56 training_loss 1.971070407629013 test_loss: 2.0510089874267576
epoch: 57 training_loss 1.986771252155304 test_loss: 1.8909330368041992
epoch: 58 training_loss 1.9647742247581481 test_loss: 1.9793092727661132
epoch: 59 training_loss 1.9394484198093414 test_loss: 1.926915168762207
epoch: 60 training_loss 1.9517418789863585 test_loss: 1.8954744338989258
epoch: 61 training_loss 1.9035994303226471 test_loss: 1.9157268524169921
epoch: 62 training_loss 1.920708544254303 test_loss: 1.847505760192871
epoch: 63 training_loss 1.8839216876029967 test_loss: 1.919172477722168
epoch: 64 training_loss 1.905964812040329 test_loss: 1.8663248062133788
epoch: 65 training_loss 1.8625370025634767 test_loss: 1.8392345428466796
epoch: 66 training_loss 1.8418359458446503 test_loss: 1.7986127853393554
epoch: 67 training_loss 1.840452412366867 test_loss: 1.864654541015625
epoch: 68 training_loss 1.8339707243442536 test_loss: 1.925917625427246
epoch: 69 training_loss 1.8441854751110076 test_loss: 1.8589433670043944
epoch: 70 training_loss 1.8901287698745728 test_loss: 1.8197233200073242
epoch: 71 training_loss 1.820871570110321 test_loss: 1.8833572387695312
epoch: 72 training_loss 1.8026252436637877 test_loss: 1.8615758895874024
epoch: 73 training_loss 1.7876466453075408 test_loss: 1.802586555480957
epoch: 74 training_loss 1.816837272644043 test_loss: 1.7971731185913087
epoch: 75 training_loss 1.7488302898406982 test_loss: 1.7727331161499023
epoch: 76 training_loss 1.7755659306049347 test_loss: 1.7521554946899414
epoch: 77 training_loss 1.7615858018398285 test_loss: 1.803837203979492
epoch: 78 training_loss 1.744148633480072 test_loss: 1.8001218795776368
epoch: 79 training_loss 1.7611561071872712 test_loss: 1.7449695587158203
epoch: 80 training_loss 1.7376364302635192 test_loss: 1.7564144134521484
epoch: 81 training_loss 1.7447751355171204 test_loss: 1.735659408569336
epoch: 82 training_loss 1.7391227400302887 test_loss: 1.7148948669433595
epoch: 83 training_loss 1.724427787065506 test_loss: 1.6944299697875977
epoch: 84 training_loss 1.7411472928524017 test_loss: 1.7070442199707032
epoch: 85 training_loss 1.7231457841396332 test_loss: 1.6836160659790038
epoch: 86 training_loss 1.7339116823673248 test_loss: 1.7608076095581056
epoch: 87 training_loss 1.6708942687511443 test_loss: 1.6777641296386718
epoch: 88 training_loss 1.6908995258808135 test_loss: 1.6773950576782226
epoch: 89 training_loss 1.7059717786312103 test_loss: 1.7199163436889648
epoch: 90 training_loss 1.7028158187866211 test_loss: 1.7407144546508788
epoch: 91 training_loss 1.697088268995285 test_loss: 1.6483880996704101
epoch: 92 training_loss 1.673990205526352 test_loss: 1.6841476440429688
epoch: 93 training_loss 1.691070146560669 test_loss: 1.6758018493652345
epoch: 94 training_loss 1.656810816526413 test_loss: 1.6414634704589843
epoch: 95 training_loss 1.6768274211883545 test_loss: 1.6514274597167968
epoch: 96 training_loss 1.648095396757126 test_loss: 1.6754653930664063
epoch: 97 training_loss 1.674770588874817 test_loss: 1.6560661315917968
epoch: 98 training_loss 1.6447216713428496 test_loss: 1.6469596862792968
epoch: 99 training_loss 1.6434719336032868 test_loss: 1.6164531707763672
epoch: 100 training_loss 1.6317398941516876 test_loss: 1.585035228729248
epoch: 101 training_loss 1.624651666879654 test_loss: 1.6337947845458984
epoch: 102 training_loss 1.603630907535553 test_loss: 1.6038915634155273
epoch: 103 training_loss 1.6513666272163392 test_loss: 1.6411575317382812
epoch: 104 training_loss 1.598129117488861 test_loss: 1.6046546936035155
epoch: 105 training_loss 1.6122896695137023 test_loss: 1.5667645454406738
epoch: 106 training_loss 1.6036795127391814 test_loss: 1.5740816116333007
epoch: 107 training_loss 1.5865272200107574 test_loss: 1.6563411712646485
epoch: 108 training_loss 1.6009730863571168 test_loss: 1.6077367782592773
epoch: 109 training_loss 1.6106032943725586 test_loss: 1.5689831733703614
epoch: 110 training_loss 1.5981139624118805 test_loss: 1.5787554740905763
epoch: 111 training_loss 1.6267859363555908 test_loss: 1.5760308265686036
epoch: 112 training_loss 1.5743693494796753 test_loss: 1.5681852340698241
epoch: 113 training_loss 1.5486337411403657 test_loss: 1.6234399795532226
epoch: 114 training_loss 1.56953675866127 test_loss: 1.5619510650634765
epoch: 115 training_loss 1.533630473613739 test_loss: 1.5808633804321288
epoch: 116 training_loss 1.5548737180233 test_loss: 1.5401955604553224
epoch: 117 training_loss 1.5564213800430298 test_loss: 1.5706156730651855
epoch: 118 training_loss 1.5573163950443267 test_loss: 1.5795217514038087
epoch: 119 training_loss 1.5617887246608735 test_loss: 1.600145149230957
epoch: 120 training_loss 1.5572119736671448 test_loss: 1.5273311614990235
epoch: 121 training_loss 1.545907508134842 test_loss: 1.5225956916809082
epoch: 122 training_loss 1.5479808712005616 test_loss: 1.5625900268554687
epoch: 123 training_loss 1.5411296963691712 test_loss: 1.5719002723693847
epoch: 124 training_loss 1.534804800748825 test_loss: 1.5106964111328125
epoch: 125 training_loss 1.5250249648094176 test_loss: 1.5166921615600586
epoch: 126 training_loss 1.5602956342697143 test_loss: 1.524952220916748
epoch: 127 training_loss 1.5378660225868226 test_loss: 1.5052703857421874
epoch: 128 training_loss 1.5212573552131652 test_loss: 1.5106616973876954
epoch: 129 training_loss 1.5364323997497558 test_loss: 1.6118654251098632
epoch: 130 training_loss 1.5258050000667571 test_loss: 1.5513103485107422
epoch: 131 training_loss 1.5118044960498809 test_loss: 1.5379583358764648
epoch: 132 training_loss 1.5284340393543243 test_loss: 1.5528218269348144
epoch: 133 training_loss 1.5077852296829224 test_loss: 1.5163517951965333
epoch: 134 training_loss 1.5162476980686188 test_loss: 1.5610203742980957
epoch: 135 training_loss 1.51374209523201 test_loss: 1.532510757446289
epoch: 136 training_loss 1.4976794290542603 test_loss: 1.5190240859985351
epoch: 137 training_loss 1.4908920419216156 test_loss: 1.5674381256103516
epoch: 138 training_loss 1.514380967617035 test_loss: 1.529305362701416
epoch: 139 training_loss 1.5115669870376587 test_loss: 1.5245201110839843
epoch: 140 training_loss 1.4830255258083342 test_loss: 1.4999631881713866
epoch: 141 training_loss 1.4849959683418275 test_loss: 1.495932674407959
epoch: 142 training_loss 1.4735666346549987 test_loss: 1.4851412773132324
epoch: 143 training_loss 1.4888208627700805 test_loss: 1.5149867057800293
epoch: 144 training_loss 1.498092451095581 test_loss: 1.5296422958374023
epoch: 145 training_loss 1.4973836529254914 test_loss: 1.4905717849731446
epoch: 146 training_loss 1.4916602981090545 test_loss: 1.5375612258911133
epoch: 147 training_loss 1.4971372413635253 test_loss: 1.4970511436462401
epoch: 148 training_loss 1.498223226070404 test_loss: 1.513513946533203
epoch: 149 training_loss 1.5012603330612182 test_loss: 1.4933768272399903
5073.619273011662
episode: 0 training return: tensor(-247.7371, device='cuda:0')
episode: 1 training return: tensor(-247.6393, device='cuda:0')
episode: 2 training return: tensor(-160.5479, device='cuda:0')
episode: 3 training return: tensor(-246.4272, device='cuda:0')
epoch: 1 test_true_pfm: 5102.839646394294 sim_pfm: -100.25583653951374
episode: 4 training return: tensor(-198.4099, device='cuda:0')
episode: 5 training return: tensor(-259.5459, device='cuda:0')
episode: 6 training return: tensor(-295.5280, device='cuda:0')
episode: 7 training return: tensor(-243.0205, device='cuda:0')
epoch: 2 test_true_pfm: 4848.622025685194 sim_pfm: -132.05565961318402
episode: 8 training return: tensor(-310.0993, device='cuda:0')
episode: 9 training return: tensor(-357.6118, device='cuda:0')
episode: 10 training return: tensor(-93.9448, device='cuda:0')
episode: 11 training return: tensor(-325.3508, device='cuda:0')
epoch: 3 test_true_pfm: 4857.985342771303 sim_pfm: -152.12083284238665
episode: 12 training return: tensor(-251.5039, device='cuda:0')
episode: 13 training return: tensor(-260.6405, device='cuda:0')
episode: 14 training return: tensor(-196.1514, device='cuda:0')
episode: 15 training return: tensor(-116.1491, device='cuda:0')
epoch: 4 test_true_pfm: 5000.1310070794725 sim_pfm: -120.37855287387113
episode: 16 training return: tensor(3.4621, device='cuda:0')
episode: 17 training return: tensor(-98.6585, device='cuda:0')
episode: 18 training return: tensor(-274.8566, device='cuda:0')
episode: 19 training return: tensor(-45.6897, device='cuda:0')
epoch: 5 test_true_pfm: 5280.528031199351 sim_pfm: 20.329835816907387
episode: 20 training return: tensor(-55.6413, device='cuda:0')
episode: 21 training return: tensor(38.5869, device='cuda:0')
episode: 22 training return: tensor(-217.0300, device='cuda:0')
episode: 23 training return: tensor(-40.2860, device='cuda:0')
epoch: 6 test_true_pfm: 5140.124002436587 sim_pfm: -56.119150748990556
episode: 24 training return: tensor(-35.6501, device='cuda:0')
episode: 25 training return: tensor(-184.1514, device='cuda:0')
episode: 26 training return: tensor(-146.9761, device='cuda:0')
episode: 27 training return: tensor(-145.4104, device='cuda:0')
epoch: 7 test_true_pfm: 5234.383808944586 sim_pfm: 35.33634995378088
episode: 28 training return: tensor(-76.3905, device='cuda:0')
episode: 29 training return: tensor(-28.5568, device='cuda:0')
episode: 30 training return: tensor(66.4096, device='cuda:0')
episode: 31 training return: tensor(-157.3324, device='cuda:0')
epoch: 8 test_true_pfm: 5141.867953361 sim_pfm: 24.4475123238614
episode: 32 training return: tensor(-90.6669, device='cuda:0')
episode: 33 training return: tensor(-101.9817, device='cuda:0')
episode: 34 training return: tensor(-44.3640, device='cuda:0')
episode: 35 training return: tensor(-32.6897, device='cuda:0')
epoch: 9 test_true_pfm: 5227.653160447577 sim_pfm: 79.23494907897354
episode: 36 training return: tensor(-12.6844, device='cuda:0')
episode: 37 training return: tensor(-14.1741, device='cuda:0')
episode: 38 training return: tensor(-28.0390, device='cuda:0')
episode: 39 training return: tensor(-88.2819, device='cuda:0')
epoch: 10 test_true_pfm: 5273.192111684318 sim_pfm: 90.79748432772855
episode: 40 training return: tensor(-10.7096, device='cuda:0')
episode: 41 training return: tensor(-8.6631, device='cuda:0')
episode: 42 training return: tensor(26.4104, device='cuda:0')
episode: 43 training return: tensor(16.2458, device='cuda:0')
epoch: 11 test_true_pfm: 5268.211160013406 sim_pfm: 88.72286968006908
episode: 44 training return: tensor(47.1595, device='cuda:0')
episode: 45 training return: tensor(-45.3622, device='cuda:0')
episode: 46 training return: tensor(-42.2663, device='cuda:0')
episode: 47 training return: tensor(-89.3603, device='cuda:0')
epoch: 12 test_true_pfm: 5421.635539327884 sim_pfm: 176.1520710651142
episode: 48 training return: tensor(50.3746, device='cuda:0')
episode: 49 training return: tensor(25.7920, device='cuda:0')
episode: 50 training return: tensor(135.6180, device='cuda:0')
episode: 51 training return: tensor(13.9771, device='cuda:0')
epoch: 13 test_true_pfm: 5374.392334621502 sim_pfm: 162.61706162131546
episode: 52 training return: tensor(151.5258, device='cuda:0')
episode: 53 training return: tensor(44.6256, device='cuda:0')
episode: 54 training return: tensor(87.2275, device='cuda:0')
episode: 55 training return: tensor(24.6144, device='cuda:0')
epoch: 14 test_true_pfm: 5432.79371669764 sim_pfm: 253.97748227093447
episode: 56 training return: tensor(29.4226, device='cuda:0')
episode: 57 training return: tensor(4.0599, device='cuda:0')
episode: 58 training return: tensor(1.1681, device='cuda:0')
episode: 59 training return: tensor(3.5687, device='cuda:0')
epoch: 15 test_true_pfm: 5469.983088560533 sim_pfm: 233.3198311155817
episode: 60 training return: tensor(-74.3776, device='cuda:0')
episode: 61 training return: tensor(236.4580, device='cuda:0')
episode: 62 training return: tensor(34.9768, device='cuda:0')
episode: 63 training return: tensor(161.3112, device='cuda:0')
epoch: 16 test_true_pfm: 5477.245719917362 sim_pfm: 180.82534165815255
episode: 64 training return: tensor(38.7628, device='cuda:0')
episode: 65 training return: tensor(96.0983, device='cuda:0')
episode: 66 training return: tensor(78.0940, device='cuda:0')
episode: 67 training return: tensor(148.6080, device='cuda:0')
epoch: 17 test_true_pfm: 5554.4419196339395 sim_pfm: 317.6635396050988
episode: 68 training return: tensor(133.2088, device='cuda:0')
episode: 69 training return: tensor(149.4139, device='cuda:0')
episode: 70 training return: tensor(34.2233, device='cuda:0')
episode: 71 training return: tensor(106.0142, device='cuda:0')
epoch: 18 test_true_pfm: 5477.637429345858 sim_pfm: 300.2723429911227
episode: 72 training return: tensor(74.6795, device='cuda:0')
episode: 73 training return: tensor(44.1466, device='cuda:0')
episode: 74 training return: tensor(114.6023, device='cuda:0')
episode: 75 training return: tensor(91.4520, device='cuda:0')
epoch: 19 test_true_pfm: 5452.290289953854 sim_pfm: 355.4753826619514
episode: 76 training return: tensor(94.0084, device='cuda:0')
episode: 77 training return: tensor(119.5301, device='cuda:0')
episode: 78 training return: tensor(217.2419, device='cuda:0')
episode: 79 training return: tensor(82.7179, device='cuda:0')
epoch: 20 test_true_pfm: 5522.3110236605835 sim_pfm: 333.96786954666214
episode: 80 training return: tensor(292.6462, device='cuda:0')
episode: 81 training return: tensor(48.4459, device='cuda:0')
episode: 82 training return: tensor(145.6423, device='cuda:0')
episode: 83 training return: tensor(205.2013, device='cuda:0')
epoch: 21 test_true_pfm: 5428.4794492043775 sim_pfm: 236.68289037097324
episode: 84 training return: tensor(232.7007, device='cuda:0')
episode: 85 training return: tensor(202.6565, device='cuda:0')
episode: 86 training return: tensor(184.4068, device='cuda:0')
episode: 87 training return: tensor(155.2650, device='cuda:0')
epoch: 22 test_true_pfm: 5564.86822226859 sim_pfm: 339.5203967303193
episode: 88 training return: tensor(253.7077, device='cuda:0')
episode: 89 training return: tensor(126.1460, device='cuda:0')
episode: 90 training return: tensor(176.5311, device='cuda:0')
episode: 91 training return: tensor(207.4745, device='cuda:0')
epoch: 23 test_true_pfm: 5566.454870998372 sim_pfm: 343.2986521374648
episode: 92 training return: tensor(248.1129, device='cuda:0')
episode: 93 training return: tensor(213.7713, device='cuda:0')
episode: 94 training return: tensor(137.8531, device='cuda:0')
episode: 95 training return: tensor(134.8095, device='cuda:0')
epoch: 24 test_true_pfm: 5684.379404098784 sim_pfm: 388.211301768121
episode: 96 training return: tensor(251.2779, device='cuda:0')
episode: 97 training return: tensor(278.0867, device='cuda:0')
episode: 98 training return: tensor(173.0287, device='cuda:0')
episode: 99 training return: tensor(198.8839, device='cuda:0')
epoch: 25 test_true_pfm: 5751.5593351796515 sim_pfm: 419.81736480724066
episode: 100 training return: tensor(261.3862, device='cuda:0')
episode: 101 training return: tensor(210.9574, device='cuda:0')
episode: 102 training return: tensor(262.0979, device='cuda:0')
episode: 103 training return: tensor(219.0809, device='cuda:0')
epoch: 26 test_true_pfm: 5733.6442193964895 sim_pfm: 443.53967612277484
episode: 104 training return: tensor(231.6678, device='cuda:0')
episode: 105 training return: tensor(142.2710, device='cuda:0')
episode: 106 training return: tensor(242.2055, device='cuda:0')
episode: 107 training return: tensor(305.3986, device='cuda:0')
epoch: 27 test_true_pfm: 5657.616035703731 sim_pfm: 396.9644286987411
episode: 108 training return: tensor(268.0962, device='cuda:0')
episode: 109 training return: tensor(291.7751, device='cuda:0')
episode: 110 training return: tensor(349.6824, device='cuda:0')
episode: 111 training return: tensor(280.5767, device='cuda:0')
epoch: 28 test_true_pfm: 5644.760714665387 sim_pfm: 430.2770647366803
episode: 112 training return: tensor(189.8596, device='cuda:0')
episode: 113 training return: tensor(344.4177, device='cuda:0')
episode: 114 training return: tensor(300.1798, device='cuda:0')
episode: 115 training return: tensor(355.3558, device='cuda:0')
epoch: 29 test_true_pfm: 5733.021764256065 sim_pfm: 414.7673876764311
episode: 116 training return: tensor(216.4571, device='cuda:0')
episode: 117 training return: tensor(251.8515, device='cuda:0')
episode: 118 training return: tensor(384.6253, device='cuda:0')
episode: 119 training return: tensor(278.8680, device='cuda:0')
epoch: 30 test_true_pfm: 5797.018019371997 sim_pfm: 480.99289435344207
episode: 120 training return: tensor(276.9410, device='cuda:0')
episode: 121 training return: tensor(280.9596, device='cuda:0')
episode: 122 training return: tensor(195.9290, device='cuda:0')
episode: 123 training return: tensor(312.2627, device='cuda:0')
epoch: 31 test_true_pfm: 5702.578179294435 sim_pfm: 406.71625679458765
episode: 124 training return: tensor(224.8903, device='cuda:0')
episode: 125 training return: tensor(293.7269, device='cuda:0')
episode: 126 training return: tensor(286.3549, device='cuda:0')
episode: 127 training return: tensor(366.9068, device='cuda:0')
epoch: 32 test_true_pfm: 5758.472050265472 sim_pfm: 461.5277463642803
episode: 128 training return: tensor(287.3036, device='cuda:0')
episode: 129 training return: tensor(392.1235, device='cuda:0')
episode: 130 training return: tensor(244.9097, device='cuda:0')
episode: 131 training return: tensor(348.2998, device='cuda:0')
epoch: 33 test_true_pfm: 5840.962671682835 sim_pfm: 404.3161438609629
episode: 132 training return: tensor(345.2751, device='cuda:0')
episode: 133 training return: tensor(358.4455, device='cuda:0')
episode: 134 training return: tensor(215.8525, device='cuda:0')
episode: 135 training return: tensor(330.9240, device='cuda:0')
epoch: 34 test_true_pfm: 5829.680887714309 sim_pfm: 474.3349428795821
episode: 136 training return: tensor(358.0209, device='cuda:0')
episode: 137 training return: tensor(344.8132, device='cuda:0')
episode: 138 training return: tensor(311.6748, device='cuda:0')
episode: 139 training return: tensor(332.3213, device='cuda:0')
epoch: 35 test_true_pfm: 5797.971628078293 sim_pfm: 485.2422127351456
episode: 140 training return: tensor(308.4987, device='cuda:0')
episode: 141 training return: tensor(249.6413, device='cuda:0')
episode: 142 training return: tensor(341.7807, device='cuda:0')
episode: 143 training return: tensor(356.2909, device='cuda:0')
epoch: 36 test_true_pfm: 5770.660596490711 sim_pfm: 528.8400684118387
episode: 144 training return: tensor(324.5555, device='cuda:0')
episode: 145 training return: tensor(424.8696, device='cuda:0')
episode: 146 training return: tensor(345.2689, device='cuda:0')
episode: 147 training return: tensor(309.8808, device='cuda:0')
epoch: 37 test_true_pfm: 5811.897987424046 sim_pfm: 420.8240587020603
episode: 148 training return: tensor(227.1821, device='cuda:0')
episode: 149 training return: tensor(380.4671, device='cuda:0')
episode: 150 training return: tensor(374.4572, device='cuda:0')
episode: 151 training return: tensor(356.6356, device='cuda:0')
epoch: 38 test_true_pfm: 5921.979824405062 sim_pfm: 503.4663971470727
episode: 152 training return: tensor(379.7458, device='cuda:0')
episode: 153 training return: tensor(180.1569, device='cuda:0')
episode: 154 training return: tensor(352.0566, device='cuda:0')
episode: 155 training return: tensor(439.2285, device='cuda:0')
epoch: 39 test_true_pfm: 5828.811293411373 sim_pfm: 520.5284992778907
episode: 156 training return: tensor(358.0363, device='cuda:0')
episode: 157 training return: tensor(318.9041, device='cuda:0')
episode: 158 training return: tensor(370.8572, device='cuda:0')
episode: 159 training return: tensor(405.5695, device='cuda:0')
epoch: 40 test_true_pfm: 5886.198606361938 sim_pfm: 483.16656971138826
episode: 160 training return: tensor(335.4550, device='cuda:0')
episode: 161 training return: tensor(206.4332, device='cuda:0')
episode: 162 training return: tensor(358.7112, device='cuda:0')
episode: 163 training return: tensor(520.1679, device='cuda:0')
epoch: 41 test_true_pfm: 5865.374012575808 sim_pfm: 475.7729868564832
episode: 164 training return: tensor(370.5938, device='cuda:0')
episode: 165 training return: tensor(367.9400, device='cuda:0')
episode: 166 training return: tensor(429.5220, device='cuda:0')
episode: 167 training return: tensor(296.7377, device='cuda:0')
epoch: 42 test_true_pfm: 5870.17539638576 sim_pfm: 489.56571664752363
episode: 168 training return: tensor(363.6774, device='cuda:0')
episode: 169 training return: tensor(451.9335, device='cuda:0')
episode: 170 training return: tensor(427.7845, device='cuda:0')
episode: 171 training return: tensor(346.0269, device='cuda:0')
epoch: 43 test_true_pfm: 5902.187461790559 sim_pfm: 490.52767142576823
episode: 172 training return: tensor(389.9872, device='cuda:0')
episode: 173 training return: tensor(374.6675, device='cuda:0')
episode: 174 training return: tensor(304.1305, device='cuda:0')
episode: 175 training return: tensor(350.4772, device='cuda:0')
epoch: 44 test_true_pfm: 5863.427956614642 sim_pfm: 525.6527462257654
episode: 176 training return: tensor(283.7059, device='cuda:0')
episode: 177 training return: tensor(443.8319, device='cuda:0')
episode: 178 training return: tensor(425.8383, device='cuda:0')
episode: 179 training return: tensor(410.2127, device='cuda:0')
epoch: 45 test_true_pfm: 5904.745937723598 sim_pfm: 527.8628792197754
episode: 180 training return: tensor(470.9579, device='cuda:0')
episode: 181 training return: tensor(419.4818, device='cuda:0')
episode: 182 training return: tensor(393.1452, device='cuda:0')
episode: 183 training return: tensor(419.0333, device='cuda:0')
epoch: 46 test_true_pfm: 5888.361767622188 sim_pfm: 514.0499981033694
episode: 184 training return: tensor(353.4234, device='cuda:0')
episode: 185 training return: tensor(351.1986, device='cuda:0')
episode: 186 training return: tensor(410.8551, device='cuda:0')
episode: 187 training return: tensor(338.5328, device='cuda:0')
epoch: 47 test_true_pfm: 5872.202580404567 sim_pfm: 555.1206716753853
episode: 188 training return: tensor(344.6609, device='cuda:0')
episode: 189 training return: tensor(332.9686, device='cuda:0')
episode: 190 training return: tensor(459.1532, device='cuda:0')
episode: 191 training return: tensor(385.9225, device='cuda:0')
epoch: 48 test_true_pfm: 5925.822025066147 sim_pfm: 540.262884589601
episode: 192 training return: tensor(399.2717, device='cuda:0')
episode: 193 training return: tensor(478.8660, device='cuda:0')
episode: 194 training return: tensor(349.0059, device='cuda:0')
episode: 195 training return: tensor(146.3176, device='cuda:0')
epoch: 49 test_true_pfm: 5973.901019084154 sim_pfm: 513.1323819855849
episode: 196 training return: tensor(392.6562, device='cuda:0')
episode: 197 training return: tensor(352.1026, device='cuda:0')
episode: 198 training return: tensor(398.4705, device='cuda:0')
episode: 199 training return: tensor(276.8265, device='cuda:0')
epoch: 50 test_true_pfm: 5926.295790601153 sim_pfm: 552.9323539537921
episode: 200 training return: tensor(338.3887, device='cuda:0')
episode: 201 training return: tensor(401.9289, device='cuda:0')
episode: 202 training return: tensor(381.0641, device='cuda:0')
episode: 203 training return: tensor(412.0052, device='cuda:0')
epoch: 51 test_true_pfm: 6022.643043851526 sim_pfm: 545.1042110838267
episode: 204 training return: tensor(391.1652, device='cuda:0')
episode: 205 training return: tensor(443.4276, device='cuda:0')
episode: 206 training return: tensor(315.3243, device='cuda:0')
episode: 207 training return: tensor(407.4516, device='cuda:0')
epoch: 52 test_true_pfm: 5838.515289974115 sim_pfm: 560.4917951136595
episode: 208 training return: tensor(451.0703, device='cuda:0')
episode: 209 training return: tensor(360.4775, device='cuda:0')
episode: 210 training return: tensor(-121.6536, device='cuda:0')
episode: 211 training return: tensor(422.8609, device='cuda:0')
epoch: 53 test_true_pfm: 5936.34968453029 sim_pfm: 514.9020166789802
episode: 212 training return: tensor(369.1257, device='cuda:0')
episode: 213 training return: tensor(448.4164, device='cuda:0')
episode: 214 training return: tensor(406.6168, device='cuda:0')
episode: 215 training return: tensor(416.1342, device='cuda:0')
epoch: 54 test_true_pfm: 5994.208067939061 sim_pfm: 557.9966741539538
episode: 216 training return: tensor(500.8387, device='cuda:0')
episode: 217 training return: tensor(396.2484, device='cuda:0')
episode: 218 training return: tensor(423.5766, device='cuda:0')
episode: 219 training return: tensor(415.8264, device='cuda:0')
epoch: 55 test_true_pfm: 5909.413974125338 sim_pfm: 555.556715838844
episode: 220 training return: tensor(429.4892, device='cuda:0')
episode: 221 training return: tensor(489.2350, device='cuda:0')
episode: 222 training return: tensor(352.3431, device='cuda:0')
episode: 223 training return: tensor(404.0862, device='cuda:0')
epoch: 56 test_true_pfm: 5967.1675575271665 sim_pfm: 541.0869142006073
episode: 224 training return: tensor(395.0088, device='cuda:0')
episode: 225 training return: tensor(340.9767, device='cuda:0')
episode: 226 training return: tensor(423.4007, device='cuda:0')
episode: 227 training return: tensor(391.5556, device='cuda:0')
epoch: 57 test_true_pfm: 5957.240050273297 sim_pfm: 610.1548720503537
episode: 228 training return: tensor(318.4370, device='cuda:0')
episode: 229 training return: tensor(357.0598, device='cuda:0')
episode: 230 training return: tensor(367.5479, device='cuda:0')
episode: 231 training return: tensor(365.6176, device='cuda:0')
epoch: 58 test_true_pfm: 5951.818437376688 sim_pfm: 541.8360224309921
episode: 232 training return: tensor(344.9302, device='cuda:0')
episode: 233 training return: tensor(302.3071, device='cuda:0')
episode: 234 training return: tensor(357.3573, device='cuda:0')
episode: 235 training return: tensor(336.2215, device='cuda:0')
epoch: 59 test_true_pfm: 5930.3065976552025 sim_pfm: 541.9509151920016
episode: 236 training return: tensor(299.4195, device='cuda:0')
episode: 237 training return: tensor(346.0549, device='cuda:0')
episode: 238 training return: tensor(416.2163, device='cuda:0')
episode: 239 training return: tensor(430.8186, device='cuda:0')
epoch: 60 test_true_pfm: 5945.174838551687 sim_pfm: 578.9581948600244
episode: 240 training return: tensor(418.8542, device='cuda:0')
episode: 241 training return: tensor(410.2378, device='cuda:0')
episode: 242 training return: tensor(403.6534, device='cuda:0')
episode: 243 training return: tensor(411.1018, device='cuda:0')
epoch: 61 test_true_pfm: 5968.9797948563 sim_pfm: 571.3755005057513
episode: 244 training return: tensor(391.7014, device='cuda:0')
episode: 245 training return: tensor(238.5255, device='cuda:0')
episode: 246 training return: tensor(424.8563, device='cuda:0')
episode: 247 training return: tensor(403.1195, device='cuda:0')
epoch: 62 test_true_pfm: 5934.676064222261 sim_pfm: 567.9037533983743
episode: 248 training return: tensor(373.3831, device='cuda:0')
episode: 249 training return: tensor(430.8169, device='cuda:0')
episode: 250 training return: tensor(418.9760, device='cuda:0')
episode: 251 training return: tensor(440.1109, device='cuda:0')
epoch: 63 test_true_pfm: 5984.56398534366 sim_pfm: 560.8405576841906
episode: 252 training return: tensor(401.3128, device='cuda:0')
episode: 253 training return: tensor(410.2278, device='cuda:0')
episode: 254 training return: tensor(441.2433, device='cuda:0')
episode: 255 training return: tensor(412.0982, device='cuda:0')
epoch: 64 test_true_pfm: 6050.760447245087 sim_pfm: 582.2757383940819
episode: 256 training return: tensor(362.1094, device='cuda:0')
episode: 257 training return: tensor(483.6368, device='cuda:0')
episode: 258 training return: tensor(396.7922, device='cuda:0')
episode: 259 training return: tensor(403.9904, device='cuda:0')
epoch: 65 test_true_pfm: 5981.569359934522 sim_pfm: 561.4093439992672
episode: 260 training return: tensor(362.8040, device='cuda:0')
episode: 261 training return: tensor(440.7806, device='cuda:0')
episode: 262 training return: tensor(435.2016, device='cuda:0')
episode: 263 training return: tensor(473.3176, device='cuda:0')
epoch: 66 test_true_pfm: 5979.089947421154 sim_pfm: 561.5446455032215
episode: 264 training return: tensor(365.4221, device='cuda:0')
episode: 265 training return: tensor(486.7719, device='cuda:0')
episode: 266 training return: tensor(417.9603, device='cuda:0')
episode: 267 training return: tensor(467.0127, device='cuda:0')
epoch: 67 test_true_pfm: 6040.410567272754 sim_pfm: 582.6630325361815
episode: 268 training return: tensor(419.0030, device='cuda:0')
episode: 269 training return: tensor(347.6007, device='cuda:0')
episode: 270 training return: tensor(424.7430, device='cuda:0')
episode: 271 training return: tensor(525.2243, device='cuda:0')
epoch: 68 test_true_pfm: 5967.972281920161 sim_pfm: 588.0002113399775
episode: 272 training return: tensor(420.9862, device='cuda:0')
episode: 273 training return: tensor(481.2284, device='cuda:0')
episode: 274 training return: tensor(399.6109, device='cuda:0')
episode: 275 training return: tensor(466.1814, device='cuda:0')
epoch: 69 test_true_pfm: 6026.291930158631 sim_pfm: 580.3051288516921
episode: 276 training return: tensor(361.9394, device='cuda:0')
episode: 277 training return: tensor(372.8164, device='cuda:0')
episode: 278 training return: tensor(488.3045, device='cuda:0')
episode: 279 training return: tensor(388.7135, device='cuda:0')
epoch: 70 test_true_pfm: 6016.501747477377 sim_pfm: 551.0874026286571
episode: 280 training return: tensor(419.5420, device='cuda:0')
episode: 281 training return: tensor(471.1581, device='cuda:0')
episode: 282 training return: tensor(397.5681, device='cuda:0')
episode: 283 training return: tensor(422.0338, device='cuda:0')
epoch: 71 test_true_pfm: 6011.9236442819965 sim_pfm: 538.966735293041
episode: 284 training return: tensor(395.8706, device='cuda:0')
episode: 285 training return: tensor(351.2058, device='cuda:0')
episode: 286 training return: tensor(469.2507, device='cuda:0')
episode: 287 training return: tensor(442.8911, device='cuda:0')
epoch: 72 test_true_pfm: 5944.162092068477 sim_pfm: 528.1069582070922
episode: 288 training return: tensor(496.6425, device='cuda:0')
episode: 289 training return: tensor(381.9560, device='cuda:0')
episode: 290 training return: tensor(392.8891, device='cuda:0')
episode: 291 training return: tensor(426.2365, device='cuda:0')
epoch: 73 test_true_pfm: 5977.073196266113 sim_pfm: 573.9787961360998
episode: 292 training return: tensor(475.8661, device='cuda:0')
episode: 293 training return: tensor(469.7928, device='cuda:0')
episode: 294 training return: tensor(471.3126, device='cuda:0')
episode: 295 training return: tensor(500.5591, device='cuda:0')
epoch: 74 test_true_pfm: 6014.707750310823 sim_pfm: 572.8841466505934
episode: 296 training return: tensor(397.2907, device='cuda:0')
episode: 297 training return: tensor(453.8586, device='cuda:0')
episode: 298 training return: tensor(408.3583, device='cuda:0')
episode: 299 training return: tensor(347.5833, device='cuda:0')
epoch: 75 test_true_pfm: 6013.480178993498 sim_pfm: 542.7828672797963
episode: 300 training return: tensor(471.4514, device='cuda:0')
episode: 301 training return: tensor(442.8361, device='cuda:0')
episode: 302 training return: tensor(487.2981, device='cuda:0')
episode: 303 training return: tensor(455.6221, device='cuda:0')
epoch: 76 test_true_pfm: 6089.8114336848885 sim_pfm: 609.7453453870645
episode: 304 training return: tensor(470.9626, device='cuda:0')
episode: 305 training return: tensor(525.4963, device='cuda:0')
episode: 306 training return: tensor(484.4006, device='cuda:0')
episode: 307 training return: tensor(496.7381, device='cuda:0')
epoch: 77 test_true_pfm: 6070.563851562605 sim_pfm: 614.5967819617363
episode: 308 training return: tensor(440.0055, device='cuda:0')
episode: 309 training return: tensor(452.8405, device='cuda:0')
episode: 310 training return: tensor(441.5077, device='cuda:0')
episode: 311 training return: tensor(430.2234, device='cuda:0')
epoch: 78 test_true_pfm: 6032.2979129166115 sim_pfm: 622.1829964588396
episode: 312 training return: tensor(481.1236, device='cuda:0')
episode: 313 training return: tensor(476.7413, device='cuda:0')
episode: 314 training return: tensor(387.2533, device='cuda:0')
episode: 315 training return: tensor(482.8675, device='cuda:0')
epoch: 79 test_true_pfm: 5995.0243334558645 sim_pfm: 569.7133503804993
episode: 316 training return: tensor(429.1789, device='cuda:0')
episode: 317 training return: tensor(322.8464, device='cuda:0')
episode: 318 training return: tensor(393.1218, device='cuda:0')
episode: 319 training return: tensor(480.1439, device='cuda:0')
epoch: 80 test_true_pfm: 5985.55001989468 sim_pfm: 582.2583822729842
episode: 320 training return: tensor(453.6673, device='cuda:0')
episode: 321 training return: tensor(456.8047, device='cuda:0')
episode: 322 training return: tensor(457.0156, device='cuda:0')
episode: 323 training return: tensor(423.9783, device='cuda:0')
epoch: 81 test_true_pfm: 6053.019805319701 sim_pfm: 587.3067629906776
episode: 324 training return: tensor(451.5934, device='cuda:0')
episode: 325 training return: tensor(423.1632, device='cuda:0')
episode: 326 training return: tensor(462.3981, device='cuda:0')
episode: 327 training return: tensor(452.5136, device='cuda:0')
epoch: 82 test_true_pfm: 6008.773909675964 sim_pfm: 556.6343660519245
episode: 328 training return: tensor(466.6511, device='cuda:0')
episode: 329 training return: tensor(438.6630, device='cuda:0')
episode: 330 training return: tensor(459.3354, device='cuda:0')
episode: 331 training return: tensor(477.1799, device='cuda:0')
epoch: 83 test_true_pfm: 6053.4146357407735 sim_pfm: 615.5735858367019
episode: 332 training return: tensor(436.4636, device='cuda:0')
episode: 333 training return: tensor(486.9747, device='cuda:0')
episode: 334 training return: tensor(438.1242, device='cuda:0')
episode: 335 training return: tensor(475.3088, device='cuda:0')
epoch: 84 test_true_pfm: 5978.178860766685 sim_pfm: 607.8803599490008
episode: 336 training return: tensor(435.7326, device='cuda:0')
episode: 337 training return: tensor(458.1584, device='cuda:0')
episode: 338 training return: tensor(478.0526, device='cuda:0')
episode: 339 training return: tensor(354.1169, device='cuda:0')
epoch: 85 test_true_pfm: 6027.714162524553 sim_pfm: 570.04427218736
episode: 340 training return: tensor(486.0466, device='cuda:0')
episode: 341 training return: tensor(413.9000, device='cuda:0')
episode: 342 training return: tensor(453.6010, device='cuda:0')
episode: 343 training return: tensor(367.7255, device='cuda:0')
epoch: 86 test_true_pfm: 6064.3284915623835 sim_pfm: 589.5011784504264
episode: 344 training return: tensor(443.3685, device='cuda:0')
episode: 345 training return: tensor(466.6855, device='cuda:0')
episode: 346 training return: tensor(473.2240, device='cuda:0')
episode: 347 training return: tensor(349.1731, device='cuda:0')
epoch: 87 test_true_pfm: 6016.0628867318055 sim_pfm: 608.5918959136665
episode: 348 training return: tensor(468.3343, device='cuda:0')
episode: 349 training return: tensor(378.4173, device='cuda:0')
episode: 350 training return: tensor(402.1754, device='cuda:0')
episode: 351 training return: tensor(510.1493, device='cuda:0')
epoch: 88 test_true_pfm: 6069.2742885352045 sim_pfm: 562.818957929189
episode: 352 training return: tensor(348.0687, device='cuda:0')
episode: 353 training return: tensor(390.2456, device='cuda:0')
episode: 354 training return: tensor(419.8919, device='cuda:0')
episode: 355 training return: tensor(480.8727, device='cuda:0')
epoch: 89 test_true_pfm: 6050.223322529787 sim_pfm: 565.003754432934
episode: 356 training return: tensor(532.4495, device='cuda:0')
episode: 357 training return: tensor(483.9562, device='cuda:0')
episode: 358 training return: tensor(386.5074, device='cuda:0')
episode: 359 training return: tensor(450.6592, device='cuda:0')
epoch: 90 test_true_pfm: 6061.559012613573 sim_pfm: 612.8481158256764
episode: 360 training return: tensor(513.4854, device='cuda:0')
episode: 361 training return: tensor(498.7931, device='cuda:0')
episode: 362 training return: tensor(438.0634, device='cuda:0')
episode: 363 training return: tensor(450.5011, device='cuda:0')
epoch: 91 test_true_pfm: 6065.889865210727 sim_pfm: 630.712468494438
episode: 364 training return: tensor(501.8139, device='cuda:0')
episode: 365 training return: tensor(500.7289, device='cuda:0')
episode: 366 training return: tensor(470.9273, device='cuda:0')
episode: 367 training return: tensor(408.6490, device='cuda:0')
epoch: 92 test_true_pfm: 6058.463299403535 sim_pfm: 623.1782271701765
episode: 368 training return: tensor(488.2162, device='cuda:0')
episode: 369 training return: tensor(454.0872, device='cuda:0')
episode: 370 training return: tensor(422.2713, device='cuda:0')
episode: 371 training return: tensor(518.8187, device='cuda:0')
epoch: 93 test_true_pfm: 6021.47091612019 sim_pfm: 608.0925624798692
episode: 372 training return: tensor(415.0952, device='cuda:0')
episode: 373 training return: tensor(482.6685, device='cuda:0')
episode: 374 training return: tensor(393.8769, device='cuda:0')
episode: 375 training return: tensor(461.0943, device='cuda:0')
epoch: 94 test_true_pfm: 6110.766892936339 sim_pfm: 627.3184947727714
episode: 376 training return: tensor(496.7078, device='cuda:0')
episode: 377 training return: tensor(532.9218, device='cuda:0')
episode: 378 training return: tensor(458.4098, device='cuda:0')
episode: 379 training return: tensor(475.8094, device='cuda:0')
epoch: 95 test_true_pfm: 6052.73763182517 sim_pfm: 580.3933451791139
episode: 380 training return: tensor(471.5745, device='cuda:0')
episode: 381 training return: tensor(504.6229, device='cuda:0')
episode: 382 training return: tensor(463.3465, device='cuda:0')
episode: 383 training return: tensor(507.9837, device='cuda:0')
epoch: 96 test_true_pfm: 6037.209507103758 sim_pfm: 617.3518092268301
episode: 384 training return: tensor(576.8069, device='cuda:0')
episode: 385 training return: tensor(437.9968, device='cuda:0')
episode: 386 training return: tensor(522.4648, device='cuda:0')
episode: 387 training return: tensor(509.8713, device='cuda:0')
epoch: 97 test_true_pfm: 6119.155242438302 sim_pfm: 605.5381834591972
episode: 388 training return: tensor(425.8611, device='cuda:0')
episode: 389 training return: tensor(434.3300, device='cuda:0')
episode: 390 training return: tensor(488.2493, device='cuda:0')
episode: 391 training return: tensor(432.7860, device='cuda:0')
epoch: 98 test_true_pfm: 6057.838762334384 sim_pfm: 616.0792857670846
episode: 392 training return: tensor(422.7003, device='cuda:0')
episode: 393 training return: tensor(486.1136, device='cuda:0')
episode: 394 training return: tensor(483.0821, device='cuda:0')
episode: 395 training return: tensor(486.8932, device='cuda:0')
epoch: 99 test_true_pfm: 6076.901606605956 sim_pfm: 605.3037119576669
episode: 396 training return: tensor(497.0274, device='cuda:0')
episode: 397 training return: tensor(441.7779, device='cuda:0')
episode: 398 training return: tensor(506.9781, device='cuda:0')
episode: 399 training return: tensor(511.9953, device='cuda:0')
epoch: 100 test_true_pfm: 6101.906186394834 sim_pfm: 594.9316089209946
episode: 400 training return: tensor(477.4132, device='cuda:0')
episode: 401 training return: tensor(508.8159, device='cuda:0')
episode: 402 training return: tensor(486.5172, device='cuda:0')
episode: 403 training return: tensor(537.9735, device='cuda:0')
epoch: 101 test_true_pfm: 6032.090712122535 sim_pfm: 596.873750985018
episode: 404 training return: tensor(456.3905, device='cuda:0')
episode: 405 training return: tensor(505.6510, device='cuda:0')
episode: 406 training return: tensor(522.7856, device='cuda:0')
episode: 407 training return: tensor(480.9637, device='cuda:0')
epoch: 102 test_true_pfm: 6101.124979882295 sim_pfm: 624.1959517383754
episode: 408 training return: tensor(433.9911, device='cuda:0')
episode: 409 training return: tensor(515.8219, device='cuda:0')
episode: 410 training return: tensor(519.3560, device='cuda:0')
episode: 411 training return: tensor(528.0317, device='cuda:0')
epoch: 103 test_true_pfm: 6071.095008414001 sim_pfm: 593.9909867974542
episode: 412 training return: tensor(534.2113, device='cuda:0')
episode: 413 training return: tensor(476.4007, device='cuda:0')
episode: 414 training return: tensor(452.5909, device='cuda:0')
episode: 415 training return: tensor(513.0334, device='cuda:0')
epoch: 104 test_true_pfm: 6045.123294445469 sim_pfm: 617.3930565670211
episode: 416 training return: tensor(488.5430, device='cuda:0')
episode: 417 training return: tensor(507.5052, device='cuda:0')
episode: 418 training return: tensor(455.0053, device='cuda:0')
episode: 419 training return: tensor(540.1467, device='cuda:0')
epoch: 105 test_true_pfm: 6100.471304595997 sim_pfm: 627.4262942822534
episode: 420 training return: tensor(524.8799, device='cuda:0')
episode: 421 training return: tensor(497.5267, device='cuda:0')
episode: 422 training return: tensor(547.8104, device='cuda:0')
episode: 423 training return: tensor(431.5737, device='cuda:0')
epoch: 106 test_true_pfm: 6092.757646087255 sim_pfm: 607.9833397422141
episode: 424 training return: tensor(538.8276, device='cuda:0')
episode: 425 training return: tensor(466.3965, device='cuda:0')
episode: 426 training return: tensor(431.1306, device='cuda:0')
episode: 427 training return: tensor(488.2565, device='cuda:0')
epoch: 107 test_true_pfm: 6063.151634747895 sim_pfm: 607.102716618334
episode: 428 training return: tensor(552.2803, device='cuda:0')
episode: 429 training return: tensor(449.1099, device='cuda:0')
episode: 430 training return: tensor(469.5384, device='cuda:0')
episode: 431 training return: tensor(538.9203, device='cuda:0')
epoch: 108 test_true_pfm: 6078.172072434797 sim_pfm: 633.0583204235882
episode: 432 training return: tensor(505.3829, device='cuda:0')
episode: 433 training return: tensor(468.7699, device='cuda:0')
episode: 434 training return: tensor(498.9870, device='cuda:0')
episode: 435 training return: tensor(322.9410, device='cuda:0')
epoch: 109 test_true_pfm: 6127.650147703432 sim_pfm: 653.4029328250714
episode: 436 training return: tensor(477.7331, device='cuda:0')
episode: 437 training return: tensor(489.8306, device='cuda:0')
episode: 438 training return: tensor(508.4665, device='cuda:0')
episode: 439 training return: tensor(461.5621, device='cuda:0')
epoch: 110 test_true_pfm: 6116.33212575522 sim_pfm: 640.7230591046003
episode: 440 training return: tensor(531.1368, device='cuda:0')
episode: 441 training return: tensor(465.2029, device='cuda:0')
episode: 442 training return: tensor(443.5540, device='cuda:0')
episode: 443 training return: tensor(442.4766, device='cuda:0')
epoch: 111 test_true_pfm: 6086.769362721553 sim_pfm: 615.4579721459304
episode: 444 training return: tensor(501.0213, device='cuda:0')
episode: 445 training return: tensor(470.9887, device='cuda:0')
episode: 446 training return: tensor(453.6370, device='cuda:0')
episode: 447 training return: tensor(480.3282, device='cuda:0')
epoch: 112 test_true_pfm: 6065.20000882851 sim_pfm: 616.5714541606916
episode: 448 training return: tensor(490.5244, device='cuda:0')
episode: 449 training return: tensor(490.5847, device='cuda:0')
episode: 450 training return: tensor(532.4968, device='cuda:0')
episode: 451 training return: tensor(491.1875, device='cuda:0')
epoch: 113 test_true_pfm: 6075.003437783063 sim_pfm: 578.8332300895418
episode: 452 training return: tensor(537.5659, device='cuda:0')
episode: 453 training return: tensor(475.7414, device='cuda:0')
episode: 454 training return: tensor(492.4478, device='cuda:0')
episode: 455 training return: tensor(487.9494, device='cuda:0')
epoch: 114 test_true_pfm: 6085.195532780354 sim_pfm: 645.7838128295261
episode: 456 training return: tensor(447.7549, device='cuda:0')
episode: 457 training return: tensor(515.3831, device='cuda:0')
episode: 458 training return: tensor(533.4969, device='cuda:0')
episode: 459 training return: tensor(481.6703, device='cuda:0')
epoch: 115 test_true_pfm: 6094.890366174895 sim_pfm: 650.7360729706319
episode: 460 training return: tensor(508.8464, device='cuda:0')
episode: 461 training return: tensor(435.0246, device='cuda:0')
episode: 462 training return: tensor(479.3120, device='cuda:0')
episode: 463 training return: tensor(485.3930, device='cuda:0')
epoch: 116 test_true_pfm: 6114.87096429336 sim_pfm: 612.2991751969967
episode: 464 training return: tensor(512.0615, device='cuda:0')
episode: 465 training return: tensor(453.9820, device='cuda:0')
episode: 466 training return: tensor(528.4974, device='cuda:0')
episode: 467 training return: tensor(539.3817, device='cuda:0')
epoch: 117 test_true_pfm: 6194.203177541055 sim_pfm: 632.1607902036048
episode: 468 training return: tensor(507.7957, device='cuda:0')
episode: 469 training return: tensor(439.6219, device='cuda:0')
episode: 470 training return: tensor(517.1851, device='cuda:0')
episode: 471 training return: tensor(443.9862, device='cuda:0')
epoch: 118 test_true_pfm: 6080.246794939708 sim_pfm: 625.7540445902074
episode: 472 training return: tensor(511.9680, device='cuda:0')
episode: 473 training return: tensor(483.5578, device='cuda:0')
episode: 474 training return: tensor(509.4247, device='cuda:0')
episode: 475 training return: tensor(495.2090, device='cuda:0')
epoch: 119 test_true_pfm: 6047.469655814294 sim_pfm: 627.4051563248989
episode: 476 training return: tensor(487.1013, device='cuda:0')
episode: 477 training return: tensor(459.7011, device='cuda:0')
episode: 478 training return: tensor(425.1927, device='cuda:0')
episode: 479 training return: tensor(468.0833, device='cuda:0')
epoch: 120 test_true_pfm: 6078.906336975088 sim_pfm: 592.5251578499252
episode: 480 training return: tensor(487.2916, device='cuda:0')
episode: 481 training return: tensor(495.0947, device='cuda:0')
episode: 482 training return: tensor(466.1707, device='cuda:0')
episode: 483 training return: tensor(558.7132, device='cuda:0')
epoch: 121 test_true_pfm: 6126.482153777721 sim_pfm: 624.974425960109
episode: 484 training return: tensor(430.1981, device='cuda:0')
episode: 485 training return: tensor(512.0589, device='cuda:0')
episode: 486 training return: tensor(396.6164, device='cuda:0')
episode: 487 training return: tensor(476.2533, device='cuda:0')
epoch: 122 test_true_pfm: 6053.654300037575 sim_pfm: 603.2657163166441
episode: 488 training return: tensor(487.8244, device='cuda:0')
episode: 489 training return: tensor(528.9067, device='cuda:0')
episode: 490 training return: tensor(543.4877, device='cuda:0')
episode: 491 training return: tensor(465.6761, device='cuda:0')
epoch: 123 test_true_pfm: 6095.580918047154 sim_pfm: 653.4525556042014
episode: 492 training return: tensor(483.5902, device='cuda:0')
episode: 493 training return: tensor(533.8668, device='cuda:0')
episode: 494 training return: tensor(528.9553, device='cuda:0')
episode: 495 training return: tensor(463.9922, device='cuda:0')
epoch: 124 test_true_pfm: 6113.64354554528 sim_pfm: 643.2062352565505
episode: 496 training return: tensor(538.7463, device='cuda:0')
episode: 497 training return: tensor(547.2498, device='cuda:0')
episode: 498 training return: tensor(480.9690, device='cuda:0')
episode: 499 training return: tensor(512.0918, device='cuda:0')
epoch: 125 test_true_pfm: 6150.12376957418 sim_pfm: 628.7447842646701
episode: 500 training return: tensor(570.3258, device='cuda:0')
episode: 501 training return: tensor(556.5533, device='cuda:0')
episode: 502 training return: tensor(501.8930, device='cuda:0')
episode: 503 training return: tensor(391.6480, device='cuda:0')
epoch: 126 test_true_pfm: 6096.693810333573 sim_pfm: 627.1155169586806
episode: 504 training return: tensor(492.2442, device='cuda:0')
episode: 505 training return: tensor(450.6666, device='cuda:0')
episode: 506 training return: tensor(496.4529, device='cuda:0')
episode: 507 training return: tensor(524.6760, device='cuda:0')
epoch: 127 test_true_pfm: 6152.507429105368 sim_pfm: 621.879305810978
episode: 508 training return: tensor(519.7726, device='cuda:0')
episode: 509 training return: tensor(550.0910, device='cuda:0')
episode: 510 training return: tensor(493.2803, device='cuda:0')
episode: 511 training return: tensor(490.5804, device='cuda:0')
epoch: 128 test_true_pfm: 6102.670507369646 sim_pfm: 635.5957268909551
episode: 512 training return: tensor(535.5803, device='cuda:0')
episode: 513 training return: tensor(470.4088, device='cuda:0')
episode: 514 training return: tensor(503.8093, device='cuda:0')
episode: 515 training return: tensor(448.8224, device='cuda:0')
epoch: 129 test_true_pfm: 6132.751253391719 sim_pfm: 651.1686932754237
episode: 516 training return: tensor(470.5669, device='cuda:0')
episode: 517 training return: tensor(446.5685, device='cuda:0')
episode: 518 training return: tensor(458.3380, device='cuda:0')
episode: 519 training return: tensor(446.4137, device='cuda:0')
epoch: 130 test_true_pfm: 6122.564868159225 sim_pfm: 641.2154870043354
episode: 520 training return: tensor(523.0507, device='cuda:0')
episode: 521 training return: tensor(437.7523, device='cuda:0')
episode: 522 training return: tensor(424.0065, device='cuda:0')
episode: 523 training return: tensor(469.1010, device='cuda:0')
epoch: 131 test_true_pfm: 6099.212043239266 sim_pfm: 650.0740410899647
episode: 524 training return: tensor(447.2843, device='cuda:0')
episode: 525 training return: tensor(521.8425, device='cuda:0')
episode: 526 training return: tensor(515.0513, device='cuda:0')
episode: 527 training return: tensor(514.2794, device='cuda:0')
epoch: 132 test_true_pfm: 6130.747951252274 sim_pfm: 641.1891732571336
episode: 528 training return: tensor(540.1625, device='cuda:0')
episode: 529 training return: tensor(550.1840, device='cuda:0')
episode: 530 training return: tensor(545.0879, device='cuda:0')
episode: 531 training return: tensor(515.7185, device='cuda:0')
epoch: 133 test_true_pfm: 6166.882947596408 sim_pfm: 622.7799575188934
episode: 532 training return: tensor(512.2842, device='cuda:0')
episode: 533 training return: tensor(511.3643, device='cuda:0')
episode: 534 training return: tensor(542.7705, device='cuda:0')
episode: 535 training return: tensor(519.9510, device='cuda:0')
epoch: 134 test_true_pfm: 6088.866293100921 sim_pfm: 650.2155979581876
episode: 536 training return: tensor(553.9091, device='cuda:0')
episode: 537 training return: tensor(482.5938, device='cuda:0')
episode: 538 training return: tensor(492.6792, device='cuda:0')
episode: 539 training return: tensor(530.6641, device='cuda:0')
epoch: 135 test_true_pfm: 6059.876019584087 sim_pfm: 644.2809013201235
episode: 540 training return: tensor(483.0270, device='cuda:0')
episode: 541 training return: tensor(580.4600, device='cuda:0')
episode: 542 training return: tensor(525.2960, device='cuda:0')
episode: 543 training return: tensor(516.8368, device='cuda:0')
epoch: 136 test_true_pfm: 6152.409928993922 sim_pfm: 642.913473938553
episode: 544 training return: tensor(526.0786, device='cuda:0')
episode: 545 training return: tensor(435.6451, device='cuda:0')
episode: 546 training return: tensor(558.1931, device='cuda:0')
episode: 547 training return: tensor(505.5682, device='cuda:0')
epoch: 137 test_true_pfm: 6140.991958581561 sim_pfm: 608.4445169142758
episode: 548 training return: tensor(508.9965, device='cuda:0')
episode: 549 training return: tensor(555.2031, device='cuda:0')
episode: 550 training return: tensor(460.9573, device='cuda:0')
episode: 551 training return: tensor(479.8809, device='cuda:0')
epoch: 138 test_true_pfm: 6174.631773187518 sim_pfm: 652.0163469898204
episode: 552 training return: tensor(547.5490, device='cuda:0')
episode: 553 training return: tensor(535.2192, device='cuda:0')
episode: 554 training return: tensor(529.2512, device='cuda:0')
episode: 555 training return: tensor(499.8911, device='cuda:0')
epoch: 139 test_true_pfm: 6091.90861868055 sim_pfm: 652.8629726390354
episode: 556 training return: tensor(521.3018, device='cuda:0')
episode: 557 training return: tensor(524.6927, device='cuda:0')
episode: 558 training return: tensor(495.5763, device='cuda:0')
episode: 559 training return: tensor(459.7209, device='cuda:0')
epoch: 140 test_true_pfm: 6143.859997887619 sim_pfm: 669.1730881721402
episode: 560 training return: tensor(586.3931, device='cuda:0')
episode: 561 training return: tensor(562.9620, device='cuda:0')
episode: 562 training return: tensor(456.9933, device='cuda:0')
episode: 563 training return: tensor(486.9834, device='cuda:0')
epoch: 141 test_true_pfm: 6135.1406310678785 sim_pfm: 649.5092369883399
episode: 564 training return: tensor(507.0775, device='cuda:0')
episode: 565 training return: tensor(473.3052, device='cuda:0')
episode: 566 training return: tensor(505.9931, device='cuda:0')
episode: 567 training return: tensor(539.0965, device='cuda:0')
epoch: 142 test_true_pfm: 6132.191882885499 sim_pfm: 632.3316583164269
episode: 568 training return: tensor(527.8265, device='cuda:0')
episode: 569 training return: tensor(519.3308, device='cuda:0')
episode: 570 training return: tensor(489.6135, device='cuda:0')
episode: 571 training return: tensor(535.0854, device='cuda:0')
epoch: 143 test_true_pfm: 6216.682507540644 sim_pfm: 627.4987967875786
episode: 572 training return: tensor(428.9991, device='cuda:0')
episode: 573 training return: tensor(558.2977, device='cuda:0')
episode: 574 training return: tensor(549.6239, device='cuda:0')
episode: 575 training return: tensor(513.3370, device='cuda:0')
epoch: 144 test_true_pfm: 6154.454076546833 sim_pfm: 662.5001518792997
episode: 576 training return: tensor(543.9774, device='cuda:0')
episode: 577 training return: tensor(509.3893, device='cuda:0')
episode: 578 training return: tensor(516.0699, device='cuda:0')
episode: 579 training return: tensor(441.1270, device='cuda:0')
epoch: 145 test_true_pfm: 6184.789946556517 sim_pfm: 644.6619371559937
episode: 580 training return: tensor(521.7161, device='cuda:0')
episode: 581 training return: tensor(563.1757, device='cuda:0')
episode: 582 training return: tensor(493.4179, device='cuda:0')
episode: 583 training return: tensor(538.0086, device='cuda:0')
epoch: 146 test_true_pfm: 6167.079537853581 sim_pfm: 650.3606138054747
episode: 584 training return: tensor(566.3752, device='cuda:0')
episode: 585 training return: tensor(480.7776, device='cuda:0')
episode: 586 training return: tensor(516.3046, device='cuda:0')
episode: 587 training return: tensor(507.9581, device='cuda:0')
epoch: 147 test_true_pfm: 6133.235996781842 sim_pfm: 630.5553523950512
episode: 588 training return: tensor(468.8054, device='cuda:0')
episode: 589 training return: tensor(510.3074, device='cuda:0')
episode: 590 training return: tensor(547.0632, device='cuda:0')
episode: 591 training return: tensor(535.0237, device='cuda:0')
epoch: 148 test_true_pfm: 6151.291453384184 sim_pfm: 664.5008022406449
episode: 592 training return: tensor(505.4850, device='cuda:0')
episode: 593 training return: tensor(569.6019, device='cuda:0')
episode: 594 training return: tensor(536.3073, device='cuda:0')
episode: 595 training return: tensor(471.5870, device='cuda:0')
epoch: 149 test_true_pfm: 6121.190569930503 sim_pfm: 630.9024784132683
episode: 596 training return: tensor(577.4056, device='cuda:0')
episode: 597 training return: tensor(503.0958, device='cuda:0')
episode: 598 training return: tensor(573.0314, device='cuda:0')
episode: 599 training return: tensor(511.2549, device='cuda:0')
epoch: 150 test_true_pfm: 6124.402428705152 sim_pfm: 647.0879857914988
