['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '9', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.31719166308641433 test_loss: 0.21624481678009033
epoch: 1 training_loss 0.18079460792243482 test_loss: 0.1684891700744629
epoch: 2 training_loss 0.15866849228739738 test_loss: 0.1464303493499756
epoch: 3 training_loss 0.14441022016108035 test_loss: 0.13413448333740235
epoch: 4 training_loss 0.1391729760542512 test_loss: 0.13438628911972045
epoch: 5 training_loss 0.135900618173182 test_loss: 0.1218190312385559
epoch: 6 training_loss 0.13002079397439956 test_loss: 0.14889016151428222
epoch: 7 training_loss 0.12606854226440192 test_loss: 0.14180176258087157
epoch: 8 training_loss 0.12730659373104572 test_loss: 0.12044551372528076
epoch: 9 training_loss 0.11951646409928798 test_loss: 0.10439753532409668
epoch: 10 training_loss 0.11135932516306639 test_loss: 0.11873191595077515
epoch: 11 training_loss 0.11708778716623783 test_loss: 0.11467597484588624
epoch: 12 training_loss 0.10480057772248984 test_loss: 0.11300204992294312
epoch: 13 training_loss 0.11426895825192332 test_loss: 0.1012032151222229
epoch: 14 training_loss 0.1145309328660369 test_loss: 0.11352099180221557
epoch: 15 training_loss 0.11047247309237719 test_loss: 0.13087602853775024
epoch: 16 training_loss 0.11289983151480555 test_loss: 0.11220729351043701
epoch: 17 training_loss 0.11407560262829065 test_loss: 0.10955748558044434
epoch: 18 training_loss 0.1074760635010898 test_loss: 0.09712088108062744
epoch: 19 training_loss 0.11321920525282621 test_loss: 0.12581224441528321
epoch: 20 training_loss 0.10962188828736544 test_loss: 0.10599696636199951
epoch: 21 training_loss 0.11324738781899214 test_loss: 0.1270588755607605
epoch: 22 training_loss 0.107377326823771 test_loss: 0.11122682094573974
epoch: 23 training_loss 0.10605371832847595 test_loss: 0.10669523477554321
epoch: 24 training_loss 0.11135237887501717 test_loss: 0.11848535537719726
epoch: 25 training_loss 0.10586122501641512 test_loss: 0.1048629879951477
epoch: 26 training_loss 0.10930046178400517 test_loss: 0.09189818501472473
epoch: 27 training_loss 0.10241871500387788 test_loss: 0.11064062118530274
epoch: 28 training_loss 0.10721174735575914 test_loss: 0.11861850023269653
epoch: 29 training_loss 0.11656387632712722 test_loss: 0.10386654138565063
epoch: 30 training_loss 0.11734988886862993 test_loss: 0.10152208805084229
epoch: 31 training_loss 0.10775641538202763 test_loss: 0.11967282295227051
epoch: 32 training_loss 0.10852479690685868 test_loss: 0.09554517269134521
epoch: 33 training_loss 0.11140320524573326 test_loss: 0.09938417673110962
epoch: 34 training_loss 0.0959425537660718 test_loss: 0.11316795349121093
epoch: 35 training_loss 0.09938978070393205 test_loss: 0.09868811368942261
epoch: 36 training_loss 0.10637473620474339 test_loss: 0.12448241710662841
epoch: 37 training_loss 0.10733885381370784 test_loss: 0.11748020648956299
epoch: 38 training_loss 0.11524667706340551 test_loss: 0.10576492547988892
epoch: 39 training_loss 0.10586333598941565 test_loss: 0.12109568119049072
epoch: 40 training_loss 0.11640977744013072 test_loss: 0.11313313245773315
epoch: 41 training_loss 0.11770729888230562 test_loss: 0.11252539157867432
epoch: 42 training_loss 0.11263901803642512 test_loss: 0.10733364820480347
epoch: 43 training_loss 0.09916344253346324 test_loss: 0.11135239601135254
epoch: 44 training_loss 0.10726830506697298 test_loss: 0.10555199384689332
epoch: 45 training_loss 0.10441539425402879 test_loss: 0.10987683534622192
epoch: 46 training_loss 0.11755013624206186 test_loss: 0.1293515682220459
epoch: 47 training_loss 0.10515963714569806 test_loss: 0.0956699550151825
epoch: 48 training_loss 0.10900152023881673 test_loss: 0.10257551670074463
epoch: 49 training_loss 0.1094676704518497 test_loss: 0.11909040212631225
epoch: 50 training_loss 0.10468374196439982 test_loss: 0.12503904104232788
epoch: 51 training_loss 0.10809285387396812 test_loss: 0.10081030130386352
epoch: 52 training_loss 0.10929890781641007 test_loss: 0.11719456911087037
epoch: 53 training_loss 0.10760973390191793 test_loss: 0.0947464108467102
epoch: 54 training_loss 0.10293838590383529 test_loss: 0.10296545028686524
epoch: 55 training_loss 0.11237091612070799 test_loss: 0.10427436828613282
epoch: 56 training_loss 0.10912587698549032 test_loss: 0.10675450563430786
epoch: 57 training_loss 0.10754725486971438 test_loss: 0.11168800592422486
epoch: 58 training_loss 0.10244796745479107 test_loss: 0.11089541912078857
epoch: 59 training_loss 0.11321447409689427 test_loss: 0.10593317747116089
epoch: 60 training_loss 0.10795817282050849 test_loss: 0.09733926057815552
epoch: 61 training_loss 0.10952727369964123 test_loss: 0.10387743711471557
epoch: 62 training_loss 0.11091639827936887 test_loss: 0.11208441257476806
epoch: 63 training_loss 0.10906238310039043 test_loss: 0.11138079166412354
epoch: 64 training_loss 0.11732046712189913 test_loss: 0.10179870128631592
epoch: 65 training_loss 0.10319483451545239 test_loss: 0.10082274675369263
epoch: 66 training_loss 0.10822010716423393 test_loss: 0.10023249387741089
epoch: 67 training_loss 0.09972444579005241 test_loss: 0.09873526096343994
epoch: 68 training_loss 0.10369236886501312 test_loss: 0.11056501865386963
epoch: 69 training_loss 0.09887377344071865 test_loss: 0.13238428831100463
epoch: 70 training_loss 0.10673284189775586 test_loss: 0.09448333382606507
epoch: 71 training_loss 0.10624304849654437 test_loss: 0.10541709661483764
epoch: 72 training_loss 0.10870862489566208 test_loss: 0.12079137563705444
epoch: 73 training_loss 0.09874180952087044 test_loss: 0.110931134223938
epoch: 74 training_loss 0.10153273846954107 test_loss: 0.09559464454650879
epoch: 75 training_loss 0.10297489738091826 test_loss: 0.1011518120765686
epoch: 76 training_loss 0.10048076141625643 test_loss: 0.09659149050712586
epoch: 77 training_loss 0.10779271142557263 test_loss: 0.11249428987503052
epoch: 78 training_loss 0.10879962185397744 test_loss: 0.09695590138435364
epoch: 79 training_loss 0.10409725561738015 test_loss: 0.1028016448020935
epoch: 80 training_loss 0.10315549355000257 test_loss: 0.12173595428466796
epoch: 81 training_loss 0.10663677409291267 test_loss: 0.10256575345993042
epoch: 82 training_loss 0.09764638906344772 test_loss: 0.10525330305099487
epoch: 83 training_loss 0.09471542105078697 test_loss: 0.12074477672576904
epoch: 84 training_loss 0.11125455243512988 test_loss: 0.11226073503494263
epoch: 85 training_loss 0.10728743322193622 test_loss: 0.09196562767028808
epoch: 86 training_loss 0.10802766075357795 test_loss: 0.10941548347473144
epoch: 87 training_loss 0.1072258429788053 test_loss: 0.10474296808242797
epoch: 88 training_loss 0.11341875463724137 test_loss: 0.11682300567626953
epoch: 89 training_loss 0.10864128429442645 test_loss: 0.11424545049667359
epoch: 90 training_loss 0.10971697146072984 test_loss: 0.11350576877593994
epoch: 91 training_loss 0.11213726352900266 test_loss: 0.11575636863708497
epoch: 92 training_loss 0.10578929293900728 test_loss: 0.10121581554412842
epoch: 93 training_loss 0.10839831497520208 test_loss: 0.09932765364646912
epoch: 94 training_loss 0.10598586425185204 test_loss: 0.10715351104736329
epoch: 95 training_loss 0.10267620224505664 test_loss: 0.09647915363311768
epoch: 96 training_loss 0.10923335812985897 test_loss: 0.11508828401565552
epoch: 97 training_loss 0.10175202962011098 test_loss: 0.09748682975769044
epoch: 98 training_loss 0.1084537311270833 test_loss: 0.09816062450408936
epoch: 99 training_loss 0.10650510739535093 test_loss: 0.11954289674758911
epoch: 100 training_loss 0.10408135488629341 test_loss: 0.11220420598983764
epoch: 101 training_loss 0.10398629928007722 test_loss: 0.10323836803436279
epoch: 102 training_loss 0.10475684080272912 test_loss: 0.09915989637374878
epoch: 103 training_loss 0.10605445485562086 test_loss: 0.09342739582061768
epoch: 104 training_loss 0.10672935105860233 test_loss: 0.11996452808380127
epoch: 105 training_loss 0.10486938897520304 test_loss: 0.10378360748291016
epoch: 106 training_loss 0.10463209342211485 test_loss: 0.09117198586463929
epoch: 107 training_loss 0.10554416690021753 test_loss: 0.10647788047790527
epoch: 108 training_loss 0.10568426989018917 test_loss: 0.10702358484268189
epoch: 109 training_loss 0.111266855802387 test_loss: 0.08945086002349853
epoch: 110 training_loss 0.10797493858262897 test_loss: 0.08778263926506043
epoch: 111 training_loss 0.10037877336144448 test_loss: 0.10221971273422241
epoch: 112 training_loss 0.1065708576515317 test_loss: 0.09792166352272033
epoch: 113 training_loss 0.10859391272068024 test_loss: 0.10171208381652833
epoch: 114 training_loss 0.10286706587299704 test_loss: 0.08645040988922119
epoch: 115 training_loss 0.10917594607919455 test_loss: 0.11003943681716918
epoch: 116 training_loss 0.10235829282552004 test_loss: 0.1162114143371582
epoch: 117 training_loss 0.10820182576775551 test_loss: 0.10607831478118897
epoch: 118 training_loss 0.10239773945882917 test_loss: 0.09469143152236939
epoch: 119 training_loss 0.10708293827250599 test_loss: 0.10189050436019897
epoch: 120 training_loss 0.10067301187664271 test_loss: 0.11604793071746826
epoch: 121 training_loss 0.10209000376984477 test_loss: 0.10673319101333618
epoch: 122 training_loss 0.0993210994079709 test_loss: 0.10092172622680665
epoch: 123 training_loss 0.09754764350131154 test_loss: 0.11745425462722778
epoch: 124 training_loss 0.11557653186842799 test_loss: 0.10130807161331176
epoch: 125 training_loss 0.10693360105156899 test_loss: 0.10605719089508056
epoch: 126 training_loss 0.1029026679508388 test_loss: 0.10904724597930908
epoch: 127 training_loss 0.10629070555791259 test_loss: 0.10096638202667237
epoch: 128 training_loss 0.10722631486132742 test_loss: 0.11111637353897094
epoch: 129 training_loss 0.10420415870845318 test_loss: 0.11350064277648926
epoch: 130 training_loss 0.10726861665025353 test_loss: 0.10088762044906616
epoch: 131 training_loss 0.10938033957034349 test_loss: 0.11623901128768921
epoch: 132 training_loss 0.10676477219909429 test_loss: 0.09984257221221923
epoch: 133 training_loss 0.0991043908149004 test_loss: 0.10654728412628174
epoch: 134 training_loss 0.10867757629603148 test_loss: 0.11273523569107055
epoch: 135 training_loss 0.10875058239325881 test_loss: 0.10344957113265991
epoch: 136 training_loss 0.10824860094115138 test_loss: 0.11334174871444702
epoch: 137 training_loss 0.10519607529044152 test_loss: 0.08737276792526245
epoch: 138 training_loss 0.10529918197542429 test_loss: 0.10151190757751465
epoch: 139 training_loss 0.10173348160460591 test_loss: 0.10220701694488525
epoch: 140 training_loss 0.10141364213079214 test_loss: 0.09961624145507812
epoch: 141 training_loss 0.10727519854903221 test_loss: 0.09386665225028992
epoch: 142 training_loss 0.10497204158455134 test_loss: 0.11045649051666259
epoch: 143 training_loss 0.10326216550543904 test_loss: 0.10727490186691284
epoch: 144 training_loss 0.1034983067214489 test_loss: 0.10541428327560425
epoch: 145 training_loss 0.10333085807040333 test_loss: 0.1010523796081543
epoch: 146 training_loss 0.11214169438928366 test_loss: 0.10474573373794556
epoch: 147 training_loss 0.10645300524309277 test_loss: 0.11679677963256836
epoch: 148 training_loss 0.09863490508869291 test_loss: 0.11100101470947266
epoch: 149 training_loss 0.11143271893262863 test_loss: 0.09515889286994934
epoch: 0 training_loss 50.224436874389646 test_loss: 24.340901184082032
epoch: 1 training_loss 19.03411873817444 test_loss: 15.692474365234375
epoch: 2 training_loss 13.945123071670531 test_loss: 12.521218872070312
epoch: 3 training_loss 11.280495767593385 test_loss: 10.1086669921875
epoch: 4 training_loss 9.56735779285431 test_loss: 9.007738494873047
epoch: 5 training_loss 8.379664697647094 test_loss: 7.6768135070800785
epoch: 6 training_loss 7.372853417396545 test_loss: 7.140143585205078
epoch: 7 training_loss 6.696416840553284 test_loss: 6.435008239746094
epoch: 8 training_loss 6.001350607872009 test_loss: 6.052958297729492
epoch: 9 training_loss 5.611088428497315 test_loss: 5.158983612060547
epoch: 10 training_loss 5.185600242614746 test_loss: 5.236991119384766
epoch: 11 training_loss 4.981832842826844 test_loss: 4.852698135375976
epoch: 12 training_loss 4.617174406051635 test_loss: 4.492790222167969
epoch: 13 training_loss 4.343700182437897 test_loss: 4.306713485717774
epoch: 14 training_loss 4.248724751472473 test_loss: 4.2049003601074215
epoch: 15 training_loss 4.009904248714447 test_loss: 3.978220748901367
epoch: 16 training_loss 3.85751647233963 test_loss: 3.6616985321044924
epoch: 17 training_loss 3.748945105075836 test_loss: 3.6977584838867186
epoch: 18 training_loss 3.586698365211487 test_loss: 3.665108489990234
epoch: 19 training_loss 3.5210674357414247 test_loss: 3.4591995239257813
epoch: 20 training_loss 3.395494406223297 test_loss: 3.4349029541015623
epoch: 21 training_loss 3.3259388208389282 test_loss: 3.315947723388672
epoch: 22 training_loss 3.1844089436531067 test_loss: 3.2463447570800783
epoch: 23 training_loss 3.0946538281440734 test_loss: 3.15651798248291
epoch: 24 training_loss 3.013269419670105 test_loss: 3.122856330871582
epoch: 25 training_loss 3.000594232082367 test_loss: 2.8447137832641602
epoch: 26 training_loss 2.920031633377075 test_loss: 3.0176897048950195
epoch: 27 training_loss 2.9127872490882876 test_loss: 2.756396675109863
epoch: 28 training_loss 2.7910979437828063 test_loss: 2.810098648071289
epoch: 29 training_loss 2.7762134289741516 test_loss: 2.5627492904663085
epoch: 30 training_loss 2.730550830364227 test_loss: 2.718256950378418
epoch: 31 training_loss 2.737420518398285 test_loss: 2.676817512512207
epoch: 32 training_loss 2.6228207778930663 test_loss: 2.7061548233032227
epoch: 33 training_loss 2.5771230268478393 test_loss: 2.452597999572754
epoch: 34 training_loss 2.494143936634064 test_loss: 2.5034141540527344
epoch: 35 training_loss 2.5218369126319886 test_loss: 2.4237857818603517
epoch: 36 training_loss 2.5010586738586427 test_loss: 2.624713325500488
epoch: 37 training_loss 2.4116544008255003 test_loss: 2.3100397109985353
epoch: 38 training_loss 2.3823281216621397 test_loss: 2.3181360244750975
epoch: 39 training_loss 2.357719799280167 test_loss: 2.3702978134155273
epoch: 40 training_loss 2.4008055567741393 test_loss: 2.2853384017944336
epoch: 41 training_loss 2.3057865786552427 test_loss: 2.3431644439697266
epoch: 42 training_loss 2.3355542051792146 test_loss: 2.2231719970703123
epoch: 43 training_loss 2.2384924471378325 test_loss: 2.2418190002441407
epoch: 44 training_loss 2.2548234391212465 test_loss: 2.204197120666504
epoch: 45 training_loss 2.2394343507289887 test_loss: 2.2966659545898436
epoch: 46 training_loss 2.2696837306022646 test_loss: 2.2851200103759766
epoch: 47 training_loss 2.1995328640937806 test_loss: 2.201107406616211
epoch: 48 training_loss 2.23707244515419 test_loss: 2.1693973541259766
epoch: 49 training_loss 2.160190484523773 test_loss: 2.2430620193481445
epoch: 50 training_loss 2.131331720352173 test_loss: 2.177552604675293
epoch: 51 training_loss 2.1734597408771514 test_loss: 2.1841949462890624
epoch: 52 training_loss 2.1481898295879365 test_loss: 2.1244623184204103
epoch: 53 training_loss 2.1266348862648012 test_loss: 2.128670310974121
epoch: 54 training_loss 2.132197003364563 test_loss: 1.9894128799438477
epoch: 55 training_loss 2.067851197719574 test_loss: 2.149014472961426
epoch: 56 training_loss 2.0782928502559663 test_loss: 2.087577056884766
epoch: 57 training_loss 2.0361212027072906 test_loss: 2.0567684173583984
epoch: 58 training_loss 2.0790068423748016 test_loss: 2.031702423095703
epoch: 59 training_loss 2.013288424015045 test_loss: 1.9855234146118164
epoch: 60 training_loss 1.9921764826774597 test_loss: 1.9526134490966798
epoch: 61 training_loss 1.9746156024932862 test_loss: 2.074777603149414
epoch: 62 training_loss 1.9867122232913972 test_loss: 1.9418771743774415
epoch: 63 training_loss 1.9701404905319213 test_loss: 1.9191951751708984
epoch: 64 training_loss 1.96807408452034 test_loss: 1.9666343688964845
epoch: 65 training_loss 1.9310497868061065 test_loss: 1.8934125900268555
epoch: 66 training_loss 1.9421401083469392 test_loss: 1.9981138229370117
epoch: 67 training_loss 1.9423450255393981 test_loss: 1.8484519958496093
epoch: 68 training_loss 1.909442765712738 test_loss: 1.986502456665039
epoch: 69 training_loss 1.9284283590316773 test_loss: 1.846029281616211
epoch: 70 training_loss 1.89189080119133 test_loss: 1.9175260543823243
epoch: 71 training_loss 1.894774956703186 test_loss: 1.9674711227416992
epoch: 72 training_loss 1.8762597775459289 test_loss: 1.8611640930175781
epoch: 73 training_loss 1.8525190508365632 test_loss: 1.904019546508789
epoch: 74 training_loss 1.852429872751236 test_loss: 1.8062639236450195
epoch: 75 training_loss 1.8738463747501373 test_loss: 1.8320299148559571
epoch: 76 training_loss 1.8500332188606263 test_loss: 1.8946556091308593
epoch: 77 training_loss 1.8370167875289918 test_loss: 1.8483943939208984
epoch: 78 training_loss 1.8243286848068236 test_loss: 1.8448141098022461
epoch: 79 training_loss 1.808287388086319 test_loss: 1.8112638473510743
epoch: 80 training_loss 1.8199285984039306 test_loss: 1.8151485443115234
epoch: 81 training_loss 1.7907973217964173 test_loss: 1.8062366485595702
epoch: 82 training_loss 1.7902251386642456 test_loss: 1.7213098526000976
epoch: 83 training_loss 1.793997097015381 test_loss: 1.8233211517333985
epoch: 84 training_loss 1.785515388250351 test_loss: 1.749980354309082
epoch: 85 training_loss 1.788636907339096 test_loss: 1.7499523162841797
epoch: 86 training_loss 1.7499689936637879 test_loss: 1.767054557800293
epoch: 87 training_loss 1.7413964807987212 test_loss: 1.8528264999389648
epoch: 88 training_loss 1.7459200406074524 test_loss: 1.769594955444336
epoch: 89 training_loss 1.7609130227565766 test_loss: 1.756136703491211
epoch: 90 training_loss 1.7545820629596711 test_loss: 1.7809030532836914
epoch: 91 training_loss 1.7499880599975586 test_loss: 1.7949554443359375
epoch: 92 training_loss 1.7209108328819276 test_loss: 1.723470115661621
epoch: 93 training_loss 1.713651237487793 test_loss: 1.7438575744628906
epoch: 94 training_loss 1.7109407269954682 test_loss: 1.7318822860717773
epoch: 95 training_loss 1.7284848964214325 test_loss: 1.7829292297363282
epoch: 96 training_loss 1.7213121688365935 test_loss: 1.6886404037475586
epoch: 97 training_loss 1.6975164890289307 test_loss: 1.714836311340332
epoch: 98 training_loss 1.7324521160125732 test_loss: 1.7029035568237305
epoch: 99 training_loss 1.6995175361633301 test_loss: 1.700609016418457
epoch: 100 training_loss 1.7015891182422638 test_loss: 1.7763984680175782
epoch: 101 training_loss 1.7148963713645935 test_loss: 1.7165861129760742
epoch: 102 training_loss 1.6842086338996887 test_loss: 1.6993379592895508
epoch: 103 training_loss 1.6667083525657653 test_loss: 1.6596574783325195
epoch: 104 training_loss 1.6582374525070191 test_loss: 1.7214447021484376
epoch: 105 training_loss 1.683528927564621 test_loss: 1.654395866394043
epoch: 106 training_loss 1.694339417219162 test_loss: 1.6951282501220704
epoch: 107 training_loss 1.6905248987674713 test_loss: 1.6497013092041015
epoch: 108 training_loss 1.666217371225357 test_loss: 1.6703125
epoch: 109 training_loss 1.6641029262542724 test_loss: 1.6249273300170899
epoch: 110 training_loss 1.6635404825210571 test_loss: 1.6477296829223633
epoch: 111 training_loss 1.6448118317127227 test_loss: 1.6774534225463866
epoch: 112 training_loss 1.6316555154323578 test_loss: 1.6141323089599608
epoch: 113 training_loss 1.6219891488552094 test_loss: 1.6090614318847656
epoch: 114 training_loss 1.6555659222602843 test_loss: 1.6910621643066406
epoch: 115 training_loss 1.6188928711414337 test_loss: 1.6767950057983398
epoch: 116 training_loss 1.6179216015338898 test_loss: 1.649186897277832
epoch: 117 training_loss 1.6494197976589202 test_loss: 1.636661911010742
epoch: 118 training_loss 1.6298867762088776 test_loss: 1.629999542236328
epoch: 119 training_loss 1.6326204180717467 test_loss: 1.6294864654541015
epoch: 120 training_loss 1.6105982458591461 test_loss: 1.6516260147094726
epoch: 121 training_loss 1.5852965986728669 test_loss: 1.5750157356262207
epoch: 122 training_loss 1.6037574350833892 test_loss: 1.6043733596801757
epoch: 123 training_loss 1.5986784672737122 test_loss: 1.5788528442382812
epoch: 124 training_loss 1.5972609174251557 test_loss: 1.5976778984069824
epoch: 125 training_loss 1.583563746213913 test_loss: 1.6422311782836914
epoch: 126 training_loss 1.616663751602173 test_loss: 1.5842855453491211
epoch: 127 training_loss 1.6154758977890014 test_loss: 1.6624155044555664
epoch: 128 training_loss 1.5963476860523225 test_loss: 1.586599349975586
epoch: 129 training_loss 1.5701988589763642 test_loss: 1.5296934127807618
epoch: 130 training_loss 1.6006367349624633 test_loss: 1.5998778343200684
epoch: 131 training_loss 1.5834187054634095 test_loss: 1.6280256271362306
epoch: 132 training_loss 1.5716483652591706 test_loss: 1.5877825736999511
epoch: 133 training_loss 1.5966640424728393 test_loss: 1.690696907043457
epoch: 134 training_loss 1.5612239611148835 test_loss: 1.6259784698486328
epoch: 135 training_loss 1.5766429698467255 test_loss: 1.541420841217041
epoch: 136 training_loss 1.5532176065444947 test_loss: 1.6496971130371094
epoch: 137 training_loss 1.5684592413902283 test_loss: 1.5650867462158202
epoch: 138 training_loss 1.5758923888206482 test_loss: 1.589012908935547
epoch: 139 training_loss 1.5750494492053986 test_loss: 1.5679396629333495
epoch: 140 training_loss 1.562144047021866 test_loss: 1.5889935493469238
epoch: 141 training_loss 1.5626456761360168 test_loss: 1.5538017272949218
epoch: 142 training_loss 1.5874267899990082 test_loss: 1.557137393951416
epoch: 143 training_loss 1.5521027851104736 test_loss: 1.6309944152832032
epoch: 144 training_loss 1.541651109457016 test_loss: 1.560521125793457
epoch: 145 training_loss 1.5527030634880066 test_loss: 1.5876172065734864
epoch: 146 training_loss 1.5504221773147584 test_loss: 1.5582858085632325
epoch: 147 training_loss 1.5398393428325654 test_loss: 1.5500587463378905
epoch: 148 training_loss 1.5544514405727385 test_loss: 1.5172167778015138
epoch: 149 training_loss 1.546019504070282 test_loss: 1.5899168014526368
5096.1009340055725
episode: 0 training return: tensor(-21.5014, device='cuda:0')
episode: 1 training return: tensor(7.0351, device='cuda:0')
episode: 2 training return: tensor(1.9079, device='cuda:0')
episode: 3 training return: tensor(-104.5769, device='cuda:0')
epoch: 1 test_true_pfm: 4903.399644855518 sim_pfm: -169.31503273319686
episode: 4 training return: tensor(8.3423, device='cuda:0')
episode: 5 training return: tensor(-25.0163, device='cuda:0')
episode: 6 training return: tensor(-146.7254, device='cuda:0')
episode: 7 training return: tensor(30.5772, device='cuda:0')
epoch: 2 test_true_pfm: 4049.664259785683 sim_pfm: 71.29904329534232
episode: 8 training return: tensor(55.2365, device='cuda:0')
episode: 9 training return: tensor(10.1643, device='cuda:0')
episode: 10 training return: tensor(14.6449, device='cuda:0')
episode: 11 training return: tensor(-99.6577, device='cuda:0')
epoch: 3 test_true_pfm: 4735.0145710683955 sim_pfm: 1.5123010082752444
episode: 12 training return: tensor(-55.3916, device='cuda:0')
episode: 13 training return: tensor(6.6187, device='cuda:0')
episode: 14 training return: tensor(70.2869, device='cuda:0')
episode: 15 training return: tensor(98.2076, device='cuda:0')
epoch: 4 test_true_pfm: 5130.361936060369 sim_pfm: 183.95641730205776
episode: 16 training return: tensor(137.2136, device='cuda:0')
episode: 17 training return: tensor(94.9512, device='cuda:0')
episode: 18 training return: tensor(-55.1257, device='cuda:0')
episode: 19 training return: tensor(-79.3415, device='cuda:0')
epoch: 5 test_true_pfm: 5117.254246409912 sim_pfm: 160.9144677168612
episode: 20 training return: tensor(-136.4180, device='cuda:0')
episode: 21 training return: tensor(-29.6775, device='cuda:0')
episode: 22 training return: tensor(87.7005, device='cuda:0')
episode: 23 training return: tensor(-45.4765, device='cuda:0')
epoch: 6 test_true_pfm: 5191.528780701995 sim_pfm: 212.19963554440378
episode: 24 training return: tensor(28.0515, device='cuda:0')
episode: 25 training return: tensor(9.8242, device='cuda:0')
episode: 26 training return: tensor(64.6944, device='cuda:0')
episode: 27 training return: tensor(97.1027, device='cuda:0')
epoch: 7 test_true_pfm: 5176.564569256173 sim_pfm: 175.82266226187735
episode: 28 training return: tensor(212.5852, device='cuda:0')
episode: 29 training return: tensor(144.6757, device='cuda:0')
episode: 30 training return: tensor(26.1929, device='cuda:0')
episode: 31 training return: tensor(-1.8303, device='cuda:0')
epoch: 8 test_true_pfm: 5128.279980910395 sim_pfm: 153.6510608252332
episode: 32 training return: tensor(207.5595, device='cuda:0')
episode: 33 training return: tensor(58.0242, device='cuda:0')
episode: 34 training return: tensor(63.7624, device='cuda:0')
episode: 35 training return: tensor(30.6096, device='cuda:0')
epoch: 9 test_true_pfm: 5212.580830552732 sim_pfm: 289.56210768709815
episode: 36 training return: tensor(66.6226, device='cuda:0')
episode: 37 training return: tensor(54.7316, device='cuda:0')
episode: 38 training return: tensor(79.5004, device='cuda:0')
episode: 39 training return: tensor(254.9887, device='cuda:0')
epoch: 10 test_true_pfm: 5086.9285056446115 sim_pfm: 273.2908912675921
episode: 40 training return: tensor(159.5988, device='cuda:0')
episode: 41 training return: tensor(144.0295, device='cuda:0')
episode: 42 training return: tensor(162.6942, device='cuda:0')
episode: 43 training return: tensor(129.2754, device='cuda:0')
epoch: 11 test_true_pfm: 5302.891856249443 sim_pfm: 347.72104370683275
episode: 44 training return: tensor(-247.0457, device='cuda:0')
episode: 45 training return: tensor(117.3051, device='cuda:0')
episode: 46 training return: tensor(282.6854, device='cuda:0')
episode: 47 training return: tensor(160.1832, device='cuda:0')
epoch: 12 test_true_pfm: 5371.1774586892725 sim_pfm: 90.4326756807083
episode: 48 training return: tensor(226.8717, device='cuda:0')
episode: 49 training return: tensor(82.4130, device='cuda:0')
episode: 50 training return: tensor(118.0843, device='cuda:0')
episode: 51 training return: tensor(102.0066, device='cuda:0')
epoch: 13 test_true_pfm: 5286.160595634581 sim_pfm: 375.6052753777961
episode: 52 training return: tensor(192.8486, device='cuda:0')
episode: 53 training return: tensor(238.8751, device='cuda:0')
episode: 54 training return: tensor(243.6393, device='cuda:0')
episode: 55 training return: tensor(176.8522, device='cuda:0')
epoch: 14 test_true_pfm: 5483.283857424159 sim_pfm: 473.3196862669526
episode: 56 training return: tensor(100.1296, device='cuda:0')
episode: 57 training return: tensor(214.0032, device='cuda:0')
episode: 58 training return: tensor(272.4178, device='cuda:0')
episode: 59 training return: tensor(191.7866, device='cuda:0')
epoch: 15 test_true_pfm: 5409.415342847807 sim_pfm: 447.38719255492714
episode: 60 training return: tensor(82.0669, device='cuda:0')
episode: 61 training return: tensor(213.3328, device='cuda:0')
episode: 62 training return: tensor(275.6061, device='cuda:0')
episode: 63 training return: tensor(90.9246, device='cuda:0')
epoch: 16 test_true_pfm: 5543.413918080444 sim_pfm: 425.93121316314983
episode: 64 training return: tensor(328.7696, device='cuda:0')
episode: 65 training return: tensor(120.8679, device='cuda:0')
episode: 66 training return: tensor(271.8289, device='cuda:0')
episode: 67 training return: tensor(391.4219, device='cuda:0')
epoch: 17 test_true_pfm: 5535.104484527932 sim_pfm: 443.043194277018
episode: 68 training return: tensor(389.6101, device='cuda:0')
episode: 69 training return: tensor(276.2408, device='cuda:0')
episode: 70 training return: tensor(302.0547, device='cuda:0')
episode: 71 training return: tensor(200.5252, device='cuda:0')
epoch: 18 test_true_pfm: 5472.6114076301865 sim_pfm: 469.25494207651354
episode: 72 training return: tensor(205.2762, device='cuda:0')
episode: 73 training return: tensor(279.4904, device='cuda:0')
episode: 74 training return: tensor(365.8241, device='cuda:0')
episode: 75 training return: tensor(363.6714, device='cuda:0')
epoch: 19 test_true_pfm: 5578.928676785104 sim_pfm: 499.66226670728065
episode: 76 training return: tensor(355.5103, device='cuda:0')
episode: 77 training return: tensor(321.3456, device='cuda:0')
episode: 78 training return: tensor(231.0504, device='cuda:0')
episode: 79 training return: tensor(323.2864, device='cuda:0')
epoch: 20 test_true_pfm: 5571.542319718098 sim_pfm: 453.67618805172
episode: 80 training return: tensor(368.6214, device='cuda:0')
episode: 81 training return: tensor(294.7346, device='cuda:0')
episode: 82 training return: tensor(300.2958, device='cuda:0')
episode: 83 training return: tensor(242.5689, device='cuda:0')
epoch: 21 test_true_pfm: 5581.503989174929 sim_pfm: 507.4765646614251
episode: 84 training return: tensor(216.5133, device='cuda:0')
episode: 85 training return: tensor(344.4172, device='cuda:0')
episode: 86 training return: tensor(467.1147, device='cuda:0')
episode: 87 training return: tensor(377.9359, device='cuda:0')
epoch: 22 test_true_pfm: 5561.089167027646 sim_pfm: 533.5913831598979
episode: 88 training return: tensor(230.1580, device='cuda:0')
episode: 89 training return: tensor(169.9831, device='cuda:0')
episode: 90 training return: tensor(265.2532, device='cuda:0')
episode: 91 training return: tensor(334.6078, device='cuda:0')
epoch: 23 test_true_pfm: 5625.03546101322 sim_pfm: 498.7237446680568
episode: 92 training return: tensor(264.0523, device='cuda:0')
episode: 93 training return: tensor(402.5336, device='cuda:0')
episode: 94 training return: tensor(296.6765, device='cuda:0')
episode: 95 training return: tensor(470.8600, device='cuda:0')
epoch: 24 test_true_pfm: 5661.260017439963 sim_pfm: 553.8039275457073
episode: 96 training return: tensor(318.1154, device='cuda:0')
episode: 97 training return: tensor(364.3778, device='cuda:0')
episode: 98 training return: tensor(378.4297, device='cuda:0')
episode: 99 training return: tensor(275.7320, device='cuda:0')
epoch: 25 test_true_pfm: 5564.621728918421 sim_pfm: 528.3273938905913
episode: 100 training return: tensor(337.8264, device='cuda:0')
episode: 101 training return: tensor(465.0731, device='cuda:0')
episode: 102 training return: tensor(442.4546, device='cuda:0')
episode: 103 training return: tensor(462.7831, device='cuda:0')
epoch: 26 test_true_pfm: 5618.051879598176 sim_pfm: 509.4118657973595
episode: 104 training return: tensor(432.5825, device='cuda:0')
episode: 105 training return: tensor(498.2588, device='cuda:0')
episode: 106 training return: tensor(305.5827, device='cuda:0')
episode: 107 training return: tensor(389.9648, device='cuda:0')
epoch: 27 test_true_pfm: 5691.908582081421 sim_pfm: 544.3922992707618
episode: 108 training return: tensor(451.6952, device='cuda:0')
episode: 109 training return: tensor(383.1772, device='cuda:0')
episode: 110 training return: tensor(336.4517, device='cuda:0')
episode: 111 training return: tensor(427.6475, device='cuda:0')
epoch: 28 test_true_pfm: 5645.154043267093 sim_pfm: 584.2111832856608
episode: 112 training return: tensor(477.2518, device='cuda:0')
episode: 113 training return: tensor(420.1331, device='cuda:0')
episode: 114 training return: tensor(377.6629, device='cuda:0')
episode: 115 training return: tensor(533.6977, device='cuda:0')
epoch: 29 test_true_pfm: 5755.810699566333 sim_pfm: 517.7759597207574
episode: 116 training return: tensor(291.6640, device='cuda:0')
episode: 117 training return: tensor(429.2209, device='cuda:0')
episode: 118 training return: tensor(463.6532, device='cuda:0')
episode: 119 training return: tensor(459.4349, device='cuda:0')
epoch: 30 test_true_pfm: 5658.840346817022 sim_pfm: 553.2897512361329
episode: 120 training return: tensor(382.4236, device='cuda:0')
episode: 121 training return: tensor(422.2010, device='cuda:0')
episode: 122 training return: tensor(366.1532, device='cuda:0')
episode: 123 training return: tensor(351.2743, device='cuda:0')
epoch: 31 test_true_pfm: 5724.345349005307 sim_pfm: 638.4291852531023
episode: 124 training return: tensor(414.2663, device='cuda:0')
episode: 125 training return: tensor(414.7334, device='cuda:0')
episode: 126 training return: tensor(470.5668, device='cuda:0')
episode: 127 training return: tensor(492.8346, device='cuda:0')
epoch: 32 test_true_pfm: 5795.191017735127 sim_pfm: 590.1600080052546
episode: 128 training return: tensor(327.6023, device='cuda:0')
episode: 129 training return: tensor(439.2412, device='cuda:0')
episode: 130 training return: tensor(343.5073, device='cuda:0')
episode: 131 training return: tensor(414.7899, device='cuda:0')
epoch: 33 test_true_pfm: 5676.341564714331 sim_pfm: 604.6984043285483
episode: 132 training return: tensor(494.9442, device='cuda:0')
episode: 133 training return: tensor(540.8573, device='cuda:0')
episode: 134 training return: tensor(327.1010, device='cuda:0')
episode: 135 training return: tensor(333.6224, device='cuda:0')
epoch: 34 test_true_pfm: 5787.416436713923 sim_pfm: 588.3028387390077
episode: 136 training return: tensor(332.0992, device='cuda:0')
episode: 137 training return: tensor(349.5750, device='cuda:0')
episode: 138 training return: tensor(505.5698, device='cuda:0')
episode: 139 training return: tensor(454.2201, device='cuda:0')
epoch: 35 test_true_pfm: 5769.329667121659 sim_pfm: 615.9830678122138
episode: 140 training return: tensor(493.3795, device='cuda:0')
episode: 141 training return: tensor(493.3142, device='cuda:0')
episode: 142 training return: tensor(414.2682, device='cuda:0')
episode: 143 training return: tensor(427.3929, device='cuda:0')
epoch: 36 test_true_pfm: 5675.762011298895 sim_pfm: 609.5896995483587
episode: 144 training return: tensor(486.7628, device='cuda:0')
episode: 145 training return: tensor(537.0958, device='cuda:0')
episode: 146 training return: tensor(569.0616, device='cuda:0')
episode: 147 training return: tensor(487.9774, device='cuda:0')
epoch: 37 test_true_pfm: 5805.5187302749255 sim_pfm: 623.711793167245
episode: 148 training return: tensor(537.7850, device='cuda:0')
episode: 149 training return: tensor(494.5613, device='cuda:0')
episode: 150 training return: tensor(536.0177, device='cuda:0')
episode: 151 training return: tensor(543.6942, device='cuda:0')
epoch: 38 test_true_pfm: 5800.934729784323 sim_pfm: 621.7279646407405
episode: 152 training return: tensor(519.9651, device='cuda:0')
episode: 153 training return: tensor(491.1621, device='cuda:0')
episode: 154 training return: tensor(448.3844, device='cuda:0')
episode: 155 training return: tensor(440.3779, device='cuda:0')
epoch: 39 test_true_pfm: 5820.582565728655 sim_pfm: 643.0457964262168
episode: 156 training return: tensor(480.8896, device='cuda:0')
episode: 157 training return: tensor(522.5612, device='cuda:0')
episode: 158 training return: tensor(534.1477, device='cuda:0')
episode: 159 training return: tensor(442.5130, device='cuda:0')
epoch: 40 test_true_pfm: 5869.953778915729 sim_pfm: 610.8084717908836
episode: 160 training return: tensor(442.4199, device='cuda:0')
episode: 161 training return: tensor(492.9791, device='cuda:0')
episode: 162 training return: tensor(507.1652, device='cuda:0')
episode: 163 training return: tensor(511.7552, device='cuda:0')
epoch: 41 test_true_pfm: 5838.6102285037005 sim_pfm: 622.6754008819504
episode: 164 training return: tensor(532.2583, device='cuda:0')
episode: 165 training return: tensor(537.3653, device='cuda:0')
episode: 166 training return: tensor(394.9290, device='cuda:0')
episode: 167 training return: tensor(564.6404, device='cuda:0')
epoch: 42 test_true_pfm: 5790.780565549077 sim_pfm: 636.1767756002179
episode: 168 training return: tensor(476.0225, device='cuda:0')
episode: 169 training return: tensor(548.4764, device='cuda:0')
episode: 170 training return: tensor(595.9370, device='cuda:0')
episode: 171 training return: tensor(466.2879, device='cuda:0')
epoch: 43 test_true_pfm: 5765.932760396404 sim_pfm: 638.3190525022801
episode: 172 training return: tensor(560.3986, device='cuda:0')
episode: 173 training return: tensor(483.2412, device='cuda:0')
episode: 174 training return: tensor(516.9576, device='cuda:0')
episode: 175 training return: tensor(510.8561, device='cuda:0')
epoch: 44 test_true_pfm: 5829.133223982938 sim_pfm: 671.2301769867967
episode: 176 training return: tensor(451.4952, device='cuda:0')
episode: 177 training return: tensor(463.0264, device='cuda:0')
episode: 178 training return: tensor(-625.9135, device='cuda:0')
episode: 179 training return: tensor(506.0336, device='cuda:0')
epoch: 45 test_true_pfm: 5776.189701349081 sim_pfm: 674.5555682987518
episode: 180 training return: tensor(504.6391, device='cuda:0')
episode: 181 training return: tensor(493.0272, device='cuda:0')
episode: 182 training return: tensor(528.7795, device='cuda:0')
episode: 183 training return: tensor(506.1991, device='cuda:0')
epoch: 46 test_true_pfm: 5945.143589336924 sim_pfm: 623.9700964373575
episode: 184 training return: tensor(482.7502, device='cuda:0')
episode: 185 training return: tensor(538.3668, device='cuda:0')
episode: 186 training return: tensor(474.3471, device='cuda:0')
episode: 187 training return: tensor(448.2318, device='cuda:0')
epoch: 47 test_true_pfm: 5911.075839322519 sim_pfm: 657.6508046559369
episode: 188 training return: tensor(508.7465, device='cuda:0')
episode: 189 training return: tensor(551.8748, device='cuda:0')
episode: 190 training return: tensor(470.3324, device='cuda:0')
episode: 191 training return: tensor(558.4420, device='cuda:0')
epoch: 48 test_true_pfm: 5902.885541970328 sim_pfm: 624.8020747599949
episode: 192 training return: tensor(602.3759, device='cuda:0')
episode: 193 training return: tensor(529.6942, device='cuda:0')
episode: 194 training return: tensor(551.8347, device='cuda:0')
episode: 195 training return: tensor(481.6292, device='cuda:0')
epoch: 49 test_true_pfm: 5884.040865487023 sim_pfm: 668.0065122689508
episode: 196 training return: tensor(528.1063, device='cuda:0')
episode: 197 training return: tensor(414.0179, device='cuda:0')
episode: 198 training return: tensor(608.6263, device='cuda:0')
episode: 199 training return: tensor(553.9678, device='cuda:0')
epoch: 50 test_true_pfm: 5839.163840011267 sim_pfm: 646.9952685897393
episode: 200 training return: tensor(376.5176, device='cuda:0')
episode: 201 training return: tensor(534.0499, device='cuda:0')
episode: 202 training return: tensor(464.1779, device='cuda:0')
episode: 203 training return: tensor(531.6503, device='cuda:0')
epoch: 51 test_true_pfm: 5927.311342655635 sim_pfm: 641.4392977794245
episode: 204 training return: tensor(583.6631, device='cuda:0')
episode: 205 training return: tensor(524.3690, device='cuda:0')
episode: 206 training return: tensor(539.4767, device='cuda:0')
episode: 207 training return: tensor(476.3855, device='cuda:0')
epoch: 52 test_true_pfm: 5933.657890047308 sim_pfm: 676.4364725447571
episode: 208 training return: tensor(561.4000, device='cuda:0')
episode: 209 training return: tensor(558.6653, device='cuda:0')
episode: 210 training return: tensor(358.6003, device='cuda:0')
episode: 211 training return: tensor(504.1117, device='cuda:0')
epoch: 53 test_true_pfm: 5870.481963734915 sim_pfm: 616.7218644665458
episode: 212 training return: tensor(529.4881, device='cuda:0')
episode: 213 training return: tensor(538.5665, device='cuda:0')
episode: 214 training return: tensor(502.4840, device='cuda:0')
episode: 215 training return: tensor(569.7849, device='cuda:0')
epoch: 54 test_true_pfm: 5909.767850181648 sim_pfm: 674.6206272625908
episode: 216 training return: tensor(501.8995, device='cuda:0')
episode: 217 training return: tensor(467.3771, device='cuda:0')
episode: 218 training return: tensor(489.4701, device='cuda:0')
episode: 219 training return: tensor(546.5618, device='cuda:0')
epoch: 55 test_true_pfm: 5867.7042439009165 sim_pfm: 674.5044113577848
episode: 220 training return: tensor(605.1638, device='cuda:0')
episode: 221 training return: tensor(501.5679, device='cuda:0')
episode: 222 training return: tensor(464.9658, device='cuda:0')
episode: 223 training return: tensor(526.5953, device='cuda:0')
epoch: 56 test_true_pfm: 5853.841612561246 sim_pfm: 671.4891125771683
episode: 224 training return: tensor(504.8240, device='cuda:0')
episode: 225 training return: tensor(545.2838, device='cuda:0')
episode: 226 training return: tensor(516.6837, device='cuda:0')
episode: 227 training return: tensor(599.5825, device='cuda:0')
epoch: 57 test_true_pfm: 5846.372673240679 sim_pfm: 650.8799043793309
episode: 228 training return: tensor(542.8684, device='cuda:0')
episode: 229 training return: tensor(513.2449, device='cuda:0')
episode: 230 training return: tensor(602.9526, device='cuda:0')
episode: 231 training return: tensor(543.8279, device='cuda:0')
epoch: 58 test_true_pfm: 5899.095913168843 sim_pfm: 687.1438865056261
episode: 232 training return: tensor(553.0057, device='cuda:0')
episode: 233 training return: tensor(486.4966, device='cuda:0')
episode: 234 training return: tensor(550.9782, device='cuda:0')
episode: 235 training return: tensor(529.2802, device='cuda:0')
epoch: 59 test_true_pfm: 5839.101545750724 sim_pfm: 647.9625966420668
episode: 236 training return: tensor(577.0760, device='cuda:0')
episode: 237 training return: tensor(613.3596, device='cuda:0')
episode: 238 training return: tensor(559.1602, device='cuda:0')
episode: 239 training return: tensor(537.1650, device='cuda:0')
epoch: 60 test_true_pfm: 5935.4464184201015 sim_pfm: 680.8582795805609
episode: 240 training return: tensor(549.6367, device='cuda:0')
episode: 241 training return: tensor(570.5728, device='cuda:0')
episode: 242 training return: tensor(565.6852, device='cuda:0')
episode: 243 training return: tensor(454.9815, device='cuda:0')
epoch: 61 test_true_pfm: 5893.677002352026 sim_pfm: 646.5985981342965
episode: 244 training return: tensor(442.4073, device='cuda:0')
episode: 245 training return: tensor(617.4493, device='cuda:0')
episode: 246 training return: tensor(444.5746, device='cuda:0')
episode: 247 training return: tensor(560.9355, device='cuda:0')
epoch: 62 test_true_pfm: 5942.029246834114 sim_pfm: 675.3007619776375
episode: 248 training return: tensor(510.5783, device='cuda:0')
episode: 249 training return: tensor(475.0771, device='cuda:0')
episode: 250 training return: tensor(443.7616, device='cuda:0')
episode: 251 training return: tensor(501.4957, device='cuda:0')
epoch: 63 test_true_pfm: 5895.770694058706 sim_pfm: 660.7137139172797
episode: 252 training return: tensor(495.3133, device='cuda:0')
episode: 253 training return: tensor(576.1229, device='cuda:0')
episode: 254 training return: tensor(564.9965, device='cuda:0')
episode: 255 training return: tensor(560.7444, device='cuda:0')
epoch: 64 test_true_pfm: 5986.249709043631 sim_pfm: 666.4792093717648
episode: 256 training return: tensor(524.6736, device='cuda:0')
episode: 257 training return: tensor(497.2448, device='cuda:0')
episode: 258 training return: tensor(576.1285, device='cuda:0')
episode: 259 training return: tensor(581.3485, device='cuda:0')
epoch: 65 test_true_pfm: 5926.454465352715 sim_pfm: 673.697124625789
episode: 260 training return: tensor(592.6237, device='cuda:0')
episode: 261 training return: tensor(615.5787, device='cuda:0')
episode: 262 training return: tensor(469.2973, device='cuda:0')
episode: 263 training return: tensor(545.1266, device='cuda:0')
epoch: 66 test_true_pfm: 5930.350058531407 sim_pfm: 684.3327293767749
episode: 264 training return: tensor(568.5803, device='cuda:0')
episode: 265 training return: tensor(564.5276, device='cuda:0')
episode: 266 training return: tensor(549.6087, device='cuda:0')
episode: 267 training return: tensor(525.5100, device='cuda:0')
epoch: 67 test_true_pfm: 5909.183893681716 sim_pfm: 699.3798739199605
episode: 268 training return: tensor(400.6355, device='cuda:0')
episode: 269 training return: tensor(565.9208, device='cuda:0')
episode: 270 training return: tensor(525.9735, device='cuda:0')
episode: 271 training return: tensor(579.5811, device='cuda:0')
epoch: 68 test_true_pfm: 5933.376817089658 sim_pfm: 698.8571045724675
episode: 272 training return: tensor(586.7150, device='cuda:0')
episode: 273 training return: tensor(622.7001, device='cuda:0')
episode: 274 training return: tensor(571.6865, device='cuda:0')
episode: 275 training return: tensor(538.4922, device='cuda:0')
epoch: 69 test_true_pfm: 5964.918605217464 sim_pfm: 697.8354511774766
episode: 276 training return: tensor(552.7762, device='cuda:0')
episode: 277 training return: tensor(602.6144, device='cuda:0')
episode: 278 training return: tensor(507.4438, device='cuda:0')
episode: 279 training return: tensor(564.7930, device='cuda:0')
epoch: 70 test_true_pfm: 5855.839650027879 sim_pfm: 654.7565323919989
episode: 280 training return: tensor(526.6305, device='cuda:0')
episode: 281 training return: tensor(575.0627, device='cuda:0')
episode: 282 training return: tensor(668.9663, device='cuda:0')
episode: 283 training return: tensor(624.3439, device='cuda:0')
epoch: 71 test_true_pfm: 5927.424005282582 sim_pfm: 704.0146554068973
episode: 284 training return: tensor(609.0596, device='cuda:0')
episode: 285 training return: tensor(556.2017, device='cuda:0')
episode: 286 training return: tensor(586.1071, device='cuda:0')
episode: 287 training return: tensor(579.8769, device='cuda:0')
epoch: 72 test_true_pfm: 5963.015609691305 sim_pfm: 694.1689824524414
episode: 288 training return: tensor(564.4844, device='cuda:0')
episode: 289 training return: tensor(630.5529, device='cuda:0')
episode: 290 training return: tensor(556.7319, device='cuda:0')
episode: 291 training return: tensor(565.1388, device='cuda:0')
epoch: 73 test_true_pfm: 5867.000187732782 sim_pfm: 677.3168072829916
episode: 292 training return: tensor(585.8017, device='cuda:0')
episode: 293 training return: tensor(546.3286, device='cuda:0')
episode: 294 training return: tensor(587.1552, device='cuda:0')
episode: 295 training return: tensor(496.1003, device='cuda:0')
epoch: 74 test_true_pfm: 5920.6471812339005 sim_pfm: 661.112688016321
episode: 296 training return: tensor(527.8433, device='cuda:0')
episode: 297 training return: tensor(565.7881, device='cuda:0')
episode: 298 training return: tensor(544.5081, device='cuda:0')
episode: 299 training return: tensor(556.3681, device='cuda:0')
epoch: 75 test_true_pfm: 5942.7314089315705 sim_pfm: 692.7468223887845
episode: 300 training return: tensor(565.3621, device='cuda:0')
episode: 301 training return: tensor(591.2366, device='cuda:0')
episode: 302 training return: tensor(502.0659, device='cuda:0')
episode: 303 training return: tensor(632.2477, device='cuda:0')
epoch: 76 test_true_pfm: 5966.9977387552935 sim_pfm: 691.1109437685615
episode: 304 training return: tensor(555.1478, device='cuda:0')
episode: 305 training return: tensor(614.4944, device='cuda:0')
episode: 306 training return: tensor(621.3564, device='cuda:0')
episode: 307 training return: tensor(533.0215, device='cuda:0')
epoch: 77 test_true_pfm: 5987.026267974823 sim_pfm: 707.3957711504385
episode: 308 training return: tensor(608.8505, device='cuda:0')
episode: 309 training return: tensor(562.1647, device='cuda:0')
episode: 310 training return: tensor(486.9729, device='cuda:0')
episode: 311 training return: tensor(584.0208, device='cuda:0')
epoch: 78 test_true_pfm: 5964.164972778429 sim_pfm: 715.0320934967409
episode: 312 training return: tensor(634.9119, device='cuda:0')
episode: 313 training return: tensor(539.8875, device='cuda:0')
episode: 314 training return: tensor(624.3580, device='cuda:0')
episode: 315 training return: tensor(618.7300, device='cuda:0')
epoch: 79 test_true_pfm: 6016.439119589057 sim_pfm: 679.2498012906677
episode: 316 training return: tensor(512.3128, device='cuda:0')
episode: 317 training return: tensor(595.2285, device='cuda:0')
episode: 318 training return: tensor(579.9227, device='cuda:0')
episode: 319 training return: tensor(547.9169, device='cuda:0')
epoch: 80 test_true_pfm: 5993.798462358643 sim_pfm: 714.8317084885202
episode: 320 training return: tensor(589.6755, device='cuda:0')
episode: 321 training return: tensor(615.7542, device='cuda:0')
episode: 322 training return: tensor(567.1881, device='cuda:0')
episode: 323 training return: tensor(529.4081, device='cuda:0')
epoch: 81 test_true_pfm: 5915.496500068679 sim_pfm: 673.0762294339947
episode: 324 training return: tensor(579.3369, device='cuda:0')
episode: 325 training return: tensor(661.2788, device='cuda:0')
episode: 326 training return: tensor(549.1490, device='cuda:0')
episode: 327 training return: tensor(564.1125, device='cuda:0')
epoch: 82 test_true_pfm: 5988.31326732769 sim_pfm: 709.7915506021042
episode: 328 training return: tensor(574.8687, device='cuda:0')
episode: 329 training return: tensor(559.8919, device='cuda:0')
episode: 330 training return: tensor(586.7123, device='cuda:0')
episode: 331 training return: tensor(575.3309, device='cuda:0')
epoch: 83 test_true_pfm: 5997.290212921471 sim_pfm: 699.0736050732958
episode: 332 training return: tensor(614.3002, device='cuda:0')
episode: 333 training return: tensor(553.1531, device='cuda:0')
episode: 334 training return: tensor(509.6616, device='cuda:0')
episode: 335 training return: tensor(593.2220, device='cuda:0')
epoch: 84 test_true_pfm: 5931.630444592359 sim_pfm: 724.1112609958509
episode: 336 training return: tensor(700.8027, device='cuda:0')
episode: 337 training return: tensor(604.4353, device='cuda:0')
episode: 338 training return: tensor(572.1278, device='cuda:0')
episode: 339 training return: tensor(535.3600, device='cuda:0')
epoch: 85 test_true_pfm: 5923.236488694718 sim_pfm: 689.4306787274545
episode: 340 training return: tensor(637.0924, device='cuda:0')
episode: 341 training return: tensor(595.7212, device='cuda:0')
episode: 342 training return: tensor(570.3668, device='cuda:0')
episode: 343 training return: tensor(600.6317, device='cuda:0')
epoch: 86 test_true_pfm: 5980.984188063797 sim_pfm: 706.4877620407302
episode: 344 training return: tensor(549.7296, device='cuda:0')
episode: 345 training return: tensor(656.6243, device='cuda:0')
episode: 346 training return: tensor(537.0438, device='cuda:0')
episode: 347 training return: tensor(570.0360, device='cuda:0')
epoch: 87 test_true_pfm: 6005.278451525308 sim_pfm: 694.1222130102979
episode: 348 training return: tensor(602.7828, device='cuda:0')
episode: 349 training return: tensor(589.2614, device='cuda:0')
episode: 350 training return: tensor(540.5564, device='cuda:0')
episode: 351 training return: tensor(607.2041, device='cuda:0')
epoch: 88 test_true_pfm: 6015.157710135981 sim_pfm: 688.5495248961573
episode: 352 training return: tensor(536.8875, device='cuda:0')
episode: 353 training return: tensor(623.7127, device='cuda:0')
episode: 354 training return: tensor(628.1545, device='cuda:0')
episode: 355 training return: tensor(589.8918, device='cuda:0')
epoch: 89 test_true_pfm: 5983.224837200626 sim_pfm: 712.0342571055129
episode: 356 training return: tensor(612.9082, device='cuda:0')
episode: 357 training return: tensor(557.3986, device='cuda:0')
episode: 358 training return: tensor(460.6217, device='cuda:0')
episode: 359 training return: tensor(618.4666, device='cuda:0')
epoch: 90 test_true_pfm: 6038.430229285353 sim_pfm: 707.8848776773472
episode: 360 training return: tensor(617.7952, device='cuda:0')
episode: 361 training return: tensor(576.0635, device='cuda:0')
episode: 362 training return: tensor(609.6615, device='cuda:0')
episode: 363 training return: tensor(597.8159, device='cuda:0')
epoch: 91 test_true_pfm: 6012.833671761292 sim_pfm: 705.2334012138502
episode: 364 training return: tensor(612.4339, device='cuda:0')
episode: 365 training return: tensor(590.1340, device='cuda:0')
episode: 366 training return: tensor(541.1131, device='cuda:0')
episode: 367 training return: tensor(590.0191, device='cuda:0')
epoch: 92 test_true_pfm: 5983.925231699143 sim_pfm: 709.6321207190243
episode: 368 training return: tensor(548.1736, device='cuda:0')
episode: 369 training return: tensor(493.0938, device='cuda:0')
episode: 370 training return: tensor(495.5779, device='cuda:0')
episode: 371 training return: tensor(626.1151, device='cuda:0')
epoch: 93 test_true_pfm: 6011.084094618377 sim_pfm: 691.9485896278638
episode: 372 training return: tensor(544.4823, device='cuda:0')
episode: 373 training return: tensor(426.1911, device='cuda:0')
episode: 374 training return: tensor(647.2316, device='cuda:0')
episode: 375 training return: tensor(607.1262, device='cuda:0')
epoch: 94 test_true_pfm: 5998.21732232112 sim_pfm: 705.2335093175061
episode: 376 training return: tensor(505.0103, device='cuda:0')
episode: 377 training return: tensor(657.7678, device='cuda:0')
episode: 378 training return: tensor(632.6783, device='cuda:0')
episode: 379 training return: tensor(574.3439, device='cuda:0')
epoch: 95 test_true_pfm: 6003.575573839917 sim_pfm: 670.778662851158
episode: 380 training return: tensor(657.2476, device='cuda:0')
episode: 381 training return: tensor(593.2220, device='cuda:0')
episode: 382 training return: tensor(546.1884, device='cuda:0')
episode: 383 training return: tensor(615.9553, device='cuda:0')
epoch: 96 test_true_pfm: 5984.655988286496 sim_pfm: 719.605331557182
episode: 384 training return: tensor(620.7673, device='cuda:0')
episode: 385 training return: tensor(622.5820, device='cuda:0')
episode: 386 training return: tensor(615.2084, device='cuda:0')
episode: 387 training return: tensor(634.6258, device='cuda:0')
epoch: 97 test_true_pfm: 6018.5234886814915 sim_pfm: 710.6103672660441
episode: 388 training return: tensor(585.6544, device='cuda:0')
episode: 389 training return: tensor(666.5356, device='cuda:0')
episode: 390 training return: tensor(588.9818, device='cuda:0')
episode: 391 training return: tensor(574.8805, device='cuda:0')
epoch: 98 test_true_pfm: 6076.702354283938 sim_pfm: 740.4611841758402
episode: 392 training return: tensor(570.7610, device='cuda:0')
episode: 393 training return: tensor(606.1180, device='cuda:0')
episode: 394 training return: tensor(627.3387, device='cuda:0')
episode: 395 training return: tensor(619.8092, device='cuda:0')
epoch: 99 test_true_pfm: 5996.685969881034 sim_pfm: 703.8119618753748
episode: 396 training return: tensor(517.4746, device='cuda:0')
episode: 397 training return: tensor(664.9501, device='cuda:0')
episode: 398 training return: tensor(615.0434, device='cuda:0')
episode: 399 training return: tensor(622.8857, device='cuda:0')
epoch: 100 test_true_pfm: 6005.457801423847 sim_pfm: 702.4528144933283
episode: 400 training return: tensor(630.8709, device='cuda:0')
episode: 401 training return: tensor(639.7145, device='cuda:0')
episode: 402 training return: tensor(590.5088, device='cuda:0')
episode: 403 training return: tensor(645.2615, device='cuda:0')
epoch: 101 test_true_pfm: 5997.3632213133 sim_pfm: 744.4745095518883
episode: 404 training return: tensor(577.3866, device='cuda:0')
episode: 405 training return: tensor(579.5627, device='cuda:0')
episode: 406 training return: tensor(638.7236, device='cuda:0')
episode: 407 training return: tensor(647.8556, device='cuda:0')
epoch: 102 test_true_pfm: 6020.779137915714 sim_pfm: 714.441351411088
episode: 408 training return: tensor(664.4658, device='cuda:0')
episode: 409 training return: tensor(660.0090, device='cuda:0')
episode: 410 training return: tensor(623.8319, device='cuda:0')
episode: 411 training return: tensor(636.8586, device='cuda:0')
epoch: 103 test_true_pfm: 5980.149947521523 sim_pfm: 700.0924614885201
episode: 412 training return: tensor(665.1056, device='cuda:0')
episode: 413 training return: tensor(602.8835, device='cuda:0')
episode: 414 training return: tensor(546.2021, device='cuda:0')
episode: 415 training return: tensor(598.9776, device='cuda:0')
epoch: 104 test_true_pfm: 5970.787893785445 sim_pfm: 743.8997464521477
episode: 416 training return: tensor(573.9564, device='cuda:0')
episode: 417 training return: tensor(604.1320, device='cuda:0')
episode: 418 training return: tensor(607.7056, device='cuda:0')
episode: 419 training return: tensor(559.5897, device='cuda:0')
epoch: 105 test_true_pfm: 6030.661497845139 sim_pfm: 711.7104476661189
episode: 420 training return: tensor(617.7934, device='cuda:0')
episode: 421 training return: tensor(639.4348, device='cuda:0')
episode: 422 training return: tensor(557.3935, device='cuda:0')
episode: 423 training return: tensor(583.7372, device='cuda:0')
epoch: 106 test_true_pfm: 6055.923968867457 sim_pfm: 731.6247570406025
episode: 424 training return: tensor(595.3971, device='cuda:0')
episode: 425 training return: tensor(551.1791, device='cuda:0')
episode: 426 training return: tensor(590.5969, device='cuda:0')
episode: 427 training return: tensor(587.5200, device='cuda:0')
epoch: 107 test_true_pfm: 6044.320579421702 sim_pfm: 749.7476294143902
episode: 428 training return: tensor(611.4346, device='cuda:0')
episode: 429 training return: tensor(652.5114, device='cuda:0')
episode: 430 training return: tensor(630.2234, device='cuda:0')
episode: 431 training return: tensor(579.1531, device='cuda:0')
epoch: 108 test_true_pfm: 6038.595686589979 sim_pfm: 686.6857475627621
episode: 432 training return: tensor(657.0073, device='cuda:0')
episode: 433 training return: tensor(602.7750, device='cuda:0')
episode: 434 training return: tensor(610.3137, device='cuda:0')
episode: 435 training return: tensor(668.1115, device='cuda:0')
epoch: 109 test_true_pfm: 5964.562435015682 sim_pfm: 717.9553887969038
episode: 436 training return: tensor(527.6746, device='cuda:0')
episode: 437 training return: tensor(571.6180, device='cuda:0')
episode: 438 training return: tensor(553.7220, device='cuda:0')
episode: 439 training return: tensor(577.5173, device='cuda:0')
epoch: 110 test_true_pfm: 6049.755227817374 sim_pfm: 740.1257404499144
episode: 440 training return: tensor(671.5790, device='cuda:0')
episode: 441 training return: tensor(646.6410, device='cuda:0')
episode: 442 training return: tensor(662.7422, device='cuda:0')
episode: 443 training return: tensor(616.4010, device='cuda:0')
epoch: 111 test_true_pfm: 5999.19555233984 sim_pfm: 735.7427887444695
episode: 444 training return: tensor(639.3491, device='cuda:0')
episode: 445 training return: tensor(620.7094, device='cuda:0')
episode: 446 training return: tensor(605.8292, device='cuda:0')
episode: 447 training return: tensor(607.7878, device='cuda:0')
epoch: 112 test_true_pfm: 5985.849034792746 sim_pfm: 693.007516454556
episode: 448 training return: tensor(690.5134, device='cuda:0')
episode: 449 training return: tensor(642.0324, device='cuda:0')
episode: 450 training return: tensor(607.6296, device='cuda:0')
episode: 451 training return: tensor(591.1006, device='cuda:0')
epoch: 113 test_true_pfm: 6100.19277533892 sim_pfm: 749.5555639206432
episode: 452 training return: tensor(581.6970, device='cuda:0')
episode: 453 training return: tensor(620.4180, device='cuda:0')
episode: 454 training return: tensor(605.5259, device='cuda:0')
episode: 455 training return: tensor(622.7903, device='cuda:0')
epoch: 114 test_true_pfm: 5914.671287321562 sim_pfm: 743.830420783294
episode: 456 training return: tensor(594.3156, device='cuda:0')
episode: 457 training return: tensor(633.6257, device='cuda:0')
episode: 458 training return: tensor(610.7492, device='cuda:0')
episode: 459 training return: tensor(633.3217, device='cuda:0')
epoch: 115 test_true_pfm: 6033.506605885664 sim_pfm: 731.2552755981063
episode: 460 training return: tensor(557.1303, device='cuda:0')
episode: 461 training return: tensor(616.0483, device='cuda:0')
episode: 462 training return: tensor(671.0656, device='cuda:0')
episode: 463 training return: tensor(584.7906, device='cuda:0')
epoch: 116 test_true_pfm: 6043.98054099749 sim_pfm: 737.3204278776733
episode: 464 training return: tensor(592.4216, device='cuda:0')
episode: 465 training return: tensor(616.9873, device='cuda:0')
episode: 466 training return: tensor(612.9611, device='cuda:0')
episode: 467 training return: tensor(684.3364, device='cuda:0')
epoch: 117 test_true_pfm: 6044.200151950782 sim_pfm: 723.4086661680291
episode: 468 training return: tensor(666.6151, device='cuda:0')
episode: 469 training return: tensor(560.7017, device='cuda:0')
episode: 470 training return: tensor(543.2261, device='cuda:0')
episode: 471 training return: tensor(650.5711, device='cuda:0')
epoch: 118 test_true_pfm: 6056.141739539228 sim_pfm: 741.8144521980236
episode: 472 training return: tensor(581.5145, device='cuda:0')
episode: 473 training return: tensor(583.8666, device='cuda:0')
episode: 474 training return: tensor(648.1290, device='cuda:0')
episode: 475 training return: tensor(593.2493, device='cuda:0')
epoch: 119 test_true_pfm: 6038.189768039304 sim_pfm: 725.4384318826875
episode: 476 training return: tensor(633.6542, device='cuda:0')
episode: 477 training return: tensor(641.3892, device='cuda:0')
episode: 478 training return: tensor(639.0249, device='cuda:0')
episode: 479 training return: tensor(613.2592, device='cuda:0')
epoch: 120 test_true_pfm: 6071.706613281688 sim_pfm: 709.629622391299
episode: 480 training return: tensor(609.0208, device='cuda:0')
episode: 481 training return: tensor(653.1131, device='cuda:0')
episode: 482 training return: tensor(675.5121, device='cuda:0')
episode: 483 training return: tensor(564.2833, device='cuda:0')
epoch: 121 test_true_pfm: 6014.70129112381 sim_pfm: 732.4229218356777
episode: 484 training return: tensor(642.0621, device='cuda:0')
episode: 485 training return: tensor(593.2479, device='cuda:0')
episode: 486 training return: tensor(663.3779, device='cuda:0')
episode: 487 training return: tensor(612.5381, device='cuda:0')
epoch: 122 test_true_pfm: 6049.588663524257 sim_pfm: 710.8322815691741
episode: 488 training return: tensor(651.4150, device='cuda:0')
episode: 489 training return: tensor(664.8997, device='cuda:0')
episode: 490 training return: tensor(662.8069, device='cuda:0')
episode: 491 training return: tensor(575.7478, device='cuda:0')
epoch: 123 test_true_pfm: 6046.523820503356 sim_pfm: 719.0053697677018
episode: 492 training return: tensor(545.3124, device='cuda:0')
episode: 493 training return: tensor(666.4194, device='cuda:0')
episode: 494 training return: tensor(674.7785, device='cuda:0')
episode: 495 training return: tensor(592.9574, device='cuda:0')
epoch: 124 test_true_pfm: 6066.381063494066 sim_pfm: 731.8315297641578
episode: 496 training return: tensor(589.7269, device='cuda:0')
episode: 497 training return: tensor(600.6761, device='cuda:0')
episode: 498 training return: tensor(623.7020, device='cuda:0')
episode: 499 training return: tensor(488.5002, device='cuda:0')
epoch: 125 test_true_pfm: 6096.504863721697 sim_pfm: 715.3279222083123
episode: 500 training return: tensor(650.2836, device='cuda:0')
episode: 501 training return: tensor(540.6545, device='cuda:0')
episode: 502 training return: tensor(547.4578, device='cuda:0')
episode: 503 training return: tensor(599.4653, device='cuda:0')
epoch: 126 test_true_pfm: 6121.112701845854 sim_pfm: 716.8883303613208
episode: 504 training return: tensor(645.8082, device='cuda:0')
episode: 505 training return: tensor(608.2229, device='cuda:0')
episode: 506 training return: tensor(646.0429, device='cuda:0')
episode: 507 training return: tensor(612.1531, device='cuda:0')
epoch: 127 test_true_pfm: 6092.813459559649 sim_pfm: 727.4815616395014
episode: 508 training return: tensor(588.4645, device='cuda:0')
episode: 509 training return: tensor(709.9570, device='cuda:0')
episode: 510 training return: tensor(647.0782, device='cuda:0')
episode: 511 training return: tensor(633.1157, device='cuda:0')
epoch: 128 test_true_pfm: 6039.793099380592 sim_pfm: 722.0478482435768
episode: 512 training return: tensor(610.5002, device='cuda:0')
episode: 513 training return: tensor(661.5278, device='cuda:0')
episode: 514 training return: tensor(646.4228, device='cuda:0')
episode: 515 training return: tensor(595.4612, device='cuda:0')
epoch: 129 test_true_pfm: 6019.888810847252 sim_pfm: 722.8212564657928
episode: 516 training return: tensor(598.5421, device='cuda:0')
episode: 517 training return: tensor(605.9584, device='cuda:0')
episode: 518 training return: tensor(611.7749, device='cuda:0')
episode: 519 training return: tensor(634.3193, device='cuda:0')
epoch: 130 test_true_pfm: 6046.762238339269 sim_pfm: 756.5151700592445
episode: 520 training return: tensor(605.9012, device='cuda:0')
episode: 521 training return: tensor(604.9242, device='cuda:0')
episode: 522 training return: tensor(566.8320, device='cuda:0')
episode: 523 training return: tensor(678.7427, device='cuda:0')
epoch: 131 test_true_pfm: 6062.03989670276 sim_pfm: 747.8682868897449
episode: 524 training return: tensor(644.7782, device='cuda:0')
episode: 525 training return: tensor(616.0148, device='cuda:0')
episode: 526 training return: tensor(570.1699, device='cuda:0')
episode: 527 training return: tensor(556.6267, device='cuda:0')
epoch: 132 test_true_pfm: 6010.969693524822 sim_pfm: 724.1548070602585
episode: 528 training return: tensor(547.6589, device='cuda:0')
episode: 529 training return: tensor(609.1954, device='cuda:0')
episode: 530 training return: tensor(627.6400, device='cuda:0')
episode: 531 training return: tensor(578.7623, device='cuda:0')
epoch: 133 test_true_pfm: 6042.678768523296 sim_pfm: 708.8435204361255
episode: 532 training return: tensor(625.1112, device='cuda:0')
episode: 533 training return: tensor(615.0510, device='cuda:0')
episode: 534 training return: tensor(625.2113, device='cuda:0')
episode: 535 training return: tensor(642.3033, device='cuda:0')
epoch: 134 test_true_pfm: 6106.014212417421 sim_pfm: 746.6407944640863
episode: 536 training return: tensor(636.5913, device='cuda:0')
episode: 537 training return: tensor(571.9297, device='cuda:0')
episode: 538 training return: tensor(594.5737, device='cuda:0')
episode: 539 training return: tensor(606.3886, device='cuda:0')
epoch: 135 test_true_pfm: 6095.378543657447 sim_pfm: 760.7454376367774
episode: 540 training return: tensor(603.0209, device='cuda:0')
episode: 541 training return: tensor(615.4119, device='cuda:0')
episode: 542 training return: tensor(600.2692, device='cuda:0')
episode: 543 training return: tensor(620.8246, device='cuda:0')
epoch: 136 test_true_pfm: 6067.681746808533 sim_pfm: 744.7210026217314
episode: 544 training return: tensor(644.8214, device='cuda:0')
episode: 545 training return: tensor(608.7730, device='cuda:0')
episode: 546 training return: tensor(648.3253, device='cuda:0')
episode: 547 training return: tensor(630.1778, device='cuda:0')
epoch: 137 test_true_pfm: 6114.781631721879 sim_pfm: 752.8588280590484
episode: 548 training return: tensor(656.0292, device='cuda:0')
episode: 549 training return: tensor(501.9045, device='cuda:0')
episode: 550 training return: tensor(651.1422, device='cuda:0')
episode: 551 training return: tensor(628.9658, device='cuda:0')
epoch: 138 test_true_pfm: 6058.183209839982 sim_pfm: 739.2552614765397
episode: 552 training return: tensor(701.2853, device='cuda:0')
episode: 553 training return: tensor(676.4755, device='cuda:0')
episode: 554 training return: tensor(625.1328, device='cuda:0')
episode: 555 training return: tensor(675.7307, device='cuda:0')
epoch: 139 test_true_pfm: 6115.802241051583 sim_pfm: 750.7291446075542
episode: 556 training return: tensor(607.3327, device='cuda:0')
episode: 557 training return: tensor(655.6732, device='cuda:0')
episode: 558 training return: tensor(625.0932, device='cuda:0')
episode: 559 training return: tensor(554.8016, device='cuda:0')
epoch: 140 test_true_pfm: 6094.060970107633 sim_pfm: 755.5666782061259
episode: 560 training return: tensor(612.4249, device='cuda:0')
episode: 561 training return: tensor(670.0367, device='cuda:0')
episode: 562 training return: tensor(640.2415, device='cuda:0')
episode: 563 training return: tensor(669.8333, device='cuda:0')
epoch: 141 test_true_pfm: 6014.844303873132 sim_pfm: 722.3004421750084
episode: 564 training return: tensor(553.7930, device='cuda:0')
episode: 565 training return: tensor(693.2317, device='cuda:0')
episode: 566 training return: tensor(602.7286, device='cuda:0')
episode: 567 training return: tensor(649.6316, device='cuda:0')
epoch: 142 test_true_pfm: 6045.85223134953 sim_pfm: 715.9953617972011
episode: 568 training return: tensor(695.0740, device='cuda:0')
episode: 569 training return: tensor(601.9869, device='cuda:0')
episode: 570 training return: tensor(631.4477, device='cuda:0')
episode: 571 training return: tensor(669.7409, device='cuda:0')
epoch: 143 test_true_pfm: 6054.185203767701 sim_pfm: 745.4785366418461
episode: 572 training return: tensor(634.5048, device='cuda:0')
episode: 573 training return: tensor(646.2009, device='cuda:0')
episode: 574 training return: tensor(651.8768, device='cuda:0')
episode: 575 training return: tensor(631.7512, device='cuda:0')
epoch: 144 test_true_pfm: 6069.4083659306925 sim_pfm: 741.8825345109217
episode: 576 training return: tensor(631.0353, device='cuda:0')
episode: 577 training return: tensor(646.0269, device='cuda:0')
episode: 578 training return: tensor(604.0500, device='cuda:0')
episode: 579 training return: tensor(610.7206, device='cuda:0')
epoch: 145 test_true_pfm: 6080.594315344976 sim_pfm: 737.3799312739478
episode: 580 training return: tensor(639.7726, device='cuda:0')
episode: 581 training return: tensor(622.1639, device='cuda:0')
episode: 582 training return: tensor(613.0538, device='cuda:0')
episode: 583 training return: tensor(655.1325, device='cuda:0')
epoch: 146 test_true_pfm: 6060.2319929084015 sim_pfm: 751.8532397378391
episode: 584 training return: tensor(651.6461, device='cuda:0')
episode: 585 training return: tensor(526.7584, device='cuda:0')
episode: 586 training return: tensor(568.9053, device='cuda:0')
episode: 587 training return: tensor(672.3743, device='cuda:0')
epoch: 147 test_true_pfm: 6030.816059793887 sim_pfm: 743.0078661165511
episode: 588 training return: tensor(612.8177, device='cuda:0')
episode: 589 training return: tensor(652.4615, device='cuda:0')
episode: 590 training return: tensor(640.8137, device='cuda:0')
episode: 591 training return: tensor(613.9108, device='cuda:0')
epoch: 148 test_true_pfm: 6074.484064497262 sim_pfm: 743.402981196375
episode: 592 training return: tensor(617.1146, device='cuda:0')
episode: 593 training return: tensor(629.8771, device='cuda:0')
episode: 594 training return: tensor(703.0373, device='cuda:0')
episode: 595 training return: tensor(649.0431, device='cuda:0')
epoch: 149 test_true_pfm: 6073.809589930451 sim_pfm: 737.489399324831
episode: 596 training return: tensor(672.2449, device='cuda:0')
episode: 597 training return: tensor(615.7258, device='cuda:0')
episode: 598 training return: tensor(665.1064, device='cuda:0')
episode: 599 training return: tensor(674.3153, device='cuda:0')
epoch: 150 test_true_pfm: 6055.642678365289 sim_pfm: 763.1144228746416
