['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '0']
epoch: 0 training_loss 0.26610870629549027 test_loss: 0.21437251567840576
epoch: 1 training_loss 0.18217661798000337 test_loss: 0.16664817333221435
epoch: 2 training_loss 0.16989023372530937 test_loss: 0.1696562647819519
epoch: 3 training_loss 0.1631736633181572 test_loss: 0.14357661008834838
epoch: 4 training_loss 0.15190084800124168 test_loss: 0.1325128197669983
epoch: 5 training_loss 0.15259335823357106 test_loss: 0.13115310668945312
epoch: 6 training_loss 0.14099141750484706 test_loss: 0.14811307191848755
epoch: 7 training_loss 0.15419787164777518 test_loss: 0.12787245512008666
epoch: 8 training_loss 0.13879163019359111 test_loss: 0.17281064987182618
epoch: 9 training_loss 0.1427746006473899 test_loss: 0.1328058958053589
epoch: 10 training_loss 0.1352738168463111 test_loss: 0.12742549180984497
epoch: 11 training_loss 0.1370406886190176 test_loss: 0.10968798398971558
epoch: 12 training_loss 0.1299628921970725 test_loss: 0.12759121656417846
epoch: 13 training_loss 0.11710920941084624 test_loss: 0.12370994091033935
epoch: 14 training_loss 0.1320169171318412 test_loss: 0.13098497390747071
epoch: 15 training_loss 0.13946197979152203 test_loss: 0.16685470342636108
epoch: 16 training_loss 0.13406393505632877 test_loss: 0.13325774669647217
epoch: 17 training_loss 0.11969220008701086 test_loss: 0.1219648003578186
epoch: 18 training_loss 0.12237746633589268 test_loss: 0.1147044062614441
epoch: 19 training_loss 0.1292643290385604 test_loss: 0.12081658840179443
epoch: 20 training_loss 0.1308028334006667 test_loss: 0.12047569751739502
epoch: 21 training_loss 0.12161261921748519 test_loss: 0.11870377063751221
epoch: 22 training_loss 0.12247299950569868 test_loss: 0.10510594844818115
epoch: 23 training_loss 0.1319961404800415 test_loss: 0.11862343549728394
epoch: 24 training_loss 0.13165496256202458 test_loss: 0.129442036151886
epoch: 25 training_loss 0.13231913633644582 test_loss: 0.12038216590881348
epoch: 26 training_loss 0.1223560756444931 test_loss: 0.1113621473312378
epoch: 27 training_loss 0.12848488222807647 test_loss: 0.13408949375152587
epoch: 28 training_loss 0.12335953272879124 test_loss: 0.14388735294342042
epoch: 29 training_loss 0.12367712892591953 test_loss: 0.12219219207763672
epoch: 30 training_loss 0.12862460631877184 test_loss: 0.12244280576705932
epoch: 31 training_loss 0.11902102168649435 test_loss: 0.12095290422439575
epoch: 32 training_loss 0.12955425381660463 test_loss: 0.12416878938674927
epoch: 33 training_loss 0.11633931592106819 test_loss: 0.14345922470092773
epoch: 34 training_loss 0.13076328311115504 test_loss: 0.12499392032623291
epoch: 35 training_loss 0.1161329823732376 test_loss: 0.12270628213882447
epoch: 36 training_loss 0.1305850075185299 test_loss: 0.12667267322540282
epoch: 37 training_loss 0.12220742162317037 test_loss: 0.1229711890220642
epoch: 38 training_loss 0.11712495494633914 test_loss: 0.11951949596405029
epoch: 39 training_loss 0.11936610568314791 test_loss: 0.12271456718444824
epoch: 40 training_loss 0.12596309835091232 test_loss: 0.10797450542449952
epoch: 41 training_loss 0.12648045238107442 test_loss: 0.12399708032608033
epoch: 42 training_loss 0.12181268567219376 test_loss: 0.12574417591094972
epoch: 43 training_loss 0.1266940328478813 test_loss: 0.10447142124176026
epoch: 44 training_loss 0.11660925801843405 test_loss: 0.11624109745025635
epoch: 45 training_loss 0.1157441451586783 test_loss: 0.13079031705856323
epoch: 46 training_loss 0.1223662143573165 test_loss: 0.13362693786621094
epoch: 47 training_loss 0.11439344637095929 test_loss: 0.10478630065917968
epoch: 48 training_loss 0.1248620392382145 test_loss: 0.13706748485565184
epoch: 49 training_loss 0.11942644467577339 test_loss: 0.1139295220375061
epoch: 50 training_loss 0.11576790116727352 test_loss: 0.11651401519775391
epoch: 51 training_loss 0.11700705580413341 test_loss: 0.12336796522140503
epoch: 52 training_loss 0.11809453012421728 test_loss: 0.12809890508651733
epoch: 53 training_loss 0.11928749710321426 test_loss: 0.13014272451400757
epoch: 54 training_loss 0.12039960384368896 test_loss: 0.10884087085723877
epoch: 55 training_loss 0.12147399298846721 test_loss: 0.12805631160736083
epoch: 56 training_loss 0.12167953714728355 test_loss: 0.13105316162109376
epoch: 57 training_loss 0.11798808684572577 test_loss: 0.10569943189620971
epoch: 58 training_loss 0.11795166533440352 test_loss: 0.11575161218643189
epoch: 59 training_loss 0.1284144068695605 test_loss: 0.12043149471282959
epoch: 60 training_loss 0.11794361270964146 test_loss: 0.1293102264404297
epoch: 61 training_loss 0.11716041829437017 test_loss: 0.12510616779327394
epoch: 62 training_loss 0.11423833955079317 test_loss: 0.11025043725967407
epoch: 63 training_loss 0.12416903894394636 test_loss: 0.11489518880844116
epoch: 64 training_loss 0.11346139522269368 test_loss: 0.12359445095062256
epoch: 65 training_loss 0.111707084774971 test_loss: 0.13700343370437623
epoch: 66 training_loss 0.11424613639712333 test_loss: 0.11618033647537232
epoch: 67 training_loss 0.11772028837352991 test_loss: 0.12898654937744142
epoch: 68 training_loss 0.1119693418778479 test_loss: 0.12029131650924682
epoch: 69 training_loss 0.11949434772133827 test_loss: 0.12450900077819824
epoch: 70 training_loss 0.11657517924904823 test_loss: 0.1312902331352234
epoch: 71 training_loss 0.11950662933290004 test_loss: 0.1175115466117859
epoch: 72 training_loss 0.11501198589801788 test_loss: 0.12968990802764893
epoch: 73 training_loss 0.11391479831188916 test_loss: 0.12371211051940918
epoch: 74 training_loss 0.1209571060538292 test_loss: 0.119654381275177
epoch: 75 training_loss 0.1076887560263276 test_loss: 0.12221001386642456
epoch: 76 training_loss 0.11845853839069605 test_loss: 0.11339654922485351
epoch: 77 training_loss 0.1172426899522543 test_loss: 0.11766172647476196
epoch: 78 training_loss 0.11711781777441502 test_loss: 0.13780498504638672
epoch: 79 training_loss 0.12528277799487114 test_loss: 0.13077402114868164
epoch: 80 training_loss 0.1173582524061203 test_loss: 0.14827536344528197
epoch: 81 training_loss 0.11448122464120387 test_loss: 0.10500344038009643
epoch: 82 training_loss 0.12181214436888695 test_loss: 0.10498489141464233
epoch: 83 training_loss 0.11926223952323198 test_loss: 0.12322006225585938
epoch: 84 training_loss 0.11838593870401383 test_loss: 0.11645054817199707
epoch: 85 training_loss 0.12175327893346548 test_loss: 0.12788552045822144
epoch: 86 training_loss 0.11811813119798899 test_loss: 0.12021163702011109
epoch: 87 training_loss 0.11629820641130209 test_loss: 0.10187013149261474
epoch: 88 training_loss 0.1127378736436367 test_loss: 0.11079609394073486
epoch: 89 training_loss 0.11730866461992263 test_loss: 0.12115508317947388
epoch: 90 training_loss 0.12241963531821966 test_loss: 0.12959903478622437
epoch: 91 training_loss 0.11742852002382279 test_loss: 0.11755636930465699
epoch: 92 training_loss 0.1204216386936605 test_loss: 0.1314267635345459
epoch: 93 training_loss 0.11036449501290918 test_loss: 0.0959443986415863
epoch: 94 training_loss 0.11468475792557001 test_loss: 0.11579296588897706
epoch: 95 training_loss 0.12277265224605799 test_loss: 0.11721289157867432
epoch: 96 training_loss 0.11653480488806962 test_loss: 0.12007778882980347
epoch: 97 training_loss 0.12166387252509595 test_loss: 0.11674330234527588
epoch: 98 training_loss 0.12460240956395864 test_loss: 0.12756354808807374
epoch: 99 training_loss 0.11727024462074041 test_loss: 0.13021235466003417
epoch: 100 training_loss 0.11877836734056473 test_loss: 0.13201498985290527
epoch: 101 training_loss 0.11608905714005231 test_loss: 0.13802908658981322
epoch: 102 training_loss 0.11690741453319788 test_loss: 0.12091969251632691
epoch: 103 training_loss 0.11649621907621623 test_loss: 0.11733764410018921
epoch: 104 training_loss 0.11613805118948221 test_loss: 0.11106842756271362
epoch: 105 training_loss 0.11610881214961409 test_loss: 0.11262915134429932
epoch: 106 training_loss 0.12837108483538032 test_loss: 0.12828595638275148
epoch: 107 training_loss 0.11347108937799931 test_loss: 0.12744890451431273
epoch: 108 training_loss 0.1253676068596542 test_loss: 0.11022162437438965
epoch: 109 training_loss 0.12155114453285933 test_loss: 0.1309547781944275
epoch: 110 training_loss 0.12177737575024367 test_loss: 0.11850019693374633
epoch: 111 training_loss 0.11514792818576097 test_loss: 0.11490607261657715
epoch: 112 training_loss 0.122407364025712 test_loss: 0.1292649745941162
epoch: 113 training_loss 0.11349130652844906 test_loss: 0.10520061254501342
epoch: 114 training_loss 0.11240225236862898 test_loss: 0.12029170989990234
epoch: 115 training_loss 0.12227580849081278 test_loss: 0.11800073385238648
epoch: 116 training_loss 0.11965865276753902 test_loss: 0.10939466953277588
epoch: 117 training_loss 0.12128726229071617 test_loss: 0.12835977077484131
epoch: 118 training_loss 0.11108811000362039 test_loss: 0.12567989826202391
epoch: 119 training_loss 0.12095655165612698 test_loss: 0.12144826650619507
epoch: 120 training_loss 0.11713565018028022 test_loss: 0.12667701244354249
epoch: 121 training_loss 0.12406717201694846 test_loss: 0.11875098943710327
epoch: 122 training_loss 0.11833300705999136 test_loss: 0.12252331972122192
epoch: 123 training_loss 0.1161516848579049 test_loss: 0.10407277345657348
epoch: 124 training_loss 0.11784959182143212 test_loss: 0.13711764812469482
epoch: 125 training_loss 0.1138263988867402 test_loss: 0.11358799934387206
epoch: 126 training_loss 0.10785941921174526 test_loss: 0.12408887147903443
epoch: 127 training_loss 0.1278836579620838 test_loss: 0.11070611476898193
epoch: 128 training_loss 0.12150748483836651 test_loss: 0.10819078683853149
epoch: 129 training_loss 0.11389587990939617 test_loss: 0.11994534730911255
epoch: 130 training_loss 0.10941121131181716 test_loss: 0.11378469467163085
epoch: 131 training_loss 0.11633025158196687 test_loss: 0.12093805074691773
epoch: 132 training_loss 0.1202027989178896 test_loss: 0.1219297170639038
epoch: 133 training_loss 0.11522858668118716 test_loss: 0.11907490491867065
epoch: 134 training_loss 0.11665990691632032 test_loss: 0.11356912851333618
epoch: 135 training_loss 0.11569270201027393 test_loss: 0.1259647011756897
epoch: 136 training_loss 0.11973944630473853 test_loss: 0.10939595699310303
epoch: 137 training_loss 0.1180621600523591 test_loss: 0.14344671964645386
epoch: 138 training_loss 0.11798655616119504 test_loss: 0.11077443361282349
epoch: 139 training_loss 0.11793449975550174 test_loss: 0.12974143028259277
epoch: 140 training_loss 0.11972995344549417 test_loss: 0.11977369785308838
epoch: 141 training_loss 0.11973252087831497 test_loss: 0.13769935369491576
epoch: 142 training_loss 0.11911900419741869 test_loss: 0.09562318921089172
epoch: 143 training_loss 0.11878262046724558 test_loss: 0.11586360931396485
epoch: 144 training_loss 0.1128930850699544 test_loss: 0.12090399265289306
epoch: 145 training_loss 0.11689123123884201 test_loss: 0.12810311317443848
epoch: 146 training_loss 0.11819616459310055 test_loss: 0.11370521783828735
epoch: 147 training_loss 0.11466464091092349 test_loss: 0.10309573411941528
epoch: 148 training_loss 0.1167990293353796 test_loss: 0.11396856307983398
epoch: 149 training_loss 0.11543032988905906 test_loss: 0.11036367416381836
epoch: 0 training_loss 29.873726968765258 test_loss: 9.174906921386718
epoch: 1 training_loss 7.321215944290161 test_loss: 6.150565719604492
epoch: 2 training_loss 5.291460263729095 test_loss: 4.806478118896484
epoch: 3 training_loss 4.516740972995758 test_loss: 4.491225814819336
epoch: 4 training_loss 3.942019181251526 test_loss: 3.6279869079589844
epoch: 5 training_loss 3.672674899101257 test_loss: 3.266647720336914
epoch: 6 training_loss 3.3164231514930727 test_loss: 3.455674743652344
epoch: 7 training_loss 3.0419312930107116 test_loss: 3.1278593063354494
epoch: 8 training_loss 2.956940908432007 test_loss: 2.700612449645996
epoch: 9 training_loss 2.6313462579250335 test_loss: 2.808491325378418
epoch: 10 training_loss 2.603622736930847 test_loss: 2.412853240966797
epoch: 11 training_loss 2.509129999876022 test_loss: 2.526473808288574
epoch: 12 training_loss 2.3609523439407347 test_loss: 2.3327836990356445
epoch: 13 training_loss 2.2714344549179075 test_loss: 2.37811279296875
epoch: 14 training_loss 2.1428622782230375 test_loss: 2.1931371688842773
epoch: 15 training_loss 2.1436243534088133 test_loss: 2.030047798156738
epoch: 16 training_loss 2.0353462159633637 test_loss: 2.0096563339233398
epoch: 17 training_loss 2.0453358447551726 test_loss: 1.9656692504882813
epoch: 18 training_loss 1.9882689881324769 test_loss: 1.98514404296875
epoch: 19 training_loss 1.966986256837845 test_loss: 1.985934066772461
epoch: 20 training_loss 1.9205266737937927 test_loss: 1.8145174026489257
epoch: 21 training_loss 1.8656926226615906 test_loss: 1.8520980834960938
epoch: 22 training_loss 1.758680717945099 test_loss: 1.9181598663330077
epoch: 23 training_loss 1.8002308905124664 test_loss: 1.7238662719726563
epoch: 24 training_loss 1.7214346218109131 test_loss: 1.7257593154907227
epoch: 25 training_loss 1.7496944606304168 test_loss: 1.7900047302246094
epoch: 26 training_loss 1.6965118837356568 test_loss: 1.7449922561645508
epoch: 27 training_loss 1.7078897428512574 test_loss: 1.612816619873047
epoch: 28 training_loss 1.576432354450226 test_loss: 1.570920467376709
epoch: 29 training_loss 1.6163226759433746 test_loss: 1.562494659423828
epoch: 30 training_loss 1.5665114426612854 test_loss: 1.690939712524414
epoch: 31 training_loss 1.5509305393695831 test_loss: 1.6477773666381836
epoch: 32 training_loss 1.5311468136310578 test_loss: 1.582937240600586
epoch: 33 training_loss 1.5412279307842254 test_loss: 1.5365377426147462
epoch: 34 training_loss 1.5551583421230317 test_loss: 1.5004199028015137
epoch: 35 training_loss 1.4822459161281585 test_loss: 1.6042613983154297
epoch: 36 training_loss 1.5497380673885346 test_loss: 1.5641667366027832
epoch: 37 training_loss 1.5007021844387054 test_loss: 1.5433756828308105
epoch: 38 training_loss 1.4530227637290956 test_loss: 1.519960308074951
epoch: 39 training_loss 1.5218643164634704 test_loss: 1.3714689254760741
epoch: 40 training_loss 1.503541476726532 test_loss: 1.4003446578979493
epoch: 41 training_loss 1.4671315610408784 test_loss: 1.4159695625305175
epoch: 42 training_loss 1.410807204246521 test_loss: 1.4026123046875
epoch: 43 training_loss 1.436975475549698 test_loss: 1.4120298385620118
epoch: 44 training_loss 1.397871584892273 test_loss: 1.3262860298156738
epoch: 45 training_loss 1.4194827139377595 test_loss: 1.4147585868835448
epoch: 46 training_loss 1.4247400724887849 test_loss: 1.4170878410339356
epoch: 47 training_loss 1.3728335106372833 test_loss: 1.309679126739502
epoch: 48 training_loss 1.369422105550766 test_loss: 1.3452622413635253
epoch: 49 training_loss 1.392841365337372 test_loss: 1.4157733917236328
epoch: 50 training_loss 1.30810413479805 test_loss: 1.358807373046875
epoch: 51 training_loss 1.3325023877620696 test_loss: 1.330862331390381
epoch: 52 training_loss 1.348897774219513 test_loss: 1.373082733154297
epoch: 53 training_loss 1.3479215538501739 test_loss: 1.280679702758789
epoch: 54 training_loss 1.283586882352829 test_loss: 1.2583468437194825
epoch: 55 training_loss 1.2964207303524018 test_loss: 1.2736080169677735
epoch: 56 training_loss 1.3275026750564576 test_loss: 1.386703109741211
epoch: 57 training_loss 1.2887073743343354 test_loss: 1.3114765167236329
epoch: 58 training_loss 1.2712498271465302 test_loss: 1.2876070976257323
epoch: 59 training_loss 1.2702168488502503 test_loss: 1.3269312858581543
epoch: 60 training_loss 1.2280115008354187 test_loss: 1.2136713027954102
epoch: 61 training_loss 1.2696646225452424 test_loss: 1.259770965576172
epoch: 62 training_loss 1.2726159024238586 test_loss: 1.2272703170776367
epoch: 63 training_loss 1.2400405788421631 test_loss: 1.3049671173095703
epoch: 64 training_loss 1.2617826706171036 test_loss: 1.2910372734069824
epoch: 65 training_loss 1.2178944778442382 test_loss: 1.235099983215332
epoch: 66 training_loss 1.2332159334421158 test_loss: 1.2361176490783692
epoch: 67 training_loss 1.2354710441827774 test_loss: 1.3029196739196778
epoch: 68 training_loss 1.253442302942276 test_loss: 1.3609043121337892
epoch: 69 training_loss 1.2100386422872544 test_loss: 1.2342748641967773
epoch: 70 training_loss 1.2223901373147965 test_loss: 1.1896422386169434
epoch: 71 training_loss 1.2150652754306792 test_loss: 1.184079074859619
epoch: 72 training_loss 1.1781258988380432 test_loss: 1.1320682525634767
epoch: 73 training_loss 1.1969376575946808 test_loss: 1.1772897720336915
epoch: 74 training_loss 1.1794195568561554 test_loss: 1.2580717086791993
epoch: 75 training_loss 1.1956521904468536 test_loss: 1.188335132598877
epoch: 76 training_loss 1.179885316491127 test_loss: 1.2245866775512695
epoch: 77 training_loss 1.1856395041942596 test_loss: 1.2348401069641113
epoch: 78 training_loss 1.1755826443433761 test_loss: 1.128370761871338
epoch: 79 training_loss 1.1611251270771026 test_loss: 1.1414846420288085
epoch: 80 training_loss 1.1562469500303267 test_loss: 1.1934340476989747
epoch: 81 training_loss 1.1558438736200332 test_loss: 1.163259220123291
epoch: 82 training_loss 1.2176079046726227 test_loss: 1.140438175201416
epoch: 83 training_loss 1.1688271218538284 test_loss: 1.195537281036377
epoch: 84 training_loss 1.117730079293251 test_loss: 1.1853570938110352
epoch: 85 training_loss 1.1457427859306335 test_loss: 1.2113115310668945
epoch: 86 training_loss 1.1332271188497542 test_loss: 1.2993253707885741
epoch: 87 training_loss 1.1343825846910476 test_loss: 1.1864169120788575
epoch: 88 training_loss 1.1304645240306854 test_loss: 1.1914621353149415
epoch: 89 training_loss 1.1399822974205016 test_loss: 1.1506926536560058
epoch: 90 training_loss 1.1326888364553451 test_loss: 1.1388562202453614
epoch: 91 training_loss 1.0980232572555542 test_loss: 1.1137418746948242
epoch: 92 training_loss 1.1212262034416198 test_loss: 1.1005941390991212
epoch: 93 training_loss 1.14608370244503 test_loss: 1.165559959411621
epoch: 94 training_loss 1.1346860629320146 test_loss: 1.104751205444336
epoch: 95 training_loss 1.1172572624683381 test_loss: 1.1092830657958985
epoch: 96 training_loss 1.1288454806804658 test_loss: 1.2282858848571778
epoch: 97 training_loss 1.1913094878196717 test_loss: 1.1287757873535156
epoch: 98 training_loss 1.0986599266529082 test_loss: 1.1038208961486817
epoch: 99 training_loss 1.1097762364149093 test_loss: 1.0736306190490723
epoch: 100 training_loss 1.1056574177742005 test_loss: 1.1284754753112793
epoch: 101 training_loss 1.120154230594635 test_loss: 1.1400628089904785
epoch: 102 training_loss 1.0918975001573563 test_loss: 1.1167056083679199
epoch: 103 training_loss 1.074961056113243 test_loss: 1.1864104270935059
epoch: 104 training_loss 1.1002088952064515 test_loss: 1.1268276214599608
epoch: 105 training_loss 1.0918926793336867 test_loss: 1.120089340209961
epoch: 106 training_loss 1.06820108294487 test_loss: 1.0679372787475585
epoch: 107 training_loss 1.0812238955497742 test_loss: 1.023785400390625
epoch: 108 training_loss 1.0791956132650375 test_loss: 1.0657222747802735
epoch: 109 training_loss 1.11121509373188 test_loss: 1.0517477989196777
epoch: 110 training_loss 1.0498791122436524 test_loss: 1.0709335327148437
epoch: 111 training_loss 1.0997869175672532 test_loss: 1.118546485900879
epoch: 112 training_loss 1.0927447253465652 test_loss: 1.0747005462646484
epoch: 113 training_loss 1.0748616230487824 test_loss: 1.1007963180541993
epoch: 114 training_loss 1.075095484852791 test_loss: 1.0764712333679198
epoch: 115 training_loss 1.0568349313735963 test_loss: 1.0638293266296386
epoch: 116 training_loss 1.0566792863607406 test_loss: 1.1053147315979004
epoch: 117 training_loss 1.0973208129405976 test_loss: 1.108800506591797
epoch: 118 training_loss 1.0683752584457398 test_loss: 1.0620643615722656
epoch: 119 training_loss 1.0595246171951294 test_loss: 1.0621046066284179
epoch: 120 training_loss 1.0641604632139205 test_loss: 1.020585536956787
epoch: 121 training_loss 1.0651668041944504 test_loss: 1.0404003143310547
epoch: 122 training_loss 1.0402905821800232 test_loss: 1.0234800338745118
epoch: 123 training_loss 1.040638616681099 test_loss: 0.9972906112670898
epoch: 124 training_loss 1.0628491353988647 test_loss: 1.0150830268859863
epoch: 125 training_loss 1.056607973575592 test_loss: 1.0802842140197755
epoch: 126 training_loss 1.044750116467476 test_loss: 1.0701951026916503
epoch: 127 training_loss 1.0683223462104798 test_loss: 1.0587858200073241
epoch: 128 training_loss 1.0261299347877502 test_loss: 1.05240478515625
epoch: 129 training_loss 1.0597185003757477 test_loss: 1.110707664489746
epoch: 130 training_loss 1.0451026171445847 test_loss: 0.9898958206176758
epoch: 131 training_loss 1.0577845108509063 test_loss: 1.0014397621154785
epoch: 132 training_loss 1.0689856004714966 test_loss: 1.0630050659179688
epoch: 133 training_loss 1.0122048449516297 test_loss: 1.0057764053344727
epoch: 134 training_loss 1.023699911236763 test_loss: 1.0128949165344239
epoch: 135 training_loss 1.0216963183879852 test_loss: 1.0266965866088866
epoch: 136 training_loss 1.0098027938604355 test_loss: 1.0309084892272948
epoch: 137 training_loss 1.0533432066440582 test_loss: 1.019084358215332
epoch: 138 training_loss 0.9837869709730148 test_loss: 1.0318737030029297
epoch: 139 training_loss 1.0235166102647781 test_loss: 1.0610630989074707
epoch: 140 training_loss 1.00743745803833 test_loss: 1.0951706886291503
epoch: 141 training_loss 1.0299331533908844 test_loss: 1.006322193145752
epoch: 142 training_loss 1.0420964807271957 test_loss: 1.0387680053710937
epoch: 143 training_loss 1.0237995392084123 test_loss: 1.0223297119140624
epoch: 144 training_loss 0.9882930648326874 test_loss: 0.9702920913696289
epoch: 145 training_loss 1.0448671412467956 test_loss: 1.05952730178833
epoch: 146 training_loss 1.0305004799365998 test_loss: 0.9896462440490723
epoch: 147 training_loss 0.9973901551961899 test_loss: 0.9843266487121582
epoch: 148 training_loss 0.9933574885129929 test_loss: 1.0520848274230956
epoch: 149 training_loss 0.9910569649934768 test_loss: 1.0426910400390625
3987.1689159829175
episode: 0 training return: tensor(214.2119, device='cuda:0')
episode: 1 training return: tensor(211.9399, device='cuda:0')
episode: 2 training return: tensor(254.9735, device='cuda:0')
episode: 3 training return: tensor(160.0072, device='cuda:0')
epoch: 1 test_true_pfm: 4019.361562090651 sim_pfm: 207.90699164149314
episode: 4 training return: tensor(211.7037, device='cuda:0')
episode: 5 training return: tensor(193.0442, device='cuda:0')
episode: 6 training return: tensor(174.8957, device='cuda:0')
episode: 7 training return: tensor(271.3646, device='cuda:0')
epoch: 2 test_true_pfm: 3997.4767846801174 sim_pfm: 242.1299779098481
episode: 8 training return: tensor(168.3235, device='cuda:0')
episode: 9 training return: tensor(115.6844, device='cuda:0')
episode: 10 training return: tensor(145.6104, device='cuda:0')
episode: 11 training return: tensor(199.2850, device='cuda:0')
epoch: 3 test_true_pfm: 3954.1519763480005 sim_pfm: 185.36024092818843
episode: 12 training return: tensor(142.1879, device='cuda:0')
episode: 13 training return: tensor(188.3948, device='cuda:0')
episode: 14 training return: tensor(271.9623, device='cuda:0')
episode: 15 training return: tensor(274.0511, device='cuda:0')
epoch: 4 test_true_pfm: 4006.755462172023 sim_pfm: 208.4044117242253
episode: 16 training return: tensor(170.1553, device='cuda:0')
episode: 17 training return: tensor(220.4895, device='cuda:0')
episode: 18 training return: tensor(156.6165, device='cuda:0')
episode: 19 training return: tensor(184.3746, device='cuda:0')
epoch: 5 test_true_pfm: 4013.1256035547704 sim_pfm: 205.813653941014
episode: 20 training return: tensor(235.8203, device='cuda:0')
episode: 21 training return: tensor(167.5508, device='cuda:0')
episode: 22 training return: tensor(275.0147, device='cuda:0')
episode: 23 training return: tensor(150.5439, device='cuda:0')
epoch: 6 test_true_pfm: 3984.6041604045754 sim_pfm: 240.37710375267974
episode: 24 training return: tensor(244.3125, device='cuda:0')
episode: 25 training return: tensor(284.4466, device='cuda:0')
episode: 26 training return: tensor(206.3196, device='cuda:0')
episode: 27 training return: tensor(259.6376, device='cuda:0')
epoch: 7 test_true_pfm: 4036.1827287271044 sim_pfm: 232.4781877203301
episode: 28 training return: tensor(254.1476, device='cuda:0')
episode: 29 training return: tensor(304.8650, device='cuda:0')
episode: 30 training return: tensor(213.6915, device='cuda:0')
episode: 31 training return: tensor(132.6540, device='cuda:0')
epoch: 8 test_true_pfm: 4002.9403875816447 sim_pfm: 208.78257153904028
episode: 32 training return: tensor(281.4759, device='cuda:0')
episode: 33 training return: tensor(244.3562, device='cuda:0')
episode: 34 training return: tensor(218.6519, device='cuda:0')
episode: 35 training return: tensor(190.6051, device='cuda:0')
epoch: 9 test_true_pfm: 3999.9151553228808 sim_pfm: 228.27326424458684
episode: 36 training return: tensor(219.2810, device='cuda:0')
episode: 37 training return: tensor(227.2297, device='cuda:0')
episode: 38 training return: tensor(257.8957, device='cuda:0')
episode: 39 training return: tensor(152.9945, device='cuda:0')
epoch: 10 test_true_pfm: 4002.643696499105 sim_pfm: 225.3375515951969
episode: 40 training return: tensor(256.3303, device='cuda:0')
episode: 41 training return: tensor(287.3458, device='cuda:0')
episode: 42 training return: tensor(188.4210, device='cuda:0')
episode: 43 training return: tensor(172.7792, device='cuda:0')
epoch: 11 test_true_pfm: 3984.308293538776 sim_pfm: 214.383582756127
episode: 44 training return: tensor(243.2478, device='cuda:0')
episode: 45 training return: tensor(235.4035, device='cuda:0')
episode: 46 training return: tensor(126.7490, device='cuda:0')
episode: 47 training return: tensor(210.6217, device='cuda:0')
epoch: 12 test_true_pfm: 4004.5393842271455 sim_pfm: 222.56092188810968
episode: 48 training return: tensor(222.9314, device='cuda:0')
episode: 49 training return: tensor(199.8267, device='cuda:0')
episode: 50 training return: tensor(276.1143, device='cuda:0')
episode: 51 training return: tensor(231.5819, device='cuda:0')
epoch: 13 test_true_pfm: 3951.0651597701612 sim_pfm: 206.20238288909118
episode: 52 training return: tensor(267.8209, device='cuda:0')
episode: 53 training return: tensor(228.0262, device='cuda:0')
episode: 54 training return: tensor(224.0915, device='cuda:0')
episode: 55 training return: tensor(126.3001, device='cuda:0')
epoch: 14 test_true_pfm: 3961.2292096258657 sim_pfm: 175.5517029024583
episode: 56 training return: tensor(262.2113, device='cuda:0')
episode: 57 training return: tensor(237.4122, device='cuda:0')
episode: 58 training return: tensor(280.0458, device='cuda:0')
episode: 59 training return: tensor(237.8046, device='cuda:0')
epoch: 15 test_true_pfm: 4001.7005418364474 sim_pfm: 204.9214166534366
episode: 60 training return: tensor(196.7830, device='cuda:0')
episode: 61 training return: tensor(255.8416, device='cuda:0')
episode: 62 training return: tensor(269.7581, device='cuda:0')
episode: 63 training return: tensor(272.1953, device='cuda:0')
epoch: 16 test_true_pfm: 4002.360561856846 sim_pfm: 202.35245037988838
episode: 64 training return: tensor(232.3204, device='cuda:0')
episode: 65 training return: tensor(208.8900, device='cuda:0')
episode: 66 training return: tensor(192.5990, device='cuda:0')
episode: 67 training return: tensor(-952.3896, device='cuda:0')
epoch: 17 test_true_pfm: 3971.334928036488 sim_pfm: 213.88841652233774
episode: 68 training return: tensor(231.1500, device='cuda:0')
episode: 69 training return: tensor(188.2217, device='cuda:0')
episode: 70 training return: tensor(238.4332, device='cuda:0')
episode: 71 training return: tensor(192.5286, device='cuda:0')
epoch: 18 test_true_pfm: 4026.3211349352227 sim_pfm: 194.62325459031854
episode: 72 training return: tensor(217.8067, device='cuda:0')
episode: 73 training return: tensor(281.5511, device='cuda:0')
episode: 74 training return: tensor(194.4187, device='cuda:0')
episode: 75 training return: tensor(212.2402, device='cuda:0')
epoch: 19 test_true_pfm: 3995.856138997642 sim_pfm: 256.13596566725755
episode: 76 training return: tensor(261.8964, device='cuda:0')
episode: 77 training return: tensor(240.6525, device='cuda:0')
episode: 78 training return: tensor(256.8524, device='cuda:0')
episode: 79 training return: tensor(169.5646, device='cuda:0')
epoch: 20 test_true_pfm: 4009.089859375548 sim_pfm: 239.98048543784535
episode: 80 training return: tensor(218.2035, device='cuda:0')
episode: 81 training return: tensor(279.1266, device='cuda:0')
episode: 82 training return: tensor(222.8842, device='cuda:0')
episode: 83 training return: tensor(194.5470, device='cuda:0')
epoch: 21 test_true_pfm: 3943.145089717753 sim_pfm: 222.42339216765444
episode: 84 training return: tensor(151.0507, device='cuda:0')
episode: 85 training return: tensor(200.6444, device='cuda:0')
episode: 86 training return: tensor(206.7033, device='cuda:0')
episode: 87 training return: tensor(209.9726, device='cuda:0')
epoch: 22 test_true_pfm: 3958.191437849156 sim_pfm: 227.1054739607401
episode: 88 training return: tensor(212.3553, device='cuda:0')
episode: 89 training return: tensor(244.0490, device='cuda:0')
episode: 90 training return: tensor(205.4013, device='cuda:0')
episode: 91 training return: tensor(200.3909, device='cuda:0')
epoch: 23 test_true_pfm: 3979.495890913125 sim_pfm: 162.47432810042906
episode: 92 training return: tensor(147.8102, device='cuda:0')
episode: 93 training return: tensor(221.1001, device='cuda:0')
episode: 94 training return: tensor(278.4283, device='cuda:0')
episode: 95 training return: tensor(-900.5634, device='cuda:0')
epoch: 24 test_true_pfm: 3983.0445856368146 sim_pfm: 262.20293923003675
episode: 96 training return: tensor(221.3842, device='cuda:0')
episode: 97 training return: tensor(-907.0850, device='cuda:0')
episode: 98 training return: tensor(260.3578, device='cuda:0')
episode: 99 training return: tensor(231.3516, device='cuda:0')
epoch: 25 test_true_pfm: 4010.7062671974186 sim_pfm: 251.19388068543049
episode: 100 training return: tensor(165.8106, device='cuda:0')
episode: 101 training return: tensor(225.7316, device='cuda:0')
episode: 102 training return: tensor(163.8167, device='cuda:0')
episode: 103 training return: tensor(258.5419, device='cuda:0')
epoch: 26 test_true_pfm: 3965.145255145642 sim_pfm: 221.51461018781023
episode: 104 training return: tensor(199.6768, device='cuda:0')
episode: 105 training return: tensor(190.9759, device='cuda:0')
episode: 106 training return: tensor(250.5170, device='cuda:0')
episode: 107 training return: tensor(207.2419, device='cuda:0')
epoch: 27 test_true_pfm: 4025.6181318776707 sim_pfm: 224.36185611303276
episode: 108 training return: tensor(235.5515, device='cuda:0')
episode: 109 training return: tensor(269.9203, device='cuda:0')
episode: 110 training return: tensor(244.4838, device='cuda:0')
episode: 111 training return: tensor(164.7487, device='cuda:0')
epoch: 28 test_true_pfm: 3974.684687115134 sim_pfm: 182.14846030259892
episode: 112 training return: tensor(160.8556, device='cuda:0')
episode: 113 training return: tensor(216.0300, device='cuda:0')
episode: 114 training return: tensor(176.5918, device='cuda:0')
episode: 115 training return: tensor(140.3697, device='cuda:0')
epoch: 29 test_true_pfm: 4001.053003112604 sim_pfm: 190.35607558004753
episode: 116 training return: tensor(253.6789, device='cuda:0')
episode: 117 training return: tensor(273.6391, device='cuda:0')
episode: 118 training return: tensor(273.3689, device='cuda:0')
episode: 119 training return: tensor(183.7357, device='cuda:0')
epoch: 30 test_true_pfm: 4008.5908784150947 sim_pfm: 215.53075070562772
episode: 120 training return: tensor(270.7224, device='cuda:0')
episode: 121 training return: tensor(161.3664, device='cuda:0')
episode: 122 training return: tensor(263.6391, device='cuda:0')
episode: 123 training return: tensor(280.7432, device='cuda:0')
epoch: 31 test_true_pfm: 3981.848398735421 sim_pfm: 193.57612380036153
episode: 124 training return: tensor(262.3868, device='cuda:0')
episode: 125 training return: tensor(243.3947, device='cuda:0')
episode: 126 training return: tensor(227.0786, device='cuda:0')
episode: 127 training return: tensor(199.9800, device='cuda:0')
epoch: 32 test_true_pfm: 3995.896118785709 sim_pfm: 206.24917457819296
episode: 128 training return: tensor(186.1213, device='cuda:0')
episode: 129 training return: tensor(213.8916, device='cuda:0')
episode: 130 training return: tensor(240.3558, device='cuda:0')
episode: 131 training return: tensor(301.9303, device='cuda:0')
epoch: 33 test_true_pfm: 4015.4669580509885 sim_pfm: 238.7530481608895
episode: 132 training return: tensor(240.4714, device='cuda:0')
episode: 133 training return: tensor(245.8571, device='cuda:0')
episode: 134 training return: tensor(227.0879, device='cuda:0')
episode: 135 training return: tensor(240.8913, device='cuda:0')
epoch: 34 test_true_pfm: 4007.0445805234717 sim_pfm: 263.70499750965
episode: 136 training return: tensor(262.0065, device='cuda:0')
episode: 137 training return: tensor(250.1718, device='cuda:0')
episode: 138 training return: tensor(250.3633, device='cuda:0')
episode: 139 training return: tensor(278.7380, device='cuda:0')
epoch: 35 test_true_pfm: 3989.9557659146726 sim_pfm: 168.18110800134795
episode: 140 training return: tensor(195.0615, device='cuda:0')
episode: 141 training return: tensor(290.4631, device='cuda:0')
episode: 142 training return: tensor(197.1175, device='cuda:0')
episode: 143 training return: tensor(255.5983, device='cuda:0')
epoch: 36 test_true_pfm: 4010.279642187504 sim_pfm: 241.7602461852366
episode: 144 training return: tensor(187.2448, device='cuda:0')
episode: 145 training return: tensor(177.4837, device='cuda:0')
episode: 146 training return: tensor(242.8190, device='cuda:0')
episode: 147 training return: tensor(293.7407, device='cuda:0')
epoch: 37 test_true_pfm: 3995.208094947968 sim_pfm: 227.86107912191073
episode: 148 training return: tensor(216.5175, device='cuda:0')
episode: 149 training return: tensor(312.1389, device='cuda:0')
episode: 150 training return: tensor(189.7293, device='cuda:0')
episode: 151 training return: tensor(201.6438, device='cuda:0')
epoch: 38 test_true_pfm: 3997.7368330029235 sim_pfm: 217.21203481650446
episode: 152 training return: tensor(212.6668, device='cuda:0')
episode: 153 training return: tensor(208.7236, device='cuda:0')
episode: 154 training return: tensor(273.7459, device='cuda:0')
episode: 155 training return: tensor(212.5637, device='cuda:0')
epoch: 39 test_true_pfm: 4061.317716507434 sim_pfm: 250.6119668146906
episode: 156 training return: tensor(295.3743, device='cuda:0')
episode: 157 training return: tensor(195.1859, device='cuda:0')
episode: 158 training return: tensor(190.2072, device='cuda:0')
episode: 159 training return: tensor(243.4605, device='cuda:0')
epoch: 40 test_true_pfm: 3996.3551006441794 sim_pfm: 286.44322586257476
episode: 160 training return: tensor(255.2551, device='cuda:0')
episode: 161 training return: tensor(284.3159, device='cuda:0')
episode: 162 training return: tensor(182.4397, device='cuda:0')
episode: 163 training return: tensor(245.4502, device='cuda:0')
epoch: 41 test_true_pfm: 4020.472873756063 sim_pfm: 249.22694035856208
episode: 164 training return: tensor(229.6666, device='cuda:0')
episode: 165 training return: tensor(257.3183, device='cuda:0')
episode: 166 training return: tensor(244.4624, device='cuda:0')
episode: 167 training return: tensor(221.0267, device='cuda:0')
epoch: 42 test_true_pfm: 4014.540424445919 sim_pfm: 280.41164497320034
episode: 168 training return: tensor(203.4728, device='cuda:0')
episode: 169 training return: tensor(209.0350, device='cuda:0')
episode: 170 training return: tensor(-867.6258, device='cuda:0')
episode: 171 training return: tensor(159.4279, device='cuda:0')
epoch: 43 test_true_pfm: 4006.2905624020987 sim_pfm: 235.22517670413558
episode: 172 training return: tensor(157.9772, device='cuda:0')
episode: 173 training return: tensor(164.6406, device='cuda:0')
episode: 174 training return: tensor(192.3864, device='cuda:0')
episode: 175 training return: tensor(164.4829, device='cuda:0')
epoch: 44 test_true_pfm: 4000.1869790050127 sim_pfm: 223.70782103777552
episode: 176 training return: tensor(184.8659, device='cuda:0')
episode: 177 training return: tensor(245.2447, device='cuda:0')
episode: 178 training return: tensor(267.8015, device='cuda:0')
episode: 179 training return: tensor(163.9720, device='cuda:0')
epoch: 45 test_true_pfm: 4002.265488100346 sim_pfm: 202.83912697666287
episode: 180 training return: tensor(241.6476, device='cuda:0')
episode: 181 training return: tensor(193.8668, device='cuda:0')
episode: 182 training return: tensor(157.2008, device='cuda:0')
episode: 183 training return: tensor(214.6470, device='cuda:0')
epoch: 46 test_true_pfm: 3984.1436822336764 sim_pfm: 196.61579805981214
episode: 184 training return: tensor(243.7281, device='cuda:0')
episode: 185 training return: tensor(280.6658, device='cuda:0')
episode: 186 training return: tensor(264.2257, device='cuda:0')
episode: 187 training return: tensor(180.6811, device='cuda:0')
epoch: 47 test_true_pfm: 4001.003405843832 sim_pfm: 230.21837884648508
episode: 188 training return: tensor(210.3118, device='cuda:0')
episode: 189 training return: tensor(237.9527, device='cuda:0')
episode: 190 training return: tensor(217.3031, device='cuda:0')
episode: 191 training return: tensor(153.9808, device='cuda:0')
epoch: 48 test_true_pfm: 3965.8337322811863 sim_pfm: 166.61202123842668
episode: 192 training return: tensor(263.1152, device='cuda:0')
episode: 193 training return: tensor(268.2144, device='cuda:0')
episode: 194 training return: tensor(231.8508, device='cuda:0')
episode: 195 training return: tensor(235.7177, device='cuda:0')
epoch: 49 test_true_pfm: 4016.244051152951 sim_pfm: 267.3214848696177
episode: 196 training return: tensor(196.2884, device='cuda:0')
episode: 197 training return: tensor(163.0574, device='cuda:0')
episode: 198 training return: tensor(216.8701, device='cuda:0')
episode: 199 training return: tensor(204.2583, device='cuda:0')
epoch: 50 test_true_pfm: 3978.8556271848734 sim_pfm: 217.06046271122372
episode: 200 training return: tensor(199.0979, device='cuda:0')
episode: 201 training return: tensor(210.4041, device='cuda:0')
episode: 202 training return: tensor(217.6380, device='cuda:0')
episode: 203 training return: tensor(200.4419, device='cuda:0')
epoch: 51 test_true_pfm: 3989.157860317113 sim_pfm: 211.18612671764762
episode: 204 training return: tensor(-914.7820, device='cuda:0')
episode: 205 training return: tensor(196.5209, device='cuda:0')
episode: 206 training return: tensor(254.2927, device='cuda:0')
episode: 207 training return: tensor(195.9525, device='cuda:0')
epoch: 52 test_true_pfm: 3960.597352956807 sim_pfm: 226.60485170257743
episode: 208 training return: tensor(226.0235, device='cuda:0')
episode: 209 training return: tensor(215.7250, device='cuda:0')
episode: 210 training return: tensor(204.6394, device='cuda:0')
episode: 211 training return: tensor(299.0587, device='cuda:0')
epoch: 53 test_true_pfm: 4002.2965672543237 sim_pfm: 223.5632788016907
episode: 212 training return: tensor(240.4638, device='cuda:0')
episode: 213 training return: tensor(209.2973, device='cuda:0')
episode: 214 training return: tensor(184.4492, device='cuda:0')
episode: 215 training return: tensor(257.2222, device='cuda:0')
epoch: 54 test_true_pfm: 4006.515598596166 sim_pfm: 252.5168640174476
episode: 216 training return: tensor(268.9354, device='cuda:0')
episode: 217 training return: tensor(198.7919, device='cuda:0')
episode: 218 training return: tensor(229.1239, device='cuda:0')
episode: 219 training return: tensor(225.9835, device='cuda:0')
epoch: 55 test_true_pfm: 4060.1926626491113 sim_pfm: 256.8135803416565
episode: 220 training return: tensor(216.1433, device='cuda:0')
episode: 221 training return: tensor(294.7569, device='cuda:0')
episode: 222 training return: tensor(220.6316, device='cuda:0')
episode: 223 training return: tensor(279.9832, device='cuda:0')
epoch: 56 test_true_pfm: 4010.5932090620577 sim_pfm: 220.67198336008005
episode: 224 training return: tensor(131.3194, device='cuda:0')
episode: 225 training return: tensor(203.4516, device='cuda:0')
episode: 226 training return: tensor(196.0220, device='cuda:0')
episode: 227 training return: tensor(231.9481, device='cuda:0')
epoch: 57 test_true_pfm: 3964.7281353205713 sim_pfm: 218.98235256349048
episode: 228 training return: tensor(202.8898, device='cuda:0')
episode: 229 training return: tensor(169.9390, device='cuda:0')
episode: 230 training return: tensor(227.9206, device='cuda:0')
episode: 231 training return: tensor(226.4402, device='cuda:0')
epoch: 58 test_true_pfm: 4004.895058290694 sim_pfm: 240.70311668503564
episode: 232 training return: tensor(222.0984, device='cuda:0')
episode: 233 training return: tensor(186.9173, device='cuda:0')
episode: 234 training return: tensor(203.1604, device='cuda:0')
episode: 235 training return: tensor(279.9405, device='cuda:0')
epoch: 59 test_true_pfm: 3996.53101785451 sim_pfm: 215.46511169391064
episode: 236 training return: tensor(171.7198, device='cuda:0')
episode: 237 training return: tensor(231.4533, device='cuda:0')
episode: 238 training return: tensor(259.9879, device='cuda:0')
episode: 239 training return: tensor(273.1476, device='cuda:0')
epoch: 60 test_true_pfm: 4000.5944106703023 sim_pfm: 261.27991891963757
episode: 240 training return: tensor(196.2437, device='cuda:0')
episode: 241 training return: tensor(266.8329, device='cuda:0')
episode: 242 training return: tensor(-847.4334, device='cuda:0')
episode: 243 training return: tensor(223.0734, device='cuda:0')
epoch: 61 test_true_pfm: 4047.6683009136173 sim_pfm: 273.3662617612087
episode: 244 training return: tensor(241.3496, device='cuda:0')
episode: 245 training return: tensor(220.7673, device='cuda:0')
episode: 246 training return: tensor(180.2598, device='cuda:0')
episode: 247 training return: tensor(221.1092, device='cuda:0')
epoch: 62 test_true_pfm: 4034.6936983890337 sim_pfm: 253.00578113158312
episode: 248 training return: tensor(236.5672, device='cuda:0')
episode: 249 training return: tensor(199.1270, device='cuda:0')
episode: 250 training return: tensor(263.5003, device='cuda:0')
episode: 251 training return: tensor(285.2291, device='cuda:0')
epoch: 63 test_true_pfm: 3987.3122323885423 sim_pfm: 244.5806116035868
episode: 252 training return: tensor(162.0122, device='cuda:0')
episode: 253 training return: tensor(201.9051, device='cuda:0')
episode: 254 training return: tensor(287.6768, device='cuda:0')
episode: 255 training return: tensor(239.0111, device='cuda:0')
epoch: 64 test_true_pfm: 4049.784920849246 sim_pfm: 237.95060476330886
episode: 256 training return: tensor(233.7473, device='cuda:0')
episode: 257 training return: tensor(196.3432, device='cuda:0')
episode: 258 training return: tensor(297.8372, device='cuda:0')
episode: 259 training return: tensor(253.6488, device='cuda:0')
epoch: 65 test_true_pfm: 3997.3220368987404 sim_pfm: 231.49649198089415
episode: 260 training return: tensor(183.6729, device='cuda:0')
episode: 261 training return: tensor(242.8957, device='cuda:0')
episode: 262 training return: tensor(248.9354, device='cuda:0')
episode: 263 training return: tensor(281.7335, device='cuda:0')
epoch: 66 test_true_pfm: 4020.9883581927684 sim_pfm: 255.23215654480737
episode: 264 training return: tensor(279.1972, device='cuda:0')
episode: 265 training return: tensor(214.4393, device='cuda:0')
episode: 266 training return: tensor(204.6066, device='cuda:0')
episode: 267 training return: tensor(232.0170, device='cuda:0')
epoch: 67 test_true_pfm: 4037.333774091269 sim_pfm: 233.3439971539968
episode: 268 training return: tensor(206.1243, device='cuda:0')
episode: 269 training return: tensor(290.2157, device='cuda:0')
episode: 270 training return: tensor(221.1707, device='cuda:0')
episode: 271 training return: tensor(267.6657, device='cuda:0')
epoch: 68 test_true_pfm: 4010.2658939106846 sim_pfm: 258.87026785544975
episode: 272 training return: tensor(175.9604, device='cuda:0')
episode: 273 training return: tensor(273.4776, device='cuda:0')
episode: 274 training return: tensor(257.1513, device='cuda:0')
episode: 275 training return: tensor(172.1082, device='cuda:0')
epoch: 69 test_true_pfm: 4007.194344998044 sim_pfm: 261.07515347993467
episode: 276 training return: tensor(209.9198, device='cuda:0')
episode: 277 training return: tensor(221.0125, device='cuda:0')
episode: 278 training return: tensor(223.7547, device='cuda:0')
episode: 279 training return: tensor(259.4768, device='cuda:0')
epoch: 70 test_true_pfm: 4008.222247455779 sim_pfm: 250.03859439437898
episode: 280 training return: tensor(195.1938, device='cuda:0')
episode: 281 training return: tensor(206.1104, device='cuda:0')
episode: 282 training return: tensor(207.8610, device='cuda:0')
episode: 283 training return: tensor(161.5340, device='cuda:0')
epoch: 71 test_true_pfm: 4017.7618093990136 sim_pfm: 220.72672716135276
episode: 284 training return: tensor(213.7382, device='cuda:0')
episode: 285 training return: tensor(207.7285, device='cuda:0')
episode: 286 training return: tensor(265.5902, device='cuda:0')
episode: 287 training return: tensor(233.4402, device='cuda:0')
epoch: 72 test_true_pfm: 4021.299887268631 sim_pfm: 243.4124655155465
episode: 288 training return: tensor(285.4202, device='cuda:0')
episode: 289 training return: tensor(267.9950, device='cuda:0')
episode: 290 training return: tensor(189.9197, device='cuda:0')
episode: 291 training return: tensor(242.5678, device='cuda:0')
epoch: 73 test_true_pfm: 4028.4349246290494 sim_pfm: 275.0652686441317
episode: 292 training return: tensor(270.4512, device='cuda:0')
episode: 293 training return: tensor(262.5732, device='cuda:0')
episode: 294 training return: tensor(239.6346, device='cuda:0')
episode: 295 training return: tensor(177.4997, device='cuda:0')
epoch: 74 test_true_pfm: 3995.356641899301 sim_pfm: 252.87016934952894
episode: 296 training return: tensor(248.6019, device='cuda:0')
episode: 297 training return: tensor(230.4346, device='cuda:0')
episode: 298 training return: tensor(264.6330, device='cuda:0')
episode: 299 training return: tensor(212.6361, device='cuda:0')
epoch: 75 test_true_pfm: 3985.786341084579 sim_pfm: 253.62019491008445
episode: 300 training return: tensor(198.6970, device='cuda:0')
episode: 301 training return: tensor(281.5991, device='cuda:0')
episode: 302 training return: tensor(271.2413, device='cuda:0')
episode: 303 training return: tensor(264.5386, device='cuda:0')
epoch: 76 test_true_pfm: 4016.692945615016 sim_pfm: 241.327356011209
episode: 304 training return: tensor(276.0862, device='cuda:0')
episode: 305 training return: tensor(204.5710, device='cuda:0')
episode: 306 training return: tensor(216.5058, device='cuda:0')
episode: 307 training return: tensor(295.1763, device='cuda:0')
epoch: 77 test_true_pfm: 4042.7105457385055 sim_pfm: 249.26807310973527
episode: 308 training return: tensor(232.3430, device='cuda:0')
episode: 309 training return: tensor(235.0676, device='cuda:0')
episode: 310 training return: tensor(213.5965, device='cuda:0')
episode: 311 training return: tensor(270.0227, device='cuda:0')
epoch: 78 test_true_pfm: 4053.266125610015 sim_pfm: 261.7051966114377
episode: 312 training return: tensor(212.1191, device='cuda:0')
episode: 313 training return: tensor(212.7747, device='cuda:0')
episode: 314 training return: tensor(209.4044, device='cuda:0')
episode: 315 training return: tensor(228.5217, device='cuda:0')
epoch: 79 test_true_pfm: 4023.2116940263113 sim_pfm: 270.20260463554104
episode: 316 training return: tensor(252.3275, device='cuda:0')
episode: 317 training return: tensor(271.5257, device='cuda:0')
episode: 318 training return: tensor(193.9630, device='cuda:0')
episode: 319 training return: tensor(233.1269, device='cuda:0')
epoch: 80 test_true_pfm: 3943.5804264909107 sim_pfm: 203.42791041208935
episode: 320 training return: tensor(232.0686, device='cuda:0')
episode: 321 training return: tensor(268.2723, device='cuda:0')
episode: 322 training return: tensor(200.2169, device='cuda:0')
episode: 323 training return: tensor(256.0359, device='cuda:0')
epoch: 81 test_true_pfm: 3970.7731975406314 sim_pfm: 218.3644589507312
episode: 324 training return: tensor(243.9767, device='cuda:0')
episode: 325 training return: tensor(246.9611, device='cuda:0')
episode: 326 training return: tensor(228.0604, device='cuda:0')
episode: 327 training return: tensor(220.8235, device='cuda:0')
epoch: 82 test_true_pfm: 3957.25587045957 sim_pfm: 262.5968788815856
episode: 328 training return: tensor(180.6541, device='cuda:0')
episode: 329 training return: tensor(-822.5137, device='cuda:0')
episode: 330 training return: tensor(133.9623, device='cuda:0')
episode: 331 training return: tensor(241.9534, device='cuda:0')
epoch: 83 test_true_pfm: 4049.7903377346593 sim_pfm: 262.85062061135733
episode: 332 training return: tensor(183.7883, device='cuda:0')
episode: 333 training return: tensor(232.7170, device='cuda:0')
episode: 334 training return: tensor(221.9847, device='cuda:0')
episode: 335 training return: tensor(243.7137, device='cuda:0')
epoch: 84 test_true_pfm: 4000.824492861497 sim_pfm: 239.5885873703422
episode: 336 training return: tensor(255.9572, device='cuda:0')
episode: 337 training return: tensor(277.2932, device='cuda:0')
episode: 338 training return: tensor(219.3747, device='cuda:0')
episode: 339 training return: tensor(142.8818, device='cuda:0')
epoch: 85 test_true_pfm: 4018.0475505780264 sim_pfm: 264.64957984065404
episode: 340 training return: tensor(222.5164, device='cuda:0')
episode: 341 training return: tensor(169.5771, device='cuda:0')
episode: 342 training return: tensor(232.1709, device='cuda:0')
episode: 343 training return: tensor(175.5541, device='cuda:0')
epoch: 86 test_true_pfm: 4057.835137800828 sim_pfm: -138.38800164935915
episode: 344 training return: tensor(160.8023, device='cuda:0')
episode: 345 training return: tensor(207.4327, device='cuda:0')
episode: 346 training return: tensor(220.9736, device='cuda:0')
episode: 347 training return: tensor(290.5531, device='cuda:0')
epoch: 87 test_true_pfm: 4028.896386516273 sim_pfm: 235.79608400687962
episode: 348 training return: tensor(214.3052, device='cuda:0')
episode: 349 training return: tensor(279.9164, device='cuda:0')
episode: 350 training return: tensor(226.4419, device='cuda:0')
episode: 351 training return: tensor(258.3583, device='cuda:0')
epoch: 88 test_true_pfm: 4054.045666023485 sim_pfm: 256.07424132227123
episode: 352 training return: tensor(216.9673, device='cuda:0')
episode: 353 training return: tensor(207.3101, device='cuda:0')
episode: 354 training return: tensor(209.0826, device='cuda:0')
episode: 355 training return: tensor(287.5761, device='cuda:0')
epoch: 89 test_true_pfm: 4045.2924511437227 sim_pfm: 231.84449878587233
episode: 356 training return: tensor(278.6129, device='cuda:0')
episode: 357 training return: tensor(206.0407, device='cuda:0')
episode: 358 training return: tensor(189.9004, device='cuda:0')
episode: 359 training return: tensor(287.0583, device='cuda:0')
epoch: 90 test_true_pfm: 4005.393093886794 sim_pfm: 264.4494541225334
episode: 360 training return: tensor(272.8916, device='cuda:0')
episode: 361 training return: tensor(269.0638, device='cuda:0')
episode: 362 training return: tensor(192.5247, device='cuda:0')
episode: 363 training return: tensor(244.5490, device='cuda:0')
epoch: 91 test_true_pfm: 4037.43550751867 sim_pfm: 245.7601828009259
episode: 364 training return: tensor(246.3321, device='cuda:0')
episode: 365 training return: tensor(165.1491, device='cuda:0')
episode: 366 training return: tensor(188.7428, device='cuda:0')
episode: 367 training return: tensor(191.9660, device='cuda:0')
epoch: 92 test_true_pfm: 4030.367584628744 sim_pfm: 264.28199239695095
episode: 368 training return: tensor(154.0806, device='cuda:0')
episode: 369 training return: tensor(225.2948, device='cuda:0')
episode: 370 training return: tensor(127.7275, device='cuda:0')
episode: 371 training return: tensor(139.2580, device='cuda:0')
epoch: 93 test_true_pfm: 3988.316704872717 sim_pfm: 210.63015205640113
episode: 372 training return: tensor(204.4184, device='cuda:0')
episode: 373 training return: tensor(235.3500, device='cuda:0')
episode: 374 training return: tensor(176.0607, device='cuda:0')
episode: 375 training return: tensor(191.4613, device='cuda:0')
epoch: 94 test_true_pfm: 4024.173659304417 sim_pfm: 280.4673817448008
episode: 376 training return: tensor(199.2516, device='cuda:0')
episode: 377 training return: tensor(208.5452, device='cuda:0')
episode: 378 training return: tensor(-688.5336, device='cuda:0')
episode: 379 training return: tensor(301.4319, device='cuda:0')
epoch: 95 test_true_pfm: 3990.4589594664444 sim_pfm: 212.69536704100514
episode: 380 training return: tensor(234.7055, device='cuda:0')
episode: 381 training return: tensor(239.4996, device='cuda:0')
episode: 382 training return: tensor(213.6574, device='cuda:0')
episode: 383 training return: tensor(168.9673, device='cuda:0')
epoch: 96 test_true_pfm: 4021.679455359596 sim_pfm: 262.53786742821103
episode: 384 training return: tensor(204.7346, device='cuda:0')
episode: 385 training return: tensor(211.5790, device='cuda:0')
episode: 386 training return: tensor(271.0192, device='cuda:0')
episode: 387 training return: tensor(256.2258, device='cuda:0')
epoch: 97 test_true_pfm: 4021.2424380602665 sim_pfm: 251.45520210588197
episode: 388 training return: tensor(282.6918, device='cuda:0')
episode: 389 training return: tensor(260.9630, device='cuda:0')
episode: 390 training return: tensor(178.8642, device='cuda:0')
episode: 391 training return: tensor(223.7817, device='cuda:0')
epoch: 98 test_true_pfm: 3963.0667910993466 sim_pfm: 219.62857538093036
episode: 392 training return: tensor(272.0493, device='cuda:0')
episode: 393 training return: tensor(271.5538, device='cuda:0')
episode: 394 training return: tensor(216.8514, device='cuda:0')
episode: 395 training return: tensor(220.9162, device='cuda:0')
epoch: 99 test_true_pfm: 4076.627943298477 sim_pfm: 284.14826196971507
episode: 396 training return: tensor(221.3224, device='cuda:0')
episode: 397 training return: tensor(204.5422, device='cuda:0')
episode: 398 training return: tensor(195.0190, device='cuda:0')
episode: 399 training return: tensor(207.8208, device='cuda:0')
epoch: 100 test_true_pfm: 4019.023049016319 sim_pfm: 267.73867951174424
episode: 400 training return: tensor(281.8375, device='cuda:0')
episode: 401 training return: tensor(279.6408, device='cuda:0')
episode: 402 training return: tensor(247.0017, device='cuda:0')
episode: 403 training return: tensor(278.0809, device='cuda:0')
epoch: 101 test_true_pfm: 4016.374842457674 sim_pfm: 274.76746574612724
episode: 404 training return: tensor(181.4953, device='cuda:0')
episode: 405 training return: tensor(232.1602, device='cuda:0')
episode: 406 training return: tensor(284.1962, device='cuda:0')
episode: 407 training return: tensor(211.8635, device='cuda:0')
epoch: 102 test_true_pfm: 3965.877426076478 sim_pfm: 203.1134282347824
episode: 408 training return: tensor(258.7617, device='cuda:0')
episode: 409 training return: tensor(181.3335, device='cuda:0')
episode: 410 training return: tensor(260.9711, device='cuda:0')
episode: 411 training return: tensor(292.6215, device='cuda:0')
epoch: 103 test_true_pfm: 4030.75889685382 sim_pfm: 292.2732514637367
episode: 412 training return: tensor(274.5483, device='cuda:0')
episode: 413 training return: tensor(255.0750, device='cuda:0')
episode: 414 training return: tensor(202.9670, device='cuda:0')
episode: 415 training return: tensor(197.6989, device='cuda:0')
epoch: 104 test_true_pfm: 4016.23526580161 sim_pfm: 262.61585452238796
episode: 416 training return: tensor(180.6744, device='cuda:0')
episode: 417 training return: tensor(216.7797, device='cuda:0')
episode: 418 training return: tensor(248.4270, device='cuda:0')
episode: 419 training return: tensor(265.9162, device='cuda:0')
epoch: 105 test_true_pfm: 4032.709906041638 sim_pfm: 256.84715174972854
episode: 420 training return: tensor(174.4645, device='cuda:0')
episode: 421 training return: tensor(201.3286, device='cuda:0')
episode: 422 training return: tensor(167.5201, device='cuda:0')
episode: 423 training return: tensor(-876.1772, device='cuda:0')
epoch: 106 test_true_pfm: 4049.5281382756316 sim_pfm: 219.25023304475084
episode: 424 training return: tensor(264.9888, device='cuda:0')
episode: 425 training return: tensor(216.0454, device='cuda:0')
episode: 426 training return: tensor(253.8401, device='cuda:0')
episode: 427 training return: tensor(240.8809, device='cuda:0')
epoch: 107 test_true_pfm: 4001.4191201513836 sim_pfm: 240.35010190447792
episode: 428 training return: tensor(267.8510, device='cuda:0')
episode: 429 training return: tensor(242.7578, device='cuda:0')
episode: 430 training return: tensor(184.9860, device='cuda:0')
episode: 431 training return: tensor(273.4226, device='cuda:0')
epoch: 108 test_true_pfm: 4027.458607406796 sim_pfm: 256.6685253624261
episode: 432 training return: tensor(178.9650, device='cuda:0')
episode: 433 training return: tensor(219.7102, device='cuda:0')
episode: 434 training return: tensor(234.0700, device='cuda:0')
episode: 435 training return: tensor(175.5034, device='cuda:0')
epoch: 109 test_true_pfm: 3988.5844598119006 sim_pfm: 193.0331679386533
episode: 436 training return: tensor(220.9049, device='cuda:0')
episode: 437 training return: tensor(209.1083, device='cuda:0')
episode: 438 training return: tensor(195.3978, device='cuda:0')
episode: 439 training return: tensor(286.4814, device='cuda:0')
epoch: 110 test_true_pfm: 3999.7697477524903 sim_pfm: 265.5575775829105
episode: 440 training return: tensor(148.6725, device='cuda:0')
episode: 441 training return: tensor(281.2441, device='cuda:0')
episode: 442 training return: tensor(138.4106, device='cuda:0')
episode: 443 training return: tensor(232.9808, device='cuda:0')
epoch: 111 test_true_pfm: 4016.861533324834 sim_pfm: 237.01108996172357
episode: 444 training return: tensor(282.1815, device='cuda:0')
episode: 445 training return: tensor(289.8629, device='cuda:0')
episode: 446 training return: tensor(199.3904, device='cuda:0')
episode: 447 training return: tensor(201.6451, device='cuda:0')
epoch: 112 test_true_pfm: 4041.2302238620578 sim_pfm: 221.49302749879038
episode: 448 training return: tensor(228.4846, device='cuda:0')
episode: 449 training return: tensor(288.5349, device='cuda:0')
episode: 450 training return: tensor(212.7703, device='cuda:0')
episode: 451 training return: tensor(167.3180, device='cuda:0')
epoch: 113 test_true_pfm: 4063.7442288607494 sim_pfm: 288.9161919497031
episode: 452 training return: tensor(222.7618, device='cuda:0')
episode: 453 training return: tensor(-811.6450, device='cuda:0')
episode: 454 training return: tensor(231.7768, device='cuda:0')
episode: 455 training return: tensor(279.9475, device='cuda:0')
epoch: 114 test_true_pfm: 4058.5276915695617 sim_pfm: 265.9201619529631
episode: 456 training return: tensor(252.9222, device='cuda:0')
episode: 457 training return: tensor(286.7012, device='cuda:0')
episode: 458 training return: tensor(293.2044, device='cuda:0')
episode: 459 training return: tensor(231.1294, device='cuda:0')
epoch: 115 test_true_pfm: 4047.6490485858208 sim_pfm: 290.9851118653314
episode: 460 training return: tensor(292.0893, device='cuda:0')
episode: 461 training return: tensor(223.9821, device='cuda:0')
episode: 462 training return: tensor(148.5421, device='cuda:0')
episode: 463 training return: tensor(262.1153, device='cuda:0')
epoch: 116 test_true_pfm: 4054.8364131067906 sim_pfm: 239.27518338046502
episode: 464 training return: tensor(281.0956, device='cuda:0')
episode: 465 training return: tensor(290.6275, device='cuda:0')
episode: 466 training return: tensor(258.9468, device='cuda:0')
episode: 467 training return: tensor(293.7657, device='cuda:0')
epoch: 117 test_true_pfm: 3993.8793866777087 sim_pfm: 248.8821535661118
episode: 468 training return: tensor(251.7551, device='cuda:0')
episode: 469 training return: tensor(227.6397, device='cuda:0')
episode: 470 training return: tensor(255.2538, device='cuda:0')
episode: 471 training return: tensor(229.2626, device='cuda:0')
epoch: 118 test_true_pfm: 3998.1954360904506 sim_pfm: 224.50234152190387
episode: 472 training return: tensor(242.3928, device='cuda:0')
episode: 473 training return: tensor(236.3875, device='cuda:0')
episode: 474 training return: tensor(181.0818, device='cuda:0')
episode: 475 training return: tensor(240.3056, device='cuda:0')
epoch: 119 test_true_pfm: 4001.9642954299547 sim_pfm: 208.3232959973199
episode: 476 training return: tensor(275.0251, device='cuda:0')
episode: 477 training return: tensor(-871.9863, device='cuda:0')
episode: 478 training return: tensor(260.0345, device='cuda:0')
episode: 479 training return: tensor(155.5104, device='cuda:0')
epoch: 120 test_true_pfm: 4038.1745661700984 sim_pfm: 259.67912867195747
episode: 480 training return: tensor(259.1616, device='cuda:0')
episode: 481 training return: tensor(278.2140, device='cuda:0')
episode: 482 training return: tensor(292.4128, device='cuda:0')
episode: 483 training return: tensor(259.6121, device='cuda:0')
epoch: 121 test_true_pfm: 4042.864465135945 sim_pfm: 296.03514281510917
episode: 484 training return: tensor(243.7163, device='cuda:0')
episode: 485 training return: tensor(280.0106, device='cuda:0')
episode: 486 training return: tensor(176.3430, device='cuda:0')
episode: 487 training return: tensor(123.4948, device='cuda:0')
epoch: 122 test_true_pfm: 3992.6607807030537 sim_pfm: 243.8204729456629
episode: 488 training return: tensor(217.8820, device='cuda:0')
episode: 489 training return: tensor(260.1275, device='cuda:0')
episode: 490 training return: tensor(180.7073, device='cuda:0')
episode: 491 training return: tensor(293.2359, device='cuda:0')
epoch: 123 test_true_pfm: 3996.613702537518 sim_pfm: 227.57018421406005
episode: 492 training return: tensor(246.0660, device='cuda:0')
episode: 493 training return: tensor(257.3899, device='cuda:0')
episode: 494 training return: tensor(159.6928, device='cuda:0')
episode: 495 training return: tensor(269.4528, device='cuda:0')
epoch: 124 test_true_pfm: 3987.637320064779 sim_pfm: 283.5346662614417
episode: 496 training return: tensor(261.1103, device='cuda:0')
episode: 497 training return: tensor(201.9247, device='cuda:0')
episode: 498 training return: tensor(278.8977, device='cuda:0')
episode: 499 training return: tensor(205.2327, device='cuda:0')
epoch: 125 test_true_pfm: 4008.839648414016 sim_pfm: 267.8187739953476
episode: 500 training return: tensor(268.8487, device='cuda:0')
episode: 501 training return: tensor(187.1226, device='cuda:0')
episode: 502 training return: tensor(270.6890, device='cuda:0')
episode: 503 training return: tensor(234.2279, device='cuda:0')
epoch: 126 test_true_pfm: 4042.054874511899 sim_pfm: 281.497105677942
episode: 504 training return: tensor(277.6105, device='cuda:0')
episode: 505 training return: tensor(157.7653, device='cuda:0')
episode: 506 training return: tensor(277.7635, device='cuda:0')
episode: 507 training return: tensor(275.4328, device='cuda:0')
epoch: 127 test_true_pfm: 4027.9732594174134 sim_pfm: 267.22489012715715
episode: 508 training return: tensor(240.3204, device='cuda:0')
episode: 509 training return: tensor(234.4275, device='cuda:0')
episode: 510 training return: tensor(271.4687, device='cuda:0')
episode: 511 training return: tensor(248.0926, device='cuda:0')
epoch: 128 test_true_pfm: 4041.093739738169 sim_pfm: 250.64966703153914
episode: 512 training return: tensor(255.2008, device='cuda:0')
episode: 513 training return: tensor(225.4187, device='cuda:0')
episode: 514 training return: tensor(264.4512, device='cuda:0')
episode: 515 training return: tensor(131.7960, device='cuda:0')
epoch: 129 test_true_pfm: 4001.6823254372134 sim_pfm: 233.34860325387368
episode: 516 training return: tensor(242.3046, device='cuda:0')
episode: 517 training return: tensor(200.9710, device='cuda:0')
episode: 518 training return: tensor(280.7721, device='cuda:0')
episode: 519 training return: tensor(245.0457, device='cuda:0')
epoch: 130 test_true_pfm: 4054.888324539881 sim_pfm: 249.88905439461814
episode: 520 training return: tensor(236.6402, device='cuda:0')
episode: 521 training return: tensor(219.9274, device='cuda:0')
episode: 522 training return: tensor(255.7349, device='cuda:0')
episode: 523 training return: tensor(223.2370, device='cuda:0')
epoch: 131 test_true_pfm: 4056.8873379531633 sim_pfm: 284.08579838221584
episode: 524 training return: tensor(293.5704, device='cuda:0')
episode: 525 training return: tensor(173.4651, device='cuda:0')
episode: 526 training return: tensor(140.8134, device='cuda:0')
episode: 527 training return: tensor(173.8380, device='cuda:0')
epoch: 132 test_true_pfm: 4050.5593118434263 sim_pfm: 256.44177221392357
episode: 528 training return: tensor(182.1139, device='cuda:0')
episode: 529 training return: tensor(186.8794, device='cuda:0')
episode: 530 training return: tensor(239.8138, device='cuda:0')
episode: 531 training return: tensor(273.8245, device='cuda:0')
epoch: 133 test_true_pfm: 4035.1212771582996 sim_pfm: 270.0648810439549
episode: 532 training return: tensor(239.0201, device='cuda:0')
episode: 533 training return: tensor(197.4725, device='cuda:0')
episode: 534 training return: tensor(243.2009, device='cuda:0')
episode: 535 training return: tensor(257.8977, device='cuda:0')
epoch: 134 test_true_pfm: 4037.7642649419035 sim_pfm: 225.4069223034312
episode: 536 training return: tensor(275.8528, device='cuda:0')
episode: 537 training return: tensor(283.2877, device='cuda:0')
episode: 538 training return: tensor(235.4651, device='cuda:0')
episode: 539 training return: tensor(250.7795, device='cuda:0')
epoch: 135 test_true_pfm: 4045.0830809259005 sim_pfm: 245.71667587419506
episode: 540 training return: tensor(243.9809, device='cuda:0')
episode: 541 training return: tensor(268.9630, device='cuda:0')
episode: 542 training return: tensor(238.3019, device='cuda:0')
episode: 543 training return: tensor(242.3938, device='cuda:0')
epoch: 136 test_true_pfm: 3949.4925419269716 sim_pfm: 189.8701216878835
episode: 544 training return: tensor(193.4622, device='cuda:0')
episode: 545 training return: tensor(282.3267, device='cuda:0')
episode: 546 training return: tensor(240.1111, device='cuda:0')
episode: 547 training return: tensor(258.2467, device='cuda:0')
epoch: 137 test_true_pfm: 4025.0966387901813 sim_pfm: 213.6284856289858
episode: 548 training return: tensor(211.9972, device='cuda:0')
episode: 549 training return: tensor(289.7932, device='cuda:0')
episode: 550 training return: tensor(174.7194, device='cuda:0')
episode: 551 training return: tensor(247.0795, device='cuda:0')
epoch: 138 test_true_pfm: 4041.767967882637 sim_pfm: 269.4077052459858
episode: 552 training return: tensor(192.7921, device='cuda:0')
episode: 553 training return: tensor(187.5036, device='cuda:0')
episode: 554 training return: tensor(178.1895, device='cuda:0')
episode: 555 training return: tensor(201.0693, device='cuda:0')
epoch: 139 test_true_pfm: 3981.309623926338 sim_pfm: 236.78163143616015
episode: 556 training return: tensor(233.5251, device='cuda:0')
episode: 557 training return: tensor(160.4515, device='cuda:0')
episode: 558 training return: tensor(191.4431, device='cuda:0')
episode: 559 training return: tensor(200.8532, device='cuda:0')
epoch: 140 test_true_pfm: 4002.285341774021 sim_pfm: 238.16168101928392
episode: 560 training return: tensor(195.2710, device='cuda:0')
episode: 561 training return: tensor(258.0220, device='cuda:0')
episode: 562 training return: tensor(233.3295, device='cuda:0')
episode: 563 training return: tensor(172.1283, device='cuda:0')
epoch: 141 test_true_pfm: 4015.3263826618427 sim_pfm: 237.9083198531686
episode: 564 training return: tensor(240.5460, device='cuda:0')
episode: 565 training return: tensor(292.9626, device='cuda:0')
episode: 566 training return: tensor(240.2401, device='cuda:0')
episode: 567 training return: tensor(238.4677, device='cuda:0')
epoch: 142 test_true_pfm: 4040.7599979019137 sim_pfm: 243.95599692240162
episode: 568 training return: tensor(281.9265, device='cuda:0')
episode: 569 training return: tensor(221.3485, device='cuda:0')
episode: 570 training return: tensor(209.4847, device='cuda:0')
episode: 571 training return: tensor(198.1057, device='cuda:0')
epoch: 143 test_true_pfm: 4009.1898009458328 sim_pfm: 226.61050659873095
episode: 572 training return: tensor(207.6828, device='cuda:0')
episode: 573 training return: tensor(230.2644, device='cuda:0')
episode: 574 training return: tensor(274.4835, device='cuda:0')
episode: 575 training return: tensor(271.2499, device='cuda:0')
epoch: 144 test_true_pfm: 4044.9320083141197 sim_pfm: 260.9883692447038
episode: 576 training return: tensor(231.3587, device='cuda:0')
episode: 577 training return: tensor(222.0767, device='cuda:0')
episode: 578 training return: tensor(194.9116, device='cuda:0')
episode: 579 training return: tensor(230.6971, device='cuda:0')
epoch: 145 test_true_pfm: 4043.0785139591494 sim_pfm: 276.6075517772697
episode: 580 training return: tensor(274.8817, device='cuda:0')
episode: 581 training return: tensor(251.0273, device='cuda:0')
episode: 582 training return: tensor(207.7344, device='cuda:0')
episode: 583 training return: tensor(196.5683, device='cuda:0')
epoch: 146 test_true_pfm: 4027.0400871901215 sim_pfm: 213.6901830402688
episode: 584 training return: tensor(198.9023, device='cuda:0')
episode: 585 training return: tensor(252.9663, device='cuda:0')
episode: 586 training return: tensor(221.9283, device='cuda:0')
episode: 587 training return: tensor(194.8086, device='cuda:0')
epoch: 147 test_true_pfm: 4034.653273749616 sim_pfm: 244.41716772657432
episode: 588 training return: tensor(271.9438, device='cuda:0')
episode: 589 training return: tensor(246.0135, device='cuda:0')
episode: 590 training return: tensor(211.7816, device='cuda:0')
episode: 591 training return: tensor(298.8952, device='cuda:0')
epoch: 148 test_true_pfm: 3981.189083405358 sim_pfm: 214.89335053681862
episode: 592 training return: tensor(251.2594, device='cuda:0')
episode: 593 training return: tensor(307.1867, device='cuda:0')
episode: 594 training return: tensor(217.5333, device='cuda:0')
episode: 595 training return: tensor(222.8473, device='cuda:0')
epoch: 149 test_true_pfm: 4021.051665945371 sim_pfm: 277.9296393469558
episode: 596 training return: tensor(197.1440, device='cuda:0')
episode: 597 training return: tensor(195.8578, device='cuda:0')
episode: 598 training return: tensor(267.3908, device='cuda:0')
episode: 599 training return: tensor(157.9238, device='cuda:0')
epoch: 150 test_true_pfm: 4027.4348088659353 sim_pfm: 242.5820257102217
