['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.3422889760881662 test_loss: 0.2069997787475586
epoch: 1 training_loss 0.18720010057091713 test_loss: 0.17057375907897948
epoch: 2 training_loss 0.16968842908740045 test_loss: 0.16121689081192017
epoch: 3 training_loss 0.1680491451919079 test_loss: 0.1683698058128357
epoch: 4 training_loss 0.17322426803410054 test_loss: 0.18845678567886354
epoch: 5 training_loss 0.15391737692058086 test_loss: 0.17368260622024537
epoch: 6 training_loss 0.1530402647703886 test_loss: 0.16042011976242065
epoch: 7 training_loss 0.15567535832524299 test_loss: 0.15043578147888184
epoch: 8 training_loss 0.15597918204963207 test_loss: 0.1568203330039978
epoch: 9 training_loss 0.1570541213452816 test_loss: 0.13868085145950318
epoch: 10 training_loss 0.15024797443300486 test_loss: 0.14915201663970948
epoch: 11 training_loss 0.15213652908802033 test_loss: 0.15077904462814332
epoch: 12 training_loss 0.15286382149904967 test_loss: 0.16901336908340453
epoch: 13 training_loss 0.1534550655633211 test_loss: 0.14301775693893432
epoch: 14 training_loss 0.15353325225412845 test_loss: 0.16197196245193482
epoch: 15 training_loss 0.15421868957579135 test_loss: 0.1513175129890442
epoch: 16 training_loss 0.14474581122398378 test_loss: 0.16101536750793458
epoch: 17 training_loss 0.14900462422519922 test_loss: 0.14159594774246215
epoch: 18 training_loss 0.14498683393001557 test_loss: 0.16723649501800536
epoch: 19 training_loss 0.15740040712058545 test_loss: 0.1399361252784729
epoch: 20 training_loss 0.14541543278843164 test_loss: 0.17017394304275513
epoch: 21 training_loss 0.1416981452703476 test_loss: 0.18040477037429808
epoch: 22 training_loss 0.14542424350976943 test_loss: 0.14439350366592407
epoch: 23 training_loss 0.13673919705674054 test_loss: 0.14073891639709474
epoch: 24 training_loss 0.14204861991107465 test_loss: 0.15337215662002562
epoch: 25 training_loss 0.14148574318736792 test_loss: 0.1518673062324524
epoch: 26 training_loss 0.13162271305918694 test_loss: 0.13870211839675903
epoch: 27 training_loss 0.13460829555988313 test_loss: 0.16079472303390502
epoch: 28 training_loss 0.13399730319157244 test_loss: 0.15801007747650148
epoch: 29 training_loss 0.1434384036809206 test_loss: 0.20044760704040526
epoch: 30 training_loss 0.1520067733898759 test_loss: 0.15306130647659302
epoch: 31 training_loss 0.13618885040283202 test_loss: 0.1559838056564331
epoch: 32 training_loss 0.13599010981619358 test_loss: 0.15075024366378784
epoch: 33 training_loss 0.14032737854868174 test_loss: 0.16574767827987671
epoch: 34 training_loss 0.13971959050744773 test_loss: 0.1285090446472168
epoch: 35 training_loss 0.14075921926647425 test_loss: 0.14325472116470336
epoch: 36 training_loss 0.1294558234140277 test_loss: 0.1494980573654175
epoch: 37 training_loss 0.14371322505176068 test_loss: 0.16179178953170775
epoch: 38 training_loss 0.14612227123230695 test_loss: 0.15078154802322388
epoch: 39 training_loss 0.14219196245074273 test_loss: 0.13681066036224365
epoch: 40 training_loss 0.1367386532574892 test_loss: 0.14578838348388673
epoch: 41 training_loss 0.15265575766563416 test_loss: 0.16251882314682006
epoch: 42 training_loss 0.13641525693237783 test_loss: 0.13378627300262452
epoch: 43 training_loss 0.1386957611888647 test_loss: 0.15366675853729247
epoch: 44 training_loss 0.14236115492880344 test_loss: 0.15179604291915894
epoch: 45 training_loss 0.14125166907906533 test_loss: 0.16619091033935546
epoch: 46 training_loss 0.1390978106483817 test_loss: 0.13801999092102052
epoch: 47 training_loss 0.1428885530307889 test_loss: 0.14427945613861085
epoch: 48 training_loss 0.13990890204906464 test_loss: 0.13357956409454347
epoch: 49 training_loss 0.1454276990145445 test_loss: 0.14899704456329346
epoch: 50 training_loss 0.14226051088422537 test_loss: 0.18556010723114014
epoch: 51 training_loss 0.13991387087851762 test_loss: 0.13990888595581055
epoch: 52 training_loss 0.14196605037897825 test_loss: 0.1425214886665344
epoch: 53 training_loss 0.14464083850383758 test_loss: 0.14985824823379518
epoch: 54 training_loss 0.1452332853525877 test_loss: 0.15545313358306884
epoch: 55 training_loss 0.1347281128540635 test_loss: 0.1360844612121582
epoch: 56 training_loss 0.13779387257993223 test_loss: 0.1692901849746704
epoch: 57 training_loss 0.14269060626626015 test_loss: 0.14762669801712036
epoch: 58 training_loss 0.13917711317539216 test_loss: 0.13386515378952027
epoch: 59 training_loss 0.14064353693276643 test_loss: 0.14202364683151245
epoch: 60 training_loss 0.13529343876987696 test_loss: 0.17289166450500487
epoch: 61 training_loss 0.13393814131617546 test_loss: 0.14792532920837403
epoch: 62 training_loss 0.1410745245590806 test_loss: 0.1483131766319275
epoch: 63 training_loss 0.14018132828176022 test_loss: 0.15696011781692504
epoch: 64 training_loss 0.14267396472394467 test_loss: 0.15459885597229003
epoch: 65 training_loss 0.13345333006232976 test_loss: 0.14576988220214843
epoch: 66 training_loss 0.13792541109025477 test_loss: 0.15988194942474365
epoch: 67 training_loss 0.14853819094598295 test_loss: 0.15609842538833618
epoch: 68 training_loss 0.1374744836986065 test_loss: 0.15048594474792482
epoch: 69 training_loss 0.14370758134871722 test_loss: 0.14659165143966674
epoch: 70 training_loss 0.14716771263629197 test_loss: 0.15707348585128783
epoch: 71 training_loss 0.13306551840156317 test_loss: 0.15825109481811522
epoch: 72 training_loss 0.13231179524213077 test_loss: 0.12565470933914186
epoch: 73 training_loss 0.13326797772198914 test_loss: 0.15924168825149537
epoch: 74 training_loss 0.13431467670947314 test_loss: 0.14615799188613893
epoch: 75 training_loss 0.136797881051898 test_loss: 0.1361101508140564
epoch: 76 training_loss 0.13987887986004352 test_loss: 0.15069141387939453
epoch: 77 training_loss 0.13179156910628081 test_loss: 0.16145168542861937
epoch: 78 training_loss 0.14227906595915557 test_loss: 0.15284593105316163
epoch: 79 training_loss 0.13232609700411557 test_loss: 0.16520452499389648
epoch: 80 training_loss 0.1435738642886281 test_loss: 0.12739216089248656
epoch: 81 training_loss 0.1409438556060195 test_loss: 0.1447246789932251
epoch: 82 training_loss 0.14100941188633442 test_loss: 0.13939716815948486
epoch: 83 training_loss 0.1354557604715228 test_loss: 0.14393988847732545
epoch: 84 training_loss 0.13366487752646208 test_loss: 0.14136193990707396
epoch: 85 training_loss 0.13369389358907938 test_loss: 0.11857495307922364
epoch: 86 training_loss 0.13598060708492996 test_loss: 0.14605038166046141
epoch: 87 training_loss 0.13134263455867767 test_loss: 0.13597036600112916
epoch: 88 training_loss 0.13758663039654492 test_loss: 0.1384875535964966
epoch: 89 training_loss 0.13132897295057774 test_loss: 0.13577216863632202
epoch: 90 training_loss 0.13070551764219998 test_loss: 0.139127779006958
epoch: 91 training_loss 0.12979531064629554 test_loss: 0.14382624626159668
epoch: 92 training_loss 0.1288178065791726 test_loss: 0.15853192806243896
epoch: 93 training_loss 0.12971359018236397 test_loss: 0.15784308910369874
epoch: 94 training_loss 0.13615690678358078 test_loss: 0.13916938304901122
epoch: 95 training_loss 0.1364252967387438 test_loss: 0.1425904154777527
epoch: 96 training_loss 0.138840298615396 test_loss: 0.15146533250808716
epoch: 97 training_loss 0.12977237194776536 test_loss: 0.14954031705856324
epoch: 98 training_loss 0.13489587128162384 test_loss: 0.12227668762207031
epoch: 99 training_loss 0.13231383327394725 test_loss: 0.14499915838241578
epoch: 100 training_loss 0.13699556358158588 test_loss: 0.16055599451065064
epoch: 101 training_loss 0.1352594444528222 test_loss: 0.13656450510025026
epoch: 102 training_loss 0.13230513218790294 test_loss: 0.1389729142189026
epoch: 103 training_loss 0.1371813330426812 test_loss: 0.14863849878311158
epoch: 104 training_loss 0.13406998600810766 test_loss: 0.1415219783782959
epoch: 105 training_loss 0.12627436812967063 test_loss: 0.14819164276123048
epoch: 106 training_loss 0.14203905541449785 test_loss: 0.14293148517608642
epoch: 107 training_loss 0.1281429211422801 test_loss: 0.15191577672958373
epoch: 108 training_loss 0.1431636405736208 test_loss: 0.13240551948547363
epoch: 109 training_loss 0.13485669422894717 test_loss: 0.1541236162185669
epoch: 110 training_loss 0.13442972186952828 test_loss: 0.12699354887008668
epoch: 111 training_loss 0.1375426197052002 test_loss: 0.16846997737884523
epoch: 112 training_loss 0.1437819641828537 test_loss: 0.15176881551742555
epoch: 113 training_loss 0.13873244762420656 test_loss: 0.14046109914779664
epoch: 114 training_loss 0.1339172887429595 test_loss: 0.1325021743774414
epoch: 115 training_loss 0.13647026371210813 test_loss: 0.15915642976760863
epoch: 116 training_loss 0.132851498760283 test_loss: 0.1391325354576111
epoch: 117 training_loss 0.1327947822958231 test_loss: 0.15634586811065673
epoch: 118 training_loss 0.12692564513534307 test_loss: 0.1412952184677124
epoch: 119 training_loss 0.13480596378445625 test_loss: 0.14087539911270142
epoch: 120 training_loss 0.143607563637197 test_loss: 0.15503318309783937
epoch: 121 training_loss 0.12686432871967554 test_loss: 0.15108199119567872
epoch: 122 training_loss 0.13907129183411598 test_loss: 0.13953394889831544
epoch: 123 training_loss 0.13276889823377133 test_loss: 0.1352955937385559
epoch: 124 training_loss 0.141471767090261 test_loss: 0.14275636672973632
epoch: 125 training_loss 0.13642346803098918 test_loss: 0.13419184684753419
epoch: 126 training_loss 0.13755810290575027 test_loss: 0.12898037433624268
epoch: 127 training_loss 0.13852429576218128 test_loss: 0.1373138427734375
epoch: 128 training_loss 0.13323625952005386 test_loss: 0.12959691286087036
epoch: 129 training_loss 0.13612203557044267 test_loss: 0.14342610836029052
epoch: 130 training_loss 0.12577563520520926 test_loss: 0.12938731908798218
epoch: 131 training_loss 0.13507937977090478 test_loss: 0.14799220561981202
epoch: 132 training_loss 0.1374242025986314 test_loss: 0.14797823429107665
epoch: 133 training_loss 0.1285616583749652 test_loss: 0.14416899681091308
epoch: 134 training_loss 0.1416273295506835 test_loss: 0.13514646291732788
epoch: 135 training_loss 0.13875724136829376 test_loss: 0.1646740674972534
epoch: 136 training_loss 0.13857258189469576 test_loss: 0.13473222255706788
epoch: 137 training_loss 0.13407738469541072 test_loss: 0.15527489185333251
epoch: 138 training_loss 0.13356389075517655 test_loss: 0.133534038066864
epoch: 139 training_loss 0.12923782836645842 test_loss: 0.14463706016540528
epoch: 140 training_loss 0.13904529493302106 test_loss: 0.13405417203903197
epoch: 141 training_loss 0.1402036925032735 test_loss: 0.1585656523704529
epoch: 142 training_loss 0.13778106853365898 test_loss: 0.14597878456115723
epoch: 143 training_loss 0.1350631073117256 test_loss: 0.13696271181106567
epoch: 144 training_loss 0.12691857311874627 test_loss: 0.1400275468826294
epoch: 145 training_loss 0.13021846938878298 test_loss: 0.14288407564163208
epoch: 146 training_loss 0.13148644536733628 test_loss: 0.13086354732513428
epoch: 147 training_loss 0.13440862957388164 test_loss: 0.14516327381134034
epoch: 148 training_loss 0.12727782748639582 test_loss: 0.136153507232666
epoch: 149 training_loss 0.13057277031242848 test_loss: 0.1420794367790222
epoch: 0 training_loss 0.27424641959369184 test_loss: 0.16539772748947143
epoch: 1 training_loss 0.1833412179350853 test_loss: 0.2000269651412964
epoch: 2 training_loss 0.17949928410351276 test_loss: 0.17610304355621337
epoch: 3 training_loss 0.1596193639189005 test_loss: 0.143773090839386
epoch: 4 training_loss 0.15591771252453326 test_loss: 0.1523856997489929
epoch: 5 training_loss 0.15660307731479406 test_loss: 0.15326955318450927
epoch: 6 training_loss 0.15652790203690528 test_loss: 0.18210841417312623
epoch: 7 training_loss 0.1531340877711773 test_loss: 0.14852840900421144
epoch: 8 training_loss 0.15226600743830204 test_loss: 0.14003267288208007
epoch: 9 training_loss 0.14604105193167924 test_loss: 0.1512799382209778
epoch: 10 training_loss 0.15160680789500475 test_loss: 0.14673103094100953
epoch: 11 training_loss 0.14904752027243376 test_loss: 0.14229271411895753
epoch: 12 training_loss 0.14542659610509873 test_loss: 0.1264781951904297
epoch: 13 training_loss 0.14563534326851368 test_loss: 0.14484130144119262
epoch: 14 training_loss 0.1403025945648551 test_loss: 0.15369209051132202
epoch: 15 training_loss 0.1327866101451218 test_loss: 0.13547630310058595
epoch: 16 training_loss 0.13958454590290784 test_loss: 0.14480020999908447
epoch: 17 training_loss 0.1507566222175956 test_loss: 0.15449775457382203
epoch: 18 training_loss 0.1441771823167801 test_loss: 0.12430344820022583
epoch: 19 training_loss 0.1444781405106187 test_loss: 0.1386495351791382
epoch: 20 training_loss 0.14473724171519278 test_loss: 0.14087312221527098
epoch: 21 training_loss 0.14849374063313006 test_loss: 0.14778150320053102
epoch: 22 training_loss 0.14173513758927583 test_loss: 0.14184219837188722
epoch: 23 training_loss 0.13489118486642837 test_loss: 0.143966007232666
epoch: 24 training_loss 0.1407637942954898 test_loss: 0.11492173671722412
epoch: 25 training_loss 0.1453862227126956 test_loss: 0.1466565489768982
epoch: 26 training_loss 0.14831796858459712 test_loss: 0.14868255853652954
epoch: 27 training_loss 0.13571428898721932 test_loss: 0.128680956363678
epoch: 28 training_loss 0.132753217369318 test_loss: 0.13396735191345216
epoch: 29 training_loss 0.13855171581730247 test_loss: 0.1399453639984131
epoch: 30 training_loss 0.13384087778627873 test_loss: 0.11514822244644166
epoch: 31 training_loss 0.14656901579350234 test_loss: 0.13737932443618775
epoch: 32 training_loss 0.13252975843846798 test_loss: 0.14679735898971558
epoch: 33 training_loss 0.13965291827917098 test_loss: 0.15022205114364623
epoch: 34 training_loss 0.13980621960014106 test_loss: 0.1608920454978943
epoch: 35 training_loss 0.13501769565045835 test_loss: 0.14349985122680664
epoch: 36 training_loss 0.13599629361182453 test_loss: 0.11320282220840454
epoch: 37 training_loss 0.13984269104897976 test_loss: 0.1248435139656067
epoch: 38 training_loss 0.14087206419557333 test_loss: 0.15612295866012574
epoch: 39 training_loss 0.13114526458084583 test_loss: 0.1320315718650818
epoch: 40 training_loss 0.13150686845183374 test_loss: 0.11703411340713502
epoch: 41 training_loss 0.1338318943604827 test_loss: 0.11633943319320679
epoch: 42 training_loss 0.1366417382657528 test_loss: 0.12658704519271852
epoch: 43 training_loss 0.1494211272522807 test_loss: 0.1416061282157898
epoch: 44 training_loss 0.13587350711226465 test_loss: 0.12238913774490356
epoch: 45 training_loss 0.14734401334077118 test_loss: 0.1383769154548645
epoch: 46 training_loss 0.14091199595481158 test_loss: 0.14717555046081543
epoch: 47 training_loss 0.14187228064984084 test_loss: 0.13120425939559938
epoch: 48 training_loss 0.1326521749794483 test_loss: 0.1334889054298401
epoch: 49 training_loss 0.13522269532084466 test_loss: 0.12431650161743164
epoch: 50 training_loss 0.13101560156792402 test_loss: 0.13072917461395264
epoch: 51 training_loss 0.13994016982614993 test_loss: 0.16597199440002441
epoch: 52 training_loss 0.13017076466232538 test_loss: 0.12101467847824096
epoch: 53 training_loss 0.14037293702363968 test_loss: 0.12920858860015869
epoch: 54 training_loss 0.13592493712902068 test_loss: 0.14510542154312134
epoch: 55 training_loss 0.1380893676355481 test_loss: 0.12181248664855956
epoch: 56 training_loss 0.13812076281756164 test_loss: 0.133205783367157
epoch: 57 training_loss 0.13545107167214154 test_loss: 0.1366703152656555
epoch: 58 training_loss 0.1318887000158429 test_loss: 0.13500075340270995
epoch: 59 training_loss 0.13267820950597525 test_loss: 0.13560945987701417
epoch: 60 training_loss 0.13532204540446402 test_loss: 0.1382036566734314
epoch: 61 training_loss 0.1313048777729273 test_loss: 0.1515416383743286
epoch: 62 training_loss 0.1424246396869421 test_loss: 0.11711727380752564
epoch: 63 training_loss 0.1396972083672881 test_loss: 0.13362044095993042
epoch: 64 training_loss 0.13597287654876708 test_loss: 0.14020376205444335
epoch: 65 training_loss 0.12963424149900674 test_loss: 0.12138065099716186
epoch: 66 training_loss 0.13376682695001363 test_loss: 0.13316160440444946
epoch: 67 training_loss 0.1310284985229373 test_loss: 0.11885732412338257
epoch: 68 training_loss 0.1301646399870515 test_loss: 0.12659250497817992
epoch: 69 training_loss 0.14304985210299492 test_loss: 0.1290441632270813
epoch: 70 training_loss 0.13882622726261615 test_loss: 0.12229406833648682
epoch: 71 training_loss 0.13280944108963014 test_loss: 0.1313895583152771
epoch: 72 training_loss 0.12946296524256468 test_loss: 0.1386592745780945
epoch: 73 training_loss 0.13306894797831773 test_loss: 0.12313425540924072
epoch: 74 training_loss 0.13417111929506065 test_loss: 0.12852517366409302
epoch: 75 training_loss 0.1356032781675458 test_loss: 0.11884911060333252
epoch: 76 training_loss 0.13575336866080762 test_loss: 0.12867975234985352
epoch: 77 training_loss 0.1393016492947936 test_loss: 0.12635085582733155
epoch: 78 training_loss 0.13141368247568608 test_loss: 0.13986166715621948
epoch: 79 training_loss 0.1322723798453808 test_loss: 0.13205289840698242
epoch: 80 training_loss 0.13011854227632283 test_loss: 0.1245422601699829
epoch: 81 training_loss 0.13303256291896104 test_loss: 0.10586760044097901
epoch: 82 training_loss 0.1389326775819063 test_loss: 0.14466607570648193
epoch: 83 training_loss 0.13401783730834724 test_loss: 0.129391348361969
epoch: 84 training_loss 0.14343315850943328 test_loss: 0.1345170855522156
epoch: 85 training_loss 0.12702110078185797 test_loss: 0.12989505529403686
epoch: 86 training_loss 0.13450252566486598 test_loss: 0.13677202463150023
epoch: 87 training_loss 0.13466237366199493 test_loss: 0.13371158838272096
epoch: 88 training_loss 0.1338917674124241 test_loss: 0.1247869610786438
epoch: 89 training_loss 0.13678235206753014 test_loss: 0.12564305067062378
epoch: 90 training_loss 0.1349786954373121 test_loss: 0.13334642648696898
epoch: 91 training_loss 0.12205335756763816 test_loss: 0.1402842879295349
epoch: 92 training_loss 0.13938206546008586 test_loss: 0.12418196201324463
epoch: 93 training_loss 0.13970874510705472 test_loss: 0.12245618104934693
epoch: 94 training_loss 0.13600238136947154 test_loss: 0.11371755599975586
epoch: 95 training_loss 0.12563291169703006 test_loss: 0.13764283657073975
epoch: 96 training_loss 0.13957150850445033 test_loss: 0.14833234548568724
epoch: 97 training_loss 0.13131327204406262 test_loss: 0.13580516576766968
epoch: 98 training_loss 0.14116097763180732 test_loss: 0.1258825182914734
epoch: 99 training_loss 0.12818622024729848 test_loss: 0.13273634910583496
epoch: 100 training_loss 0.14433712605386972 test_loss: 0.12331377267837525
epoch: 101 training_loss 0.13720236625522375 test_loss: 0.13996901512145996
epoch: 102 training_loss 0.13569332756102084 test_loss: 0.1499537229537964
epoch: 103 training_loss 0.1298658163845539 test_loss: 0.13830058574676513
epoch: 104 training_loss 0.13381999205797912 test_loss: 0.11421140432357788
epoch: 105 training_loss 0.13286601167172193 test_loss: 0.1364474415779114
epoch: 106 training_loss 0.14581717345863582 test_loss: 0.1133693814277649
epoch: 107 training_loss 0.1325169688463211 test_loss: 0.13852715492248535
epoch: 108 training_loss 0.12991323683410883 test_loss: 0.14009777307510377
epoch: 109 training_loss 0.12958643078804016 test_loss: 0.13530791997909547
epoch: 110 training_loss 0.1309838880598545 test_loss: 0.1380302667617798
epoch: 111 training_loss 0.13267200347036123 test_loss: 0.12209122180938721
epoch: 112 training_loss 0.12379296064376831 test_loss: 0.1360846519470215
epoch: 113 training_loss 0.1356413782015443 test_loss: 0.12944034337997437
epoch: 114 training_loss 0.1265351140126586 test_loss: 0.11932920217514038
epoch: 115 training_loss 0.13570354595780373 test_loss: 0.12051419019699097
epoch: 116 training_loss 0.13252763971686363 test_loss: 0.13796168565750122
epoch: 117 training_loss 0.135733039714396 test_loss: 0.13136762380599976
epoch: 118 training_loss 0.13162785373628139 test_loss: 0.13741655349731446
epoch: 119 training_loss 0.13236605796962977 test_loss: 0.12051665782928467
epoch: 120 training_loss 0.13254943124949933 test_loss: 0.12996726036071776
epoch: 121 training_loss 0.1298971698433161 test_loss: 0.13047387599945068
epoch: 122 training_loss 0.13388159591704607 test_loss: 0.1258091449737549
epoch: 123 training_loss 0.12347565796226263 test_loss: 0.1466062307357788
epoch: 124 training_loss 0.1330222348496318 test_loss: 0.11927663087844849
epoch: 125 training_loss 0.12644287820905448 test_loss: 0.12706202268600464
epoch: 126 training_loss 0.13333977449685336 test_loss: 0.1308514356613159
epoch: 127 training_loss 0.1369597417488694 test_loss: 0.12341705560684205
epoch: 128 training_loss 0.12759246058762075 test_loss: 0.13808828592300415
epoch: 129 training_loss 0.12877047039568423 test_loss: 0.14363791942596435
epoch: 130 training_loss 0.1285510141029954 test_loss: 0.1255250096321106
epoch: 131 training_loss 0.13131247624754905 test_loss: 0.12964816093444825
epoch: 132 training_loss 0.13038233637809754 test_loss: 0.13089969158172607
epoch: 133 training_loss 0.13166116386651994 test_loss: 0.10195984840393066
epoch: 134 training_loss 0.12825526855885983 test_loss: 0.13850233554840088
epoch: 135 training_loss 0.11940801244229078 test_loss: 0.11558241844177246
epoch: 136 training_loss 0.12263141583651305 test_loss: 0.13104852437973022
epoch: 137 training_loss 0.1339445251598954 test_loss: 0.13812546730041503
epoch: 138 training_loss 0.13795710690319538 test_loss: 0.13517023324966432
epoch: 139 training_loss 0.1332501309365034 test_loss: 0.12886346578598024
epoch: 140 training_loss 0.13580320227891207 test_loss: 0.11787846088409423
epoch: 141 training_loss 0.12760525930672884 test_loss: 0.1344151258468628
epoch: 142 training_loss 0.12967326670885085 test_loss: 0.13251382112503052
epoch: 143 training_loss 0.1304214686527848 test_loss: 0.11375964879989624
epoch: 144 training_loss 0.12949071925133468 test_loss: 0.14355632066726684
epoch: 145 training_loss 0.12665982004255055 test_loss: 0.1387777090072632
epoch: 146 training_loss 0.12508371129631995 test_loss: 0.1277169704437256
epoch: 147 training_loss 0.129508468862623 test_loss: 0.12599775791168213
epoch: 148 training_loss 0.13065731056034566 test_loss: 0.14233511686325073
epoch: 149 training_loss 0.12733486518263817 test_loss: 0.1294493317604065
epoch: 0 training_loss 0.28835238128900526 test_loss: 0.21432790756225586
epoch: 1 training_loss 0.17068571865558624 test_loss: 0.1620452880859375
epoch: 2 training_loss 0.16077712006866932 test_loss: 0.14501185417175294
epoch: 3 training_loss 0.1524579981714487 test_loss: 0.1688152074813843
epoch: 4 training_loss 0.1555664800107479 test_loss: 0.2515387535095215
epoch: 5 training_loss 0.15724737800657748 test_loss: 0.180980908870697
epoch: 6 training_loss 0.16298501692712308 test_loss: 0.16193095445632935
epoch: 7 training_loss 0.14487802390009163 test_loss: 0.15325043201446534
epoch: 8 training_loss 0.1414847095683217 test_loss: 0.15981987714767457
epoch: 9 training_loss 0.14149821821600198 test_loss: 0.138083815574646
epoch: 10 training_loss 0.14051427636295558 test_loss: 0.15116168260574342
epoch: 11 training_loss 0.13865354079753162 test_loss: 0.16937628984451295
epoch: 12 training_loss 0.15129263345152139 test_loss: 0.188485848903656
epoch: 13 training_loss 0.14061201497912407 test_loss: 0.1578724980354309
epoch: 14 training_loss 0.1444063862413168 test_loss: 0.14863622188568115
epoch: 15 training_loss 0.14971939219161867 test_loss: 0.13959821462631225
epoch: 16 training_loss 0.14133713643997908 test_loss: 0.15626715421676635
epoch: 17 training_loss 0.14569131676107644 test_loss: 0.14969091415405272
epoch: 18 training_loss 0.14913484174758196 test_loss: 0.16261962652206421
epoch: 19 training_loss 0.14177373103797436 test_loss: 0.14803529977798463
epoch: 20 training_loss 0.1322366388887167 test_loss: 0.15803177356719972
epoch: 21 training_loss 0.13027291595935822 test_loss: 0.17297109365463256
epoch: 22 training_loss 0.13813750252127646 test_loss: 0.1481919527053833
epoch: 23 training_loss 0.1416661725193262 test_loss: 0.1362806439399719
epoch: 24 training_loss 0.12839314606040717 test_loss: 0.13691717386245728
epoch: 25 training_loss 0.1319648392125964 test_loss: 0.14862815141677857
epoch: 26 training_loss 0.13954771894961596 test_loss: 0.15359364748001098
epoch: 27 training_loss 0.1370694113150239 test_loss: 0.16378397941589357
epoch: 28 training_loss 0.1355710607022047 test_loss: 0.13927470445632933
epoch: 29 training_loss 0.1273162543028593 test_loss: 0.1596961498260498
epoch: 30 training_loss 0.1453943556919694 test_loss: 0.12797067165374756
epoch: 31 training_loss 0.13607883494347334 test_loss: 0.1511433720588684
epoch: 32 training_loss 0.13015400789678097 test_loss: 0.1391808271408081
epoch: 33 training_loss 0.14422377299517394 test_loss: 0.15040431022644044
epoch: 34 training_loss 0.13263051968067885 test_loss: 0.15237706899642944
epoch: 35 training_loss 0.13996460009366274 test_loss: 0.1487185001373291
epoch: 36 training_loss 0.13805380452424287 test_loss: 0.1655275821685791
epoch: 37 training_loss 0.13333600372076035 test_loss: 0.14699442386627198
epoch: 38 training_loss 0.1390731792151928 test_loss: 0.14808509349822999
epoch: 39 training_loss 0.13345627278089522 test_loss: 0.12876201868057252
epoch: 40 training_loss 0.12644469738006592 test_loss: 0.13608490228652953
epoch: 41 training_loss 0.13443642854690552 test_loss: 0.1595369815826416
epoch: 42 training_loss 0.13015256635844707 test_loss: 0.13308225870132445
epoch: 43 training_loss 0.140373556651175 test_loss: 0.14735444784164428
epoch: 44 training_loss 0.13287469863891602 test_loss: 0.15085028409957885
epoch: 45 training_loss 0.12568569853901862 test_loss: 0.16088913679122924
epoch: 46 training_loss 0.1346614748612046 test_loss: 0.14684990644454957
epoch: 47 training_loss 0.1364869925379753 test_loss: 0.14299265146255494
epoch: 48 training_loss 0.12441178672015667 test_loss: 0.11618399620056152
epoch: 49 training_loss 0.12742430865764617 test_loss: 0.14710211753845215
epoch: 50 training_loss 0.12696488298475742 test_loss: 0.1446618914604187
epoch: 51 training_loss 0.12577618982642888 test_loss: 0.1421691060066223
epoch: 52 training_loss 0.13336620640009642 test_loss: 0.13005801439285278
epoch: 53 training_loss 0.14294464753940703 test_loss: 0.14350526332855223
epoch: 54 training_loss 0.14056274693459272 test_loss: 0.1532772421836853
epoch: 55 training_loss 0.13968146540224552 test_loss: 0.1301909327507019
epoch: 56 training_loss 0.1320316368341446 test_loss: 0.16667728424072265
epoch: 57 training_loss 0.12776370100677015 test_loss: 0.13919854164123535
epoch: 58 training_loss 0.12969841429963708 test_loss: 0.12175850868225098
epoch: 59 training_loss 0.12620271943509578 test_loss: 0.13968716859817504
epoch: 60 training_loss 0.12787536092102528 test_loss: 0.1358272671699524
epoch: 61 training_loss 0.12602260932326317 test_loss: 0.13149260282516478
epoch: 62 training_loss 0.14391647011041642 test_loss: 0.12596943378448486
epoch: 63 training_loss 0.1338404841348529 test_loss: 0.14597511291503906
epoch: 64 training_loss 0.12977677177637814 test_loss: 0.1293354868888855
epoch: 65 training_loss 0.12425587050616742 test_loss: 0.13081763982772826
epoch: 66 training_loss 0.1339133459329605 test_loss: 0.12581686973571776
epoch: 67 training_loss 0.12908075049519538 test_loss: 0.16449625492095948
epoch: 68 training_loss 0.13342387400567532 test_loss: 0.12060682773590088
epoch: 69 training_loss 0.12797200169414283 test_loss: 0.1391867756843567
epoch: 70 training_loss 0.12757026232779026 test_loss: 0.11888363361358642
epoch: 71 training_loss 0.12672168619930743 test_loss: 0.14470815658569336
epoch: 72 training_loss 0.13214313004165887 test_loss: 0.1471519351005554
epoch: 73 training_loss 0.12983433183282614 test_loss: 0.1461243987083435
epoch: 74 training_loss 0.1350738149508834 test_loss: 0.1313929796218872
epoch: 75 training_loss 0.12867611587047578 test_loss: 0.14919313192367553
epoch: 76 training_loss 0.1279355400428176 test_loss: 0.1317967414855957
epoch: 77 training_loss 0.14267395794391632 test_loss: 0.1189589262008667
epoch: 78 training_loss 0.12958373442292215 test_loss: 0.1541154384613037
epoch: 79 training_loss 0.13600819751620294 test_loss: 0.14847131967544555
epoch: 80 training_loss 0.13096361957490443 test_loss: 0.15452655553817748
epoch: 81 training_loss 0.13100958343595268 test_loss: 0.14965569972991943
epoch: 82 training_loss 0.13077131774276496 test_loss: 0.12421770095825195
epoch: 83 training_loss 0.12679586626589298 test_loss: 0.13167216777801513
epoch: 84 training_loss 0.12631992861628533 test_loss: 0.13159263134002686
epoch: 85 training_loss 0.13343762267380954 test_loss: 0.15348522663116454
epoch: 86 training_loss 0.13225085325539113 test_loss: 0.13575527667999268
epoch: 87 training_loss 0.13128164183348418 test_loss: 0.1456351637840271
epoch: 88 training_loss 0.13082554221153259 test_loss: 0.12112967967987061
epoch: 89 training_loss 0.13255186062306165 test_loss: 0.1422939896583557
epoch: 90 training_loss 0.12484123095870019 test_loss: 0.13873670101165772
epoch: 91 training_loss 0.1264972362294793 test_loss: 0.1314557671546936
epoch: 92 training_loss 0.13180665049701928 test_loss: 0.13907036781311036
epoch: 93 training_loss 0.13092939350754024 test_loss: 0.148987340927124
epoch: 94 training_loss 0.13489771336317063 test_loss: 0.15795143842697143
epoch: 95 training_loss 0.13669845949858428 test_loss: 0.14041515588760375
epoch: 96 training_loss 0.125632981993258 test_loss: 0.15355643033981323
epoch: 97 training_loss 0.12824766021221876 test_loss: 0.12614269256591798
epoch: 98 training_loss 0.1273979951813817 test_loss: 0.12816139459609985
epoch: 99 training_loss 0.1415217887982726 test_loss: 0.14610354900360106
epoch: 100 training_loss 0.12596693594008684 test_loss: 0.13836863040924072
epoch: 101 training_loss 0.1327208200469613 test_loss: 0.14183351993560792
epoch: 102 training_loss 0.1300839266180992 test_loss: 0.13516284227371217
epoch: 103 training_loss 0.13035773739218712 test_loss: 0.1386415481567383
epoch: 104 training_loss 0.12763430412858726 test_loss: 0.13496474027633668
epoch: 105 training_loss 0.1302124872431159 test_loss: 0.11216377019882202
epoch: 106 training_loss 0.13129174593836068 test_loss: 0.13305097818374634
epoch: 107 training_loss 0.12892011169344186 test_loss: 0.14658104181289672
epoch: 108 training_loss 0.12804726593196392 test_loss: 0.15414626598358155
epoch: 109 training_loss 0.13314163103699683 test_loss: 0.13663433790206908
epoch: 110 training_loss 0.13304091714322566 test_loss: 0.12899001836776733
epoch: 111 training_loss 0.13274357695132494 test_loss: 0.1352885127067566
epoch: 112 training_loss 0.13051118820905686 test_loss: 0.12910134792327882
epoch: 113 training_loss 0.1326298414170742 test_loss: 0.13738750219345092
epoch: 114 training_loss 0.12344110827893019 test_loss: 0.13437641859054567
epoch: 115 training_loss 0.13259377427399158 test_loss: 0.1419618010520935
epoch: 116 training_loss 0.12079264361411333 test_loss: 0.12077243328094482
epoch: 117 training_loss 0.12601868130266666 test_loss: 0.1321253538131714
epoch: 118 training_loss 0.1362023326382041 test_loss: 0.13285058736801147
epoch: 119 training_loss 0.13030657958239317 test_loss: 0.14028877019882202
epoch: 120 training_loss 0.1322755018249154 test_loss: 0.14794301986694336
epoch: 121 training_loss 0.12594848837703465 test_loss: 0.13900986909866334
epoch: 122 training_loss 0.1307341042533517 test_loss: 0.14706103801727294
epoch: 123 training_loss 0.1319300463050604 test_loss: 0.14718958139419555
epoch: 124 training_loss 0.1264213640242815 test_loss: 0.128132963180542
epoch: 125 training_loss 0.13194753898307682 test_loss: 0.14297860860824585
epoch: 126 training_loss 0.12923490576446056 test_loss: 0.14930788278579712
epoch: 127 training_loss 0.12800446085631847 test_loss: 0.15390727519989014
epoch: 128 training_loss 0.1317615120485425 test_loss: 0.12773014307022096
epoch: 129 training_loss 0.1320413190126419 test_loss: 0.14488505125045775
epoch: 130 training_loss 0.13021496076136826 test_loss: 0.139210844039917
epoch: 131 training_loss 0.12618904575705528 test_loss: 0.14027880430221557
epoch: 132 training_loss 0.1316840260475874 test_loss: 0.1407001256942749
epoch: 133 training_loss 0.13864957008510828 test_loss: 0.13834019899368286
epoch: 134 training_loss 0.12502074295654894 test_loss: 0.13083117008209227
epoch: 135 training_loss 0.13206683147698642 test_loss: 0.1450184941291809
epoch: 136 training_loss 0.12519371595233678 test_loss: 0.13492228984832763
epoch: 137 training_loss 0.12245385434478522 test_loss: 0.14147300720214845
epoch: 138 training_loss 0.1286671083420515 test_loss: 0.14135416746139526
epoch: 139 training_loss 0.13029857765883207 test_loss: 0.16233402490615845
epoch: 140 training_loss 0.13308167077600955 test_loss: 0.16270334720611573
epoch: 141 training_loss 0.13180176872760058 test_loss: 0.1446260929107666
epoch: 142 training_loss 0.12389805562794208 test_loss: 0.14701136350631713
epoch: 143 training_loss 0.12553917109966278 test_loss: 0.15013214349746704
epoch: 144 training_loss 0.1200385782495141 test_loss: 0.14072599411010742
epoch: 145 training_loss 0.12660046249628068 test_loss: 0.13336042165756226
epoch: 146 training_loss 0.12532221391797066 test_loss: 0.13628957271575928
epoch: 147 training_loss 0.11774909449741244 test_loss: 0.14558616876602173
epoch: 148 training_loss 0.12750453770160675 test_loss: 0.14883981943130492
epoch: 149 training_loss 0.12770837560296058 test_loss: 0.15553826093673706
epoch: 0 training_loss 0.29197594068944455 test_loss: 0.2128450632095337
epoch: 1 training_loss 0.1842449151724577 test_loss: 0.1641839861869812
epoch: 2 training_loss 0.1749424208700657 test_loss: 0.13793963193893433
epoch: 3 training_loss 0.15879186870530249 test_loss: 0.16849927902221679
epoch: 4 training_loss 0.17236485928297043 test_loss: 0.13700944185256958
epoch: 5 training_loss 0.15796625450253488 test_loss: 0.1418477177619934
epoch: 6 training_loss 0.16160166326910258 test_loss: 0.13819756507873535
epoch: 7 training_loss 0.16136721581220625 test_loss: 0.14969910383224488
epoch: 8 training_loss 0.15729172177612782 test_loss: 0.12784072160720825
epoch: 9 training_loss 0.1597781365737319 test_loss: 0.14117218255996705
epoch: 10 training_loss 0.16547233488410712 test_loss: 0.14638108015060425
epoch: 11 training_loss 0.1531262432783842 test_loss: 0.13796515464782716
epoch: 12 training_loss 0.1428645684197545 test_loss: 0.1344433307647705
epoch: 13 training_loss 0.15009530879557131 test_loss: 0.14133543968200685
epoch: 14 training_loss 0.15076462145894765 test_loss: 0.13901928663253785
epoch: 15 training_loss 0.14362293004989624 test_loss: 0.13241854906082154
epoch: 16 training_loss 0.14869552202522754 test_loss: 0.13774330615997316
epoch: 17 training_loss 0.14587432097643613 test_loss: 0.12617602348327636
epoch: 18 training_loss 0.1475466148555279 test_loss: 0.14898345470428467
epoch: 19 training_loss 0.14645286656916143 test_loss: 0.12583447694778443
epoch: 20 training_loss 0.14739110965281724 test_loss: 0.13235843181610107
epoch: 21 training_loss 0.1464606357179582 test_loss: 0.1512356996536255
epoch: 22 training_loss 0.14576955735683442 test_loss: 0.13350386619567872
epoch: 23 training_loss 0.14447143107652663 test_loss: 0.13076136112213135
epoch: 24 training_loss 0.13667090237140656 test_loss: 0.14709317684173584
epoch: 25 training_loss 0.14434223864227533 test_loss: 0.14040204286575317
epoch: 26 training_loss 0.14599619176238776 test_loss: 0.14588812589645386
epoch: 27 training_loss 0.1481762581318617 test_loss: 0.14390214681625366
epoch: 28 training_loss 0.14719094548374415 test_loss: 0.14305489063262938
epoch: 29 training_loss 0.13801256269216539 test_loss: 0.12992767095565796
epoch: 30 training_loss 0.13039784155786038 test_loss: 0.15156205892562866
epoch: 31 training_loss 0.1464595479518175 test_loss: 0.13318979740142822
epoch: 32 training_loss 0.14303895343095063 test_loss: 0.14237788915634156
epoch: 33 training_loss 0.1475750956311822 test_loss: 0.14026643037796022
epoch: 34 training_loss 0.1369769871979952 test_loss: 0.13939862251281737
epoch: 35 training_loss 0.1404920481145382 test_loss: 0.16033486127853394
epoch: 36 training_loss 0.13294935032725333 test_loss: 0.13601189851760864
epoch: 37 training_loss 0.15221638016402722 test_loss: 0.12812076807022094
epoch: 38 training_loss 0.1451966145634651 test_loss: 0.14073556661605835
epoch: 39 training_loss 0.13389692503958942 test_loss: 0.12966630458831788
epoch: 40 training_loss 0.14193125180900096 test_loss: 0.14075086116790772
epoch: 41 training_loss 0.15255026929080487 test_loss: 0.1363042712211609
epoch: 42 training_loss 0.1397554248943925 test_loss: 0.1294625997543335
epoch: 43 training_loss 0.14113688666373492 test_loss: 0.14462069272994996
epoch: 44 training_loss 0.1410269694775343 test_loss: 0.13150341510772706
epoch: 45 training_loss 0.13803529486060143 test_loss: 0.14166126251220704
epoch: 46 training_loss 0.12976125068962574 test_loss: 0.13732348680496215
epoch: 47 training_loss 0.1421385046467185 test_loss: 0.13794020414352418
epoch: 48 training_loss 0.14266085110604762 test_loss: 0.12922996282577515
epoch: 49 training_loss 0.1447295045107603 test_loss: 0.13523952960968016
epoch: 50 training_loss 0.14285618387162685 test_loss: 0.12761414051055908
epoch: 51 training_loss 0.1384988271072507 test_loss: 0.12821195125579835
epoch: 52 training_loss 0.13680745378136636 test_loss: 0.14525810480117798
epoch: 53 training_loss 0.14343862157315015 test_loss: 0.13931678533554076
epoch: 54 training_loss 0.13535475812852382 test_loss: 0.13029080629348755
epoch: 55 training_loss 0.13685416443273424 test_loss: 0.14386042356491088
epoch: 56 training_loss 0.14317633416503667 test_loss: 0.1301451325416565
epoch: 57 training_loss 0.13739835765212774 test_loss: 0.13940736055374145
epoch: 58 training_loss 0.14014954961836337 test_loss: 0.1325312852859497
epoch: 59 training_loss 0.1403953631594777 test_loss: 0.15389177799224854
epoch: 60 training_loss 0.14532470341771841 test_loss: 0.1280530333518982
epoch: 61 training_loss 0.1425788003951311 test_loss: 0.1382611870765686
epoch: 62 training_loss 0.13227861806750296 test_loss: 0.1365242600440979
epoch: 63 training_loss 0.1399115953594446 test_loss: 0.13168116807937622
epoch: 64 training_loss 0.13774302560836077 test_loss: 0.13016946315765382
epoch: 65 training_loss 0.12938146287575364 test_loss: 0.13639763593673707
epoch: 66 training_loss 0.14152076121419668 test_loss: 0.12992870807647705
epoch: 67 training_loss 0.14068131424486638 test_loss: 0.13838839530944824
epoch: 68 training_loss 0.14456038516014813 test_loss: 0.1330418109893799
epoch: 69 training_loss 0.13754212010651826 test_loss: 0.12241233587265014
epoch: 70 training_loss 0.13938956804573535 test_loss: 0.13311614990234374
epoch: 71 training_loss 0.1380493066459894 test_loss: 0.14029887914657593
epoch: 72 training_loss 0.1398574621975422 test_loss: 0.15524150133132936
epoch: 73 training_loss 0.1417239224910736 test_loss: 0.1331096649169922
epoch: 74 training_loss 0.13998628955334425 test_loss: 0.11944259405136108
epoch: 75 training_loss 0.14111360661685468 test_loss: 0.13842315673828126
epoch: 76 training_loss 0.14243293561041356 test_loss: 0.1419276237487793
epoch: 77 training_loss 0.12969181694090368 test_loss: 0.14474034309387207
epoch: 78 training_loss 0.1460424861125648 test_loss: 0.1303868055343628
epoch: 79 training_loss 0.13381612729281187 test_loss: 0.12415405511856079
epoch: 80 training_loss 0.13853852391242982 test_loss: 0.14470127820968628
epoch: 81 training_loss 0.13721512962132693 test_loss: 0.13313337564468383
epoch: 82 training_loss 0.13405281180515885 test_loss: 0.12486209869384765
epoch: 83 training_loss 0.13557569671422243 test_loss: 0.13421529531478882
epoch: 84 training_loss 0.14294778265058994 test_loss: 0.13145463466644286
epoch: 85 training_loss 0.13454441100358963 test_loss: 0.14493499994277953
epoch: 86 training_loss 0.13050494991242886 test_loss: 0.13389527797698975
epoch: 87 training_loss 0.13635499708354473 test_loss: 0.14491163492202758
epoch: 88 training_loss 0.13454143594950438 test_loss: 0.1377665400505066
epoch: 89 training_loss 0.13628755498677492 test_loss: 0.13371073007583617
epoch: 90 training_loss 0.13107241835445166 test_loss: 0.12165249586105346
epoch: 91 training_loss 0.13747798096388578 test_loss: 0.13488805294036865
epoch: 92 training_loss 0.1409294992685318 test_loss: 0.12326908111572266
epoch: 93 training_loss 0.13862758416682483 test_loss: 0.14103676080703736
epoch: 94 training_loss 0.13871577028185128 test_loss: 0.13964058160781861
epoch: 95 training_loss 0.14172210685908795 test_loss: 0.11854186058044433
epoch: 96 training_loss 0.13426002079620958 test_loss: 0.13634785413742065
epoch: 97 training_loss 0.14302447564899923 test_loss: 0.12894258499145508
epoch: 98 training_loss 0.13097920909523963 test_loss: 0.12292908430099488
epoch: 99 training_loss 0.1380197124555707 test_loss: 0.13805067539215088
epoch: 100 training_loss 0.13577391862869262 test_loss: 0.13760522603988648
epoch: 101 training_loss 0.14098016697913407 test_loss: 0.13562692403793336
epoch: 102 training_loss 0.13767541337758302 test_loss: 0.13539067506790162
epoch: 103 training_loss 0.14436124693602323 test_loss: 0.154649817943573
epoch: 104 training_loss 0.13564100209623575 test_loss: 0.12976812124252318
epoch: 105 training_loss 0.13248513497412204 test_loss: 0.1301180601119995
epoch: 106 training_loss 0.13848473366349937 test_loss: 0.12545937299728394
epoch: 107 training_loss 0.13802960369735956 test_loss: 0.12304450273513794
epoch: 108 training_loss 0.13721312776207925 test_loss: 0.11011327505111694
epoch: 109 training_loss 0.13825469210743904 test_loss: 0.12631163597106934
epoch: 110 training_loss 0.1367626893147826 test_loss: 0.12689393758773804
epoch: 111 training_loss 0.13006983492523433 test_loss: 0.14258843660354614
epoch: 112 training_loss 0.13398678958415985 test_loss: 0.13532801866531372
epoch: 113 training_loss 0.1395994106307626 test_loss: 0.11835991144180298
epoch: 114 training_loss 0.14058968972414732 test_loss: 0.13533130884170533
epoch: 115 training_loss 0.13832256611436605 test_loss: 0.15687175989151
epoch: 116 training_loss 0.12866899311542512 test_loss: 0.12890303134918213
epoch: 117 training_loss 0.13040370501577855 test_loss: 0.13243941068649293
epoch: 118 training_loss 0.1310342850908637 test_loss: 0.12401081323623657
epoch: 119 training_loss 0.13064373698085546 test_loss: 0.14579225778579713
epoch: 120 training_loss 0.13648189775645733 test_loss: 0.13667930364608766
epoch: 121 training_loss 0.1305811432003975 test_loss: 0.16176749467849733
epoch: 122 training_loss 0.13849448774009943 test_loss: 0.14156498908996581
epoch: 123 training_loss 0.13160404775291681 test_loss: 0.1358893632888794
epoch: 124 training_loss 0.1311294987425208 test_loss: 0.11686317920684815
epoch: 125 training_loss 0.1339676970615983 test_loss: 0.1388092279434204
epoch: 126 training_loss 0.140010231025517 test_loss: 0.13797104358673096
epoch: 127 training_loss 0.13218964375555514 test_loss: 0.11994278430938721
epoch: 128 training_loss 0.13955729641020298 test_loss: 0.12945433855056762
epoch: 129 training_loss 0.13818819098174573 test_loss: 0.13179869651794435
epoch: 130 training_loss 0.12794890400022269 test_loss: 0.12083324193954467
epoch: 131 training_loss 0.13261711832135917 test_loss: 0.1231913447380066
epoch: 132 training_loss 0.13403749909251927 test_loss: 0.12618632316589357
epoch: 133 training_loss 0.1308398562297225 test_loss: 0.13749910593032838
epoch: 134 training_loss 0.14272105611860753 test_loss: 0.12374697923660279
epoch: 135 training_loss 0.13608916245400907 test_loss: 0.14322484731674195
epoch: 136 training_loss 0.1351285767927766 test_loss: 0.12801328897476197
epoch: 137 training_loss 0.13304515197873115 test_loss: 0.14552037715911864
epoch: 138 training_loss 0.13236823610961437 test_loss: 0.13130061626434325
epoch: 139 training_loss 0.14758565712720156 test_loss: 0.13742830753326415
epoch: 140 training_loss 0.13214622942730783 test_loss: 0.12860746383666993
epoch: 141 training_loss 0.12994072549045085 test_loss: 0.14182448387145996
epoch: 142 training_loss 0.12615211270749568 test_loss: 0.14808326959609985
epoch: 143 training_loss 0.13686927527189255 test_loss: 0.13306286334991455
epoch: 144 training_loss 0.13119700677692891 test_loss: 0.14880213737487794
epoch: 145 training_loss 0.14587051652371882 test_loss: 0.1408067226409912
epoch: 146 training_loss 0.13262713670730591 test_loss: 0.13385931253433228
epoch: 147 training_loss 0.13588363073766233 test_loss: 0.14664607048034667
epoch: 148 training_loss 0.13333242267370224 test_loss: 0.1256351351737976
epoch: 149 training_loss 0.12299803793430328 test_loss: 0.12982382774353027
episode: 0 training return: -1008.8566192769918
episode: 1 training return: -1004.5434734088647
episode: 2 training return: -1058.2532797682632
episode: 3 training return: -1006.9151870459766
epoch: 1 test_true_pfm: -131.78868461558545 sim_pfm: -968.928826067648
episode: 4 training return: -999.7461434081264
episode: 5 training return: -1004.4102344988266
episode: 6 training return: -1067.0866322952115
episode: 7 training return: -1024.6627583423942
epoch: 2 test_true_pfm: 151.04316282623597 sim_pfm: -996.8561065515945
episode: 8 training return: -1056.3219149521926
episode: 9 training return: -1060.505367835167
episode: 10 training return: -989.4547312499437
episode: 11 training return: -991.0959612097593
epoch: 3 test_true_pfm: 162.3512817305356 sim_pfm: -963.9229065866226
episode: 12 training return: -983.1466643286898
episode: 13 training return: -994.1696686728112
episode: 14 training return: -957.3979662088074
episode: 15 training return: -998.1485637922661
epoch: 4 test_true_pfm: 168.98424271447 sim_pfm: -945.8027333249894
episode: 16 training return: -948.1959260275288
episode: 17 training return: -949.7593721619909
episode: 18 training return: -944.1508222645971
episode: 19 training return: -948.759421663154
epoch: 5 test_true_pfm: 179.10530445574486 sim_pfm: -950.085731133956
episode: 20 training return: -957.1244756139321
episode: 21 training return: -956.9036267702103
episode: 22 training return: -950.027192605559
episode: 23 training return: -939.8898956195754
epoch: 6 test_true_pfm: 163.07343780717102 sim_pfm: -946.0694562243276
episode: 24 training return: -950.935053106972
episode: 25 training return: -948.5102672156171
episode: 26 training return: -944.3673366917819
episode: 27 training return: -936.7055860402595
epoch: 7 test_true_pfm: 174.5853620738269 sim_pfm: -945.3636601482218
episode: 28 training return: -947.0568533300386
episode: 29 training return: -945.3136663166075
episode: 30 training return: -941.7519243115936
episode: 31 training return: -943.127340923815
epoch: 8 test_true_pfm: 141.78291042718317 sim_pfm: -961.6843236379113
episode: 32 training return: -939.1545232065464
episode: 33 training return: -945.2195301201268
episode: 34 training return: -941.205145162299
episode: 35 training return: -928.4731745110181
epoch: 9 test_true_pfm: 210.321402560127 sim_pfm: -894.0515688091882
episode: 36 training return: -916.8506474840331
episode: 37 training return: -902.1489442422975
episode: 38 training return: -883.8212706552525
episode: 39 training return: -913.4623595136255
epoch: 10 test_true_pfm: 244.0816087527096 sim_pfm: -896.0403884327892
episode: 40 training return: -874.3682430294194
episode: 41 training return: -867.4242562009925
episode: 42 training return: -909.1868726260101
episode: 43 training return: -926.099082348136
epoch: 11 test_true_pfm: 240.65166687852533 sim_pfm: -907.4096440357631
episode: 44 training return: -899.8581674405983
episode: 45 training return: -906.273435389746
episode: 46 training return: -890.2356783649694
episode: 47 training return: -902.0251706870666
epoch: 12 test_true_pfm: 214.61173086692816 sim_pfm: -900.5716981887485
episode: 48 training return: -893.7904641003794
episode: 49 training return: -900.4138857799122
episode: 50 training return: -882.6252094358691
episode: 51 training return: -890.1957421835156
epoch: 13 test_true_pfm: 233.19175853827792 sim_pfm: -880.100641588212
episode: 52 training return: -893.5984154568644
episode: 53 training return: -863.0529227227149
episode: 54 training return: -876.4044829334421
episode: 55 training return: -877.2785316304931
epoch: 14 test_true_pfm: 250.68824289299997 sim_pfm: -876.714316252007
episode: 56 training return: -871.746081985174
episode: 57 training return: -896.835776083139
episode: 58 training return: -892.4180460601166
episode: 59 training return: -886.7099949834495
epoch: 15 test_true_pfm: 240.6295420750313 sim_pfm: -884.39765983095
episode: 60 training return: -885.5002752206858
episode: 61 training return: -896.3281832257614
episode: 62 training return: -877.084954736825
episode: 63 training return: -882.8305269528776
epoch: 16 test_true_pfm: 303.61448446295884 sim_pfm: -844.7869430960071
episode: 64 training return: -859.0591303396669
episode: 65 training return: -865.610858064802
episode: 66 training return: -850.2347154859284
episode: 67 training return: -853.0177898258929
epoch: 17 test_true_pfm: 284.3793229096934 sim_pfm: -860.4121416810025
episode: 68 training return: -857.1443748635807
episode: 69 training return: -860.0150379030965
episode: 70 training return: -836.459913760079
episode: 71 training return: -857.0779179020462
epoch: 18 test_true_pfm: 286.4286177718879 sim_pfm: -850.9532380666627
episode: 72 training return: -863.5327053609761
episode: 73 training return: -857.6518851792273
episode: 74 training return: -852.9270305896133
episode: 75 training return: -875.6846637389322
epoch: 19 test_true_pfm: 277.2925686897097 sim_pfm: -850.5492029536655
episode: 76 training return: -855.4472459398509
episode: 77 training return: -861.8944343395403
episode: 78 training return: -872.1859501586287
episode: 79 training return: -866.9230788882418
epoch: 20 test_true_pfm: 277.2757939734363 sim_pfm: -857.2593393277261
episode: 80 training return: -859.9742804130457
episode: 81 training return: -867.8377662391531
episode: 82 training return: -843.4749670590638
episode: 83 training return: -861.6016232597982
epoch: 21 test_true_pfm: 291.0731503174671 sim_pfm: -854.2578205734247
episode: 84 training return: -847.8647908108192
episode: 85 training return: -864.9538134614181
episode: 86 training return: -863.8120410012841
episode: 87 training return: -852.5083986134123
epoch: 22 test_true_pfm: 274.3286370983131 sim_pfm: -853.8071080356817
episode: 88 training return: -861.188855196518
episode: 89 training return: -858.0389308977698
episode: 90 training return: -869.347379151741
episode: 91 training return: -871.57979032261
epoch: 23 test_true_pfm: 270.10533737323334 sim_pfm: -846.963350338839
episode: 92 training return: -862.0811735999844
episode: 93 training return: -852.9497629618304
episode: 94 training return: -850.0816593760165
episode: 95 training return: -854.1180547101976
epoch: 24 test_true_pfm: 275.63276780181536 sim_pfm: -858.6724662391006
episode: 96 training return: -853.1114924955671
episode: 97 training return: -852.9600191222593
episode: 98 training return: -860.2844326290152
episode: 99 training return: -856.7669416018991
epoch: 25 test_true_pfm: 281.83955644040793 sim_pfm: -843.1383144293039
episode: 100 training return: -861.8089008412463
episode: 101 training return: -852.9446825767191
episode: 102 training return: -838.2595461185465
episode: 103 training return: -845.9927545337393
epoch: 26 test_true_pfm: 279.47952873939147 sim_pfm: -841.9413289689182
episode: 104 training return: -846.6501111979445
episode: 105 training return: -860.8112349673403
episode: 106 training return: -859.0856438910482
episode: 107 training return: -860.2370817886377
epoch: 27 test_true_pfm: 267.3959673322854 sim_pfm: -846.3488371390541
episode: 108 training return: -847.185150971977
episode: 109 training return: -864.676007923316
episode: 110 training return: -854.5702967033088
episode: 111 training return: -859.3365599017932
epoch: 28 test_true_pfm: 281.7990061933819 sim_pfm: -848.7397057414933
episode: 112 training return: -847.3521150295999
episode: 113 training return: -847.6742916596421
episode: 114 training return: -856.5614876791598
episode: 115 training return: -851.529127224548
epoch: 29 test_true_pfm: 275.460021450276 sim_pfm: -844.0872093114066
episode: 116 training return: -860.6309964396634
episode: 117 training return: -867.4430962516162
episode: 118 training return: -847.502214523373
episode: 119 training return: -840.5959988911171
epoch: 30 test_true_pfm: 288.9533355999943 sim_pfm: -837.53418075918
episode: 120 training return: -852.4873852934136
episode: 121 training return: -855.5754029836329
episode: 122 training return: -848.2695421307559
episode: 123 training return: -845.9429234540704
epoch: 31 test_true_pfm: 288.26385200270266 sim_pfm: -834.6115904212337
episode: 124 training return: -854.7974849572345
episode: 125 training return: -836.2280497534598
episode: 126 training return: -845.0651042625599
episode: 127 training return: -850.0512390370711
epoch: 32 test_true_pfm: 284.98840043296417 sim_pfm: -842.4917345633613
episode: 128 training return: -854.9429936141865
episode: 129 training return: -853.6839774658929
episode: 130 training return: -853.5139169906968
episode: 131 training return: -840.7367362176573
epoch: 33 test_true_pfm: 284.347726677366 sim_pfm: -845.8749840396637
episode: 132 training return: -845.0582810840393
episode: 133 training return: -860.4778749589046
episode: 134 training return: -855.2100665080283
episode: 135 training return: -851.1687734784767
epoch: 34 test_true_pfm: 278.2555550159022 sim_pfm: -847.4154794085003
episode: 136 training return: -852.0182858348368
episode: 137 training return: -846.4572149087767
episode: 138 training return: -846.4161601383878
episode: 139 training return: -844.6632753843287
epoch: 35 test_true_pfm: 291.8583695956326 sim_pfm: -843.2609971150406
episode: 140 training return: -837.4568786329352
episode: 141 training return: -853.086543869144
episode: 142 training return: -852.0135359673992
episode: 143 training return: -838.2614218349279
epoch: 36 test_true_pfm: 292.84428502544546 sim_pfm: -840.9771413681514
episode: 144 training return: -862.6771946779285
episode: 145 training return: -841.1749527868521
episode: 146 training return: -855.5843828418242
episode: 147 training return: -841.964637382728
epoch: 37 test_true_pfm: 278.6644562273531 sim_pfm: -846.4766103682128
episode: 148 training return: -863.1682102584888
episode: 149 training return: -840.3603382408419
episode: 150 training return: -845.0469528776028
episode: 151 training return: -850.9685407759797
epoch: 38 test_true_pfm: 296.20739545264587 sim_pfm: -837.3901685868854
episode: 152 training return: -835.2090641146518
episode: 153 training return: -849.621831852163
episode: 154 training return: -845.3870444014273
episode: 155 training return: -851.3554756756848
epoch: 39 test_true_pfm: 288.5192579149464 sim_pfm: -842.6375510128886
episode: 156 training return: -850.3948409460579
episode: 157 training return: -851.0188450906453
episode: 158 training return: -848.440935381223
episode: 159 training return: -859.6054410592005
epoch: 40 test_true_pfm: 294.2670688176309 sim_pfm: -829.3507231670732
episode: 160 training return: -844.6918470034256
episode: 161 training return: -855.2850395350323
episode: 162 training return: -861.5801669545909
episode: 163 training return: -869.8313344524797
epoch: 41 test_true_pfm: 286.55392192833636 sim_pfm: -837.7042417278966
episode: 164 training return: -849.7370209007197
episode: 165 training return: -834.621780858699
episode: 166 training return: -846.1516094472723
episode: 167 training return: -858.4899363249249
epoch: 42 test_true_pfm: 286.50513096309174 sim_pfm: -838.2824112098806
episode: 168 training return: -846.0981820210608
episode: 169 training return: -845.5880522544971
episode: 170 training return: -842.1860498690843
episode: 171 training return: -842.6731664898559
epoch: 43 test_true_pfm: 274.4873863094201 sim_pfm: -846.3905047926829
episode: 172 training return: -841.299722229182
episode: 173 training return: -859.3928053877277
episode: 174 training return: -864.1617680718218
episode: 175 training return: -840.1288447033016
epoch: 44 test_true_pfm: 278.51955090817165 sim_pfm: -837.144433720136
episode: 176 training return: -862.8975047537591
episode: 177 training return: -850.2915651136935
episode: 178 training return: -850.953630969537
episode: 179 training return: -836.3258456040674
epoch: 45 test_true_pfm: 289.54205065896394 sim_pfm: -839.9622985127298
episode: 180 training return: -849.2979125390042
episode: 181 training return: -851.3806198205987
episode: 182 training return: -841.602468993817
episode: 183 training return: -847.5469061401257
epoch: 46 test_true_pfm: 297.5449398459118 sim_pfm: -829.7673876732912
episode: 184 training return: -837.8520717506822
episode: 185 training return: -844.8675970935769
episode: 186 training return: -845.1041814061534
episode: 187 training return: -843.2555826311727
epoch: 47 test_true_pfm: 289.78098973677953 sim_pfm: -841.9196130233277
episode: 188 training return: -847.8683010078171
episode: 189 training return: -851.8439003622589
episode: 190 training return: -858.2083846182422
episode: 191 training return: -838.9497976314921
epoch: 48 test_true_pfm: 283.7103781650225 sim_pfm: -834.4190829648102
episode: 192 training return: -846.4618464680829
episode: 193 training return: -839.6889563138205
episode: 194 training return: -849.8561983175581
episode: 195 training return: -850.1308771101052
epoch: 49 test_true_pfm: 285.44399202769296 sim_pfm: -834.4549734766669
episode: 196 training return: -860.57880903973
episode: 197 training return: -843.2288906823526
episode: 198 training return: -839.92961652762
episode: 199 training return: -841.3154365177558
epoch: 50 test_true_pfm: 278.29540742833956 sim_pfm: -838.5387927085654
episode: 200 training return: -840.7870183154324
episode: 201 training return: -835.9608650274154
episode: 202 training return: -835.1517205374705
episode: 203 training return: -835.6827437837223
epoch: 51 test_true_pfm: 284.5504671316853 sim_pfm: -830.5834130947969
episode: 204 training return: -833.976001374736
episode: 205 training return: -856.6943547774766
episode: 206 training return: -846.6741963614259
episode: 207 training return: -845.764278953708
epoch: 52 test_true_pfm: 279.7661432602096 sim_pfm: -837.8574909980963
episode: 208 training return: -848.8076119251803
episode: 209 training return: -845.6492641572837
episode: 210 training return: -838.843347703354
episode: 211 training return: -849.1610653434876
epoch: 53 test_true_pfm: 292.84234875567154 sim_pfm: -830.7238061912325
episode: 212 training return: -854.6925830567936
episode: 213 training return: -835.5135421158027
episode: 214 training return: -835.4051862798764
episode: 215 training return: -849.5120055766706
epoch: 54 test_true_pfm: 281.10004351440836 sim_pfm: -837.342869779121
episode: 216 training return: -854.9602107311619
episode: 217 training return: -833.7030607239407
episode: 218 training return: -852.4959768695335
episode: 219 training return: -843.3134957135045
epoch: 55 test_true_pfm: 278.4701694236383 sim_pfm: -844.0074934928757
episode: 220 training return: -843.4969759981502
episode: 221 training return: -837.9283865308986
episode: 222 training return: -856.6769763853634
episode: 223 training return: -866.1633159157333
epoch: 56 test_true_pfm: 294.43058593944676 sim_pfm: -833.5662050863657
episode: 224 training return: -849.4103989865933
episode: 225 training return: -846.351523457236
episode: 226 training return: -838.5120853846654
episode: 227 training return: -853.333766884803
epoch: 57 test_true_pfm: 277.00538748626576 sim_pfm: -840.7139868168396
episode: 228 training return: -841.7255611810473
episode: 229 training return: -833.9231926696549
episode: 230 training return: -844.3981228081474
episode: 231 training return: -841.3527703551007
epoch: 58 test_true_pfm: 293.85903185967527 sim_pfm: -828.2373215175312
episode: 232 training return: -847.8811030622289
episode: 233 training return: -834.7354860213671
episode: 234 training return: -841.6248673013749
episode: 235 training return: -846.1941078042964
epoch: 59 test_true_pfm: 286.35032529772064 sim_pfm: -835.2608178503198
episode: 236 training return: -833.9419931068688
episode: 237 training return: -846.4329534335325
episode: 238 training return: -844.0317075892393
episode: 239 training return: -838.0619814560949
epoch: 60 test_true_pfm: 317.405273918296 sim_pfm: -816.9254907812214
episode: 240 training return: -834.4224739069498
episode: 241 training return: -843.9927405677118
episode: 242 training return: -845.4070645277742
episode: 243 training return: -846.4350605463222
epoch: 61 test_true_pfm: 293.29241284440536 sim_pfm: -835.153810054463
episode: 244 training return: -846.4607528344247
episode: 245 training return: -829.9532396659544
episode: 246 training return: -835.6252933867663
episode: 247 training return: -843.0874895328614
epoch: 62 test_true_pfm: 293.77666727932893 sim_pfm: -837.162757316616
episode: 248 training return: -848.204590037988
episode: 249 training return: -829.4194198995924
episode: 250 training return: -833.1211774179171
episode: 251 training return: -831.3037999203117
epoch: 63 test_true_pfm: 293.9780786515806 sim_pfm: -835.4484553784305
episode: 252 training return: -849.1926890307672
episode: 253 training return: -850.285520095169
episode: 254 training return: -854.5532805779121
episode: 255 training return: -834.5360664027274
epoch: 64 test_true_pfm: 290.33528422538734 sim_pfm: -828.6259232230863
episode: 256 training return: -836.0656038688857
episode: 257 training return: -863.0219136092319
episode: 258 training return: -839.837146227722
episode: 259 training return: -839.0362168337927
epoch: 65 test_true_pfm: 279.5847516208134 sim_pfm: -838.6127592148123
episode: 260 training return: -835.6259532816161
episode: 261 training return: -842.6255192984253
episode: 262 training return: -836.2181204546325
episode: 263 training return: -847.0227736994684
epoch: 66 test_true_pfm: 294.9543106800351 sim_pfm: -827.4081286236047
episode: 264 training return: -832.0821197511514
episode: 265 training return: -833.3916639450865
episode: 266 training return: -840.8234819688481
episode: 267 training return: -847.9264094155701
epoch: 67 test_true_pfm: 286.2582654609034 sim_pfm: -835.7821020624514
episode: 268 training return: -833.7546488693337
episode: 269 training return: -844.9226814493469
episode: 270 training return: -846.3844635674587
episode: 271 training return: -848.3915414051596
epoch: 68 test_true_pfm: 290.0503697509357 sim_pfm: -833.284476537371
episode: 272 training return: -836.4714287673944
episode: 273 training return: -843.6720647175398
episode: 274 training return: -830.4770841880579
episode: 275 training return: -849.5558687847458
epoch: 69 test_true_pfm: 290.7719111753126 sim_pfm: -827.6591474504106
episode: 276 training return: -836.2813434037124
episode: 277 training return: -834.4061458553048
episode: 278 training return: -833.7568157188512
episode: 279 training return: -831.5512191186281
epoch: 70 test_true_pfm: 283.4706795671346 sim_pfm: -834.8675586845757
episode: 280 training return: -838.2833882682925
episode: 281 training return: -832.3969582460504
episode: 282 training return: -846.264139962934
episode: 283 training return: -828.1715682017813
epoch: 71 test_true_pfm: 296.8494435118967 sim_pfm: -825.9794604162526
episode: 284 training return: -842.9435075185988
episode: 285 training return: -842.2867014273503
episode: 286 training return: -836.0878398164249
episode: 287 training return: -833.0881160042464
epoch: 72 test_true_pfm: 287.79919735276843 sim_pfm: -830.4184721184589
episode: 288 training return: -840.1361762160639
episode: 289 training return: -830.727201446599
episode: 290 training return: -839.224481055573
episode: 291 training return: -832.0432011991818
epoch: 73 test_true_pfm: 288.41538822668775 sim_pfm: -824.5260821457135
episode: 292 training return: -830.8574596165197
episode: 293 training return: -838.5581774087145
episode: 294 training return: -838.7539628828227
episode: 295 training return: -841.648311850255
epoch: 74 test_true_pfm: 285.7573711918505 sim_pfm: -833.4674406917766
episode: 296 training return: -839.6472773150371
episode: 297 training return: -838.2921076314658
episode: 298 training return: -836.4139776289621
episode: 299 training return: -843.3507583258549
epoch: 75 test_true_pfm: 283.2522494041627 sim_pfm: -828.0164329047437
episode: 300 training return: -837.3022033188865
episode: 301 training return: -831.3372957508393
episode: 302 training return: -837.8335461434951
episode: 303 training return: -837.1664217652581
epoch: 76 test_true_pfm: 287.1902689396243 sim_pfm: -824.495160137918
episode: 304 training return: -827.3723008726174
episode: 305 training return: -842.0283940591489
episode: 306 training return: -830.8002003246157
episode: 307 training return: -834.0495091923995
epoch: 77 test_true_pfm: 279.96760000640944 sim_pfm: -831.2220984613847
episode: 308 training return: -837.4250887890048
episode: 309 training return: -833.3746146332394
episode: 310 training return: -840.223692570637
episode: 311 training return: -836.4364180080828
epoch: 78 test_true_pfm: 278.5022163900491 sim_pfm: -839.5249568051746
episode: 312 training return: -834.3831861260821
episode: 313 training return: -836.0560595529748
episode: 314 training return: -839.0467685552435
episode: 315 training return: -832.1293060192623
epoch: 79 test_true_pfm: 285.99859912065864 sim_pfm: -826.3914148963822
episode: 316 training return: -833.1048385511168
episode: 317 training return: -834.6286493211018
episode: 318 training return: -832.4756740992893
episode: 319 training return: -844.3908992994628
epoch: 80 test_true_pfm: 292.4679689991852 sim_pfm: -824.5902446518799
episode: 320 training return: -831.5597211465013
episode: 321 training return: -838.9703190509221
episode: 322 training return: -833.0943263183256
episode: 323 training return: -843.8211385990304
epoch: 81 test_true_pfm: 286.51448084584837 sim_pfm: -824.795324220598
episode: 324 training return: -838.5661203840123
episode: 325 training return: -828.5366985677675
episode: 326 training return: -835.8539297184909
episode: 327 training return: -834.0434896532217
epoch: 82 test_true_pfm: 284.79326336232555 sim_pfm: -826.984770270973
episode: 328 training return: -830.7985570931697
episode: 329 training return: -842.923075665192
episode: 330 training return: -835.1343525914655
episode: 331 training return: -834.6095556496844
epoch: 83 test_true_pfm: 293.51687815061797 sim_pfm: -824.2027056619776
episode: 332 training return: -843.2074919377776
episode: 333 training return: -834.0283465945744
episode: 334 training return: -839.2475566414176
episode: 335 training return: -846.7622578793756
epoch: 84 test_true_pfm: 284.9380105523795 sim_pfm: -824.903574191161
episode: 336 training return: -831.0484026397472
episode: 337 training return: -835.5355352116542
episode: 338 training return: -838.5396880120836
episode: 339 training return: -834.5602235797691
epoch: 85 test_true_pfm: 286.04884508012384 sim_pfm: -827.4302171843261
episode: 340 training return: -838.7558229336212
episode: 341 training return: -834.0983176335601
episode: 342 training return: -836.0033595300672
episode: 343 training return: -836.7790953549077
epoch: 86 test_true_pfm: 283.99923507241897 sim_pfm: -831.3773269550603
episode: 344 training return: -835.7264090365352
episode: 345 training return: -836.9004872812517
episode: 346 training return: -833.6430852189321
episode: 347 training return: -840.1777252959337
epoch: 87 test_true_pfm: 286.086052604554 sim_pfm: -826.916206918946
episode: 348 training return: -829.2227916198215
episode: 349 training return: -826.5630245925707
episode: 350 training return: -832.807325923753
episode: 351 training return: -838.9402067378202
epoch: 88 test_true_pfm: 280.5561541538864 sim_pfm: -829.4252020120188
episode: 352 training return: -837.4412049488581
episode: 353 training return: -837.1094679071215
episode: 354 training return: -839.8506269523914
episode: 355 training return: -840.397784070712
epoch: 89 test_true_pfm: 285.7668204328629 sim_pfm: -824.3999174917163
episode: 356 training return: -845.4127249267989
episode: 357 training return: -845.4480283417703
episode: 358 training return: -836.125014299086
episode: 359 training return: -838.0953079975804
epoch: 90 test_true_pfm: 277.939398919297 sim_pfm: -830.0050945272583
episode: 360 training return: -838.0559168387949
episode: 361 training return: -833.3145717994397
episode: 362 training return: -831.9867276282084
episode: 363 training return: -840.0349763062908
epoch: 91 test_true_pfm: 286.424033782059 sim_pfm: -825.455786926198
episode: 364 training return: -838.3935334423267
episode: 365 training return: -839.3554690162213
episode: 366 training return: -838.5788449541353
episode: 367 training return: -826.7886470810831
epoch: 92 test_true_pfm: 281.2857397762769 sim_pfm: -829.3705471276727
episode: 368 training return: -829.9635759665445
episode: 369 training return: -830.9929556390496
episode: 370 training return: -842.5655824876277
episode: 371 training return: -831.5870130100441
epoch: 93 test_true_pfm: 281.805738635629 sim_pfm: -826.0177855486585
episode: 372 training return: -836.0510516649703
episode: 373 training return: -835.7208038996741
episode: 374 training return: -827.5493373716414
episode: 375 training return: -835.5129413416142
epoch: 94 test_true_pfm: 284.23106109233896 sim_pfm: -826.8587012302673
episode: 376 training return: -835.192740393008
episode: 377 training return: -829.3991034490473
episode: 378 training return: -831.5509356430091
episode: 379 training return: -832.8695849746489
epoch: 95 test_true_pfm: 288.3952989802742 sim_pfm: -826.1121346101263
episode: 380 training return: -839.4967851060849
episode: 381 training return: -830.9210244226387
episode: 382 training return: -838.2693566098214
episode: 383 training return: -831.7267261415176
epoch: 96 test_true_pfm: 286.90823674859877 sim_pfm: -828.1214450260919
episode: 384 training return: -841.8912716226565
episode: 385 training return: -832.8251106602112
episode: 386 training return: -841.6734067856327
episode: 387 training return: -837.638386383754
epoch: 97 test_true_pfm: 285.4292024228186 sim_pfm: -824.898446836029
episode: 388 training return: -831.4234337433535
episode: 389 training return: -833.1615314099744
episode: 390 training return: -830.973911567035
episode: 391 training return: -831.4516565593259
epoch: 98 test_true_pfm: 279.70260486574267 sim_pfm: -833.6865305547777
episode: 392 training return: -827.7713319266335
episode: 393 training return: -838.5401760213958
episode: 394 training return: -831.1904347247272
episode: 395 training return: -834.7896963182069
epoch: 99 test_true_pfm: 283.7873985139541 sim_pfm: -828.8664554659063
episode: 396 training return: -836.7351804302956
episode: 397 training return: -839.3348114232613
episode: 398 training return: -835.2098121650789
episode: 399 training return: -841.2077736393987
epoch: 100 test_true_pfm: 282.7530173430104 sim_pfm: -827.0654304862863
episode: 400 training return: -834.4542482696296
episode: 401 training return: -828.9361749419426
episode: 402 training return: -838.4979420074778
episode: 403 training return: -830.0160848312927
epoch: 101 test_true_pfm: 282.1419322891282 sim_pfm: -828.999751178622
episode: 404 training return: -831.8881772438609
episode: 405 training return: -833.6479575347025
episode: 406 training return: -836.3404258722842
episode: 407 training return: -842.6278611411219
epoch: 102 test_true_pfm: 280.43462936180026 sim_pfm: -827.772057238229
episode: 408 training return: -834.8428431733056
episode: 409 training return: -836.4817681925686
episode: 410 training return: -832.2959067047763
episode: 411 training return: -832.788970710942
epoch: 103 test_true_pfm: 285.41091655587087 sim_pfm: -828.0740590115951
episode: 412 training return: -837.4858073040417
episode: 413 training return: -842.2235296080123
episode: 414 training return: -843.608932826295
episode: 415 training return: -831.0140589268303
epoch: 104 test_true_pfm: 283.2770312640396 sim_pfm: -825.259703219143
episode: 416 training return: -831.3311432497774
episode: 417 training return: -836.3058934852395
episode: 418 training return: -839.6581015318679
episode: 419 training return: -839.7656013389701
epoch: 105 test_true_pfm: 286.5261684416578 sim_pfm: -828.9479883042618
episode: 420 training return: -839.9170747438466
episode: 421 training return: -824.9707211867573
episode: 422 training return: -831.6286912652399
episode: 423 training return: -829.3187669115771
epoch: 106 test_true_pfm: 283.9532938068333 sim_pfm: -826.1015402432735
episode: 424 training return: -833.5002435327978
episode: 425 training return: -830.8053794865293
episode: 426 training return: -830.1318222017126
episode: 427 training return: -842.1770535627547
epoch: 107 test_true_pfm: 293.6018157712218 sim_pfm: -825.0471573720343
episode: 428 training return: -827.5732747420318
episode: 429 training return: -829.3743854549728
episode: 430 training return: -837.5782040425328
episode: 431 training return: -831.3429636708705
epoch: 108 test_true_pfm: 289.6387523826839 sim_pfm: -823.4370260428963
episode: 432 training return: -838.4449291090035
episode: 433 training return: -830.0938890487124
episode: 434 training return: -839.6073408628748
episode: 435 training return: -834.2874632503388
epoch: 109 test_true_pfm: 282.1268122912251 sim_pfm: -869.2852780864305
episode: 436 training return: -836.215802611215
episode: 437 training return: -836.5632537086962
episode: 438 training return: -829.1740717001651
episode: 439 training return: -832.3146864803317
epoch: 110 test_true_pfm: 284.00379619372467 sim_pfm: -827.509424726917
episode: 440 training return: -830.1698306420806
episode: 441 training return: -834.6739696251484
episode: 442 training return: -832.1629676470494
episode: 443 training return: -844.9575503106572
epoch: 111 test_true_pfm: 289.19102377554424 sim_pfm: -829.0938598977386
episode: 444 training return: -840.1670027454269
episode: 445 training return: -830.0581823235082
episode: 446 training return: -826.0607349887234
episode: 447 training return: -838.6903447030185
epoch: 112 test_true_pfm: 288.7971916926886 sim_pfm: -828.1918725007963
episode: 448 training return: -839.9890146057371
episode: 449 training return: -838.8948916751965
episode: 450 training return: -839.0129299963298
episode: 451 training return: -831.3436773516966
epoch: 113 test_true_pfm: 280.9923369973096 sim_pfm: -827.2276776949034
episode: 452 training return: -842.9186924740347
episode: 453 training return: -831.221632406649
episode: 454 training return: -837.7692421741299
episode: 455 training return: -847.5713996952633
epoch: 114 test_true_pfm: 283.3602084802298 sim_pfm: -831.6601371168122
episode: 456 training return: -837.6160024807376
episode: 457 training return: -839.2393986969697
episode: 458 training return: -834.2364479275261
episode: 459 training return: -833.4256143845882
epoch: 115 test_true_pfm: 282.03736034153354 sim_pfm: -831.6250434952086
episode: 460 training return: -836.8664421024721
episode: 461 training return: -846.6031192504755
episode: 462 training return: -836.390627477411
episode: 463 training return: -838.3142984453182
epoch: 116 test_true_pfm: 286.38150443669656 sim_pfm: -826.2922514754811
episode: 464 training return: -834.3018901644241
episode: 465 training return: -840.8452028945898
episode: 466 training return: -830.5626984259741
episode: 467 training return: -832.8075417265071
epoch: 117 test_true_pfm: 277.72329136456415 sim_pfm: -833.3730484467591
episode: 468 training return: -835.1402525106653
episode: 469 training return: -829.6669836865555
episode: 470 training return: -833.5392646574394
episode: 471 training return: -836.6507428859622
epoch: 118 test_true_pfm: 282.4110399166905 sim_pfm: -833.541700448492
episode: 472 training return: -836.1858492612902
episode: 473 training return: -840.1363202412273
episode: 474 training return: -841.549864319286
episode: 475 training return: -831.6467631601736
epoch: 119 test_true_pfm: 288.9595306721747 sim_pfm: -827.8310793601062
episode: 476 training return: -833.000334407132
episode: 477 training return: -831.8993394132432
episode: 478 training return: -836.3284006610306
episode: 479 training return: -830.5959636836125
epoch: 120 test_true_pfm: 290.42959847610933 sim_pfm: -830.2156583896535
episode: 480 training return: -847.0019928850206
episode: 481 training return: -838.9821138719111
episode: 482 training return: -834.627086913735
episode: 483 training return: -827.7015468396669
epoch: 121 test_true_pfm: 284.0230540146047 sim_pfm: -831.1924351720713
episode: 484 training return: -833.1199617766026
episode: 485 training return: -841.4490640370311
episode: 486 training return: -836.7595127644635
episode: 487 training return: -841.5053863474918
epoch: 122 test_true_pfm: 281.5205113527544 sim_pfm: -832.047985136634
episode: 488 training return: -832.7738028591948
episode: 489 training return: -840.7862432981617
episode: 490 training return: -832.611190014943
episode: 491 training return: -832.1158879744337
epoch: 123 test_true_pfm: 283.2235070780794 sim_pfm: -832.2716135054616
episode: 492 training return: -836.3425510851029
episode: 493 training return: -832.649106729078
episode: 494 training return: -838.8636809666123
episode: 495 training return: -841.5751429371427
epoch: 124 test_true_pfm: 282.50266364854156 sim_pfm: -830.7947485795088
episode: 496 training return: -831.4260554078999
episode: 497 training return: -836.5887049416672
episode: 498 training return: -832.8771195323866
episode: 499 training return: -834.0927036723416
epoch: 125 test_true_pfm: 292.1025022169071 sim_pfm: -824.7636082882732
episode: 500 training return: -833.4946014904152
episode: 501 training return: -843.5835175303328
episode: 502 training return: -838.118305423065
episode: 503 training return: -826.4154416914486
epoch: 126 test_true_pfm: 287.19927710095834 sim_pfm: -828.7630531274182
episode: 504 training return: -834.8014998567024
episode: 505 training return: -835.3481313790162
episode: 506 training return: -832.8527638363485
episode: 507 training return: -833.8517170498109
epoch: 127 test_true_pfm: 281.7755489420947 sim_pfm: -828.3399411721413
episode: 508 training return: -831.3129954318684
episode: 509 training return: -832.8633888885264
episode: 510 training return: -839.2679907053898
episode: 511 training return: -832.9295910533905
epoch: 128 test_true_pfm: 286.58359066807225 sim_pfm: -829.5105565172029
episode: 512 training return: -832.7386582121103
episode: 513 training return: -825.0076227277552
episode: 514 training return: -830.8901607725549
episode: 515 training return: -836.4277462914923
epoch: 129 test_true_pfm: 283.04710778204594 sim_pfm: -836.9905321579969
episode: 516 training return: -838.0770869228115
episode: 517 training return: -843.7365361672431
episode: 518 training return: -844.2176020412271
episode: 519 training return: -830.8152178942051
epoch: 130 test_true_pfm: 276.6091735426758 sim_pfm: -835.2439752441109
episode: 520 training return: -829.0584685729922
episode: 521 training return: -838.8787581735633
episode: 522 training return: -832.3816547517924
episode: 523 training return: -837.1863155722226
epoch: 131 test_true_pfm: 284.095219624375 sim_pfm: -829.6871685344363
episode: 524 training return: -830.8466939241671
episode: 525 training return: -826.7139269501653
episode: 526 training return: -833.403653647531
episode: 527 training return: -830.2740464915895
epoch: 132 test_true_pfm: 286.3679744765928 sim_pfm: -827.5748721992628
episode: 528 training return: -843.1639933005624
episode: 529 training return: -833.8496805801952
episode: 530 training return: -849.6947406837785
episode: 531 training return: -833.3630661699195
epoch: 133 test_true_pfm: 286.8411261961363 sim_pfm: -830.5077300957493
episode: 532 training return: -827.6492928430139
episode: 533 training return: -837.9705681809168
episode: 534 training return: -841.572406452436
episode: 535 training return: -829.6563653301234
epoch: 134 test_true_pfm: 293.09388709264164 sim_pfm: -824.9457196462299
episode: 536 training return: -830.7589870217635
episode: 537 training return: -834.8613953868785
episode: 538 training return: -837.1052722872821
episode: 539 training return: -839.6070833490278
epoch: 135 test_true_pfm: 287.81469015357465 sim_pfm: -831.626536504974
episode: 540 training return: -832.5218576857319
episode: 541 training return: -837.2073163788475
episode: 542 training return: -833.5424869255937
episode: 543 training return: -837.0364316063852
epoch: 136 test_true_pfm: 287.5345904253124 sim_pfm: -828.3523615350083
episode: 544 training return: -834.1049800198866
episode: 545 training return: -832.7988518455059
episode: 546 training return: -831.905117545146
episode: 547 training return: -837.7317034658491
epoch: 137 test_true_pfm: 282.66431701909823 sim_pfm: -830.5292578736411
episode: 548 training return: -834.3430859338371
episode: 549 training return: -836.4971452719652
episode: 550 training return: -830.1039089091375
episode: 551 training return: -839.5496343362646
epoch: 138 test_true_pfm: 279.1334660467815 sim_pfm: -828.724025075864
episode: 552 training return: -836.727586016044
episode: 553 training return: -827.2894467442599
episode: 554 training return: -839.8290125759183
episode: 555 training return: -837.7980910819966
epoch: 139 test_true_pfm: 288.4946659888332 sim_pfm: -822.8836941973938
episode: 556 training return: -847.8285231735008
episode: 557 training return: -839.0066619537355
episode: 558 training return: -836.6255702061907
episode: 559 training return: -841.6993878100205
epoch: 140 test_true_pfm: 283.99263320331255 sim_pfm: -829.7924519076481
episode: 560 training return: -838.5107047731468
episode: 561 training return: -830.602321877294
episode: 562 training return: -833.8837870144223
episode: 563 training return: -840.4244077579518
epoch: 141 test_true_pfm: 280.95315584188944 sim_pfm: -830.6329077758431
episode: 564 training return: -844.1385240611164
episode: 565 training return: -834.0736926403024
episode: 566 training return: -835.3294295839071
episode: 567 training return: -835.5899255343322
epoch: 142 test_true_pfm: 286.01275675140874 sim_pfm: -827.6890969428943
episode: 568 training return: -841.3142288195039
episode: 569 training return: -839.9717292885792
episode: 570 training return: -836.995625751634
episode: 571 training return: -832.8084989048946
epoch: 143 test_true_pfm: 281.9050667869192 sim_pfm: -828.3201938609346
episode: 572 training return: -828.6779933326661
episode: 573 training return: -841.2811862997168
episode: 574 training return: -836.7548991175605
episode: 575 training return: -830.2354658519548
epoch: 144 test_true_pfm: 281.35668773881156 sim_pfm: -824.7510671324453
episode: 576 training return: -840.395261092731
episode: 577 training return: -834.7536172498949
episode: 578 training return: -836.2969829319921
episode: 579 training return: -833.4805357307198
epoch: 145 test_true_pfm: 283.5212278326674 sim_pfm: -832.3490673954597
episode: 580 training return: -839.8524658214944
episode: 581 training return: -838.738895756572
episode: 582 training return: -830.5012973245542
episode: 583 training return: -834.7685196726612
epoch: 146 test_true_pfm: 280.5518438868243 sim_pfm: -831.5175746820054
episode: 584 training return: -831.6087784619735
episode: 585 training return: -848.2582547753434
episode: 586 training return: -838.868422264756
episode: 587 training return: -829.9306630673286
epoch: 147 test_true_pfm: 280.7953613661037 sim_pfm: -828.6406908862105
episode: 588 training return: -832.4784910161922
episode: 589 training return: -840.1761416599652
episode: 590 training return: -828.7112696946758
episode: 591 training return: -833.7408788110304
epoch: 148 test_true_pfm: 278.58452937526005 sim_pfm: -829.3036698776518
episode: 592 training return: -837.8098290828495
episode: 593 training return: -834.0226934518007
episode: 594 training return: -834.355227931694
episode: 595 training return: -830.3210650190111
epoch: 149 test_true_pfm: 280.91647307629535 sim_pfm: -833.0548115192346
episode: 596 training return: -826.5213866670233
episode: 597 training return: -842.2535312380845
episode: 598 training return: -840.9302048810775
episode: 599 training return: -840.8811469214639
epoch: 150 test_true_pfm: 277.5094554750967 sim_pfm: -834.4591609930472
