['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.2979506308585405 test_loss: 0.18929299116134643
epoch: 1 training_loss 0.19147536918520927 test_loss: 0.17251896858215332
epoch: 2 training_loss 0.17873235419392586 test_loss: 0.19011232852935792
epoch: 3 training_loss 0.16125614270567895 test_loss: 0.16906952857971191
epoch: 4 training_loss 0.16426000114530326 test_loss: 0.15749675035476685
epoch: 5 training_loss 0.15471912689507009 test_loss: 0.14492634534835816
epoch: 6 training_loss 0.15389779187738895 test_loss: 0.1704447388648987
epoch: 7 training_loss 0.17549691073596477 test_loss: 0.17723457813262938
epoch: 8 training_loss 0.15932738356292248 test_loss: 0.16499497890472412
epoch: 9 training_loss 0.15962420556694268 test_loss: 0.15505608320236205
epoch: 10 training_loss 0.15893391456454992 test_loss: 0.15092931985855101
epoch: 11 training_loss 0.151942856349051 test_loss: 0.1425417184829712
epoch: 12 training_loss 0.14719204857945442 test_loss: 0.15049980878829955
epoch: 13 training_loss 0.15115034513175488 test_loss: 0.1335824489593506
epoch: 14 training_loss 0.14539591692388057 test_loss: 0.16145644187927247
epoch: 15 training_loss 0.14849181029945613 test_loss: 0.13684496879577637
epoch: 16 training_loss 0.15026923723518848 test_loss: 0.15898022651672364
epoch: 17 training_loss 0.13978841617703439 test_loss: 0.15025546550750732
epoch: 18 training_loss 0.15005346871912478 test_loss: 0.13770867586135865
epoch: 19 training_loss 0.13713660921901463 test_loss: 0.14898769855499266
epoch: 20 training_loss 0.1425734919682145 test_loss: 0.1323559522628784
epoch: 21 training_loss 0.13338959380984305 test_loss: 0.14347548484802247
epoch: 22 training_loss 0.14390369586646556 test_loss: 0.1410307765007019
epoch: 23 training_loss 0.1425595188140869 test_loss: 0.1319899320602417
epoch: 24 training_loss 0.16116605747491122 test_loss: 0.14071402549743653
epoch: 25 training_loss 0.13886575579643248 test_loss: 0.15406477451324463
epoch: 26 training_loss 0.15701364122331143 test_loss: 0.14292896986007692
epoch: 27 training_loss 0.14782939363270997 test_loss: 0.14790667295455934
epoch: 28 training_loss 0.13614192347973586 test_loss: 0.14577503204345704
epoch: 29 training_loss 0.14717580541968345 test_loss: 0.15195252895355224
epoch: 30 training_loss 0.13634099278599024 test_loss: 0.18653284311294555
epoch: 31 training_loss 0.155206704698503 test_loss: 0.15837491750717164
epoch: 32 training_loss 0.1471356752514839 test_loss: 0.15092644691467286
epoch: 33 training_loss 0.14263530034571886 test_loss: 0.16123946905136108
epoch: 34 training_loss 0.14310378339141608 test_loss: 0.1412501096725464
epoch: 35 training_loss 0.14197097398340702 test_loss: 0.13364473581314087
epoch: 36 training_loss 0.1432490823417902 test_loss: 0.13851253986358641
epoch: 37 training_loss 0.14406886611133815 test_loss: 0.14509583711624147
epoch: 38 training_loss 0.14535023115575313 test_loss: 0.135082471370697
epoch: 39 training_loss 0.14647666297852993 test_loss: 0.1663639783859253
epoch: 40 training_loss 0.13803150322288274 test_loss: 0.13974448442459106
epoch: 41 training_loss 0.1430513171106577 test_loss: 0.14469646215438842
epoch: 42 training_loss 0.13971471149474382 test_loss: 0.13891165256500243
epoch: 43 training_loss 0.15053475856781007 test_loss: 0.1438663959503174
epoch: 44 training_loss 0.13895990751683712 test_loss: 0.13802303075790406
epoch: 45 training_loss 0.14609320417046548 test_loss: 0.14261420965194702
epoch: 46 training_loss 0.1398838046565652 test_loss: 0.1306338429450989
epoch: 47 training_loss 0.14970209527760744 test_loss: 0.13705120086669922
epoch: 48 training_loss 0.14125052347779274 test_loss: 0.13444491624832153
epoch: 49 training_loss 0.14562768876552581 test_loss: 0.14012953042984008
epoch: 50 training_loss 0.1430969213321805 test_loss: 0.13489545583724977
epoch: 51 training_loss 0.14084251888096333 test_loss: 0.14612854719161988
epoch: 52 training_loss 0.13290765706449748 test_loss: 0.14611004590988158
epoch: 53 training_loss 0.14572813984006644 test_loss: 0.14481494426727295
epoch: 54 training_loss 0.13755422674119472 test_loss: 0.15632410049438478
epoch: 55 training_loss 0.14140258334577083 test_loss: 0.12885061502456666
epoch: 56 training_loss 0.14084722831845284 test_loss: 0.13834786415100098
epoch: 57 training_loss 0.1357808332517743 test_loss: 0.1412588119506836
epoch: 58 training_loss 0.1389416751265526 test_loss: 0.1567285418510437
epoch: 59 training_loss 0.14068418256938459 test_loss: 0.12870911359786988
epoch: 60 training_loss 0.13899899821728467 test_loss: 0.1338721752166748
epoch: 61 training_loss 0.13727444723248483 test_loss: 0.1388137936592102
epoch: 62 training_loss 0.14474782913923265 test_loss: 0.14071907997131347
epoch: 63 training_loss 0.13081075109541415 test_loss: 0.13049612045288086
epoch: 64 training_loss 0.14378030758351087 test_loss: 0.14116785526275635
epoch: 65 training_loss 0.14142502553761005 test_loss: 0.14008781909942628
epoch: 66 training_loss 0.13631305692717433 test_loss: 0.15064165592193604
epoch: 67 training_loss 0.1361008663289249 test_loss: 0.1458898067474365
epoch: 68 training_loss 0.136422298848629 test_loss: 0.14366155862808228
epoch: 69 training_loss 0.1413275783509016 test_loss: 0.13812108039855958
epoch: 70 training_loss 0.13918665487319232 test_loss: 0.1302241563796997
epoch: 71 training_loss 0.1369066758826375 test_loss: 0.16886290311813354
epoch: 72 training_loss 0.13831975892186166 test_loss: 0.12770457267761232
epoch: 73 training_loss 0.13237920425832272 test_loss: 0.1340238332748413
epoch: 74 training_loss 0.1355318469554186 test_loss: 0.17234355211257935
epoch: 75 training_loss 0.12921750262379647 test_loss: 0.14466313123703003
epoch: 76 training_loss 0.13632481649518013 test_loss: 0.14189871549606323
epoch: 77 training_loss 0.1334179200604558 test_loss: 0.15080268383026124
epoch: 78 training_loss 0.1375145126506686 test_loss: 0.13061741590499878
epoch: 79 training_loss 0.14137173876166342 test_loss: 0.14197381734848022
epoch: 80 training_loss 0.1346298399567604 test_loss: 0.12745317220687866
epoch: 81 training_loss 0.1394529790058732 test_loss: 0.13340766429901124
epoch: 82 training_loss 0.13035511668771504 test_loss: 0.12554880380630493
epoch: 83 training_loss 0.12961870796978472 test_loss: 0.13969051837921143
epoch: 84 training_loss 0.1345064976811409 test_loss: 0.14384016990661622
epoch: 85 training_loss 0.1438170812278986 test_loss: 0.14672911167144775
epoch: 86 training_loss 0.14301094256341457 test_loss: 0.14064512252807618
epoch: 87 training_loss 0.14486923910677432 test_loss: 0.14877468347549438
epoch: 88 training_loss 0.13601464111357928 test_loss: 0.15176749229431152
epoch: 89 training_loss 0.1372763040289283 test_loss: 0.12484158277511596
epoch: 90 training_loss 0.13680329497903584 test_loss: 0.12489380836486816
epoch: 91 training_loss 0.13772552784532308 test_loss: 0.1421557068824768
epoch: 92 training_loss 0.13197169337421655 test_loss: 0.12410023212432861
epoch: 93 training_loss 0.13569259967654942 test_loss: 0.14531725645065308
epoch: 94 training_loss 0.14189294759184123 test_loss: 0.13524720668792725
epoch: 95 training_loss 0.13818400185555219 test_loss: 0.1435924291610718
epoch: 96 training_loss 0.14445879679173232 test_loss: 0.13379523754119874
epoch: 97 training_loss 0.1459183106198907 test_loss: 0.14282693862915039
epoch: 98 training_loss 0.1396574767678976 test_loss: 0.14877722263336182
epoch: 99 training_loss 0.14034028893336653 test_loss: 0.15561915636062623
epoch: 100 training_loss 0.1336749843135476 test_loss: 0.13472119569778443
epoch: 101 training_loss 0.1311331181973219 test_loss: 0.14375128746032714
epoch: 102 training_loss 0.13955178391188383 test_loss: 0.12362756729125976
epoch: 103 training_loss 0.13739401046186686 test_loss: 0.12619212865829468
epoch: 104 training_loss 0.14109140403568746 test_loss: 0.14125367403030395
epoch: 105 training_loss 0.14112156588584185 test_loss: 0.13055323362350463
epoch: 106 training_loss 0.13681190207600594 test_loss: 0.1435136079788208
epoch: 107 training_loss 0.13666955215856433 test_loss: 0.12162113189697266
epoch: 108 training_loss 0.14198039893060924 test_loss: 0.13093215227127075
epoch: 109 training_loss 0.1392960512638092 test_loss: 0.136225426197052
epoch: 110 training_loss 0.14070790000259875 test_loss: 0.1377133846282959
epoch: 111 training_loss 0.13200223471969366 test_loss: 0.13450299501419066
epoch: 112 training_loss 0.1349615104869008 test_loss: 0.13631725311279297
epoch: 113 training_loss 0.1401563400030136 test_loss: 0.13483527898788453
epoch: 114 training_loss 0.13343071863055228 test_loss: 0.13893474340438844
epoch: 115 training_loss 0.1408834944292903 test_loss: 0.13301230669021608
epoch: 116 training_loss 0.14114779476076364 test_loss: 0.14469515085220336
epoch: 117 training_loss 0.13691053960472346 test_loss: 0.12083112001419068
epoch: 118 training_loss 0.12998907005414367 test_loss: 0.12565838098526
epoch: 119 training_loss 0.13473304167389868 test_loss: 0.1552019238471985
epoch: 120 training_loss 0.1305547869578004 test_loss: 0.14731024503707885
epoch: 121 training_loss 0.13528339546173812 test_loss: 0.12801358699798585
epoch: 122 training_loss 0.13465985391288995 test_loss: 0.1253330945968628
epoch: 123 training_loss 0.12883044712245464 test_loss: 0.13265767097473144
epoch: 124 training_loss 0.13514003209769726 test_loss: 0.12935603857040406
epoch: 125 training_loss 0.1406333715096116 test_loss: 0.15358985662460328
epoch: 126 training_loss 0.13518675684928894 test_loss: 0.13874126672744752
epoch: 127 training_loss 0.13851529201492668 test_loss: 0.13925851583480836
epoch: 128 training_loss 0.13315273229032754 test_loss: 0.1359947681427002
epoch: 129 training_loss 0.14006627522408963 test_loss: 0.14012036323547364
epoch: 130 training_loss 0.1366157541051507 test_loss: 0.13378788232803346
epoch: 131 training_loss 0.1347519299760461 test_loss: 0.12365376949310303
epoch: 132 training_loss 0.1340170046687126 test_loss: 0.12303711175918579
epoch: 133 training_loss 0.12396043550223113 test_loss: 0.14979345798492433
epoch: 134 training_loss 0.145863451436162 test_loss: 0.1392253041267395
epoch: 135 training_loss 0.13619957469403743 test_loss: 0.12926797866821288
epoch: 136 training_loss 0.13611049376428128 test_loss: 0.140444016456604
epoch: 137 training_loss 0.14550597555935382 test_loss: 0.1277562975883484
epoch: 138 training_loss 0.13652422569692135 test_loss: 0.13646814823150635
epoch: 139 training_loss 0.1368863619491458 test_loss: 0.14086084365844725
epoch: 140 training_loss 0.14245274245738984 test_loss: 0.15670815706253052
epoch: 141 training_loss 0.13012327931821346 test_loss: 0.13796712160110475
epoch: 142 training_loss 0.13202655844390393 test_loss: 0.12874720096588135
epoch: 143 training_loss 0.13116532113403082 test_loss: 0.13267902135849
epoch: 144 training_loss 0.13208409413695335 test_loss: 0.14413282871246338
epoch: 145 training_loss 0.12867890529334544 test_loss: 0.14065054655075074
epoch: 146 training_loss 0.1386167447641492 test_loss: 0.13260648250579835
epoch: 147 training_loss 0.14200079904869198 test_loss: 0.1325261950492859
epoch: 148 training_loss 0.1417958716675639 test_loss: 0.13668354749679565
epoch: 149 training_loss 0.14352681249380111 test_loss: 0.15533753633499145
epoch: 0 training_loss 0.2964905812591314 test_loss: 0.197284734249115
epoch: 1 training_loss 0.19491983376443386 test_loss: 0.17705461978912354
epoch: 2 training_loss 0.17077781297266484 test_loss: 0.20574278831481935
epoch: 3 training_loss 0.17227104399353266 test_loss: 0.15432367324829102
epoch: 4 training_loss 0.15859825309365988 test_loss: 0.16758208274841307
epoch: 5 training_loss 0.16786424487829207 test_loss: 0.15890536308288575
epoch: 6 training_loss 0.15917531114071606 test_loss: 0.15081558227539063
epoch: 7 training_loss 0.14574509333819152 test_loss: 0.14319180250167846
epoch: 8 training_loss 0.1512924948707223 test_loss: 0.1818719506263733
epoch: 9 training_loss 0.15255458872765304 test_loss: 0.14239732027053834
epoch: 10 training_loss 0.15312913812696935 test_loss: 0.13262150287628174
epoch: 11 training_loss 0.14710641838610172 test_loss: 0.16118621826171875
epoch: 12 training_loss 0.15351701762527228 test_loss: 0.16869329214096068
epoch: 13 training_loss 0.1485674322396517 test_loss: 0.13568787574768065
epoch: 14 training_loss 0.13562469981610775 test_loss: 0.12739646434783936
epoch: 15 training_loss 0.15503988023847343 test_loss: 0.18772132396698
epoch: 16 training_loss 0.14706865061074495 test_loss: 0.14383682012557983
epoch: 17 training_loss 0.14968634996563196 test_loss: 0.1320318818092346
epoch: 18 training_loss 0.14479731231927873 test_loss: 0.15054748058319092
epoch: 19 training_loss 0.14030103258788584 test_loss: 0.14620304107666016
epoch: 20 training_loss 0.13772814001888037 test_loss: 0.14297182559967042
epoch: 21 training_loss 0.1342128586024046 test_loss: 0.16811368465423585
epoch: 22 training_loss 0.14321559209376575 test_loss: 0.15106967687606812
epoch: 23 training_loss 0.14230879992246628 test_loss: 0.17141907215118407
epoch: 24 training_loss 0.1436151208728552 test_loss: 0.14384685754776
epoch: 25 training_loss 0.1321089296042919 test_loss: 0.1434284567832947
epoch: 26 training_loss 0.13448009133338928 test_loss: 0.1523581862449646
epoch: 27 training_loss 0.13998734794557094 test_loss: 0.12194446325302125
epoch: 28 training_loss 0.1371919321268797 test_loss: 0.13630692958831786
epoch: 29 training_loss 0.14181495189666748 test_loss: 0.14241743087768555
epoch: 30 training_loss 0.14254549384117127 test_loss: 0.158991801738739
epoch: 31 training_loss 0.13327813357114793 test_loss: 0.12495880126953125
epoch: 32 training_loss 0.1356703931093216 test_loss: 0.13647695779800414
epoch: 33 training_loss 0.13877237506210804 test_loss: 0.17304364442825318
epoch: 34 training_loss 0.1304512020945549 test_loss: 0.1439746141433716
epoch: 35 training_loss 0.12907662250101568 test_loss: 0.13788532018661498
epoch: 36 training_loss 0.12811676470562816 test_loss: 0.14863030910491942
epoch: 37 training_loss 0.13883926406502722 test_loss: 0.1611710786819458
epoch: 38 training_loss 0.13833096724003555 test_loss: 0.1302141547203064
epoch: 39 training_loss 0.13497139602899552 test_loss: 0.146886146068573
epoch: 40 training_loss 0.14073624402284624 test_loss: 0.1287410020828247
epoch: 41 training_loss 0.13573077708482742 test_loss: 0.1394038200378418
epoch: 42 training_loss 0.1481196379289031 test_loss: 0.14863603115081786
epoch: 43 training_loss 0.14081699777394532 test_loss: 0.12733466625213624
epoch: 44 training_loss 0.14124608017504214 test_loss: 0.14607055187225343
epoch: 45 training_loss 0.13835960313677786 test_loss: 0.14201197624206544
epoch: 46 training_loss 0.13335214108228682 test_loss: 0.13607178926467894
epoch: 47 training_loss 0.13066427301615477 test_loss: 0.12223347425460815
epoch: 48 training_loss 0.14530499443411826 test_loss: 0.16071290969848634
epoch: 49 training_loss 0.13962349098175764 test_loss: 0.12690058946609498
epoch: 50 training_loss 0.1335214449092746 test_loss: 0.14417295455932616
epoch: 51 training_loss 0.1413301508128643 test_loss: 0.13568562269210815
epoch: 52 training_loss 0.13473996832966806 test_loss: 0.13493170738220214
epoch: 53 training_loss 0.13753388058394195 test_loss: 0.15833849906921388
epoch: 54 training_loss 0.13565955977886915 test_loss: 0.14800150394439698
epoch: 55 training_loss 0.1319714728370309 test_loss: 0.13018008470535278
epoch: 56 training_loss 0.12982919240370394 test_loss: 0.1450291872024536
epoch: 57 training_loss 0.13185124967247247 test_loss: 0.15384007692337037
epoch: 58 training_loss 0.133355656452477 test_loss: 0.1360045313835144
epoch: 59 training_loss 0.1343392602354288 test_loss: 0.1339537262916565
epoch: 60 training_loss 0.13453042443841695 test_loss: 0.14733792543411256
epoch: 61 training_loss 0.13899708077311515 test_loss: 0.13541746139526367
epoch: 62 training_loss 0.13212014477699996 test_loss: 0.138357412815094
epoch: 63 training_loss 0.12796735554933547 test_loss: 0.1380022168159485
epoch: 64 training_loss 0.12980573896318673 test_loss: 0.1424115538597107
epoch: 65 training_loss 0.1397648458555341 test_loss: 0.1400144338607788
epoch: 66 training_loss 0.1410162440687418 test_loss: 0.12736818790435792
epoch: 67 training_loss 0.12703602258116006 test_loss: 0.14030715227127075
epoch: 68 training_loss 0.13327803146094083 test_loss: 0.13062925338745118
epoch: 69 training_loss 0.13768718000501395 test_loss: 0.13533107042312623
epoch: 70 training_loss 0.13173859003931285 test_loss: 0.13139299154281617
epoch: 71 training_loss 0.13578850165009498 test_loss: 0.14991514682769774
epoch: 72 training_loss 0.1303559109196067 test_loss: 0.13300597667694092
epoch: 73 training_loss 0.13145374972373247 test_loss: 0.13901666402816773
epoch: 74 training_loss 0.12775469712913037 test_loss: 0.15357575416564942
epoch: 75 training_loss 0.12743076533079148 test_loss: 0.14612647294998168
epoch: 76 training_loss 0.13531292416155338 test_loss: 0.15724085569381713
epoch: 77 training_loss 0.13465634901076556 test_loss: 0.1513541340827942
epoch: 78 training_loss 0.13386718317866325 test_loss: 0.13470165729522704
epoch: 79 training_loss 0.13084162894636392 test_loss: 0.13893525600433348
epoch: 80 training_loss 0.1344847697764635 test_loss: 0.13754068613052367
epoch: 81 training_loss 0.12577481351792813 test_loss: 0.13429636955261232
epoch: 82 training_loss 0.1380426613613963 test_loss: 0.13336266279220582
epoch: 83 training_loss 0.140851922146976 test_loss: 0.13720967769622802
epoch: 84 training_loss 0.13292828425765038 test_loss: 0.13904736042022706
epoch: 85 training_loss 0.13879985053092242 test_loss: 0.1166001319885254
epoch: 86 training_loss 0.12651431355625392 test_loss: 0.13925381898880004
epoch: 87 training_loss 0.1276623837277293 test_loss: 0.14251620769500734
epoch: 88 training_loss 0.13428566366434097 test_loss: 0.13561859130859374
epoch: 89 training_loss 0.1305751605704427 test_loss: 0.12405084371566773
epoch: 90 training_loss 0.13394522961229086 test_loss: 0.1370787262916565
epoch: 91 training_loss 0.1333443147689104 test_loss: 0.13639165163040162
epoch: 92 training_loss 0.1332216701284051 test_loss: 0.12495434284210205
epoch: 93 training_loss 0.13571689415723084 test_loss: 0.1265202283859253
epoch: 94 training_loss 0.1283618015050888 test_loss: 0.13173147439956664
epoch: 95 training_loss 0.124368612524122 test_loss: 0.12613322734832763
epoch: 96 training_loss 0.12972905330359935 test_loss: 0.13222652673721313
epoch: 97 training_loss 0.13209966119378805 test_loss: 0.12949542999267577
epoch: 98 training_loss 0.13157072339206935 test_loss: 0.12445428371429443
epoch: 99 training_loss 0.1330284347385168 test_loss: 0.11885093450546265
epoch: 100 training_loss 0.13545996136963367 test_loss: 0.14005794525146484
epoch: 101 training_loss 0.12946877971291543 test_loss: 0.13825206756591796
epoch: 102 training_loss 0.13700350847095252 test_loss: 0.1349504590034485
epoch: 103 training_loss 0.12366592451930046 test_loss: 0.1395076870918274
epoch: 104 training_loss 0.12490071281790734 test_loss: 0.1456822633743286
epoch: 105 training_loss 0.14081283617764712 test_loss: 0.13480528593063354
epoch: 106 training_loss 0.13996040154248476 test_loss: 0.15696256160736083
epoch: 107 training_loss 0.13808911502361298 test_loss: 0.13091276884078978
epoch: 108 training_loss 0.1290925308316946 test_loss: 0.1490481376647949
epoch: 109 training_loss 0.13036882385611534 test_loss: 0.12756918668746947
epoch: 110 training_loss 0.12959133006632328 test_loss: 0.13414729833602906
epoch: 111 training_loss 0.12514693645760416 test_loss: 0.12322275638580323
epoch: 112 training_loss 0.1303578445315361 test_loss: 0.1313310980796814
epoch: 113 training_loss 0.12483607601374387 test_loss: 0.14759528636932373
epoch: 114 training_loss 0.1313263090327382 test_loss: 0.1566585421562195
epoch: 115 training_loss 0.13588809758424758 test_loss: 0.13756858110427855
epoch: 116 training_loss 0.13566684290766717 test_loss: 0.1349899649620056
epoch: 117 training_loss 0.13064618863165378 test_loss: 0.12182997465133667
epoch: 118 training_loss 0.13376183807849884 test_loss: 0.13927606344223023
epoch: 119 training_loss 0.12941387677565216 test_loss: 0.12541857957839966
epoch: 120 training_loss 0.13317101940512657 test_loss: 0.12972707748413087
epoch: 121 training_loss 0.13198715019971133 test_loss: 0.14173789024353028
epoch: 122 training_loss 0.13501866828650236 test_loss: 0.1448913812637329
epoch: 123 training_loss 0.14345524102449417 test_loss: 0.1553597331047058
epoch: 124 training_loss 0.13532390151172877 test_loss: 0.1508074164390564
epoch: 125 training_loss 0.13634763777256012 test_loss: 0.13858674764633178
epoch: 126 training_loss 0.12439588762819767 test_loss: 0.1373368740081787
epoch: 127 training_loss 0.1287556057050824 test_loss: 0.12285115718841552
epoch: 128 training_loss 0.12125112319365144 test_loss: 0.11867543458938598
epoch: 129 training_loss 0.12312819968909025 test_loss: 0.15068238973617554
epoch: 130 training_loss 0.13370011512190103 test_loss: 0.13261009454727174
epoch: 131 training_loss 0.13311685364693404 test_loss: 0.1452586531639099
epoch: 132 training_loss 0.12591324742883445 test_loss: 0.14704179763793945
epoch: 133 training_loss 0.13390406299382449 test_loss: 0.1491796612739563
epoch: 134 training_loss 0.1287112070992589 test_loss: 0.14420255422592163
epoch: 135 training_loss 0.13476572014391422 test_loss: 0.14799495935440063
epoch: 136 training_loss 0.13151886966079473 test_loss: 0.13130562305450438
epoch: 137 training_loss 0.135140730291605 test_loss: 0.1384541392326355
epoch: 138 training_loss 0.12662391383200883 test_loss: 0.13202663660049438
epoch: 139 training_loss 0.13685030918568372 test_loss: 0.1243133306503296
epoch: 140 training_loss 0.13813025701791048 test_loss: 0.1273129940032959
epoch: 141 training_loss 0.12854758884757758 test_loss: 0.11947166919708252
epoch: 142 training_loss 0.1280135614424944 test_loss: 0.13382776975631713
epoch: 143 training_loss 0.13194987542927264 test_loss: 0.1307082176208496
epoch: 144 training_loss 0.13691264662891625 test_loss: 0.12951964139938354
epoch: 145 training_loss 0.13509736318141222 test_loss: 0.12910263538360595
epoch: 146 training_loss 0.12655915696173906 test_loss: 0.13151414394378663
epoch: 147 training_loss 0.12750658094882966 test_loss: 0.16993860006332398
epoch: 148 training_loss 0.13157552663236857 test_loss: 0.1470864534378052
epoch: 149 training_loss 0.12740778401494027 test_loss: 0.13511147499084472
epoch: 0 training_loss 0.2957462580502033 test_loss: 0.20305337905883789
epoch: 1 training_loss 0.19413630954921246 test_loss: 0.15672794580459595
epoch: 2 training_loss 0.1713205696642399 test_loss: 0.16707999706268312
epoch: 3 training_loss 0.1603035642951727 test_loss: 0.17287384271621703
epoch: 4 training_loss 0.1538010996952653 test_loss: 0.18201084136962892
epoch: 5 training_loss 0.1526214050874114 test_loss: 0.14776980876922607
epoch: 6 training_loss 0.16143442437052727 test_loss: 0.13456966876983642
epoch: 7 training_loss 0.15660432159900664 test_loss: 0.1401928424835205
epoch: 8 training_loss 0.14585220269858837 test_loss: 0.1460110902786255
epoch: 9 training_loss 0.1482367876917124 test_loss: 0.15698525905609131
epoch: 10 training_loss 0.1523098911345005 test_loss: 0.17443095445632933
epoch: 11 training_loss 0.13740914057940246 test_loss: 0.14925141334533693
epoch: 12 training_loss 0.1388276381790638 test_loss: 0.13440848588943483
epoch: 13 training_loss 0.14225339245051147 test_loss: 0.13498016595840454
epoch: 14 training_loss 0.14421728745102882 test_loss: 0.16054762601852418
epoch: 15 training_loss 0.1410183961316943 test_loss: 0.1356768488883972
epoch: 16 training_loss 0.12992416720837355 test_loss: 0.14361642599105834
epoch: 17 training_loss 0.14362251184880734 test_loss: 0.13877991437911988
epoch: 18 training_loss 0.13982827849686147 test_loss: 0.14075424671173095
epoch: 19 training_loss 0.13826714154332875 test_loss: 0.13015352487564086
epoch: 20 training_loss 0.1384449728950858 test_loss: 0.14173907041549683
epoch: 21 training_loss 0.14182391054928303 test_loss: 0.13851075172424315
epoch: 22 training_loss 0.1345446391776204 test_loss: 0.1467789888381958
epoch: 23 training_loss 0.1342754114791751 test_loss: 0.15382394790649415
epoch: 24 training_loss 0.13205140128731727 test_loss: 0.13018293380737306
epoch: 25 training_loss 0.12879746679216622 test_loss: 0.13937067985534668
epoch: 26 training_loss 0.1342747398838401 test_loss: 0.13854867219924927
epoch: 27 training_loss 0.12707572754472493 test_loss: 0.14110808372497557
epoch: 28 training_loss 0.13427531752735378 test_loss: 0.14849636554718018
epoch: 29 training_loss 0.1390301376208663 test_loss: 0.1351104736328125
epoch: 30 training_loss 0.1339245317876339 test_loss: 0.1426527976989746
epoch: 31 training_loss 0.1350601320154965 test_loss: 0.14545639753341674
epoch: 32 training_loss 0.13152085103094577 test_loss: 0.15655372142791749
epoch: 33 training_loss 0.13375368792563677 test_loss: 0.1307060480117798
epoch: 34 training_loss 0.13930281285196544 test_loss: 0.12336080074310303
epoch: 35 training_loss 0.1353918693587184 test_loss: 0.13670066595077515
epoch: 36 training_loss 0.13493420027196407 test_loss: 0.13343294858932495
epoch: 37 training_loss 0.13853821106255054 test_loss: 0.12835601568222046
epoch: 38 training_loss 0.12489633075892925 test_loss: 0.13730539083480836
epoch: 39 training_loss 0.13783353097736836 test_loss: 0.1244320273399353
epoch: 40 training_loss 0.13351519640535117 test_loss: 0.14139283895492555
epoch: 41 training_loss 0.12892774498090148 test_loss: 0.17508410215377807
epoch: 42 training_loss 0.13900793254375457 test_loss: 0.1499568462371826
epoch: 43 training_loss 0.13235357720404864 test_loss: 0.13713310956954955
epoch: 44 training_loss 0.1300181621685624 test_loss: 0.1394673228263855
epoch: 45 training_loss 0.138524162620306 test_loss: 0.13020120859146117
epoch: 46 training_loss 0.14077588725835086 test_loss: 0.12678178548812866
epoch: 47 training_loss 0.14086818166077136 test_loss: 0.13011354207992554
epoch: 48 training_loss 0.12749108973890544 test_loss: 0.13388652801513673
epoch: 49 training_loss 0.14033888548612594 test_loss: 0.1395086646080017
epoch: 50 training_loss 0.13174517955631018 test_loss: 0.12775923013687135
epoch: 51 training_loss 0.136218770891428 test_loss: 0.12902387380599975
epoch: 52 training_loss 0.12447621617466212 test_loss: 0.15387412309646606
epoch: 53 training_loss 0.12890465516597033 test_loss: 0.13247010707855225
epoch: 54 training_loss 0.13362321801483631 test_loss: 0.13679453134536743
epoch: 55 training_loss 0.14555984623730184 test_loss: 0.12055073976516724
epoch: 56 training_loss 0.12755013506859542 test_loss: 0.1470712423324585
epoch: 57 training_loss 0.1293514947220683 test_loss: 0.1254614233970642
epoch: 58 training_loss 0.13264397330582142 test_loss: 0.1413412094116211
epoch: 59 training_loss 0.13517254240810872 test_loss: 0.1241010308265686
epoch: 60 training_loss 0.13550022698938846 test_loss: 0.1607363224029541
epoch: 61 training_loss 0.13885030031204224 test_loss: 0.13889045715332032
epoch: 62 training_loss 0.14003725927323102 test_loss: 0.13897912502288817
epoch: 63 training_loss 0.12883002836257218 test_loss: 0.13040437698364257
epoch: 64 training_loss 0.13777973882853986 test_loss: 0.1253644585609436
epoch: 65 training_loss 0.13697325974702834 test_loss: 0.12043646574020386
epoch: 66 training_loss 0.13443710576742887 test_loss: 0.13317207098007203
epoch: 67 training_loss 0.12152662411332131 test_loss: 0.14719027280807495
epoch: 68 training_loss 0.1281900791823864 test_loss: 0.14772225618362428
epoch: 69 training_loss 0.1361009308695793 test_loss: 0.13752191066741942
epoch: 70 training_loss 0.1364274011179805 test_loss: 0.13103991746902466
epoch: 71 training_loss 0.12457256827503443 test_loss: 0.14298518896102905
epoch: 72 training_loss 0.13638038866221905 test_loss: 0.1440574049949646
epoch: 73 training_loss 0.13672215830534695 test_loss: 0.12411818504333497
epoch: 74 training_loss 0.1254438105970621 test_loss: 0.13185496330261232
epoch: 75 training_loss 0.13011277817189693 test_loss: 0.11933828592300415
epoch: 76 training_loss 0.1371126028895378 test_loss: 0.14797936677932738
epoch: 77 training_loss 0.12727128893136977 test_loss: 0.13177706003189088
epoch: 78 training_loss 0.1292636749893427 test_loss: 0.137454354763031
epoch: 79 training_loss 0.13072003193199636 test_loss: 0.1249935507774353
epoch: 80 training_loss 0.1342611426115036 test_loss: 0.14005709886550904
epoch: 81 training_loss 0.12601499155163765 test_loss: 0.13188005685806276
epoch: 82 training_loss 0.12479115717113018 test_loss: 0.15457592010498047
epoch: 83 training_loss 0.1366343454644084 test_loss: 0.1282735586166382
epoch: 84 training_loss 0.1277356531098485 test_loss: 0.1617574691772461
epoch: 85 training_loss 0.13608286783099174 test_loss: 0.12624692916870117
epoch: 86 training_loss 0.1375507189333439 test_loss: 0.1262884259223938
epoch: 87 training_loss 0.13560762003064156 test_loss: 0.12849172353744506
epoch: 88 training_loss 0.13536761805415154 test_loss: 0.1320112943649292
epoch: 89 training_loss 0.13300574511289598 test_loss: 0.12818375825881959
epoch: 90 training_loss 0.14134966623038053 test_loss: 0.1419116735458374
epoch: 91 training_loss 0.13173671763390302 test_loss: 0.11933605670928955
epoch: 92 training_loss 0.12539686385542154 test_loss: 0.14119688272476197
epoch: 93 training_loss 0.12238703701645136 test_loss: 0.13800482749938964
epoch: 94 training_loss 0.1299516287818551 test_loss: 0.15670965909957885
epoch: 95 training_loss 0.13319885496050118 test_loss: 0.12659924030303954
epoch: 96 training_loss 0.13119717240333556 test_loss: 0.13960219621658326
epoch: 97 training_loss 0.13497999474406241 test_loss: 0.12933515310287474
epoch: 98 training_loss 0.13308231502771378 test_loss: 0.1318846344947815
epoch: 99 training_loss 0.12791813515126704 test_loss: 0.13882248401641845
epoch: 100 training_loss 0.12881035011261702 test_loss: 0.14261738061904908
epoch: 101 training_loss 0.1351983180269599 test_loss: 0.12826166152954102
epoch: 102 training_loss 0.13957814954221248 test_loss: 0.14032634496688842
epoch: 103 training_loss 0.13083557091653347 test_loss: 0.11448155641555786
epoch: 104 training_loss 0.12548958230763674 test_loss: 0.1370030164718628
epoch: 105 training_loss 0.12119271229952573 test_loss: 0.13272373676300048
epoch: 106 training_loss 0.12899444121867418 test_loss: 0.13600656986236573
epoch: 107 training_loss 0.13518982257694007 test_loss: 0.14519113302230835
epoch: 108 training_loss 0.1287721010670066 test_loss: 0.13065838813781738
epoch: 109 training_loss 0.12220565423369407 test_loss: 0.12785487174987792
epoch: 110 training_loss 0.13399718552827836 test_loss: 0.1170355200767517
epoch: 111 training_loss 0.12600772827863693 test_loss: 0.12477710247039794
epoch: 112 training_loss 0.12953568929806353 test_loss: 0.1339268207550049
epoch: 113 training_loss 0.12620157279074193 test_loss: 0.1317353367805481
epoch: 114 training_loss 0.13711980633437634 test_loss: 0.1541187882423401
epoch: 115 training_loss 0.12649664510041475 test_loss: 0.14889326095581054
epoch: 116 training_loss 0.12775925785303116 test_loss: 0.1318559765815735
epoch: 117 training_loss 0.1298617599904537 test_loss: 0.11862847805023194
epoch: 118 training_loss 0.13445503741502762 test_loss: 0.13531765937805176
epoch: 119 training_loss 0.12432455856353045 test_loss: 0.1279170870780945
epoch: 120 training_loss 0.13844185758382083 test_loss: 0.13035566806793214
epoch: 121 training_loss 0.1291877345740795 test_loss: 0.139884877204895
epoch: 122 training_loss 0.12669682420790196 test_loss: 0.12957799434661865
epoch: 123 training_loss 0.13361126344650984 test_loss: 0.13450714349746704
epoch: 124 training_loss 0.1287710765004158 test_loss: 0.12921159267425536
epoch: 125 training_loss 0.12683612948283554 test_loss: 0.14343600273132323
epoch: 126 training_loss 0.1292344807088375 test_loss: 0.131001079082489
epoch: 127 training_loss 0.13005107309669256 test_loss: 0.1336335778236389
epoch: 128 training_loss 0.12458082720637322 test_loss: 0.12389519214630126
epoch: 129 training_loss 0.12342120192945004 test_loss: 0.1369258165359497
epoch: 130 training_loss 0.12513574179261922 test_loss: 0.14196912050247193
epoch: 131 training_loss 0.12870159853249788 test_loss: 0.13274253606796266
epoch: 132 training_loss 0.1346806789189577 test_loss: 0.13136144876480102
epoch: 133 training_loss 0.12326280899345875 test_loss: 0.13235150575637816
epoch: 134 training_loss 0.12800886385142804 test_loss: 0.13016496896743773
epoch: 135 training_loss 0.12208282429724931 test_loss: 0.1346595287322998
epoch: 136 training_loss 0.12609512839466333 test_loss: 0.13438618183135986
epoch: 137 training_loss 0.1250443271920085 test_loss: 0.13105626106262208
epoch: 138 training_loss 0.1247513648122549 test_loss: 0.12777246236801149
epoch: 139 training_loss 0.13633388258516788 test_loss: 0.1592297911643982
epoch: 140 training_loss 0.12687698543071746 test_loss: 0.15303882360458373
epoch: 141 training_loss 0.13264431662857532 test_loss: 0.12486898899078369
epoch: 142 training_loss 0.1322646280936897 test_loss: 0.12454167604446412
epoch: 143 training_loss 0.1337838852778077 test_loss: 0.12969390153884888
epoch: 144 training_loss 0.12780733164399863 test_loss: 0.1419151782989502
epoch: 145 training_loss 0.1322047007828951 test_loss: 0.13213340044021607
epoch: 146 training_loss 0.1274279796332121 test_loss: 0.12828850746154785
epoch: 147 training_loss 0.1312375793606043 test_loss: 0.1248656153678894
epoch: 148 training_loss 0.12758856900036336 test_loss: 0.12334359884262085
epoch: 149 training_loss 0.13259938426315784 test_loss: 0.1389026165008545
epoch: 0 training_loss 0.3030908516049385 test_loss: 0.19629453420639037
epoch: 1 training_loss 0.19185264654457568 test_loss: 0.25419204235076903
epoch: 2 training_loss 0.19004452399909497 test_loss: 0.15695298910140992
epoch: 3 training_loss 0.17061510276049374 test_loss: 0.1596935749053955
epoch: 4 training_loss 0.16286942161619664 test_loss: 0.17497092485427856
epoch: 5 training_loss 0.17313252829015255 test_loss: 0.14414042234420776
epoch: 6 training_loss 0.15620665445923806 test_loss: 0.14129023551940917
epoch: 7 training_loss 0.15752132825553417 test_loss: 0.1484521746635437
epoch: 8 training_loss 0.141849247738719 test_loss: 0.14339460134506227
epoch: 9 training_loss 0.15803985346108676 test_loss: 0.1636802077293396
epoch: 10 training_loss 0.15963664412498474 test_loss: 0.1582495927810669
epoch: 11 training_loss 0.14819964841008187 test_loss: 0.1406446099281311
epoch: 12 training_loss 0.14716857649385928 test_loss: 0.1459033489227295
epoch: 13 training_loss 0.15247299827635288 test_loss: 0.13983359336853027
epoch: 14 training_loss 0.14257140219211578 test_loss: 0.14742926359176636
epoch: 15 training_loss 0.1471676942333579 test_loss: 0.19325907230377198
epoch: 16 training_loss 0.14484057694673538 test_loss: 0.15887451171875
epoch: 17 training_loss 0.15148048005998135 test_loss: 0.14292272329330444
epoch: 18 training_loss 0.14808551229536535 test_loss: 0.15088748931884766
epoch: 19 training_loss 0.14829645112156867 test_loss: 0.13497461080551149
epoch: 20 training_loss 0.14134869746863843 test_loss: 0.13659484386444093
epoch: 21 training_loss 0.13988972846418618 test_loss: 0.13513731956481934
epoch: 22 training_loss 0.1472019011899829 test_loss: 0.1409761667251587
epoch: 23 training_loss 0.14734692256897688 test_loss: 0.13593536615371704
epoch: 24 training_loss 0.13806239489465952 test_loss: 0.1534430742263794
epoch: 25 training_loss 0.13782401978969575 test_loss: 0.14891698360443115
epoch: 26 training_loss 0.13385776720941067 test_loss: 0.13504661321640016
epoch: 27 training_loss 0.1406860103085637 test_loss: 0.18983813524246215
epoch: 28 training_loss 0.137615039460361 test_loss: 0.1391054153442383
epoch: 29 training_loss 0.14723688397556545 test_loss: 0.1477300524711609
epoch: 30 training_loss 0.14315097112208605 test_loss: 0.15624183416366577
epoch: 31 training_loss 0.13694196835160255 test_loss: 0.14193097352981568
epoch: 32 training_loss 0.14708758413791656 test_loss: 0.15134706497192382
epoch: 33 training_loss 0.1425601302087307 test_loss: 0.131830096244812
epoch: 34 training_loss 0.13882816921919583 test_loss: 0.13991252183914185
epoch: 35 training_loss 0.13281884774565697 test_loss: 0.15078059434890748
epoch: 36 training_loss 0.14043169658631086 test_loss: 0.13027584552764893
epoch: 37 training_loss 0.13199218787252903 test_loss: 0.14744459390640258
epoch: 38 training_loss 0.13977254401892425 test_loss: 0.1407652974128723
epoch: 39 training_loss 0.12464617781341075 test_loss: 0.1404127836227417
epoch: 40 training_loss 0.14629963971674442 test_loss: 0.14830729961395264
epoch: 41 training_loss 0.1350386118143797 test_loss: 0.13396749496459961
epoch: 42 training_loss 0.13672371368855238 test_loss: 0.14182122945785522
epoch: 43 training_loss 0.14501068949699403 test_loss: 0.13717175722122193
epoch: 44 training_loss 0.1410421435162425 test_loss: 0.14823198318481445
epoch: 45 training_loss 0.13510596815496684 test_loss: 0.13514289855957032
epoch: 46 training_loss 0.12720300134271384 test_loss: 0.13876333236694335
epoch: 47 training_loss 0.14174514662474394 test_loss: 0.13320391178131102
epoch: 48 training_loss 0.14068366792052983 test_loss: 0.14566855430603026
epoch: 49 training_loss 0.1390241589397192 test_loss: 0.14467697143554686
epoch: 50 training_loss 0.13816289190202952 test_loss: 0.12339943647384644
epoch: 51 training_loss 0.14681799460202455 test_loss: 0.15120248794555663
epoch: 52 training_loss 0.13720165625214575 test_loss: 0.13014332056045533
epoch: 53 training_loss 0.12775151439011098 test_loss: 0.1386471152305603
epoch: 54 training_loss 0.13301877696067094 test_loss: 0.13351929187774658
epoch: 55 training_loss 0.1378721083328128 test_loss: 0.12790265083312988
epoch: 56 training_loss 0.14485950328409672 test_loss: 0.15462021827697753
epoch: 57 training_loss 0.1422497182711959 test_loss: 0.14057224988937378
epoch: 58 training_loss 0.13767746191471816 test_loss: 0.1505471110343933
epoch: 59 training_loss 0.1343534866720438 test_loss: 0.13455151319503783
epoch: 60 training_loss 0.13083960685878993 test_loss: 0.13494521379470825
epoch: 61 training_loss 0.13173717666417362 test_loss: 0.13228768110275269
epoch: 62 training_loss 0.1320067847520113 test_loss: 0.1209981083869934
epoch: 63 training_loss 0.13889149872586132 test_loss: 0.13264611959457398
epoch: 64 training_loss 0.1310360813885927 test_loss: 0.16055771112442016
epoch: 65 training_loss 0.14075586270540952 test_loss: 0.11786128282546997
epoch: 66 training_loss 0.12561086688190698 test_loss: 0.1532209873199463
epoch: 67 training_loss 0.14602992843836546 test_loss: 0.15882411003112792
epoch: 68 training_loss 0.13451931573450565 test_loss: 0.12839232683181762
epoch: 69 training_loss 0.1364147688448429 test_loss: 0.13580976724624633
epoch: 70 training_loss 0.13409318201243878 test_loss: 0.1570450782775879
epoch: 71 training_loss 0.1372664113715291 test_loss: 0.11739959716796874
epoch: 72 training_loss 0.13245443493127823 test_loss: 0.1287916660308838
epoch: 73 training_loss 0.135956381931901 test_loss: 0.14390066862106324
epoch: 74 training_loss 0.14135438755154608 test_loss: 0.13550708293914795
epoch: 75 training_loss 0.14416324241086842 test_loss: 0.14149727821350097
epoch: 76 training_loss 0.1460478224232793 test_loss: 0.15998027324676514
epoch: 77 training_loss 0.13559287771582604 test_loss: 0.14826725721359252
epoch: 78 training_loss 0.1378878827020526 test_loss: 0.13862823247909545
epoch: 79 training_loss 0.14544841896742583 test_loss: 0.13925713300704956
epoch: 80 training_loss 0.1404848838225007 test_loss: 0.13735630512237548
epoch: 81 training_loss 0.1253560584038496 test_loss: 0.1380029797554016
epoch: 82 training_loss 0.12265718346461654 test_loss: 0.1459653377532959
epoch: 83 training_loss 0.13502134889364242 test_loss: 0.1464053750038147
epoch: 84 training_loss 0.1452571253851056 test_loss: 0.14449543952941896
epoch: 85 training_loss 0.13178486160933972 test_loss: 0.1351165771484375
epoch: 86 training_loss 0.1274358106777072 test_loss: 0.13976719379425048
epoch: 87 training_loss 0.13860483705997467 test_loss: 0.12435750961303711
epoch: 88 training_loss 0.14398719847202301 test_loss: 0.13174318075180053
epoch: 89 training_loss 0.13403587756678462 test_loss: 0.15257400274276733
epoch: 90 training_loss 0.14222915314137935 test_loss: 0.15068472623825074
epoch: 91 training_loss 0.13813740115612747 test_loss: 0.12712931632995605
epoch: 92 training_loss 0.1339834763482213 test_loss: 0.1278691291809082
epoch: 93 training_loss 0.12558379735797642 test_loss: 0.13010774850845336
epoch: 94 training_loss 0.13824747547507285 test_loss: 0.1457741975784302
epoch: 95 training_loss 0.13001995045691728 test_loss: 0.14394207000732423
epoch: 96 training_loss 0.1418038433045149 test_loss: 0.12511032819747925
epoch: 97 training_loss 0.12442900240421295 test_loss: 0.15074527263641357
epoch: 98 training_loss 0.13715782910585403 test_loss: 0.12534096240997314
epoch: 99 training_loss 0.13635945696383714 test_loss: 0.125278103351593
epoch: 100 training_loss 0.13506813380867244 test_loss: 0.1268584609031677
epoch: 101 training_loss 0.13088741268962623 test_loss: 0.14170560836791993
epoch: 102 training_loss 0.12967807495966555 test_loss: 0.13169671297073365
epoch: 103 training_loss 0.1366921977326274 test_loss: 0.15120960474014283
epoch: 104 training_loss 0.13528476893901825 test_loss: 0.15221997499465942
epoch: 105 training_loss 0.1270803613960743 test_loss: 0.1462499976158142
epoch: 106 training_loss 0.133590278737247 test_loss: 0.13426908254623413
epoch: 107 training_loss 0.1325387560762465 test_loss: 0.1395219087600708
epoch: 108 training_loss 0.13746995374560356 test_loss: 0.13063467741012574
epoch: 109 training_loss 0.13097413890063764 test_loss: 0.13525086641311646
epoch: 110 training_loss 0.13529988035559654 test_loss: 0.13423746824264526
epoch: 111 training_loss 0.13331875912845134 test_loss: 0.1368975520133972
epoch: 112 training_loss 0.14423580028116703 test_loss: 0.14397491216659547
epoch: 113 training_loss 0.1392345380038023 test_loss: 0.12286322116851807
epoch: 114 training_loss 0.12679293397814034 test_loss: 0.13731231689453124
epoch: 115 training_loss 0.13263128235936164 test_loss: 0.13037108182907103
epoch: 116 training_loss 0.12922186493873597 test_loss: 0.1385595679283142
epoch: 117 training_loss 0.1348845542781055 test_loss: 0.13560270071029662
epoch: 118 training_loss 0.14216106448322535 test_loss: 0.14230992794036865
epoch: 119 training_loss 0.12044366437941789 test_loss: 0.1414387822151184
epoch: 120 training_loss 0.13418550990521907 test_loss: 0.14530529975891113
epoch: 121 training_loss 0.12586117327213286 test_loss: 0.13226648569107055
epoch: 122 training_loss 0.13707129610702395 test_loss: 0.12567756175994874
epoch: 123 training_loss 0.1316682704165578 test_loss: 0.144792377948761
epoch: 124 training_loss 0.1340004813671112 test_loss: 0.1469753861427307
epoch: 125 training_loss 0.14385875225067138 test_loss: 0.14385571479797363
epoch: 126 training_loss 0.13291649147868156 test_loss: 0.12951669692993165
epoch: 127 training_loss 0.13570212740451099 test_loss: 0.13800889253616333
epoch: 128 training_loss 0.13104089479893447 test_loss: 0.12668635845184326
epoch: 129 training_loss 0.1356612901389599 test_loss: 0.13953348398208618
epoch: 130 training_loss 0.13263601172715425 test_loss: 0.142997670173645
epoch: 131 training_loss 0.13136986561119557 test_loss: 0.15502846240997314
epoch: 132 training_loss 0.13635910406708718 test_loss: 0.13005495071411133
epoch: 133 training_loss 0.1315740632265806 test_loss: 0.1380640149116516
epoch: 134 training_loss 0.13321211852133275 test_loss: 0.13826791048049927
epoch: 135 training_loss 0.13309242717921735 test_loss: 0.12866805791854857
epoch: 136 training_loss 0.13504524029791354 test_loss: 0.15405478477478027
epoch: 137 training_loss 0.12578529570251704 test_loss: 0.1311956524848938
epoch: 138 training_loss 0.13922692611813545 test_loss: 0.14466772079467774
epoch: 139 training_loss 0.13108979742974042 test_loss: 0.12832987308502197
epoch: 140 training_loss 0.1390044229850173 test_loss: 0.1436283230781555
epoch: 141 training_loss 0.13461637061089277 test_loss: 0.13664717674255372
epoch: 142 training_loss 0.13646221920847892 test_loss: 0.14731571674346924
epoch: 143 training_loss 0.1302368028089404 test_loss: 0.12131441831588745
epoch: 144 training_loss 0.12915247723460196 test_loss: 0.1518772602081299
epoch: 145 training_loss 0.13480902452021837 test_loss: 0.1367423415184021
epoch: 146 training_loss 0.13313021257519722 test_loss: 0.13493434190750123
epoch: 147 training_loss 0.13780994180589914 test_loss: 0.1359870672225952
epoch: 148 training_loss 0.12967398542910813 test_loss: 0.13673951625823974
epoch: 149 training_loss 0.1335276760533452 test_loss: 0.12279123067855835
episode: 0 training return: -1046.8491763146942
episode: 1 training return: -1059.7533276380866
episode: 2 training return: -985.7255605335463
episode: 3 training return: -981.3871014861193
epoch: 1 test_true_pfm: 231.8109185311586 sim_pfm: -967.7154706030318
episode: 4 training return: -975.5851005827478
episode: 5 training return: -986.6005982363191
episode: 6 training return: -1016.2029581576201
episode: 7 training return: -1028.0831522542953
epoch: 2 test_true_pfm: 248.7594431849741 sim_pfm: -961.7836178621001
episode: 8 training return: -983.8430838869647
episode: 9 training return: -984.3625918223096
episode: 10 training return: -943.7614123254867
episode: 11 training return: -979.3278718302337
epoch: 3 test_true_pfm: 202.73899889944377 sim_pfm: -937.3327927322216
episode: 12 training return: -940.395314686344
episode: 13 training return: -938.381916183881
episode: 14 training return: -948.3975889572072
episode: 15 training return: -948.2222161890758
epoch: 4 test_true_pfm: 178.393228996076 sim_pfm: -944.8136671535199
episode: 16 training return: -955.1059120545956
episode: 17 training return: -927.493454651789
episode: 18 training return: -926.4354723020984
episode: 19 training return: -897.6746900791644
epoch: 5 test_true_pfm: 247.877692184295 sim_pfm: -907.8110524558806
episode: 20 training return: -897.9203107258923
episode: 21 training return: -885.2535874218328
episode: 22 training return: -907.8104366463002
episode: 23 training return: -909.1566296426192
epoch: 6 test_true_pfm: 275.09951226406764 sim_pfm: -901.0187374257802
episode: 24 training return: -907.4916216954246
episode: 25 training return: -913.6072493118808
episode: 26 training return: -884.7346282149032
episode: 27 training return: -894.276971355463
epoch: 7 test_true_pfm: 285.1067757043774 sim_pfm: -888.0796304168084
episode: 28 training return: -898.7787112728714
episode: 29 training return: -885.6494650922284
episode: 30 training return: -897.1496580515791
episode: 31 training return: -899.0845858131715
epoch: 8 test_true_pfm: 275.1255407531623 sim_pfm: -896.8276560288687
episode: 32 training return: -917.1439116068574
episode: 33 training return: -909.7474375458304
episode: 34 training return: -895.4875383342461
episode: 35 training return: -895.2908948924306
epoch: 9 test_true_pfm: 267.83521471771655 sim_pfm: -907.4176305216378
episode: 36 training return: -890.7112918635736
episode: 37 training return: -885.7723638878076
episode: 38 training return: -895.3518996635349
episode: 39 training return: -897.615962465322
epoch: 10 test_true_pfm: 310.47378005207185 sim_pfm: -863.4814535355099
episode: 40 training return: -879.3903858354447
episode: 41 training return: -873.7393314454621
episode: 42 training return: -875.0970869729401
episode: 43 training return: -882.0204268013947
epoch: 11 test_true_pfm: 311.163923331746 sim_pfm: -863.1230839410888
episode: 44 training return: -863.0604862433306
episode: 45 training return: -884.0764774877208
episode: 46 training return: -874.1938862814511
episode: 47 training return: -883.928472194673
epoch: 12 test_true_pfm: 329.35665349417417 sim_pfm: -858.3875773456831
episode: 48 training return: -869.4327354941289
episode: 49 training return: -875.2062198826725
episode: 50 training return: -879.1759049005082
episode: 51 training return: -887.447374308627
epoch: 13 test_true_pfm: 316.68076864250975 sim_pfm: -866.6678056022483
episode: 52 training return: -866.4457228145096
episode: 53 training return: -869.3922313526218
episode: 54 training return: -869.680804581534
episode: 55 training return: -880.6066927850179
epoch: 14 test_true_pfm: 323.7627879759845 sim_pfm: -864.3556083094373
episode: 56 training return: -879.7829795897496
episode: 57 training return: -886.8404864606068
episode: 58 training return: -880.7594934492366
episode: 59 training return: -860.9366382046053
epoch: 15 test_true_pfm: 339.50339758494215 sim_pfm: -858.7897202476926
episode: 60 training return: -860.2461466898579
episode: 61 training return: -870.9517282370532
episode: 62 training return: -868.1981460567522
episode: 63 training return: -862.6373399410057
epoch: 16 test_true_pfm: 337.0384617427758 sim_pfm: -851.6557320703964
episode: 64 training return: -856.3236025206276
episode: 65 training return: -848.2754784705459
episode: 66 training return: -867.6349477628174
episode: 67 training return: -866.0103233770506
epoch: 17 test_true_pfm: 328.92771633933813 sim_pfm: -853.4059942503678
episode: 68 training return: -865.6414191311358
episode: 69 training return: -853.5977281376845
episode: 70 training return: -854.1867343179886
episode: 71 training return: -854.2695696004319
epoch: 18 test_true_pfm: 336.1068321274222 sim_pfm: -852.1268094915982
episode: 72 training return: -861.1605976606046
episode: 73 training return: -852.4031313697392
episode: 74 training return: -852.4325354559921
episode: 75 training return: -858.6650259008835
epoch: 19 test_true_pfm: 351.71694663699355 sim_pfm: -840.4717696899124
episode: 76 training return: -865.4892987036994
episode: 77 training return: -856.745058665805
episode: 78 training return: -855.1391261845323
episode: 79 training return: -854.2895983069851
epoch: 20 test_true_pfm: 346.6737007872621 sim_pfm: -850.071720738673
episode: 80 training return: -856.197696106798
episode: 81 training return: -850.2140901125096
episode: 82 training return: -852.9428587053354
episode: 83 training return: -853.2170398731636
epoch: 21 test_true_pfm: 361.46978489643 sim_pfm: -844.6476432307271
episode: 84 training return: -843.0652946317861
episode: 85 training return: -848.2704105776497
episode: 86 training return: -856.6723087857492
episode: 87 training return: -858.5860692431971
epoch: 22 test_true_pfm: 345.31541981384817 sim_pfm: -853.7373664224979
episode: 88 training return: -864.4639525480217
episode: 89 training return: -851.7063564951745
episode: 90 training return: -850.3879548743912
episode: 91 training return: -844.6867052589729
epoch: 23 test_true_pfm: 348.22637667637036 sim_pfm: -844.4626663600053
episode: 92 training return: -847.3163773489714
episode: 93 training return: -852.7658178670387
episode: 94 training return: -846.3253937782442
episode: 95 training return: -847.3436307992654
epoch: 24 test_true_pfm: 358.39775528043305 sim_pfm: -849.2318158996168
episode: 96 training return: -864.4269401656564
episode: 97 training return: -846.814884395243
episode: 98 training return: -851.605696468421
episode: 99 training return: -837.8669923937377
epoch: 25 test_true_pfm: 344.54486410970475 sim_pfm: -847.0153895246027
episode: 100 training return: -846.3775866919516
episode: 101 training return: -852.236919929349
episode: 102 training return: -846.4182789697271
episode: 103 training return: -853.2818258725091
epoch: 26 test_true_pfm: 353.19311623487175 sim_pfm: -840.2540992422181
episode: 104 training return: -849.7935537488402
episode: 105 training return: -848.4362090321556
episode: 106 training return: -842.5495492851903
episode: 107 training return: -848.6905248104049
epoch: 27 test_true_pfm: 358.06129346477127 sim_pfm: -839.53469759761
episode: 108 training return: -846.6954853865319
episode: 109 training return: -814.0474967494846
episode: 110 training return: -845.9540899523877
episode: 111 training return: -849.9970109655094
epoch: 28 test_true_pfm: 281.27771724866926 sim_pfm: -835.3859982858817
episode: 112 training return: -841.9439502614957
episode: 113 training return: -842.4325727931038
episode: 114 training return: -822.8512614291008
episode: 115 training return: -851.0527238463962
epoch: 29 test_true_pfm: 382.57901191558153 sim_pfm: -813.413638599569
episode: 116 training return: -852.6263454287082
episode: 117 training return: -858.4769857542731
episode: 118 training return: -845.4475089391071
episode: 119 training return: -855.3261782231523
epoch: 30 test_true_pfm: 394.90803135999266 sim_pfm: -830.6317708456618
episode: 120 training return: -837.7252818019566
episode: 121 training return: -823.5105473653301
episode: 122 training return: -804.6179874582205
episode: 123 training return: -818.3507958946388
epoch: 31 test_true_pfm: 396.1059336375559 sim_pfm: -803.9233762284081
episode: 124 training return: -825.8650391285397
episode: 125 training return: -828.9363879945354
episode: 126 training return: -834.6093108282063
episode: 127 training return: -815.9308297859346
epoch: 32 test_true_pfm: 353.9547889083842 sim_pfm: -847.9743947750864
episode: 128 training return: -813.6919269832573
episode: 129 training return: -854.604755273978
episode: 130 training return: -859.513975395087
episode: 131 training return: -788.346533063668
epoch: 33 test_true_pfm: 433.20625679621907 sim_pfm: -779.8786826929994
episode: 132 training return: -805.5798332421462
episode: 133 training return: -808.1308156928203
episode: 134 training return: -828.5335981498637
episode: 135 training return: -862.7989921451649
epoch: 34 test_true_pfm: 372.35834072416975 sim_pfm: -800.1325666434227
episode: 136 training return: -819.9178883594434
episode: 137 training return: -809.5456095862604
episode: 138 training return: -812.1183181562772
episode: 139 training return: -843.9024620879172
epoch: 35 test_true_pfm: 388.8063252294798 sim_pfm: -803.6288015134566
episode: 140 training return: -812.8182782478301
episode: 141 training return: -786.1523687484248
episode: 142 training return: -839.104080472719
episode: 143 training return: -841.8141893285534
epoch: 36 test_true_pfm: 392.38168797383975 sim_pfm: -804.4272395489719
episode: 144 training return: -814.6072934739643
episode: 145 training return: -810.5436657913077
episode: 146 training return: -803.9233921619272
episode: 147 training return: -852.8710850172885
epoch: 37 test_true_pfm: 420.8537331304242 sim_pfm: -787.8279210602547
episode: 148 training return: -779.4029239499602
episode: 149 training return: -800.1727610083964
episode: 150 training return: -805.9080600483951
episode: 151 training return: -810.3961658263198
epoch: 38 test_true_pfm: 373.2787425147455 sim_pfm: -808.9142523421152
episode: 152 training return: -781.2654507865745
episode: 153 training return: -850.0074255866637
episode: 154 training return: -845.4573931455261
episode: 155 training return: -828.0574567654033
epoch: 39 test_true_pfm: 437.27475720436263 sim_pfm: -777.116857444394
episode: 156 training return: -803.9053754520787
episode: 157 training return: -802.0075324477112
episode: 158 training return: -842.5474546831207
episode: 159 training return: -799.2774914049369
epoch: 40 test_true_pfm: 472.8937753073605 sim_pfm: -764.9459458966776
episode: 160 training return: -773.4851662230685
episode: 161 training return: -803.4917686511027
episode: 162 training return: -805.4611037900349
episode: 163 training return: -771.056630682747
epoch: 41 test_true_pfm: 476.63260714508925 sim_pfm: -756.6990195815964
episode: 164 training return: -843.5065097033107
episode: 165 training return: -826.0247707404086
episode: 166 training return: -805.1540537300712
episode: 167 training return: -824.8584930887373
epoch: 42 test_true_pfm: 395.8820330974702 sim_pfm: -789.4830839303027
episode: 168 training return: -798.5590446203709
episode: 169 training return: -799.699304828723
episode: 170 training return: -798.2587093961812
episode: 171 training return: -803.2282828088047
epoch: 43 test_true_pfm: 496.2563710834067 sim_pfm: -757.8370247738234
episode: 172 training return: -795.6114617897833
episode: 173 training return: -792.9557098703832
episode: 174 training return: -772.3214979996474
episode: 175 training return: -778.2696490948853
epoch: 44 test_true_pfm: 481.07800696717317 sim_pfm: -760.1526175675505
episode: 176 training return: -781.0252341311343
episode: 177 training return: -790.2071951068931
episode: 178 training return: -775.0102502747608
episode: 179 training return: -781.7223666213267
epoch: 45 test_true_pfm: 508.12689925556623 sim_pfm: -742.9961428263781
episode: 180 training return: -763.8230804616712
episode: 181 training return: -770.3767680459091
episode: 182 training return: -772.8427875905438
episode: 183 training return: -775.8734539371561
epoch: 46 test_true_pfm: 474.9718907617398 sim_pfm: -768.7286056866851
episode: 184 training return: -783.4331129311571
episode: 185 training return: -783.016722767501
episode: 186 training return: -810.5679705826028
episode: 187 training return: -777.8621904304243
epoch: 47 test_true_pfm: 457.8041497833867 sim_pfm: -768.449164846249
episode: 188 training return: -789.9050376275413
episode: 189 training return: -793.289873327764
episode: 190 training return: -774.052217108758
episode: 191 training return: -776.1043727227626
epoch: 48 test_true_pfm: 482.59480131711143 sim_pfm: -752.8478769914758
episode: 192 training return: -793.6384854893782
episode: 193 training return: -789.3879646247476
episode: 194 training return: -797.7299947907892
episode: 195 training return: -772.1767463406255
epoch: 49 test_true_pfm: 488.0396562009433 sim_pfm: -752.9180453038307
episode: 196 training return: -788.005962595359
episode: 197 training return: -776.9919123622134
episode: 198 training return: -788.6420763382395
episode: 199 training return: -776.9773928475038
epoch: 50 test_true_pfm: 499.7074921920326 sim_pfm: -748.8820069506655
episode: 200 training return: -787.5267615491531
episode: 201 training return: -786.6701829522384
episode: 202 training return: -813.5457339024016
episode: 203 training return: -785.6426704358429
epoch: 51 test_true_pfm: 489.86689358973155 sim_pfm: -757.99333649724
episode: 204 training return: -792.3982828531574
episode: 205 training return: -801.475056517535
episode: 206 training return: -791.6639095230678
episode: 207 training return: -817.0024280693399
epoch: 52 test_true_pfm: 485.30483241901123 sim_pfm: -747.3498232813769
episode: 208 training return: -791.8287476159858
episode: 209 training return: -760.7294855968144
episode: 210 training return: -802.3526838035057
episode: 211 training return: -761.7572687919809
epoch: 53 test_true_pfm: 437.5464816969524 sim_pfm: -774.8203074569992
episode: 212 training return: -787.7290217237505
episode: 213 training return: -805.6573211794224
episode: 214 training return: -775.9193261041596
episode: 215 training return: -771.2326923420065
epoch: 54 test_true_pfm: 475.3915740696419 sim_pfm: -766.0379977153433
episode: 216 training return: -775.327332475669
episode: 217 training return: -768.209998156718
episode: 218 training return: -779.2231102866932
episode: 219 training return: -778.8344708933439
epoch: 55 test_true_pfm: 484.0557017914087 sim_pfm: -747.2286195204883
episode: 220 training return: -780.9606353591558
episode: 221 training return: -779.0857847406699
episode: 222 training return: -759.703122427157
episode: 223 training return: -794.8475871564782
epoch: 56 test_true_pfm: 477.252739077825 sim_pfm: -747.5715262879897
episode: 224 training return: -780.1744048433694
episode: 225 training return: -788.4885366686764
episode: 226 training return: -797.781212100271
episode: 227 training return: -774.8091145100175
epoch: 57 test_true_pfm: 513.6732787347854 sim_pfm: -737.4259794199112
episode: 228 training return: -779.9953042841571
episode: 229 training return: -781.5913749561271
episode: 230 training return: -781.4004353062707
episode: 231 training return: -801.0861315659412
epoch: 58 test_true_pfm: 474.13710863905004 sim_pfm: -747.1672177607401
episode: 232 training return: -783.7144125467288
episode: 233 training return: -778.0498257801352
episode: 234 training return: -757.5268300156379
episode: 235 training return: -776.1636410461824
epoch: 59 test_true_pfm: 491.6351346868662 sim_pfm: -759.3941397702118
episode: 236 training return: -778.9518882515739
episode: 237 training return: -765.3690990396743
episode: 238 training return: -787.0452055768968
episode: 239 training return: -754.9866600735303
epoch: 60 test_true_pfm: 489.70988584634443 sim_pfm: -750.3448029955043
episode: 240 training return: -792.1691606545483
episode: 241 training return: -784.4469565972524
episode: 242 training return: -787.2872263453775
episode: 243 training return: -756.7055249957026
epoch: 61 test_true_pfm: 500.0857871315427 sim_pfm: -751.5283068941299
episode: 244 training return: -766.1684804902819
episode: 245 training return: -768.8907413923067
episode: 246 training return: -781.3620638883199
episode: 247 training return: -791.9842174512588
epoch: 62 test_true_pfm: 497.46613104255925 sim_pfm: -740.1024897913934
episode: 248 training return: -754.0833074033724
episode: 249 training return: -746.0169724158462
episode: 250 training return: -791.8301129371021
episode: 251 training return: -770.5791830203952
epoch: 63 test_true_pfm: 502.50193641278315 sim_pfm: -740.1157127572319
episode: 252 training return: -761.208512130841
episode: 253 training return: -788.6955956992525
episode: 254 training return: -780.029692447853
episode: 255 training return: -745.8356583925519
epoch: 64 test_true_pfm: 489.6446208447408 sim_pfm: -744.7450747371762
episode: 256 training return: -776.4091316242835
episode: 257 training return: -778.7536794211067
episode: 258 training return: -777.1609274222211
episode: 259 training return: -776.8015082110744
epoch: 65 test_true_pfm: 504.782889672979 sim_pfm: -742.7038939684167
episode: 260 training return: -760.2594493323148
episode: 261 training return: -765.3035142497065
episode: 262 training return: -783.2253674248303
episode: 263 training return: -764.5983106758531
epoch: 66 test_true_pfm: 503.86225403754355 sim_pfm: -743.7663025338376
episode: 264 training return: -760.2723221803085
episode: 265 training return: -811.6697977703138
episode: 266 training return: -768.279773218071
episode: 267 training return: -764.5078584548422
epoch: 67 test_true_pfm: 490.0263647952513 sim_pfm: -748.3420914806297
episode: 268 training return: -773.2858304831768
episode: 269 training return: -747.442969241779
episode: 270 training return: -762.6485764258668
episode: 271 training return: -774.466459199245
epoch: 68 test_true_pfm: 498.38637500033525 sim_pfm: -744.8217345580024
episode: 272 training return: -764.6797192644591
episode: 273 training return: -772.7262443250548
episode: 274 training return: -760.5977303387181
episode: 275 training return: -760.3950271411188
epoch: 69 test_true_pfm: 526.5016794648969 sim_pfm: -739.286748788997
episode: 276 training return: -761.5366345560254
episode: 277 training return: -778.5378017790601
episode: 278 training return: -767.2506136204393
episode: 279 training return: -751.6195697734339
epoch: 70 test_true_pfm: 496.57333068520876 sim_pfm: -748.7516373921427
episode: 280 training return: -763.9629069737064
episode: 281 training return: -761.9691766938448
episode: 282 training return: -760.5314452916992
episode: 283 training return: -758.6481172583359
epoch: 71 test_true_pfm: 482.30121108244583 sim_pfm: -742.4057156679604
episode: 284 training return: -767.3853790379435
episode: 285 training return: -752.8337544225409
episode: 286 training return: -760.9169166878646
episode: 287 training return: -759.4816823910486
epoch: 72 test_true_pfm: 502.27304355864294 sim_pfm: -744.0728313803303
episode: 288 training return: -756.4572946166231
episode: 289 training return: -757.3030146847921
episode: 290 training return: -754.2909850033764
episode: 291 training return: -758.1972839454768
epoch: 73 test_true_pfm: 506.3144319246958 sim_pfm: -738.1845133876601
episode: 292 training return: -769.1270118758304
episode: 293 training return: -760.7553826082997
episode: 294 training return: -755.413165103846
episode: 295 training return: -785.7066516675415
epoch: 74 test_true_pfm: 469.63422650303954 sim_pfm: -757.7848647441056
episode: 296 training return: -746.0128915533463
episode: 297 training return: -764.2758071851747
episode: 298 training return: -763.6737103831804
episode: 299 training return: -751.7851528403723
epoch: 75 test_true_pfm: 464.13814972273184 sim_pfm: -757.2961897630925
episode: 300 training return: -771.2087497251566
episode: 301 training return: -767.423210141824
episode: 302 training return: -761.6431098000534
episode: 303 training return: -756.2586735830768
epoch: 76 test_true_pfm: 485.21439788076987 sim_pfm: -751.0548475339328
episode: 304 training return: -761.0520488110217
episode: 305 training return: -743.538925942457
episode: 306 training return: -763.7174439563063
episode: 307 training return: -768.715129983211
epoch: 77 test_true_pfm: 471.97767859906315 sim_pfm: -744.4967289242142
episode: 308 training return: -751.4201614530792
episode: 309 training return: -763.8297512160228
episode: 310 training return: -771.3697432547226
episode: 311 training return: -766.8933507315163
epoch: 78 test_true_pfm: 496.6975748470479 sim_pfm: -745.9950662376099
episode: 312 training return: -769.2787954193307
episode: 313 training return: -749.79355578337
episode: 314 training return: -740.1147883473174
episode: 315 training return: -760.8324291063619
epoch: 79 test_true_pfm: 471.0081009160717 sim_pfm: -756.5279098057781
episode: 316 training return: -780.9851029532553
episode: 317 training return: -762.9694117567318
episode: 318 training return: -750.535019251962
episode: 319 training return: -761.3505069972532
epoch: 80 test_true_pfm: 504.16558820820154 sim_pfm: -741.8449150797909
episode: 320 training return: -751.6417647541134
episode: 321 training return: -754.7831956461977
episode: 322 training return: -751.1286097936942
episode: 323 training return: -761.3198879776439
epoch: 81 test_true_pfm: 485.66764702316794 sim_pfm: -750.9781613814857
episode: 324 training return: -753.4403050018364
episode: 325 training return: -754.3588496385972
episode: 326 training return: -751.1019107285894
episode: 327 training return: -746.9287448403156
epoch: 82 test_true_pfm: 475.02747189432125 sim_pfm: -756.2304693573964
episode: 328 training return: -746.5886577023274
episode: 329 training return: -751.439478401266
episode: 330 training return: -755.3236333456572
episode: 331 training return: -753.7927093561688
epoch: 83 test_true_pfm: 475.19416376803065 sim_pfm: -748.4597239217388
episode: 332 training return: -744.0130560551282
episode: 333 training return: -766.3295753763676
episode: 334 training return: -739.8233980840444
episode: 335 training return: -767.3203407889481
epoch: 84 test_true_pfm: 481.68613746106456 sim_pfm: -743.9442632187919
episode: 336 training return: -763.5653467491248
episode: 337 training return: -756.9964778175887
episode: 338 training return: -770.9413716745053
episode: 339 training return: -746.6392274654968
epoch: 85 test_true_pfm: 491.61557524557026 sim_pfm: -749.016533844314
episode: 340 training return: -766.7694084206043
episode: 341 training return: -760.2597736716102
episode: 342 training return: -771.3370191658622
episode: 343 training return: -756.4834275814393
epoch: 86 test_true_pfm: 490.8216254268453 sim_pfm: -741.4382179712239
episode: 344 training return: -743.7980153628678
episode: 345 training return: -772.4344559781127
episode: 346 training return: -756.3949578433527
episode: 347 training return: -758.2105092804026
epoch: 87 test_true_pfm: 487.6755924717854 sim_pfm: -747.3282288761108
episode: 348 training return: -766.8247272287873
episode: 349 training return: -760.9001954261857
episode: 350 training return: -757.1992959921738
episode: 351 training return: -788.2544242474077
epoch: 88 test_true_pfm: 504.29320779896904 sim_pfm: -739.9668801073029
episode: 352 training return: -771.4312312653333
episode: 353 training return: -745.3306269139198
episode: 354 training return: -764.7229177031617
episode: 355 training return: -765.6014126821883
epoch: 89 test_true_pfm: 489.60990718868607 sim_pfm: -737.061487412323
episode: 356 training return: -771.7097927191959
episode: 357 training return: -769.7139001074773
episode: 358 training return: -787.359769609403
episode: 359 training return: -750.2503757968097
epoch: 90 test_true_pfm: 497.147029121978 sim_pfm: -741.089598288506
episode: 360 training return: -763.7986624192741
episode: 361 training return: -779.060658944013
episode: 362 training return: -755.7180386217286
episode: 363 training return: -753.391055281591
epoch: 91 test_true_pfm: 491.912927637524 sim_pfm: -739.141879926386
episode: 364 training return: -763.5873212427414
episode: 365 training return: -757.7030822484163
episode: 366 training return: -751.4823430283004
episode: 367 training return: -777.7890124262763
epoch: 92 test_true_pfm: 475.2413480839923 sim_pfm: -752.1088854416367
episode: 368 training return: -743.7351622140409
episode: 369 training return: -742.5180000009013
episode: 370 training return: -773.2955578547185
episode: 371 training return: -753.9921606271114
epoch: 93 test_true_pfm: 501.9370474690899 sim_pfm: -735.6220349063236
episode: 372 training return: -755.7718079875077
episode: 373 training return: -758.1859375851222
episode: 374 training return: -751.7297901209447
episode: 375 training return: -771.6962034853707
epoch: 94 test_true_pfm: 544.9426532329566 sim_pfm: -742.9261146456976
episode: 376 training return: -763.2069551682957
episode: 377 training return: -767.3490757708221
episode: 378 training return: -753.1661674089623
episode: 379 training return: -763.7035863414461
epoch: 95 test_true_pfm: 483.55779324024866 sim_pfm: -741.4733148678878
episode: 380 training return: -747.946756485473
episode: 381 training return: -774.7006254189771
episode: 382 training return: -748.8613583535389
episode: 383 training return: -764.2095745983387
epoch: 96 test_true_pfm: 500.1029447447961 sim_pfm: -736.4514744785553
episode: 384 training return: -759.6638730470462
episode: 385 training return: -767.1564487079477
episode: 386 training return: -775.6555886019828
episode: 387 training return: -765.2895311103117
epoch: 97 test_true_pfm: 493.27650685860516 sim_pfm: -735.1201376270701
episode: 388 training return: -739.9498967285424
episode: 389 training return: -753.3609344190675
episode: 390 training return: -753.8459610214894
episode: 391 training return: -748.6794784294195
epoch: 98 test_true_pfm: 502.70702608004717 sim_pfm: -739.470772249032
episode: 392 training return: -774.4502156954425
episode: 393 training return: -756.6044805625244
episode: 394 training return: -752.9514393037434
episode: 395 training return: -759.8334282338225
epoch: 99 test_true_pfm: 520.2105308974552 sim_pfm: -733.5758233712235
episode: 396 training return: -746.5104067943201
episode: 397 training return: -750.0660386801482
episode: 398 training return: -746.9196115404186
episode: 399 training return: -749.0957045346314
epoch: 100 test_true_pfm: 491.73203068679874 sim_pfm: -734.8632820573158
episode: 400 training return: -754.2741030862065
episode: 401 training return: -762.5179070903729
episode: 402 training return: -755.9517740860683
episode: 403 training return: -771.7794691371777
epoch: 101 test_true_pfm: 504.41562989377115 sim_pfm: -734.2764697573448
episode: 404 training return: -766.8417129631649
episode: 405 training return: -758.1128639140904
episode: 406 training return: -742.4467975587686
episode: 407 training return: -753.528327484737
epoch: 102 test_true_pfm: 500.40505627867054 sim_pfm: -731.0909833034057
episode: 408 training return: -759.3419669009485
episode: 409 training return: -752.4178269333812
episode: 410 training return: -764.9187776744541
episode: 411 training return: -753.9099121364919
epoch: 103 test_true_pfm: 494.6790365463908 sim_pfm: -736.9107599326599
episode: 412 training return: -759.0964933373265
episode: 413 training return: -758.8079457028401
episode: 414 training return: -746.1866467972588
episode: 415 training return: -764.6035650106584
epoch: 104 test_true_pfm: 478.3952187998578 sim_pfm: -745.2208718254741
episode: 416 training return: -748.3820112373782
episode: 417 training return: -749.1883451459182
episode: 418 training return: -746.4514974292379
episode: 419 training return: -761.3424116176434
epoch: 105 test_true_pfm: 501.3457389378748 sim_pfm: -736.9463440432586
episode: 420 training return: -743.4906095221156
episode: 421 training return: -752.2019387701608
episode: 422 training return: -745.3587001317595
episode: 423 training return: -772.708582472198
epoch: 106 test_true_pfm: 513.4411005381462 sim_pfm: -734.6228243660571
episode: 424 training return: -752.5848374757508
episode: 425 training return: -765.1064519094283
episode: 426 training return: -757.8772855416933
episode: 427 training return: -752.5390646370493
epoch: 107 test_true_pfm: 517.6505133328826 sim_pfm: -732.1803951354124
episode: 428 training return: -761.6330898945934
episode: 429 training return: -753.9218184004096
episode: 430 training return: -759.3242342270643
episode: 431 training return: -759.1785533405716
epoch: 108 test_true_pfm: 502.9538214152081 sim_pfm: -731.2083065639654
episode: 432 training return: -758.7051659785258
episode: 433 training return: -760.2958638307249
episode: 434 training return: -769.6141476491
episode: 435 training return: -755.911515700116
epoch: 109 test_true_pfm: 504.8145425491659 sim_pfm: -732.7100886118134
episode: 436 training return: -758.783862594555
episode: 437 training return: -749.353837800729
episode: 438 training return: -780.253220106979
episode: 439 training return: -759.5921574533304
epoch: 110 test_true_pfm: 466.4894144767175 sim_pfm: -745.7977621073346
episode: 440 training return: -743.9031991821643
episode: 441 training return: -760.7171086779535
episode: 442 training return: -751.6534188054898
episode: 443 training return: -771.1564637745643
epoch: 111 test_true_pfm: 477.00421603256495 sim_pfm: -739.095292323289
episode: 444 training return: -763.2992182893619
episode: 445 training return: -763.2780391825988
episode: 446 training return: -750.1825122272786
episode: 447 training return: -747.2238937034984
epoch: 112 test_true_pfm: 507.5249810278937 sim_pfm: -738.1435569822432
episode: 448 training return: -746.7675314937026
episode: 449 training return: -761.4841125886659
episode: 450 training return: -746.0002070783029
episode: 451 training return: -755.8633598999985
epoch: 113 test_true_pfm: 497.5381745483393 sim_pfm: -743.1398101127389
episode: 452 training return: -757.3623842889808
episode: 453 training return: -744.6113792810266
episode: 454 training return: -764.8390555085899
episode: 455 training return: -758.1792563203508
epoch: 114 test_true_pfm: 499.3772321977124 sim_pfm: -733.2107747668628
episode: 456 training return: -754.122137299738
episode: 457 training return: -750.4264205145769
episode: 458 training return: -747.3820192289464
episode: 459 training return: -750.7730224379173
epoch: 115 test_true_pfm: 505.31934332264626 sim_pfm: -736.8158901219011
episode: 460 training return: -756.3947635383533
episode: 461 training return: -758.2365609321768
episode: 462 training return: -751.821135592076
episode: 463 training return: -761.0223782062806
epoch: 116 test_true_pfm: 483.0176320825401 sim_pfm: -738.8457748732022
episode: 464 training return: -761.5598375610118
episode: 465 training return: -757.6956936307719
episode: 466 training return: -756.4468852580694
episode: 467 training return: -736.8323041166487
epoch: 117 test_true_pfm: 502.6434217829339 sim_pfm: -730.2851389353506
episode: 468 training return: -757.2063967470878
episode: 469 training return: -763.3465228725969
episode: 470 training return: -774.8911147273916
episode: 471 training return: -754.2412271955495
epoch: 118 test_true_pfm: 493.42027342383955 sim_pfm: -740.7318982764479
episode: 472 training return: -759.2053317069593
episode: 473 training return: -755.0137358354285
episode: 474 training return: -759.37407134377
episode: 475 training return: -757.524249476042
epoch: 119 test_true_pfm: 490.9407022657753 sim_pfm: -735.9161528365671
episode: 476 training return: -750.6920312587409
episode: 477 training return: -768.9277352382207
episode: 478 training return: -745.5529523483146
episode: 479 training return: -773.7314244496301
epoch: 120 test_true_pfm: 489.6466308074434 sim_pfm: -745.1211402400891
episode: 480 training return: -768.4764445631894
episode: 481 training return: -764.2547701419483
episode: 482 training return: -761.0456604949009
episode: 483 training return: -762.9073322255522
epoch: 121 test_true_pfm: 507.91817331222916 sim_pfm: -734.8252736840623
episode: 484 training return: -737.7932265617455
episode: 485 training return: -754.7577265990436
episode: 486 training return: -755.7774387636784
episode: 487 training return: -759.8441869661038
epoch: 122 test_true_pfm: 509.7371830686418 sim_pfm: -731.7881686496909
episode: 488 training return: -753.129017416284
episode: 489 training return: -753.3211020399498
episode: 490 training return: -754.9745180085902
episode: 491 training return: -753.4287845219327
epoch: 123 test_true_pfm: 493.4907208661659 sim_pfm: -741.729596404782
episode: 492 training return: -753.47285502122
episode: 493 training return: -755.7990765878964
episode: 494 training return: -756.6494615907926
episode: 495 training return: -765.2275822852139
epoch: 124 test_true_pfm: 506.5132102789687 sim_pfm: -736.7626265575196
episode: 496 training return: -764.7856410015091
episode: 497 training return: -741.8279412672505
episode: 498 training return: -772.6170663895018
episode: 499 training return: -742.6311667218206
epoch: 125 test_true_pfm: 497.9066491462759 sim_pfm: -741.1056600116923
episode: 500 training return: -775.9434417466639
episode: 501 training return: -769.1004622871326
episode: 502 training return: -744.3829791998149
episode: 503 training return: -740.9563374425587
epoch: 126 test_true_pfm: 498.40122325077687 sim_pfm: -729.6645656530894
episode: 504 training return: -748.0225966802078
episode: 505 training return: -759.0224774763294
episode: 506 training return: -751.1271689283071
episode: 507 training return: -764.9016633259421
epoch: 127 test_true_pfm: 480.79431688748923 sim_pfm: -737.2095546512986
episode: 508 training return: -757.3672587483297
episode: 509 training return: -755.1657208217223
episode: 510 training return: -762.2354585398022
episode: 511 training return: -770.2610156621681
epoch: 128 test_true_pfm: 495.5577732411727 sim_pfm: -734.9151419892909
episode: 512 training return: -755.6688267695137
episode: 513 training return: -769.4616552589262
episode: 514 training return: -779.6310428956853
episode: 515 training return: -749.1010383848466
epoch: 129 test_true_pfm: 494.0935484963661 sim_pfm: -735.9734762072085
episode: 516 training return: -753.7171995867095
episode: 517 training return: -753.9409290578292
episode: 518 training return: -761.0398200623771
episode: 519 training return: -763.8909364582546
epoch: 130 test_true_pfm: 499.5811380972356 sim_pfm: -735.4367766788669
episode: 520 training return: -754.6769339237668
episode: 521 training return: -747.2961725484773
episode: 522 training return: -763.7209561824362
episode: 523 training return: -749.5917246658775
epoch: 131 test_true_pfm: 500.54924804528673 sim_pfm: -737.2998489248724
episode: 524 training return: -760.027282005821
episode: 525 training return: -754.9146666486129
episode: 526 training return: -744.9296496118271
episode: 527 training return: -751.3224752842074
epoch: 132 test_true_pfm: 508.12582033268865 sim_pfm: -732.5861452092495
episode: 528 training return: -748.6493212703763
episode: 529 training return: -753.7785467845292
episode: 530 training return: -753.9266068102658
episode: 531 training return: -755.5730721165688
epoch: 133 test_true_pfm: 504.70847843427055 sim_pfm: -739.669533175838
episode: 532 training return: -764.281425358385
episode: 533 training return: -770.7611329094683
episode: 534 training return: -739.547713586207
episode: 535 training return: -742.9084542745128
epoch: 134 test_true_pfm: 491.39714583180995 sim_pfm: -734.5753819667492
episode: 536 training return: -762.5435442244924
episode: 537 training return: -750.0282095413221
episode: 538 training return: -758.2023131112777
episode: 539 training return: -768.4797478287156
epoch: 135 test_true_pfm: 503.73920049611957 sim_pfm: -732.8029708348587
episode: 540 training return: -752.6681742574051
episode: 541 training return: -761.767156972574
episode: 542 training return: -749.5470597777372
episode: 543 training return: -745.3886197983578
epoch: 136 test_true_pfm: 490.63866496286136 sim_pfm: -736.7752292008736
episode: 544 training return: -760.6865305532363
episode: 545 training return: -765.2803504544067
episode: 546 training return: -755.3834631252121
episode: 547 training return: -757.8461287575975
epoch: 137 test_true_pfm: 504.0255003635454 sim_pfm: -735.6100819814848
episode: 548 training return: -750.0036351422071
episode: 549 training return: -755.8115803188898
episode: 550 training return: -763.8762939061784
episode: 551 training return: -755.6888380295966
epoch: 138 test_true_pfm: 495.526313139436 sim_pfm: -735.6521317908865
episode: 552 training return: -741.6409158613393
episode: 553 training return: -749.2775765036829
episode: 554 training return: -750.1685716529847
episode: 555 training return: -754.359400813676
epoch: 139 test_true_pfm: 496.4598759879486 sim_pfm: -736.1030289946679
episode: 556 training return: -752.6912044719156
episode: 557 training return: -749.4224307109316
episode: 558 training return: -747.8297242569298
episode: 559 training return: -746.7280177366764
epoch: 140 test_true_pfm: 506.91274816347374 sim_pfm: -730.393156261935
episode: 560 training return: -751.2135379309999
episode: 561 training return: -745.4652644195029
episode: 562 training return: -759.6419174558794
episode: 563 training return: -755.5783264045218
epoch: 141 test_true_pfm: 499.0968337168288 sim_pfm: -732.5662293949131
episode: 564 training return: -764.6371446197601
episode: 565 training return: -752.2637760168839
episode: 566 training return: -768.0159809430907
episode: 567 training return: -762.0091660350421
epoch: 142 test_true_pfm: 495.96866735719067 sim_pfm: -735.9275654082279
episode: 568 training return: -776.9654896694253
episode: 569 training return: -755.7362104172739
episode: 570 training return: -745.8800711401797
episode: 571 training return: -741.3306671284813
epoch: 143 test_true_pfm: 510.25730690470783 sim_pfm: -728.0917283038871
episode: 572 training return: -735.8839089879449
episode: 573 training return: -758.931067440355
episode: 574 training return: -761.2914574932076
episode: 575 training return: -744.7939132363141
epoch: 144 test_true_pfm: 521.2694700001637 sim_pfm: -726.3175072531504
episode: 576 training return: -746.4491165956979
episode: 577 training return: -738.6321162911629
episode: 578 training return: -743.2537219477878
episode: 579 training return: -754.9158159966809
epoch: 145 test_true_pfm: 495.0535363491445 sim_pfm: -738.1894714678137
episode: 580 training return: -737.1526889858031
episode: 581 training return: -755.6811660827827
episode: 582 training return: -761.9830372798168
episode: 583 training return: -752.5381890184677
epoch: 146 test_true_pfm: 514.1274007270871 sim_pfm: -727.5288582883767
episode: 584 training return: -757.0339058007492
episode: 585 training return: -750.7561282589933
episode: 586 training return: -746.8075914136472
episode: 587 training return: -761.6071513728816
epoch: 147 test_true_pfm: 507.8065877756516 sim_pfm: -732.1694296537935
episode: 588 training return: -750.6244253511702
episode: 589 training return: -762.6279854468345
episode: 590 training return: -741.9526370367431
episode: 591 training return: -752.6025068749234
epoch: 148 test_true_pfm: 518.4931325286074 sim_pfm: -731.9407343177519
episode: 592 training return: -759.813651192802
episode: 593 training return: -755.4406561668394
episode: 594 training return: -745.3164750630605
episode: 595 training return: -757.3294196433309
epoch: 149 test_true_pfm: 504.9432622597712 sim_pfm: -730.3161249358901
episode: 596 training return: -742.9641927840947
episode: 597 training return: -757.018761400587
episode: 598 training return: -749.3259889611485
episode: 599 training return: -752.8536753256209
epoch: 150 test_true_pfm: 494.2472108866618 sim_pfm: -734.3610344510739
