['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.2103131916373968 test_loss: 0.1526864290237427
epoch: 1 training_loss 0.13500347800552845 test_loss: 0.14169111251831054
epoch: 2 training_loss 0.1303072203323245 test_loss: 0.12537335157394408
epoch: 3 training_loss 0.12961439851671458 test_loss: 0.14668294191360473
epoch: 4 training_loss 0.11862180471420287 test_loss: 0.12293249368667603
epoch: 5 training_loss 0.11910168509930372 test_loss: 0.12346614599227905
epoch: 6 training_loss 0.11970916144549847 test_loss: 0.12508782148361205
epoch: 7 training_loss 0.12019940711557865 test_loss: 0.12096351385116577
epoch: 8 training_loss 0.12513335432857275 test_loss: 0.1397779941558838
epoch: 9 training_loss 0.11502526804804802 test_loss: 0.13817214965820312
epoch: 10 training_loss 0.12276750855147839 test_loss: 0.1206162691116333
epoch: 11 training_loss 0.11027548015117646 test_loss: 0.1172342300415039
epoch: 12 training_loss 0.1161030973866582 test_loss: 0.11953650712966919
epoch: 13 training_loss 0.11477983489632607 test_loss: 0.11712435483932496
epoch: 14 training_loss 0.11680184941738844 test_loss: 0.11778191328048707
epoch: 15 training_loss 0.11418332871049643 test_loss: 0.11409015655517578
epoch: 16 training_loss 0.11947861567139625 test_loss: 0.1358262062072754
epoch: 17 training_loss 0.11619774933904409 test_loss: 0.12643785476684571
epoch: 18 training_loss 0.11100897755473853 test_loss: 0.13170207738876344
epoch: 19 training_loss 0.10897757276892663 test_loss: 0.11366137266159057
epoch: 20 training_loss 0.12657335855066776 test_loss: 0.13176136016845702
epoch: 21 training_loss 0.11145130079239607 test_loss: 0.13193570375442504
epoch: 22 training_loss 0.11234447598457337 test_loss: 0.11229732036590576
epoch: 23 training_loss 0.11324018903076649 test_loss: 0.11830124855041504
epoch: 24 training_loss 0.10922273010015487 test_loss: 0.13084800243377687
epoch: 25 training_loss 0.11277070995420217 test_loss: 0.12597997188568116
epoch: 26 training_loss 0.1167728940397501 test_loss: 0.11625722646713257
epoch: 27 training_loss 0.11601233799010516 test_loss: 0.1112816333770752
epoch: 28 training_loss 0.11175562858581543 test_loss: 0.1298643469810486
epoch: 29 training_loss 0.11483943961560726 test_loss: 0.11597819328308105
epoch: 30 training_loss 0.11476690631359815 test_loss: 0.13143444061279297
epoch: 31 training_loss 0.11481220960617065 test_loss: 0.12832311391830445
epoch: 32 training_loss 0.1150228275731206 test_loss: 0.11993407011032105
epoch: 33 training_loss 0.10371981326490641 test_loss: 0.13372087478637695
epoch: 34 training_loss 0.10605661105364561 test_loss: 0.11993488073348998
epoch: 35 training_loss 0.11334364350885152 test_loss: 0.13470737934112548
epoch: 36 training_loss 0.11832547720521688 test_loss: 0.11920467615127564
epoch: 37 training_loss 0.1210735647752881 test_loss: 0.1084228515625
epoch: 38 training_loss 0.10923518776893616 test_loss: 0.12288779020309448
epoch: 39 training_loss 0.11673537958413363 test_loss: 0.1080740213394165
epoch: 40 training_loss 0.1108988394215703 test_loss: 0.1272469639778137
epoch: 41 training_loss 0.11015808708965778 test_loss: 0.12325543165206909
epoch: 42 training_loss 0.11340836144983768 test_loss: 0.10799869298934936
epoch: 43 training_loss 0.109031076207757 test_loss: 0.12279354333877564
epoch: 44 training_loss 0.11414950232952834 test_loss: 0.12632580995559692
epoch: 45 training_loss 0.11416212424635887 test_loss: 0.11743279695510864
epoch: 46 training_loss 0.11964760065078735 test_loss: 0.12823787927627564
epoch: 47 training_loss 0.11413616433739662 test_loss: 0.12695759534835815
epoch: 48 training_loss 0.1121006453409791 test_loss: 0.12620877027511596
epoch: 49 training_loss 0.11669040817767382 test_loss: 0.10496221780776978
epoch: 50 training_loss 0.11283438295125961 test_loss: 0.11320623159408569
epoch: 51 training_loss 0.11242556404322386 test_loss: 0.11526569128036498
epoch: 52 training_loss 0.11049340050667525 test_loss: 0.12365489006042481
epoch: 53 training_loss 0.10925797171890736 test_loss: 0.11333361864089966
epoch: 54 training_loss 0.11241678398102523 test_loss: 0.12777997255325318
epoch: 55 training_loss 0.11430561058223247 test_loss: 0.11552163362503051
epoch: 56 training_loss 0.11269975289702415 test_loss: 0.11106904745101928
epoch: 57 training_loss 0.11641962952911854 test_loss: 0.10868293046951294
epoch: 58 training_loss 0.10935796618461609 test_loss: 0.12072350978851318
epoch: 59 training_loss 0.1081679042801261 test_loss: 0.11832411289215088
epoch: 60 training_loss 0.10779612731188536 test_loss: 0.11362437009811402
epoch: 61 training_loss 0.11372787337750197 test_loss: 0.11239751577377319
epoch: 62 training_loss 0.10695316419005393 test_loss: 0.12333087921142578
epoch: 63 training_loss 0.11210853576660157 test_loss: 0.11409585475921631
epoch: 64 training_loss 0.10885378882288933 test_loss: 0.11641232967376709
epoch: 65 training_loss 0.10858339738100767 test_loss: 0.11755634546279907
epoch: 66 training_loss 0.10999666363000869 test_loss: 0.11474648714065552
epoch: 67 training_loss 0.11611558314412833 test_loss: 0.12276718616485596
epoch: 68 training_loss 0.1120779376104474 test_loss: 0.11850323677062988
epoch: 69 training_loss 0.11412821911275386 test_loss: 0.112725830078125
epoch: 70 training_loss 0.1164654703438282 test_loss: 0.12399474382400513
epoch: 71 training_loss 0.10779278855770827 test_loss: 0.11209437847137452
epoch: 72 training_loss 0.10895013507455588 test_loss: 0.11505181789398193
epoch: 73 training_loss 0.11407871641218663 test_loss: 0.11702131032943726
epoch: 74 training_loss 0.10878617141395808 test_loss: 0.11721457242965698
epoch: 75 training_loss 0.11016737073659896 test_loss: 0.12497432231903076
epoch: 76 training_loss 0.10957398641854525 test_loss: 0.10840058326721191
epoch: 77 training_loss 0.11025617644190788 test_loss: 0.10430808067321777
epoch: 78 training_loss 0.11209941972047091 test_loss: 0.10574476718902588
epoch: 79 training_loss 0.10765765938907862 test_loss: 0.10778572559356689
epoch: 80 training_loss 0.11142099186778069 test_loss: 0.12252968549728394
epoch: 81 training_loss 0.10895478293299675 test_loss: 0.1275464415550232
epoch: 82 training_loss 0.10805474273860455 test_loss: 0.11050055027008057
epoch: 83 training_loss 0.11079490158706903 test_loss: 0.1110990047454834
epoch: 84 training_loss 0.10921907264739275 test_loss: 0.12407087087631226
epoch: 85 training_loss 0.10888676326721906 test_loss: 0.11858174800872803
epoch: 86 training_loss 0.11661330115050078 test_loss: 0.1148150086402893
epoch: 87 training_loss 0.10582167789340019 test_loss: 0.11753833293914795
epoch: 88 training_loss 0.10440134942531586 test_loss: 0.12417834997177124
epoch: 89 training_loss 0.11645010072737932 test_loss: 0.10754389762878418
epoch: 90 training_loss 0.11292359437793494 test_loss: 0.11607239246368409
epoch: 91 training_loss 0.1076823902875185 test_loss: 0.12214487791061401
epoch: 92 training_loss 0.10991101317107678 test_loss: 0.11638795137405396
epoch: 93 training_loss 0.10608468677848577 test_loss: 0.12105321884155273
epoch: 94 training_loss 0.10134470235556364 test_loss: 0.10719292163848877
epoch: 95 training_loss 0.11530630808323622 test_loss: 0.11250183582305909
epoch: 96 training_loss 0.10906655538827181 test_loss: 0.10881590843200684
epoch: 97 training_loss 0.1078381484746933 test_loss: 0.11824806928634643
epoch: 98 training_loss 0.10704594023525715 test_loss: 0.11473939418792725
epoch: 99 training_loss 0.10750302605330944 test_loss: 0.11544326543807984
epoch: 100 training_loss 0.11125717874616385 test_loss: 0.1334371328353882
epoch: 101 training_loss 0.10403102982789278 test_loss: 0.11248964071273804
epoch: 102 training_loss 0.10706883732229472 test_loss: 0.12939523458480834
epoch: 103 training_loss 0.10905887193977833 test_loss: 0.11583147048950196
epoch: 104 training_loss 0.10572999894618988 test_loss: 0.117875075340271
epoch: 105 training_loss 0.10825775314122438 test_loss: 0.10744330883026124
epoch: 106 training_loss 0.11339684017002583 test_loss: 0.11823775768280029
epoch: 107 training_loss 0.10652770921587944 test_loss: 0.12536780834197997
epoch: 108 training_loss 0.10718282569199801 test_loss: 0.11995766162872315
epoch: 109 training_loss 0.10712434105575085 test_loss: 0.11886448860168457
epoch: 110 training_loss 0.09950945220887661 test_loss: 0.12833496332168579
epoch: 111 training_loss 0.1022421956807375 test_loss: 0.12192014455795289
epoch: 112 training_loss 0.11456985525786877 test_loss: 0.11274200677871704
epoch: 113 training_loss 0.10466848995536565 test_loss: 0.14043903350830078
epoch: 114 training_loss 0.10331603825092316 test_loss: 0.11994023323059082
epoch: 115 training_loss 0.11560434628278017 test_loss: 0.10804202556610107
epoch: 116 training_loss 0.1131081035360694 test_loss: 0.11005054712295533
epoch: 117 training_loss 0.10414755772799253 test_loss: 0.11919498443603516
epoch: 118 training_loss 0.10710828129202127 test_loss: 0.11797876358032226
epoch: 119 training_loss 0.107909662052989 test_loss: 0.12227144241333007
epoch: 120 training_loss 0.11187707934528589 test_loss: 0.1241428017616272
epoch: 121 training_loss 0.10251203941181303 test_loss: 0.11303406953811646
epoch: 122 training_loss 0.10574575584381819 test_loss: 0.11874476671218873
epoch: 123 training_loss 0.10569061383605004 test_loss: 0.12467761039733886
epoch: 124 training_loss 0.10433589786291123 test_loss: 0.12492867708206176
epoch: 125 training_loss 0.10493508651852608 test_loss: 0.10409170389175415
epoch: 126 training_loss 0.10610250189900398 test_loss: 0.111672043800354
epoch: 127 training_loss 0.10490868296474218 test_loss: 0.11836334466934204
epoch: 128 training_loss 0.09759089298546314 test_loss: 0.1350036382675171
epoch: 129 training_loss 0.10669071465730667 test_loss: 0.11352595090866088
epoch: 130 training_loss 0.10533508937805891 test_loss: 0.11528006792068482
epoch: 131 training_loss 0.11390573799610137 test_loss: 0.12057623863220215
epoch: 132 training_loss 0.104545514062047 test_loss: 0.11960072517395019
epoch: 133 training_loss 0.10880561912432313 test_loss: 0.12033371925354004
epoch: 134 training_loss 0.10564622089266777 test_loss: 0.12319540977478027
epoch: 135 training_loss 0.11083280544728041 test_loss: 0.10569889545440674
epoch: 136 training_loss 0.10448504708707333 test_loss: 0.11673274040222167
epoch: 137 training_loss 0.10484237119555473 test_loss: 0.11609199047088622
epoch: 138 training_loss 0.11242690734565258 test_loss: 0.11955111026763916
epoch: 139 training_loss 0.10458241600543261 test_loss: 0.1118956446647644
epoch: 140 training_loss 0.10432204242795706 test_loss: 0.1085545539855957
epoch: 141 training_loss 0.10217759549617768 test_loss: 0.12062193155288696
epoch: 142 training_loss 0.10446528274565935 test_loss: 0.12495241165161133
epoch: 143 training_loss 0.1069911983795464 test_loss: 0.12775611877441406
epoch: 144 training_loss 0.11196411430835723 test_loss: 0.11839507818222046
epoch: 145 training_loss 0.10480176039040089 test_loss: 0.12041442394256592
epoch: 146 training_loss 0.10819433145225048 test_loss: 0.12975401878356935
epoch: 147 training_loss 0.10881010863929987 test_loss: 0.11145834922790528
epoch: 148 training_loss 0.10726183518767357 test_loss: 0.12532256841659545
epoch: 149 training_loss 0.10680488131940365 test_loss: 0.12373944520950317
epoch: 0 training_loss 0.20247378662228585 test_loss: 0.13559811115264891
epoch: 1 training_loss 0.1350615933910012 test_loss: 0.1647974967956543
epoch: 2 training_loss 0.13671958409249782 test_loss: 0.13533239364624022
epoch: 3 training_loss 0.12806677389889956 test_loss: 0.11335521936416626
epoch: 4 training_loss 0.12835862513631582 test_loss: 0.13043512105941774
epoch: 5 training_loss 0.12229982908815146 test_loss: 0.11517592668533325
epoch: 6 training_loss 0.12303952779620886 test_loss: 0.12133170366287231
epoch: 7 training_loss 0.12512108840048314 test_loss: 0.11289621591567993
epoch: 8 training_loss 0.12361331779509782 test_loss: 0.13469911813735963
epoch: 9 training_loss 0.12759571589529514 test_loss: 0.12494552135467529
epoch: 10 training_loss 0.12260327544063329 test_loss: 0.11646400690078736
epoch: 11 training_loss 0.11659757483750582 test_loss: 0.11866779327392578
epoch: 12 training_loss 0.11881770487874746 test_loss: 0.11668685674667359
epoch: 13 training_loss 0.11885046921670436 test_loss: 0.11781110763549804
epoch: 14 training_loss 0.11924532275646925 test_loss: 0.11265747547149658
epoch: 15 training_loss 0.12326228305697441 test_loss: 0.11482967138290405
epoch: 16 training_loss 0.126207981929183 test_loss: 0.10763752460479736
epoch: 17 training_loss 0.11885132312774659 test_loss: 0.11596946716308594
epoch: 18 training_loss 0.11492987856268883 test_loss: 0.11323394775390624
epoch: 19 training_loss 0.1163955644518137 test_loss: 0.10892583131790161
epoch: 20 training_loss 0.11955974545329809 test_loss: 0.11444153785705566
epoch: 21 training_loss 0.11793395280838012 test_loss: 0.11318763494491577
epoch: 22 training_loss 0.11408220905810594 test_loss: 0.10887732505798339
epoch: 23 training_loss 0.12254357643425465 test_loss: 0.11340024471282958
epoch: 24 training_loss 0.11469182342290879 test_loss: 0.11613894701004028
epoch: 25 training_loss 0.11828473046422004 test_loss: 0.10379482507705688
epoch: 26 training_loss 0.11912600446492433 test_loss: 0.11960076093673706
epoch: 27 training_loss 0.11601193699985743 test_loss: 0.12001030445098877
epoch: 28 training_loss 0.1171690759062767 test_loss: 0.12189925909042358
epoch: 29 training_loss 0.11976834632456303 test_loss: 0.1141505479812622
epoch: 30 training_loss 0.11056510962545872 test_loss: 0.12099400758743287
epoch: 31 training_loss 0.12223150044679641 test_loss: 0.10301436185836792
epoch: 32 training_loss 0.1195980467647314 test_loss: 0.11106359958648682
epoch: 33 training_loss 0.11524990323930978 test_loss: 0.10287330150604249
epoch: 34 training_loss 0.11593424681574106 test_loss: 0.11489182710647583
epoch: 35 training_loss 0.11873503062874079 test_loss: 0.11047766208648682
epoch: 36 training_loss 0.1172619554027915 test_loss: 0.10699206590652466
epoch: 37 training_loss 0.11840140666812658 test_loss: 0.11413652896881103
epoch: 38 training_loss 0.11157020557671786 test_loss: 0.12022322416305542
epoch: 39 training_loss 0.11825034867972135 test_loss: 0.1075859546661377
epoch: 40 training_loss 0.11587958455085755 test_loss: 0.104985511302948
epoch: 41 training_loss 0.11939141634851694 test_loss: 0.1024967074394226
epoch: 42 training_loss 0.12236462090164422 test_loss: 0.11394679546356201
epoch: 43 training_loss 0.11996204756200314 test_loss: 0.11648143529891967
epoch: 44 training_loss 0.1166623105481267 test_loss: 0.11507241725921631
epoch: 45 training_loss 0.11318990804255008 test_loss: 0.1076235055923462
epoch: 46 training_loss 0.1177643110603094 test_loss: 0.10354162454605102
epoch: 47 training_loss 0.11340052019804717 test_loss: 0.1169082760810852
epoch: 48 training_loss 0.11143759641796351 test_loss: 0.11678609848022461
epoch: 49 training_loss 0.1183097142726183 test_loss: 0.1180911660194397
epoch: 50 training_loss 0.11727517247200012 test_loss: 0.10801607370376587
epoch: 51 training_loss 0.11721284449100494 test_loss: 0.1071926236152649
epoch: 52 training_loss 0.11759907126426697 test_loss: 0.1079620361328125
epoch: 53 training_loss 0.11993023917078972 test_loss: 0.10221736431121826
epoch: 54 training_loss 0.11079944223165512 test_loss: 0.1089445948600769
epoch: 55 training_loss 0.11562619995325804 test_loss: 0.11635332107543946
epoch: 56 training_loss 0.11621031757444143 test_loss: 0.11146316528320313
epoch: 57 training_loss 0.11146435268223286 test_loss: 0.10550130605697632
epoch: 58 training_loss 0.11292389705777169 test_loss: 0.11639949083328247
epoch: 59 training_loss 0.11171548400074244 test_loss: 0.11088817119598389
epoch: 60 training_loss 0.11508263476192951 test_loss: 0.1176426887512207
epoch: 61 training_loss 0.1138702454790473 test_loss: 0.11223776340484619
epoch: 62 training_loss 0.11239996071904898 test_loss: 0.10617059469223022
epoch: 63 training_loss 0.11342305727303029 test_loss: 0.10883157253265381
epoch: 64 training_loss 0.11227211982011795 test_loss: 0.11252758502960206
epoch: 65 training_loss 0.11435096647590398 test_loss: 0.10726299285888671
epoch: 66 training_loss 0.11897969860583543 test_loss: 0.10916683673858643
epoch: 67 training_loss 0.11060269244015217 test_loss: 0.10449038743972779
epoch: 68 training_loss 0.11494527451694012 test_loss: 0.1177675724029541
epoch: 69 training_loss 0.11060223903506994 test_loss: 0.11285254955291749
epoch: 70 training_loss 0.11073980037122964 test_loss: 0.0989829957485199
epoch: 71 training_loss 0.11585808299481869 test_loss: 0.10249202251434326
epoch: 72 training_loss 0.11881922893226146 test_loss: 0.10150421857833862
epoch: 73 training_loss 0.11089515972882509 test_loss: 0.11154775619506836
epoch: 74 training_loss 0.1095634901151061 test_loss: 0.11138635873794556
epoch: 75 training_loss 0.11957779500633478 test_loss: 0.10834931135177613
epoch: 76 training_loss 0.10955271184444428 test_loss: 0.11121199131011963
epoch: 77 training_loss 0.10882090114057064 test_loss: 0.11598247289657593
epoch: 78 training_loss 0.11232687577605248 test_loss: 0.10906798839569092
epoch: 79 training_loss 0.11952830448746682 test_loss: 0.09964702725410461
epoch: 80 training_loss 0.11415820702910423 test_loss: 0.11294790506362914
epoch: 81 training_loss 0.1137782096862793 test_loss: 0.11068278551101685
epoch: 82 training_loss 0.11569363497197628 test_loss: 0.10276501178741455
epoch: 83 training_loss 0.11055381264537573 test_loss: 0.10698103904724121
epoch: 84 training_loss 0.10755107602104545 test_loss: 0.11222625970840454
epoch: 85 training_loss 0.11927176468074321 test_loss: 0.10453952550888061
epoch: 86 training_loss 0.11136449873447418 test_loss: 0.10273813009262085
epoch: 87 training_loss 0.11560119479894639 test_loss: 0.11115692853927613
epoch: 88 training_loss 0.1166644662618637 test_loss: 0.10017699003219604
epoch: 89 training_loss 0.11800559736788273 test_loss: 0.10802654027938843
epoch: 90 training_loss 0.11484104670584201 test_loss: 0.09903120398521423
epoch: 91 training_loss 0.11300631329417228 test_loss: 0.10706100463867188
epoch: 92 training_loss 0.10815448846668005 test_loss: 0.11247875690460205
epoch: 93 training_loss 0.111170449629426 test_loss: 0.10430587530136108
epoch: 94 training_loss 0.11303044363856316 test_loss: 0.12362583875656127
epoch: 95 training_loss 0.11483681667596102 test_loss: 0.1032552719116211
epoch: 96 training_loss 0.11286072611808777 test_loss: 0.11130026578903199
epoch: 97 training_loss 0.1133145796880126 test_loss: 0.10681754350662231
epoch: 98 training_loss 0.11327901251614093 test_loss: 0.11458058357238769
epoch: 99 training_loss 0.10958281800150871 test_loss: 0.09560309648513794
epoch: 100 training_loss 0.10950598197057843 test_loss: 0.11060773134231568
epoch: 101 training_loss 0.12006521660834552 test_loss: 0.10359023809432984
epoch: 102 training_loss 0.1111681580543518 test_loss: 0.10723259449005126
epoch: 103 training_loss 0.11154649093747139 test_loss: 0.1122925877571106
epoch: 104 training_loss 0.11208772234618664 test_loss: 0.10723965167999268
epoch: 105 training_loss 0.10343147918581963 test_loss: 0.104350745677948
epoch: 106 training_loss 0.11471588850021362 test_loss: 0.10502936840057372
epoch: 107 training_loss 0.11450648907572031 test_loss: 0.10408403873443603
epoch: 108 training_loss 0.1093191346526146 test_loss: 0.10192708969116211
epoch: 109 training_loss 0.1143412957713008 test_loss: 0.10575904846191406
epoch: 110 training_loss 0.10778694983571768 test_loss: 0.1116529107093811
epoch: 111 training_loss 0.11276927024126053 test_loss: 0.10663797855377197
epoch: 112 training_loss 0.11332572050392628 test_loss: 0.11802531480789184
epoch: 113 training_loss 0.11432705268263817 test_loss: 0.12009018659591675
epoch: 114 training_loss 0.11248350277543068 test_loss: 0.09616560935974121
epoch: 115 training_loss 0.11014355082064867 test_loss: 0.12029042243957519
epoch: 116 training_loss 0.10585417982190848 test_loss: 0.11299519538879395
epoch: 117 training_loss 0.11219243541359901 test_loss: 0.09593492150306701
epoch: 118 training_loss 0.11517680685967208 test_loss: 0.11893442869186402
epoch: 119 training_loss 0.11805335827171802 test_loss: 0.10250903367996216
epoch: 120 training_loss 0.11888522218912839 test_loss: 0.10825059413909913
epoch: 121 training_loss 0.11076841663569212 test_loss: 0.10752319097518921
epoch: 122 training_loss 0.11634573150426149 test_loss: 0.11875970363616943
epoch: 123 training_loss 0.11046421553939581 test_loss: 0.1132359266281128
epoch: 124 training_loss 0.11266101317480207 test_loss: 0.11234558820724487
epoch: 125 training_loss 0.10861250258982182 test_loss: 0.11597834825515747
epoch: 126 training_loss 0.11192923750728369 test_loss: 0.10917562246322632
epoch: 127 training_loss 0.11043479148298502 test_loss: 0.10951360464096069
epoch: 128 training_loss 0.11311167303472758 test_loss: 0.11951862573623658
epoch: 129 training_loss 0.11015914175659418 test_loss: 0.10734467506408692
epoch: 130 training_loss 0.10975055929273367 test_loss: 0.11076613664627075
epoch: 131 training_loss 0.11400031723082066 test_loss: 0.09702906608581544
epoch: 132 training_loss 0.10753011889755726 test_loss: 0.12072241306304932
epoch: 133 training_loss 0.11195179361850023 test_loss: 0.10305203199386596
epoch: 134 training_loss 0.11289165843278169 test_loss: 0.11280176639556885
epoch: 135 training_loss 0.10840415477752685 test_loss: 0.10454803705215454
epoch: 136 training_loss 0.11446883525699376 test_loss: 0.09752017855644227
epoch: 137 training_loss 0.11787515860050916 test_loss: 0.10998388528823852
epoch: 138 training_loss 0.11010067116469145 test_loss: 0.11534868478775025
epoch: 139 training_loss 0.10832320261746645 test_loss: 0.10405430793762208
epoch: 140 training_loss 0.11152924109250308 test_loss: 0.11384145021438599
epoch: 141 training_loss 0.11184497069567442 test_loss: 0.1191400408744812
epoch: 142 training_loss 0.12098746564239264 test_loss: 0.1146381139755249
epoch: 143 training_loss 0.11177205950021744 test_loss: 0.10458790063858033
epoch: 144 training_loss 0.11289394754916429 test_loss: 0.11832977533340454
epoch: 145 training_loss 0.10913673460483551 test_loss: 0.10995055437088012
epoch: 146 training_loss 0.115797931663692 test_loss: 0.1068042278289795
epoch: 147 training_loss 0.11084678519517183 test_loss: 0.09858604073524475
epoch: 148 training_loss 0.11215755511075258 test_loss: 0.10706249475479127
epoch: 149 training_loss 0.1151845746859908 test_loss: 0.09714416265487671
epoch: 0 training_loss 0.20372385978698732 test_loss: 0.1645270347595215
epoch: 1 training_loss 0.14245065551251174 test_loss: 0.12904354333877563
epoch: 2 training_loss 0.13847973708063363 test_loss: 0.12788982391357423
epoch: 3 training_loss 0.13453923743218182 test_loss: 0.1617498517036438
epoch: 4 training_loss 0.12436681795865297 test_loss: 0.12584446668624877
epoch: 5 training_loss 0.133551830612123 test_loss: 0.1285804033279419
epoch: 6 training_loss 0.1277947837486863 test_loss: 0.12729028463363648
epoch: 7 training_loss 0.12892272379249334 test_loss: 0.1270642399787903
epoch: 8 training_loss 0.12293302543461322 test_loss: 0.11837400197982788
epoch: 9 training_loss 0.12030130740255117 test_loss: 0.11819472312927246
epoch: 10 training_loss 0.12009258568286896 test_loss: 0.10791923999786376
epoch: 11 training_loss 0.12365553595125675 test_loss: 0.12274818420410157
epoch: 12 training_loss 0.13058800853788852 test_loss: 0.12327306270599366
epoch: 13 training_loss 0.12175871536135674 test_loss: 0.11807838678359986
epoch: 14 training_loss 0.11283701568841935 test_loss: 0.11504534482955933
epoch: 15 training_loss 0.12050135158002377 test_loss: 0.11798428297042847
epoch: 16 training_loss 0.1198981137573719 test_loss: 0.11724518537521363
epoch: 17 training_loss 0.11676798652857542 test_loss: 0.11834489107131958
epoch: 18 training_loss 0.11413652148097754 test_loss: 0.15547374486923218
epoch: 19 training_loss 0.12151568327099085 test_loss: 0.12997691631317138
epoch: 20 training_loss 0.12336863372474909 test_loss: 0.12660667896270753
epoch: 21 training_loss 0.1289638378471136 test_loss: 0.11764464378356934
epoch: 22 training_loss 0.12411148320883512 test_loss: 0.1216621994972229
epoch: 23 training_loss 0.117786505818367 test_loss: 0.10813436508178711
epoch: 24 training_loss 0.11832714106887579 test_loss: 0.11090126037597656
epoch: 25 training_loss 0.11520354598760604 test_loss: 0.12512096166610717
epoch: 26 training_loss 0.11893958240747451 test_loss: 0.1276639461517334
epoch: 27 training_loss 0.12198800723999739 test_loss: 0.1345674753189087
epoch: 28 training_loss 0.11608343277126551 test_loss: 0.1134073257446289
epoch: 29 training_loss 0.12016561813652515 test_loss: 0.11033508777618409
epoch: 30 training_loss 0.1164247553423047 test_loss: 0.11792376041412353
epoch: 31 training_loss 0.11905865728855133 test_loss: 0.10929878950119018
epoch: 32 training_loss 0.11727511860430241 test_loss: 0.11699330806732178
epoch: 33 training_loss 0.12368504665791988 test_loss: 0.11288237571716309
epoch: 34 training_loss 0.11886322218924761 test_loss: 0.12413421869277955
epoch: 35 training_loss 0.12078998371958732 test_loss: 0.11488814353942871
epoch: 36 training_loss 0.1246001274138689 test_loss: 0.12818289995193483
epoch: 37 training_loss 0.11718309886753558 test_loss: 0.11380252838134766
epoch: 38 training_loss 0.11810468912124633 test_loss: 0.13617854118347167
epoch: 39 training_loss 0.12484053537249565 test_loss: 0.12714750766754152
epoch: 40 training_loss 0.11855231530964375 test_loss: 0.12157889604568481
epoch: 41 training_loss 0.1174514939635992 test_loss: 0.11584105491638183
epoch: 42 training_loss 0.11617627739906311 test_loss: 0.10712922811508178
epoch: 43 training_loss 0.11341810148209333 test_loss: 0.13130426406860352
epoch: 44 training_loss 0.11758717428892851 test_loss: 0.1148118257522583
epoch: 45 training_loss 0.1179296750947833 test_loss: 0.11777659654617309
epoch: 46 training_loss 0.11904706422239542 test_loss: 0.1267351031303406
epoch: 47 training_loss 0.11215836714953184 test_loss: 0.12278664112091064
epoch: 48 training_loss 0.12051357675343752 test_loss: 0.1255607008934021
epoch: 49 training_loss 0.11510776493698359 test_loss: 0.12811492681503295
epoch: 50 training_loss 0.12191997800022364 test_loss: 0.12428280115127563
epoch: 51 training_loss 0.11183449596166611 test_loss: 0.1046340823173523
epoch: 52 training_loss 0.11846920121461153 test_loss: 0.11189823150634766
epoch: 53 training_loss 0.11090235080569982 test_loss: 0.11476370096206664
epoch: 54 training_loss 0.1202504163607955 test_loss: 0.11900142431259156
epoch: 55 training_loss 0.11592305719852447 test_loss: 0.11199761629104614
epoch: 56 training_loss 0.11718581072986126 test_loss: 0.12446919679641724
epoch: 57 training_loss 0.11928565442562103 test_loss: 0.10327385663986206
epoch: 58 training_loss 0.1152940160408616 test_loss: 0.10477399826049805
epoch: 59 training_loss 0.11547485675662755 test_loss: 0.11252367496490479
epoch: 60 training_loss 0.12018219895660877 test_loss: 0.11036214828491211
epoch: 61 training_loss 0.11321010645478964 test_loss: 0.11050422191619873
epoch: 62 training_loss 0.11400423001497983 test_loss: 0.1189294695854187
epoch: 63 training_loss 0.11628368623554707 test_loss: 0.1236754059791565
epoch: 64 training_loss 0.12621468562632798 test_loss: 0.1105228304862976
epoch: 65 training_loss 0.11868025101721287 test_loss: 0.11524275541305543
epoch: 66 training_loss 0.11804900653660297 test_loss: 0.10887088775634765
epoch: 67 training_loss 0.11572281308472157 test_loss: 0.10275435447692871
epoch: 68 training_loss 0.11846936330199241 test_loss: 0.12967838048934938
epoch: 69 training_loss 0.11738799482584 test_loss: 0.11098589897155761
epoch: 70 training_loss 0.11198136355727911 test_loss: 0.11756778955459594
epoch: 71 training_loss 0.12496089424937963 test_loss: 0.1168623685836792
epoch: 72 training_loss 0.11394946131855249 test_loss: 0.12541347742080688
epoch: 73 training_loss 0.11972274202853442 test_loss: 0.12079662084579468
epoch: 74 training_loss 0.11749982744455338 test_loss: 0.11534689664840699
epoch: 75 training_loss 0.11569315269589424 test_loss: 0.11361498832702636
epoch: 76 training_loss 0.11482850544154644 test_loss: 0.11099852323532104
epoch: 77 training_loss 0.11968476872891187 test_loss: 0.11799405813217163
epoch: 78 training_loss 0.10998494103550911 test_loss: 0.1142243504524231
epoch: 79 training_loss 0.11436046168208122 test_loss: 0.1249186635017395
epoch: 80 training_loss 0.12134598512202502 test_loss: 0.12743051052093507
epoch: 81 training_loss 0.11956204131245612 test_loss: 0.11717981100082397
epoch: 82 training_loss 0.11515403650701046 test_loss: 0.11586227416992187
epoch: 83 training_loss 0.11594540201127529 test_loss: 0.12064076662063598
epoch: 84 training_loss 0.1177033257111907 test_loss: 0.11474204063415527
epoch: 85 training_loss 0.11289762191474438 test_loss: 0.11805474758148193
epoch: 86 training_loss 0.11683643814176321 test_loss: 0.12290009260177612
epoch: 87 training_loss 0.11398989532142878 test_loss: 0.12113863229751587
epoch: 88 training_loss 0.11400317147374153 test_loss: 0.10789698362350464
epoch: 89 training_loss 0.11744492795318365 test_loss: 0.11611008644104004
epoch: 90 training_loss 0.11579255059361458 test_loss: 0.12142448425292969
epoch: 91 training_loss 0.11841039914637803 test_loss: 0.1172310471534729
epoch: 92 training_loss 0.11650793388485908 test_loss: 0.1093942642211914
epoch: 93 training_loss 0.11491861511021853 test_loss: 0.10997394323349
epoch: 94 training_loss 0.1185710721835494 test_loss: 0.11208506822586059
epoch: 95 training_loss 0.11176686996594071 test_loss: 0.1201816439628601
epoch: 96 training_loss 0.11681575078517198 test_loss: 0.11140103340148926
epoch: 97 training_loss 0.11866208404302597 test_loss: 0.11582720279693604
epoch: 98 training_loss 0.11765328865498305 test_loss: 0.11257034540176392
epoch: 99 training_loss 0.11416908081620931 test_loss: 0.1271673321723938
epoch: 100 training_loss 0.10912598872557283 test_loss: 0.10950261354446411
epoch: 101 training_loss 0.10618440166115761 test_loss: 0.10593432188034058
epoch: 102 training_loss 0.11786659818142653 test_loss: 0.12299062013626098
epoch: 103 training_loss 0.11342012409120798 test_loss: 0.12949508428573608
epoch: 104 training_loss 0.12015716645866632 test_loss: 0.1060144066810608
epoch: 105 training_loss 0.11725995812565088 test_loss: 0.10354037284851074
epoch: 106 training_loss 0.11535024605691432 test_loss: 0.1106907606124878
epoch: 107 training_loss 0.11822153888642788 test_loss: 0.11079579591751099
epoch: 108 training_loss 0.11103899788111449 test_loss: 0.12501802444458007
epoch: 109 training_loss 0.1123398957774043 test_loss: 0.11336038112640381
epoch: 110 training_loss 0.11280669212341309 test_loss: 0.11190323829650879
epoch: 111 training_loss 0.11537454564124346 test_loss: 0.11797862052917481
epoch: 112 training_loss 0.1126385010406375 test_loss: 0.11376810073852539
epoch: 113 training_loss 0.11310987144708634 test_loss: 0.12397040128707885
epoch: 114 training_loss 0.11173379380255938 test_loss: 0.10913503170013428
epoch: 115 training_loss 0.11366493351757527 test_loss: 0.12325645685195923
epoch: 116 training_loss 0.1109702856093645 test_loss: 0.1140709400177002
epoch: 117 training_loss 0.11905186265707016 test_loss: 0.1105373740196228
epoch: 118 training_loss 0.1170593336969614 test_loss: 0.11530933380126954
epoch: 119 training_loss 0.11887042060494422 test_loss: 0.1158374547958374
epoch: 120 training_loss 0.11017798878252506 test_loss: 0.1234994649887085
epoch: 121 training_loss 0.11989810705184936 test_loss: 0.1139939308166504
epoch: 122 training_loss 0.11086167056113481 test_loss: 0.10650138854980469
epoch: 123 training_loss 0.1106154703348875 test_loss: 0.1116490125656128
epoch: 124 training_loss 0.1113483176380396 test_loss: 0.11584604978561401
epoch: 125 training_loss 0.11192432940006256 test_loss: 0.09231883883476258
epoch: 126 training_loss 0.11350876949727536 test_loss: 0.10582853555679321
epoch: 127 training_loss 0.11159490775316953 test_loss: 0.1153376817703247
epoch: 128 training_loss 0.11086075946688652 test_loss: 0.11998555660247803
epoch: 129 training_loss 0.11541210159659386 test_loss: 0.12382999658584595
epoch: 130 training_loss 0.1169253582879901 test_loss: 0.11497942209243775
epoch: 131 training_loss 0.11985812693834305 test_loss: 0.11237497329711914
epoch: 132 training_loss 0.10771472059190274 test_loss: 0.11979695558547973
epoch: 133 training_loss 0.11060299258679152 test_loss: 0.11273434162139892
epoch: 134 training_loss 0.11893931042402983 test_loss: 0.10508953332901001
epoch: 135 training_loss 0.11183237090706825 test_loss: 0.11658785343170167
epoch: 136 training_loss 0.10469095677137374 test_loss: 0.11735881567001342
epoch: 137 training_loss 0.11565526980906725 test_loss: 0.11380898952484131
epoch: 138 training_loss 0.11445744648575783 test_loss: 0.11122517585754395
epoch: 139 training_loss 0.10538263864815235 test_loss: 0.11684139966964721
epoch: 140 training_loss 0.11029085505753755 test_loss: 0.11762640476226807
epoch: 141 training_loss 0.11130013916641474 test_loss: 0.1153119683265686
epoch: 142 training_loss 0.11239794544875621 test_loss: 0.1144289255142212
epoch: 143 training_loss 0.11574663780629635 test_loss: 0.11796833276748657
epoch: 144 training_loss 0.11837153505533933 test_loss: 0.12306286096572876
epoch: 145 training_loss 0.11557028025388717 test_loss: 0.1259605646133423
epoch: 146 training_loss 0.1196053309366107 test_loss: 0.10553159713745117
epoch: 147 training_loss 0.11150934934616089 test_loss: 0.12052642107009888
epoch: 148 training_loss 0.11221637178212404 test_loss: 0.11857177019119262
epoch: 149 training_loss 0.11633063200861216 test_loss: 0.12181597948074341
epoch: 0 training_loss 0.20211907781660557 test_loss: 0.12161153554916382
epoch: 1 training_loss 0.14128696866333484 test_loss: 0.12253059148788452
epoch: 2 training_loss 0.12927836764603853 test_loss: 0.12202982902526856
epoch: 3 training_loss 0.12802227087318896 test_loss: 0.10243139266967774
epoch: 4 training_loss 0.13167168401181698 test_loss: 0.12003602981567382
epoch: 5 training_loss 0.12960576422512532 test_loss: 0.1189118504524231
epoch: 6 training_loss 0.13118528112769126 test_loss: 0.12234508991241455
epoch: 7 training_loss 0.12284627947956324 test_loss: 0.11760530471801758
epoch: 8 training_loss 0.13501730225980282 test_loss: 0.1165343165397644
epoch: 9 training_loss 0.11997908379882574 test_loss: 0.11602437496185303
epoch: 10 training_loss 0.1253828088194132 test_loss: 0.1055350661277771
epoch: 11 training_loss 0.1220008748397231 test_loss: 0.1149266242980957
epoch: 12 training_loss 0.11838571961969137 test_loss: 0.11806899309158325
epoch: 13 training_loss 0.1256135918200016 test_loss: 0.11208810806274414
epoch: 14 training_loss 0.12688785150647164 test_loss: 0.1148134708404541
epoch: 15 training_loss 0.12183831796050072 test_loss: 0.10153101682662964
epoch: 16 training_loss 0.12517992742359638 test_loss: 0.11366561651229859
epoch: 17 training_loss 0.12230902407318353 test_loss: 0.12034714221954346
epoch: 18 training_loss 0.11685367237776517 test_loss: 0.1130273699760437
epoch: 19 training_loss 0.12213889122009278 test_loss: 0.1197573184967041
epoch: 20 training_loss 0.1245834069699049 test_loss: 0.11188149452209473
epoch: 21 training_loss 0.1210131773352623 test_loss: 0.11976687908172608
epoch: 22 training_loss 0.12288531985133887 test_loss: 0.11207860708236694
epoch: 23 training_loss 0.1236709850281477 test_loss: 0.10918922424316406
epoch: 24 training_loss 0.1187481926754117 test_loss: 0.12346233129501342
epoch: 25 training_loss 0.12072765994817018 test_loss: 0.10063459873199462
epoch: 26 training_loss 0.11785454213619233 test_loss: 0.12160520553588867
epoch: 27 training_loss 0.11170940291136504 test_loss: 0.10989445447921753
epoch: 28 training_loss 0.1186867455765605 test_loss: 0.11498559713363647
epoch: 29 training_loss 0.12658461600542067 test_loss: 0.11861629486083984
epoch: 30 training_loss 0.11855460561811924 test_loss: 0.11070008277893066
epoch: 31 training_loss 0.11729890499264002 test_loss: 0.10556942224502563
epoch: 32 training_loss 0.11908043570816516 test_loss: 0.11672834157943726
epoch: 33 training_loss 0.11278761334717274 test_loss: 0.1203696608543396
epoch: 34 training_loss 0.11161606576293708 test_loss: 0.1173981785774231
epoch: 35 training_loss 0.11608565524220467 test_loss: 0.10500208139419556
epoch: 36 training_loss 0.12027219258248806 test_loss: 0.11444370746612549
epoch: 37 training_loss 0.12335949771106243 test_loss: 0.1159744143486023
epoch: 38 training_loss 0.11600604455918073 test_loss: 0.11929841041564941
epoch: 39 training_loss 0.11688461195677519 test_loss: 0.1152880072593689
epoch: 40 training_loss 0.11364570271223784 test_loss: 0.115921950340271
epoch: 41 training_loss 0.1195173330977559 test_loss: 0.10853585004806518
epoch: 42 training_loss 0.12132835201919079 test_loss: 0.11132606267929077
epoch: 43 training_loss 0.11595019057393074 test_loss: 0.14094154834747313
epoch: 44 training_loss 0.11515828728675842 test_loss: 0.11788369417190551
epoch: 45 training_loss 0.11691112514585257 test_loss: 0.11195318698883057
epoch: 46 training_loss 0.12047709476202727 test_loss: 0.10941867828369141
epoch: 47 training_loss 0.11726493999361992 test_loss: 0.1225355625152588
epoch: 48 training_loss 0.11712065398693085 test_loss: 0.12463611364364624
epoch: 49 training_loss 0.12200468629598618 test_loss: 0.11093738079071044
epoch: 50 training_loss 0.11732971474528313 test_loss: 0.1055687427520752
epoch: 51 training_loss 0.119847895167768 test_loss: 0.11877847909927368
epoch: 52 training_loss 0.11417214404791594 test_loss: 0.11652671098709107
epoch: 53 training_loss 0.11404718618839979 test_loss: 0.11502480506896973
epoch: 54 training_loss 0.11853942934423685 test_loss: 0.10753928422927857
epoch: 55 training_loss 0.12021203361451625 test_loss: 0.11017111539840699
epoch: 56 training_loss 0.1225107605010271 test_loss: 0.1061333417892456
epoch: 57 training_loss 0.11114951774477959 test_loss: 0.11615198850631714
epoch: 58 training_loss 0.11511222060769796 test_loss: 0.11295348405838013
epoch: 59 training_loss 0.11377267178148032 test_loss: 0.11181656122207642
epoch: 60 training_loss 0.1150683980062604 test_loss: 0.10456969738006591
epoch: 61 training_loss 0.11230774387717248 test_loss: 0.103695547580719
epoch: 62 training_loss 0.11119805917143821 test_loss: 0.11838147640228272
epoch: 63 training_loss 0.11645449966192245 test_loss: 0.11762279272079468
epoch: 64 training_loss 0.11766335025429725 test_loss: 0.11234368085861206
epoch: 65 training_loss 0.11654787071049214 test_loss: 0.10831040143966675
epoch: 66 training_loss 0.11849335912615061 test_loss: 0.10696537494659424
epoch: 67 training_loss 0.11943848725408315 test_loss: 0.118097984790802
epoch: 68 training_loss 0.10699960827827454 test_loss: 0.10254181623458862
epoch: 69 training_loss 0.11588408000767231 test_loss: 0.11374475955963134
epoch: 70 training_loss 0.12024277657270431 test_loss: 0.1256168246269226
epoch: 71 training_loss 0.11796079639345408 test_loss: 0.10741124153137208
epoch: 72 training_loss 0.11854407150298357 test_loss: 0.11111432313919067
epoch: 73 training_loss 0.11314536288380622 test_loss: 0.11423064470291137
epoch: 74 training_loss 0.11307201713323593 test_loss: 0.12340458631515502
epoch: 75 training_loss 0.11423812713474035 test_loss: 0.10880333185195923
epoch: 76 training_loss 0.11741463176906108 test_loss: 0.11413246393203735
epoch: 77 training_loss 0.11979551438242197 test_loss: 0.11377236843109131
epoch: 78 training_loss 0.12210478372871876 test_loss: 0.10607097148895264
epoch: 79 training_loss 0.11707239542156458 test_loss: 0.11938328742980957
epoch: 80 training_loss 0.11433230597525836 test_loss: 0.11273505687713622
epoch: 81 training_loss 0.10731860872358084 test_loss: 0.1065543532371521
epoch: 82 training_loss 0.11876865513622761 test_loss: 0.11270368099212646
epoch: 83 training_loss 0.11175052501261235 test_loss: 0.11529484987258912
epoch: 84 training_loss 0.1089020736142993 test_loss: 0.1330277442932129
epoch: 85 training_loss 0.11379133608192206 test_loss: 0.11128045320510864
epoch: 86 training_loss 0.11690832421183586 test_loss: 0.11078717708587646
epoch: 87 training_loss 0.11520832736045122 test_loss: 0.11014655828475953
epoch: 88 training_loss 0.11377138769254089 test_loss: 0.10244201421737671
epoch: 89 training_loss 0.11196569189429283 test_loss: 0.10682332515716553
epoch: 90 training_loss 0.1121628149971366 test_loss: 0.1080877423286438
epoch: 91 training_loss 0.11896977093070746 test_loss: 0.12550803422927856
epoch: 92 training_loss 0.12133194927126169 test_loss: 0.11027399301528931
epoch: 93 training_loss 0.11528541311621666 test_loss: 0.10267864465713501
epoch: 94 training_loss 0.10891875810921192 test_loss: 0.10609464645385742
epoch: 95 training_loss 0.11450163416564464 test_loss: 0.10883859395980836
epoch: 96 training_loss 0.11372642233967781 test_loss: 0.09867909550666809
epoch: 97 training_loss 0.11451321102678776 test_loss: 0.11338787078857422
epoch: 98 training_loss 0.10883271358907223 test_loss: 0.1147829532623291
epoch: 99 training_loss 0.10661230605095624 test_loss: 0.11517108678817749
epoch: 100 training_loss 0.11846126090735197 test_loss: 0.11657893657684326
epoch: 101 training_loss 0.11237204287201166 test_loss: 0.11089582443237304
epoch: 102 training_loss 0.1110922297090292 test_loss: 0.11108497381210328
epoch: 103 training_loss 0.11787934973835945 test_loss: 0.10752583742141723
epoch: 104 training_loss 0.11449327178299427 test_loss: 0.11781237125396729
epoch: 105 training_loss 0.11258272916078567 test_loss: 0.11017022132873536
epoch: 106 training_loss 0.11565585348755121 test_loss: 0.11929490566253662
epoch: 107 training_loss 0.1153631867840886 test_loss: 0.10897859334945678
epoch: 108 training_loss 0.11305432844907046 test_loss: 0.10611443519592285
epoch: 109 training_loss 0.10844348413869738 test_loss: 0.11005576848983764
epoch: 110 training_loss 0.11577444311231375 test_loss: 0.11859301328659058
epoch: 111 training_loss 0.10933660209178925 test_loss: 0.09827592372894287
epoch: 112 training_loss 0.10986456658691168 test_loss: 0.11461319923400878
epoch: 113 training_loss 0.10946624375879764 test_loss: 0.11145508289337158
epoch: 114 training_loss 0.11527159832417964 test_loss: 0.11344710588455201
epoch: 115 training_loss 0.10982367470860481 test_loss: 0.10013937950134277
epoch: 116 training_loss 0.11522707100957633 test_loss: 0.10198048353195191
epoch: 117 training_loss 0.1153398809954524 test_loss: 0.10577238798141479
epoch: 118 training_loss 0.10979861948639154 test_loss: 0.11254013776779175
epoch: 119 training_loss 0.11823339127004147 test_loss: 0.10512186288833618
epoch: 120 training_loss 0.11459011871367693 test_loss: 0.10900316238403321
epoch: 121 training_loss 0.11060783434659242 test_loss: 0.12616299390792846
epoch: 122 training_loss 0.1132504827529192 test_loss: 0.11325523853302003
epoch: 123 training_loss 0.10932637680321931 test_loss: 0.10242894887924195
epoch: 124 training_loss 0.11291058000177145 test_loss: 0.11485482454299926
epoch: 125 training_loss 0.11687208451330662 test_loss: 0.1096701979637146
epoch: 126 training_loss 0.11349897999316454 test_loss: 0.1086169958114624
epoch: 127 training_loss 0.11168226312845946 test_loss: 0.11279462575912476
epoch: 128 training_loss 0.10408211216330528 test_loss: 0.10717307329177857
epoch: 129 training_loss 0.12039584863930941 test_loss: 0.10110030174255372
epoch: 130 training_loss 0.11090815648436546 test_loss: 0.10780247449874877
epoch: 131 training_loss 0.10853243317455054 test_loss: 0.1056860089302063
epoch: 132 training_loss 0.11112070705741645 test_loss: 0.1171797752380371
epoch: 133 training_loss 0.10607877999544144 test_loss: 0.11523528099060058
epoch: 134 training_loss 0.1155080458521843 test_loss: 0.11291214227676391
epoch: 135 training_loss 0.11415992215275765 test_loss: 0.10345634222030639
epoch: 136 training_loss 0.12147272244095803 test_loss: 0.11490812301635742
epoch: 137 training_loss 0.11718351624906063 test_loss: 0.11068596839904785
epoch: 138 training_loss 0.11311369717121124 test_loss: 0.10869593620300293
epoch: 139 training_loss 0.10688622117042541 test_loss: 0.11504024267196655
epoch: 140 training_loss 0.11194416355341673 test_loss: 0.11651564836502075
epoch: 141 training_loss 0.11337749648839235 test_loss: 0.1167215347290039
epoch: 142 training_loss 0.11839105822145939 test_loss: 0.10957304239273072
epoch: 143 training_loss 0.11801008768379688 test_loss: 0.11601461172103882
epoch: 144 training_loss 0.1119614565744996 test_loss: 0.10230584144592285
epoch: 145 training_loss 0.10983567234128713 test_loss: 0.11189956665039062
epoch: 146 training_loss 0.11564601056277751 test_loss: 0.10079904794692993
epoch: 147 training_loss 0.11775601424276828 test_loss: 0.10750168561935425
epoch: 148 training_loss 0.1150220587104559 test_loss: 0.10632550716400146
epoch: 149 training_loss 0.11639996267855167 test_loss: 0.10485290288925171
episode: 0 training return: -995.9773341305927
episode: 1 training return: -1444.4897879534444
episode: 2 training return: -1389.2137191664535
episode: 3 training return: -950.6250995857868
epoch: 1 test_true_pfm: 15.395937667680357 sim_pfm: -311.94846276644637
episode: 4 training return: -1273.6634875168681
episode: 5 training return: -921.107727695933
episode: 6 training return: -1273.0104309054102
episode: 7 training return: -1798.8953738651205
epoch: 2 test_true_pfm: 21.86364912819294 sim_pfm: -260.91227829354955
episode: 8 training return: -1522.3781675672542
episode: 9 training return: -1024.2077654545883
episode: 10 training return: -438.43925374798584
episode: 11 training return: -331.876588991457
epoch: 3 test_true_pfm: 39.34462125580053 sim_pfm: -289.45752669855926
episode: 12 training return: -261.7098448020665
episode: 13 training return: -399.4101860387729
episode: 14 training return: -343.2603206554694
episode: 15 training return: -375.6135151131039
epoch: 4 test_true_pfm: 14.104944242247495 sim_pfm: -199.60668156143674
episode: 16 training return: -224.94718411334767
episode: 17 training return: -260.1055956371874
episode: 18 training return: -345.86547785265606
episode: 19 training return: -356.9878547974044
epoch: 5 test_true_pfm: 8.00777227167826 sim_pfm: -571.249120062716
episode: 20 training return: 18.358434876080963
episode: 21 training return: 211.26428491849126
episode: 22 training return: -701.54396726021
episode: 23 training return: -617.2710326606161
epoch: 6 test_true_pfm: 28.498409530301597 sim_pfm: -314.4602322113449
episode: 24 training return: -365.1271160015313
episode: 25 training return: -282.79847244957926
episode: 26 training return: -173.93127949846703
episode: 27 training return: -45.50513887166071
epoch: 7 test_true_pfm: 44.20412718409143 sim_pfm: 54.1743962255696
episode: 28 training return: -325.18116852741286
episode: 29 training return: -90.51336352593844
episode: 30 training return: -276.96185223209216
episode: 31 training return: 28.73267101591214
epoch: 8 test_true_pfm: 18.140935354658836 sim_pfm: -160.91696126768494
episode: 32 training return: 397.608516367025
episode: 33 training return: 100.98046341588883
episode: 34 training return: 360.62589376971437
episode: 35 training return: 331.4821936620646
epoch: 9 test_true_pfm: 24.687050740788866 sim_pfm: 259.8446903605715
episode: 36 training return: 339.3780291754027
episode: 37 training return: 209.096283852612
episode: 38 training return: 120.29598840560162
episode: 39 training return: 177.53704023497244
epoch: 10 test_true_pfm: 27.513652533232356 sim_pfm: 277.9841934335796
episode: 40 training return: 33.26280413986832
episode: 41 training return: 256.6734097780799
episode: 42 training return: 267.0726810574459
episode: 43 training return: 395.1711046055341
epoch: 11 test_true_pfm: 19.13404023018944 sim_pfm: 242.4793465746066
episode: 44 training return: 450.99162496949185
episode: 45 training return: 556.8011830237144
episode: 46 training return: 546.2785742212504
episode: 47 training return: 448.23651209309594
epoch: 12 test_true_pfm: 26.759570003329372 sim_pfm: 394.8758921079891
episode: 48 training return: 427.3578043964203
episode: 49 training return: 495.23398673476873
episode: 50 training return: 452.77774541442807
episode: 51 training return: 578.8268686426319
epoch: 13 test_true_pfm: -0.6030877273902785 sim_pfm: 561.3318726993571
episode: 52 training return: 540.4919806222792
episode: 53 training return: 587.130971977394
episode: 54 training return: 569.2119025685444
episode: 55 training return: 580.7974310973431
epoch: 14 test_true_pfm: 12.3939621454263 sim_pfm: 643.7527302910463
episode: 56 training return: 611.7558505060359
episode: 57 training return: 655.8140660158181
episode: 58 training return: 662.3248586936061
episode: 59 training return: 676.7807386991598
epoch: 15 test_true_pfm: 1.6624293577719211 sim_pfm: 691.7669796555294
episode: 60 training return: 356.97145539630543
episode: 61 training return: 601.1822997970842
episode: 62 training return: 624.7811854083144
episode: 63 training return: 661.0030990817772
epoch: 16 test_true_pfm: 28.15331554817405 sim_pfm: 690.5612054783003
episode: 64 training return: 690.4613527990966
episode: 65 training return: 753.259332699051
episode: 66 training return: 776.7156952015011
episode: 67 training return: 759.2196222210304
epoch: 17 test_true_pfm: 15.475792491151338 sim_pfm: 826.645099425861
episode: 68 training return: 750.1348059402941
episode: 69 training return: 805.5167874529615
episode: 70 training return: 788.7541780672055
episode: 71 training return: 809.2479444139349
epoch: 18 test_true_pfm: 10.401315502227826 sim_pfm: 893.9076759650659
episode: 72 training return: 806.1122167079706
episode: 73 training return: 791.3160577614291
episode: 74 training return: 800.9645043005935
episode: 75 training return: 799.3558251980836
epoch: 19 test_true_pfm: 14.246533623831485 sim_pfm: 842.7691723404234
episode: 76 training return: 797.9534109917225
episode: 77 training return: 781.7528688148466
episode: 78 training return: 772.4809658230864
episode: 79 training return: 788.3814559033482
epoch: 20 test_true_pfm: 17.45021495475074 sim_pfm: 861.9941524683006
episode: 80 training return: 793.9382266119758
episode: 81 training return: 820.0963226130999
episode: 82 training return: 785.4267207574367
episode: 83 training return: 780.6497619140634
epoch: 21 test_true_pfm: 8.809470566439664 sim_pfm: 869.7451530735359
episode: 84 training return: 764.6649947590116
episode: 85 training return: 795.4644729792983
episode: 86 training return: 774.4874372218504
episode: 87 training return: 786.9890923104397
epoch: 22 test_true_pfm: 12.48164740374391 sim_pfm: 906.1794957569118
episode: 88 training return: 790.6971088205161
episode: 89 training return: 797.4763084356622
episode: 90 training return: 778.3651394961685
episode: 91 training return: 793.6721408765438
epoch: 23 test_true_pfm: 10.957823305013127 sim_pfm: 872.4352902715106
episode: 92 training return: 807.9251334574487
episode: 93 training return: 801.6447191052805
episode: 94 training return: 825.0703053133324
episode: 95 training return: 811.7849446789515
epoch: 24 test_true_pfm: 14.900512306645416 sim_pfm: 867.5788389758014
episode: 96 training return: 820.6249210331724
episode: 97 training return: 796.6996779712752
episode: 98 training return: 824.222089393593
episode: 99 training return: 820.6499512107972
epoch: 25 test_true_pfm: 11.407324778011876 sim_pfm: 855.7006868050063
episode: 100 training return: 827.1422320017064
episode: 101 training return: 798.5313849619364
episode: 102 training return: 792.4223426893011
episode: 103 training return: 824.4201303767196
epoch: 26 test_true_pfm: 7.4864545498428825 sim_pfm: 913.4313548611921
episode: 104 training return: 819.5324465172378
episode: 105 training return: 849.3051002451907
episode: 106 training return: 818.8034886219712
episode: 107 training return: 826.2891203365674
epoch: 27 test_true_pfm: 8.574190511582355 sim_pfm: 905.4354975943783
episode: 108 training return: 842.4076691920757
episode: 109 training return: 827.2761930545225
episode: 110 training return: 831.9981817873276
episode: 111 training return: 815.4183546638218
epoch: 28 test_true_pfm: 9.332015964311207 sim_pfm: 901.3733454504661
episode: 112 training return: 812.9622269110155
episode: 113 training return: 825.4421148358306
episode: 114 training return: 832.5851557433488
episode: 115 training return: 834.9310198469808
epoch: 29 test_true_pfm: 7.21953070235528 sim_pfm: 913.669399877355
episode: 116 training return: 824.9273403417056
episode: 117 training return: 832.5289627227002
episode: 118 training return: 815.5628331524368
episode: 119 training return: 823.0403222454491
epoch: 30 test_true_pfm: 6.865956181078185 sim_pfm: 895.8755554983745
episode: 120 training return: 829.4332028699879
episode: 121 training return: 822.0608382159637
episode: 122 training return: 846.4063862434807
episode: 123 training return: 828.9857235435815
epoch: 31 test_true_pfm: 8.82345656358978 sim_pfm: 916.3061321600231
episode: 124 training return: 828.7057599652666
episode: 125 training return: 822.5422598067133
episode: 126 training return: 830.3369927267931
episode: 127 training return: 808.5378843173056
epoch: 32 test_true_pfm: 9.81985921437218 sim_pfm: 917.8922620167905
episode: 128 training return: 805.874452962811
episode: 129 training return: 822.6257439853019
episode: 130 training return: 822.5963217730266
episode: 131 training return: 811.0285722867662
epoch: 33 test_true_pfm: 9.625310530771978 sim_pfm: 894.2002611626267
episode: 132 training return: 802.6451192122997
episode: 133 training return: 811.014427140275
episode: 134 training return: 795.9093483693714
episode: 135 training return: 795.0785677888041
epoch: 34 test_true_pfm: 9.84294700000034 sim_pfm: 921.446935602241
episode: 136 training return: 769.9485878633428
episode: 137 training return: 803.7692529147009
episode: 138 training return: 801.9731012350558
episode: 139 training return: 783.7085250794239
epoch: 35 test_true_pfm: 10.740582514936667 sim_pfm: 909.1357065211354
episode: 140 training return: 796.0558134787399
episode: 141 training return: 821.7084503950603
episode: 142 training return: 808.1787969851251
episode: 143 training return: 827.9559602214812
epoch: 36 test_true_pfm: 7.124324184514016 sim_pfm: 919.4968858270292
episode: 144 training return: 815.3670304367621
episode: 145 training return: 817.8074591738059
episode: 146 training return: 821.1635389256213
episode: 147 training return: 805.3675258420733
epoch: 37 test_true_pfm: 13.393998976658784 sim_pfm: 909.7723338108478
episode: 148 training return: 827.4830852081998
episode: 149 training return: 800.6355699741906
episode: 150 training return: 823.873877119962
episode: 151 training return: 812.8017499176856
epoch: 38 test_true_pfm: 10.126238940616537 sim_pfm: 919.910120481503
episode: 152 training return: 829.534568591945
episode: 153 training return: 822.5766050383487
episode: 154 training return: 840.9546769440716
episode: 155 training return: 800.354031408694
epoch: 39 test_true_pfm: 10.445955551716938 sim_pfm: 933.9953057767503
episode: 156 training return: 834.7487150881868
episode: 157 training return: 789.7450516821016
episode: 158 training return: 777.4947331903122
episode: 159 training return: 809.3210577883303
epoch: 40 test_true_pfm: 9.132465317363474 sim_pfm: 927.2203670889448
episode: 160 training return: 838.9701191091983
episode: 161 training return: 841.5737409593187
episode: 162 training return: 826.6822183097081
episode: 163 training return: 827.2655859560531
epoch: 41 test_true_pfm: 8.98336310098259 sim_pfm: 920.3532824374255
episode: 164 training return: 806.1374892765934
episode: 165 training return: 796.4415424615146
episode: 166 training return: 815.3398798258672
episode: 167 training return: 811.9686872369181
epoch: 42 test_true_pfm: 6.475748614877768 sim_pfm: 929.0511028167286
episode: 168 training return: 804.2727991079311
episode: 169 training return: 831.8189579131098
episode: 170 training return: 833.285342685177
episode: 171 training return: 826.7736577798373
epoch: 43 test_true_pfm: 11.080560897937735 sim_pfm: 918.4946258004951
episode: 172 training return: 815.3963227519491
episode: 173 training return: 817.3261707297777
episode: 174 training return: 816.4859898011842
episode: 175 training return: 798.5965071692923
epoch: 44 test_true_pfm: 8.691709354806047 sim_pfm: 920.8594363069027
episode: 176 training return: 815.71456249164
episode: 177 training return: 822.4681575190228
episode: 178 training return: 812.971676816453
episode: 179 training return: 784.4252868019852
epoch: 45 test_true_pfm: 10.669647090008198 sim_pfm: 898.2486938007092
episode: 180 training return: 787.7068071710476
episode: 181 training return: 764.8318833067759
episode: 182 training return: 752.4094335752734
episode: 183 training return: 777.0579505346664
epoch: 46 test_true_pfm: 6.270300609571946 sim_pfm: 898.8079900520186
episode: 184 training return: 780.90161810221
episode: 185 training return: 795.1818000307023
episode: 186 training return: 808.0611968835168
episode: 187 training return: 755.8912433287577
epoch: 47 test_true_pfm: 9.89942350355427 sim_pfm: 867.5149869473362
episode: 188 training return: 809.1523723179897
episode: 189 training return: 804.3584400008942
episode: 190 training return: 771.9064007354386
episode: 191 training return: 822.871722242381
epoch: 48 test_true_pfm: 8.927536189868265 sim_pfm: 879.7399923936812
episode: 192 training return: 778.229348879193
episode: 193 training return: 757.8547646027025
episode: 194 training return: 763.8015739733765
episode: 195 training return: 780.450767099061
epoch: 49 test_true_pfm: 12.194262922340261 sim_pfm: 863.610247493023
episode: 196 training return: 797.4030507548594
episode: 197 training return: 790.4055870640889
episode: 198 training return: 784.024125941479
episode: 199 training return: 795.7320685365017
epoch: 50 test_true_pfm: 13.245657582238607 sim_pfm: 836.5304848559323
episode: 200 training return: 797.3520584436305
episode: 201 training return: 786.6417019873713
episode: 202 training return: 790.2013988510065
episode: 203 training return: 800.371207714434
epoch: 51 test_true_pfm: 7.024117832548893 sim_pfm: 897.883790658263
episode: 204 training return: 788.1275493520404
episode: 205 training return: 806.4346948965433
episode: 206 training return: 806.7827217108792
episode: 207 training return: 816.5685451939212
epoch: 52 test_true_pfm: 11.127982612691834 sim_pfm: 850.119219237283
episode: 208 training return: 777.7409612711645
episode: 209 training return: 794.7301481388076
episode: 210 training return: 777.6226852215582
episode: 211 training return: 830.5550378806531
epoch: 53 test_true_pfm: 11.23472658566347 sim_pfm: 912.297064062891
episode: 212 training return: 810.5382463380085
episode: 213 training return: 808.1178185487987
episode: 214 training return: 828.3121160456125
episode: 215 training return: 827.6729535008698
epoch: 54 test_true_pfm: 9.009252112938215 sim_pfm: 915.0154245119909
episode: 216 training return: 813.7257204011731
episode: 217 training return: 820.6415058841811
episode: 218 training return: 819.3367875459516
episode: 219 training return: 786.5324782539225
epoch: 55 test_true_pfm: 13.696341202845208 sim_pfm: 886.1495715584266
episode: 220 training return: 846.8264678789151
episode: 221 training return: 805.9771797783573
episode: 222 training return: 823.1216543930823
episode: 223 training return: 817.6457110443467
epoch: 56 test_true_pfm: 7.581051478240255 sim_pfm: 917.1873386227086
episode: 224 training return: 802.7923942410919
episode: 225 training return: 807.2957939674652
episode: 226 training return: 804.509840586924
episode: 227 training return: 805.7596901414827
epoch: 57 test_true_pfm: 7.076572133494554 sim_pfm: 919.2861634653971
episode: 228 training return: 783.7685902828541
episode: 229 training return: 809.4349964870775
episode: 230 training return: 818.6178964943836
episode: 231 training return: 789.2270441317991
epoch: 58 test_true_pfm: 5.247470584168312 sim_pfm: 880.4161116560414
episode: 232 training return: 819.8373482089347
episode: 233 training return: 808.2455955012543
episode: 234 training return: 829.224648191421
episode: 235 training return: 818.7328365437495
epoch: 59 test_true_pfm: 9.820141965177884 sim_pfm: 921.4835936828628
episode: 236 training return: 827.8657665296548
episode: 237 training return: 811.7256076474273
episode: 238 training return: 807.13072067367
episode: 239 training return: 813.9744602176287
epoch: 60 test_true_pfm: 8.045236731070794 sim_pfm: 914.7831594276995
episode: 240 training return: 822.3318433189147
episode: 241 training return: 823.4145438637437
episode: 242 training return: 810.7369171570854
episode: 243 training return: 807.6265923469322
epoch: 61 test_true_pfm: 8.808411932484955 sim_pfm: 887.8385033417877
episode: 244 training return: 783.9844857090419
episode: 245 training return: 818.0349001962428
episode: 246 training return: 815.3491984665903
episode: 247 training return: 835.5381648392148
epoch: 62 test_true_pfm: 11.258621092321471 sim_pfm: 893.6373416130882
episode: 248 training return: 801.7928587439371
episode: 249 training return: 813.2605562056319
episode: 250 training return: 817.4521721852716
episode: 251 training return: 818.6155342279219
epoch: 63 test_true_pfm: 4.934563469517951 sim_pfm: 871.2274521911953
episode: 252 training return: 809.1028421327453
episode: 253 training return: 806.9841269723036
episode: 254 training return: 806.2429157066722
episode: 255 training return: 816.4759713772405
epoch: 64 test_true_pfm: 0.2864799377739531 sim_pfm: 876.8573416419733
episode: 256 training return: 812.8491353660144
episode: 257 training return: 803.7403483213429
episode: 258 training return: 822.603819312738
episode: 259 training return: 799.9860378749444
epoch: 65 test_true_pfm: 3.2154004377319176 sim_pfm: 893.8627741749366
episode: 260 training return: 812.3764989487443
episode: 261 training return: 806.6893290518074
episode: 262 training return: 774.7355524869802
episode: 263 training return: 826.4080203295281
epoch: 66 test_true_pfm: 2.7614780392034417 sim_pfm: 897.7372404098915
episode: 264 training return: 788.8311022080609
episode: 265 training return: 792.2619982175648
episode: 266 training return: 818.4268393540691
episode: 267 training return: 811.8425192927174
epoch: 67 test_true_pfm: 2.816072745812672 sim_pfm: 893.4972936004324
episode: 268 training return: 792.7493433258808
episode: 269 training return: 798.4843605189101
episode: 270 training return: 808.1086273356486
episode: 271 training return: 825.2546702436395
epoch: 68 test_true_pfm: 9.700507197716854 sim_pfm: 906.6101234080095
episode: 272 training return: 826.864363644718
episode: 273 training return: 791.6769326905032
episode: 274 training return: 807.4160785568504
episode: 275 training return: 805.4310029796392
epoch: 69 test_true_pfm: 5.086024409320251 sim_pfm: 862.242115668887
episode: 276 training return: 812.3905438022816
episode: 277 training return: 818.6083842326269
episode: 278 training return: 818.9645729801578
episode: 279 training return: 813.2481170157278
epoch: 70 test_true_pfm: 4.036711511700004 sim_pfm: 886.4198927051766
episode: 280 training return: 826.4382769848447
episode: 281 training return: 801.8503913185781
episode: 282 training return: 795.8111761110252
episode: 283 training return: 803.0259192603651
epoch: 71 test_true_pfm: 6.177681088880836 sim_pfm: 816.4093361489732
episode: 284 training return: 796.9437015703841
episode: 285 training return: 827.7706537822299
episode: 286 training return: 819.4238364978708
episode: 287 training return: 803.4917837525852
epoch: 72 test_true_pfm: 3.3452258996092845 sim_pfm: 883.1693641855752
episode: 288 training return: 823.6388409170137
episode: 289 training return: 811.2906474744963
episode: 290 training return: 812.7655141645819
episode: 291 training return: 828.9752283880481
epoch: 73 test_true_pfm: 3.278024460646764 sim_pfm: 829.8089648077505
episode: 292 training return: 823.6940384560446
episode: 293 training return: 815.8914287288952
episode: 294 training return: 823.5756293225186
episode: 295 training return: 839.9822305249384
epoch: 74 test_true_pfm: 3.6003122536159937 sim_pfm: 909.9228854559493
episode: 296 training return: 797.7064379216966
episode: 297 training return: 821.0419457445455
episode: 298 training return: 840.6485129955406
episode: 299 training return: 811.6390070954567
epoch: 75 test_true_pfm: 3.6321178279829107 sim_pfm: 839.6531828629861
episode: 300 training return: 800.3624393533638
episode: 301 training return: 815.4856933106618
episode: 302 training return: 844.0228076186498
episode: 303 training return: 834.5783871872255
epoch: 76 test_true_pfm: 7.93494153169612 sim_pfm: 873.4569114687529
episode: 304 training return: 796.1733779782263
episode: 305 training return: 819.2177910200502
episode: 306 training return: 815.9444022627808
episode: 307 training return: 831.4502815082411
epoch: 77 test_true_pfm: 1.8109987640833396 sim_pfm: 766.1312550578889
episode: 308 training return: 828.0228664946242
episode: 309 training return: 806.0210127949777
episode: 310 training return: 794.9886577477351
episode: 311 training return: 814.4081573610197
epoch: 78 test_true_pfm: 7.112958679553008 sim_pfm: 844.041525456094
episode: 312 training return: 827.5575260261496
episode: 313 training return: 824.4721100866071
episode: 314 training return: 799.9076480509056
episode: 315 training return: 810.170050715012
epoch: 79 test_true_pfm: 6.036416304022131 sim_pfm: 838.2989092982236
episode: 316 training return: 773.0621708873124
episode: 317 training return: 783.5040356417949
episode: 318 training return: 813.8559531969843
episode: 319 training return: 815.7749053963279
epoch: 80 test_true_pfm: 3.9085289496174433 sim_pfm: 865.1651128830032
episode: 320 training return: 812.0398922738291
episode: 321 training return: 811.3363762828004
episode: 322 training return: 825.4424582164003
episode: 323 training return: 847.5037351483463
epoch: 81 test_true_pfm: 4.814732354306112 sim_pfm: 892.1813803927147
episode: 324 training return: 847.6810431789115
episode: 325 training return: 842.745184881725
episode: 326 training return: 838.4904611591655
episode: 327 training return: 830.6290107168617
epoch: 82 test_true_pfm: 4.346339457870053 sim_pfm: 875.5422598245945
episode: 328 training return: 829.3239657759528
episode: 329 training return: 808.9412342381712
episode: 330 training return: 832.8063659402469
episode: 331 training return: 826.367587018819
epoch: 83 test_true_pfm: 4.587091004210938 sim_pfm: 882.7564797899693
episode: 332 training return: 828.7079057201045
episode: 333 training return: 837.6631539379999
episode: 334 training return: 824.664785937268
episode: 335 training return: 829.404360921495
epoch: 84 test_true_pfm: 6.963827262457242 sim_pfm: 906.020974739407
episode: 336 training return: 848.119600524357
episode: 337 training return: 824.4542067513781
episode: 338 training return: 845.1717129808516
episode: 339 training return: 848.3170639475014
epoch: 85 test_true_pfm: 3.497462083693036 sim_pfm: 904.4817862302905
episode: 340 training return: 827.634342534706
episode: 341 training return: 846.2923477294498
episode: 342 training return: 822.7556576842736
episode: 343 training return: 844.2116299185817
epoch: 86 test_true_pfm: 3.8516507657267796 sim_pfm: 877.558665421888
episode: 344 training return: 792.9061951023349
episode: 345 training return: 816.0698251208659
episode: 346 training return: 834.9632642977177
episode: 347 training return: 839.6957209572855
epoch: 87 test_true_pfm: 0.1528542641161344 sim_pfm: 883.7517118253951
episode: 348 training return: 835.8308504237443
episode: 349 training return: 832.2527433919253
episode: 350 training return: 831.2332297247985
episode: 351 training return: 828.8238780548814
epoch: 88 test_true_pfm: 5.3132630946717 sim_pfm: 854.1656882679999
episode: 352 training return: 794.0993018880542
episode: 353 training return: 806.6482229168606
episode: 354 training return: 841.8210997176247
episode: 355 training return: 819.6882224591151
epoch: 89 test_true_pfm: 8.004708987719857 sim_pfm: 893.2907725573107
episode: 356 training return: 823.0983993577446
episode: 357 training return: 830.7781197190747
episode: 358 training return: 837.0870173778475
episode: 359 training return: 808.0072430665821
epoch: 90 test_true_pfm: 5.197945193928142 sim_pfm: 871.4333825727341
episode: 360 training return: 841.5304865297403
episode: 361 training return: 834.7665972371718
episode: 362 training return: 816.9718118737409
episode: 363 training return: 821.2064393596469
epoch: 91 test_true_pfm: 3.7853039619516644 sim_pfm: 831.4446188559621
episode: 364 training return: 816.0400071352002
episode: 365 training return: 824.7459010830212
episode: 366 training return: 814.7774881827802
episode: 367 training return: 818.396806738323
epoch: 92 test_true_pfm: 2.8502026452578186 sim_pfm: 883.4821954950783
episode: 368 training return: 804.87775267446
episode: 369 training return: 822.5375700395981
episode: 370 training return: 824.8682794224177
episode: 371 training return: 815.5441874356719
epoch: 93 test_true_pfm: 5.411508588510236 sim_pfm: 880.4381508559687
episode: 372 training return: 806.7774143011048
episode: 373 training return: 804.0993580181349
episode: 374 training return: 809.499164784902
episode: 375 training return: 813.5889278426124
epoch: 94 test_true_pfm: 5.0352452328734305 sim_pfm: 934.2860852749718
episode: 376 training return: 829.4350203961797
episode: 377 training return: 810.5575562085543
episode: 378 training return: 790.2326747420142
episode: 379 training return: 788.2083818308809
epoch: 95 test_true_pfm: 3.9224604073436913 sim_pfm: 842.0203950496445
episode: 380 training return: 812.8967081561846
episode: 381 training return: 818.3868879700879
episode: 382 training return: 835.5817545203381
episode: 383 training return: 811.3550122375987
epoch: 96 test_true_pfm: 3.8942134201117837 sim_pfm: 883.2235471868349
episode: 384 training return: 822.379774926436
episode: 385 training return: 803.9616652571716
episode: 386 training return: 818.4096408519146
episode: 387 training return: 819.2545394207293
epoch: 97 test_true_pfm: 3.9634577727818225 sim_pfm: 870.8687970683853
episode: 388 training return: 824.3876009021483
episode: 389 training return: 833.4414208854139
episode: 390 training return: 798.873765401478
episode: 391 training return: 823.1294948189112
epoch: 98 test_true_pfm: 4.727566442025575 sim_pfm: 915.1817296138443
episode: 392 training return: 808.8929498842197
episode: 393 training return: 807.0868209696766
episode: 394 training return: 799.5566095358698
episode: 395 training return: 808.609129628199
epoch: 99 test_true_pfm: 2.257121951942506 sim_pfm: 882.0841254699972
episode: 396 training return: 824.3605075279419
episode: 397 training return: 802.6897464370431
episode: 398 training return: 819.3929765944249
episode: 399 training return: 787.8356753961615
epoch: 100 test_true_pfm: 1.7936564596489084 sim_pfm: 895.1019721020293
episode: 400 training return: 823.8833598336817
episode: 401 training return: 805.9064728416749
episode: 402 training return: 807.2333622010549
episode: 403 training return: 813.4959101989332
epoch: 101 test_true_pfm: 1.7894315623532715 sim_pfm: 896.6546607952134
episode: 404 training return: 821.5015165772276
episode: 405 training return: 812.4892132466701
episode: 406 training return: 810.0553479256654
episode: 407 training return: 814.5442765532417
epoch: 102 test_true_pfm: 2.759121891565079 sim_pfm: 875.863544493279
episode: 408 training return: 823.6299224425137
episode: 409 training return: 822.5158007325574
episode: 410 training return: 803.1738006956355
episode: 411 training return: 830.378140537694
epoch: 103 test_true_pfm: 1.1540756560986611 sim_pfm: 884.2586058133878
episode: 412 training return: 831.8050254928094
episode: 413 training return: 830.7939180909133
episode: 414 training return: 824.5400592839653
episode: 415 training return: 819.4710277702975
epoch: 104 test_true_pfm: 3.1220097307295434 sim_pfm: 863.9117216762168
episode: 416 training return: 797.4972265338109
episode: 417 training return: 797.6621787453021
episode: 418 training return: 830.774236124222
episode: 419 training return: 829.565001549588
epoch: 105 test_true_pfm: 1.3137620110440928 sim_pfm: 886.0590727542036
episode: 420 training return: 810.7446639636723
episode: 421 training return: 811.2688236273386
episode: 422 training return: 786.9414484277187
episode: 423 training return: 785.5559435341188
epoch: 106 test_true_pfm: 4.600555408950107 sim_pfm: 920.507231328289
episode: 424 training return: 811.5977544436917
episode: 425 training return: 822.1843803799493
episode: 426 training return: 837.7291477016518
episode: 427 training return: 816.1336299190104
epoch: 107 test_true_pfm: 2.4476936265259566 sim_pfm: 897.3410940387054
episode: 428 training return: 780.7012442461797
episode: 429 training return: 776.8292259829036
episode: 430 training return: 815.030362424353
episode: 431 training return: 831.2048519242474
epoch: 108 test_true_pfm: 0.30760363344324015 sim_pfm: 884.1598590611424
episode: 432 training return: 814.4472747667179
episode: 433 training return: 814.7073993936266
episode: 434 training return: 804.5809809450159
episode: 435 training return: 818.6751611625624
epoch: 109 test_true_pfm: 5.408037326521965 sim_pfm: 907.3438197111815
episode: 436 training return: 794.6630678698081
episode: 437 training return: 804.3408519380724
episode: 438 training return: 829.5402184132347
episode: 439 training return: 815.9303245567371
epoch: 110 test_true_pfm: 4.53556124714275 sim_pfm: 923.2310299056484
episode: 440 training return: 823.1680164167252
episode: 441 training return: 820.1707697502195
episode: 442 training return: 814.627367945381
episode: 443 training return: 835.0636648708512
epoch: 111 test_true_pfm: 4.6191556865420935 sim_pfm: 892.9336068029152
episode: 444 training return: 815.6581779995613
episode: 445 training return: 815.9517329185264
episode: 446 training return: 798.6002500972143
episode: 447 training return: 793.6812877589
epoch: 112 test_true_pfm: 5.164668959078305 sim_pfm: 912.8094653584265
episode: 448 training return: 796.7574917874165
episode: 449 training return: 810.7792660972037
episode: 450 training return: 818.2116253742073
episode: 451 training return: 812.904385345198
epoch: 113 test_true_pfm: 2.3503256374253203 sim_pfm: 891.004159140724
episode: 452 training return: 826.4272861517869
episode: 453 training return: 823.727017836388
episode: 454 training return: 806.7195660892147
episode: 455 training return: 831.1319042061066
epoch: 114 test_true_pfm: 4.771774436646174 sim_pfm: 922.7316274479344
episode: 456 training return: 815.0419367884947
episode: 457 training return: 828.7120386315212
episode: 458 training return: 791.8460802237179
episode: 459 training return: 806.4447253780359
epoch: 115 test_true_pfm: 4.86407669911527 sim_pfm: 890.2312253645072
episode: 460 training return: 818.7822982124482
episode: 461 training return: 805.1284269580517
episode: 462 training return: 791.6497928701947
episode: 463 training return: 791.2960512710752
epoch: 116 test_true_pfm: 5.4882605250684975 sim_pfm: 907.8637393948168
episode: 464 training return: 813.0352792084994
episode: 465 training return: 830.8457447708201
episode: 466 training return: 838.8315283147168
episode: 467 training return: 828.804874374219
epoch: 117 test_true_pfm: 7.585571824138223 sim_pfm: 925.7366314115774
episode: 468 training return: 827.2566071201952
episode: 469 training return: 819.2927848987105
episode: 470 training return: 829.6998267920176
episode: 471 training return: 842.3761276870082
epoch: 118 test_true_pfm: 6.1094678983224515 sim_pfm: 927.0379512588883
episode: 472 training return: 839.596585824979
episode: 473 training return: 830.6623057989775
episode: 474 training return: 821.4559390230669
episode: 475 training return: 820.9729665618579
epoch: 119 test_true_pfm: 3.578090490947075 sim_pfm: 901.2826124897778
episode: 476 training return: 828.5292104418786
episode: 477 training return: 835.4403992620413
episode: 478 training return: 839.3626851794007
episode: 479 training return: 828.2776183346443
epoch: 120 test_true_pfm: 3.25433959644634 sim_pfm: 916.5951938081149
episode: 480 training return: 818.8222392916764
episode: 481 training return: 826.5249657910709
episode: 482 training return: 829.5989603999684
episode: 483 training return: 836.6536467095224
epoch: 121 test_true_pfm: 3.330167454493812 sim_pfm: 900.5662356583704
episode: 484 training return: 835.5227673752747
episode: 485 training return: 814.8156105795669
episode: 486 training return: 835.2195050671984
episode: 487 training return: 835.5253346662312
epoch: 122 test_true_pfm: 3.4769181107747658 sim_pfm: 925.5889507196416
episode: 488 training return: 825.0485849262877
episode: 489 training return: 815.4834378652375
episode: 490 training return: 808.4433555523736
episode: 491 training return: 806.1935518303945
epoch: 123 test_true_pfm: 9.865049450133672 sim_pfm: 896.6174335361424
episode: 492 training return: 814.9512884304527
episode: 493 training return: 801.2798868471795
episode: 494 training return: 822.2862517103772
episode: 495 training return: 824.2480858235995
epoch: 124 test_true_pfm: 2.4234357144375394 sim_pfm: 902.0210780266092
episode: 496 training return: 800.004199453118
episode: 497 training return: 837.5260889782063
episode: 498 training return: 820.9591840715027
episode: 499 training return: 822.5318525634042
epoch: 125 test_true_pfm: 1.4601599221733716 sim_pfm: 921.5528709783399
episode: 500 training return: 812.723210989012
episode: 501 training return: 817.190960286321
episode: 502 training return: 798.736948783906
episode: 503 training return: 825.0386424973209
epoch: 126 test_true_pfm: 7.033831449424409 sim_pfm: 912.7213760259172
episode: 504 training return: 807.3854620440437
episode: 505 training return: 823.250936729073
episode: 506 training return: 803.6419166836483
episode: 507 training return: 820.7143980704318
epoch: 127 test_true_pfm: 3.5004367000529983 sim_pfm: 889.7777390580965
episode: 508 training return: 837.2735501169286
episode: 509 training return: 810.5192215281282
episode: 510 training return: 847.0026890209216
episode: 511 training return: 841.0380652558915
epoch: 128 test_true_pfm: 2.685396335943311 sim_pfm: 886.9359817166726
episode: 512 training return: 805.9522048311736
episode: 513 training return: 819.5260982744217
episode: 514 training return: 851.9291271724237
episode: 515 training return: 839.9761056421551
epoch: 129 test_true_pfm: 1.7401963613482376 sim_pfm: 894.7440690217375
episode: 516 training return: 828.2604076860499
episode: 517 training return: 829.3050284200901
episode: 518 training return: 828.9882014620716
episode: 519 training return: 815.8687199121955
epoch: 130 test_true_pfm: 4.443817882218363 sim_pfm: 910.7128866413553
episode: 520 training return: 821.6214083306972
episode: 521 training return: 791.8355106621109
episode: 522 training return: 836.3460733669965
episode: 523 training return: 800.5127148648488
epoch: 131 test_true_pfm: 4.754092689349671 sim_pfm: 898.6674723528644
episode: 524 training return: 810.5805120306833
episode: 525 training return: 816.0556810649499
episode: 526 training return: 833.006987891203
episode: 527 training return: 818.5503202166302
epoch: 132 test_true_pfm: -0.29168770276872313 sim_pfm: 892.2893771369661
episode: 528 training return: 801.355043709078
episode: 529 training return: 817.5467398703012
episode: 530 training return: 829.334942946596
episode: 531 training return: 833.5778316547987
epoch: 133 test_true_pfm: 3.493330214748177 sim_pfm: 897.9237599449552
episode: 532 training return: 831.9889577642074
episode: 533 training return: 805.2919430812502
episode: 534 training return: 813.0193565667934
episode: 535 training return: 813.8165902445844
epoch: 134 test_true_pfm: 5.6191904442112826 sim_pfm: 897.5290224214132
episode: 536 training return: 800.2130276045398
episode: 537 training return: 831.1885182027542
episode: 538 training return: 823.0032097965818
episode: 539 training return: 824.2231859539945
epoch: 135 test_true_pfm: -1.1312108333112472 sim_pfm: 809.8355290160623
episode: 540 training return: 800.0977674462142
episode: 541 training return: 808.8548879604305
episode: 542 training return: 830.3947280415217
episode: 543 training return: 797.7161903070605
epoch: 136 test_true_pfm: 4.680323441101324 sim_pfm: 882.7413419140969
episode: 544 training return: 804.2897796171563
episode: 545 training return: 815.5403430417916
episode: 546 training return: 796.1073976882642
episode: 547 training return: 797.2642853810576
epoch: 137 test_true_pfm: 0.7765769794469555 sim_pfm: 854.3420121169238
episode: 548 training return: 792.5186991660624
episode: 549 training return: 807.8592954789851
episode: 550 training return: 793.9143507133316
episode: 551 training return: 805.7209410541336
epoch: 138 test_true_pfm: 3.5389984272296537 sim_pfm: 885.883611834685
episode: 552 training return: 767.8889395222126
episode: 553 training return: 792.056212574807
episode: 554 training return: 782.6881045874181
episode: 555 training return: 757.0665242342259
epoch: 139 test_true_pfm: -0.5841057164341652 sim_pfm: 849.8295954876569
episode: 556 training return: 787.0187438024989
episode: 557 training return: 805.2606045587373
episode: 558 training return: 808.7061399881011
episode: 559 training return: 823.0483201294112
epoch: 140 test_true_pfm: 1.152681289342604 sim_pfm: 877.5552608738305
episode: 560 training return: 799.8352328555544
episode: 561 training return: 807.4809828619382
episode: 562 training return: 822.6522743253742
episode: 563 training return: 811.7572390240474
epoch: 141 test_true_pfm: 2.9273880857340977 sim_pfm: 894.1982129914097
episode: 564 training return: 810.1703769542328
episode: 565 training return: 791.5254155890506
episode: 566 training return: 807.8762293823147
episode: 567 training return: 826.4499357244789
epoch: 142 test_true_pfm: 1.0120981719861368 sim_pfm: 890.9604868233386
episode: 568 training return: 833.8361238733366
episode: 569 training return: 802.9808075971681
episode: 570 training return: 804.005756077452
episode: 571 training return: 811.0881511568984
epoch: 143 test_true_pfm: 1.575233019427189 sim_pfm: 897.7853956622323
episode: 572 training return: 809.8000062959319
episode: 573 training return: 836.5501646617861
episode: 574 training return: 809.5680609427225
episode: 575 training return: 827.5423145556331
epoch: 144 test_true_pfm: 2.879945357869098 sim_pfm: 908.9973093262997
episode: 576 training return: 835.2238998799841
episode: 577 training return: 828.8243600141774
episode: 578 training return: 829.7796186453354
episode: 579 training return: 807.1608687113276
epoch: 145 test_true_pfm: 2.1802193133634273 sim_pfm: 900.2296497617017
episode: 580 training return: 839.2103477520426
episode: 581 training return: 827.7709976364272
episode: 582 training return: 820.3199521177659
episode: 583 training return: 830.7476031397011
epoch: 146 test_true_pfm: 0.6602522438981293 sim_pfm: 884.1309550922024
episode: 584 training return: 821.7587709354407
episode: 585 training return: 812.392661123321
episode: 586 training return: 780.0087227653671
episode: 587 training return: 795.1252097999419
epoch: 147 test_true_pfm: 1.212765192726542 sim_pfm: 893.0097341052758
episode: 588 training return: 802.5023985795755
episode: 589 training return: 811.25781151568
episode: 590 training return: 812.0670925909752
episode: 591 training return: 795.069191683923
epoch: 148 test_true_pfm: 2.483621057618959 sim_pfm: 903.2016896574372
episode: 592 training return: 815.4870669276095
episode: 593 training return: 813.9933274978383
episode: 594 training return: 796.4653153427632
episode: 595 training return: 793.242460533242
epoch: 149 test_true_pfm: 1.6631948960034912 sim_pfm: 845.5232516228501
episode: 596 training return: 828.9195360198133
episode: 597 training return: 838.4034516268672
episode: 598 training return: 846.9806633361217
episode: 599 training return: 814.194330122997
epoch: 150 test_true_pfm: 1.0495839819814317 sim_pfm: 886.6553677900025
