['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '0', '--data', '100000']
epoch: 0 training_loss 0.3363268129527569 test_loss: 0.20667099952697754
epoch: 1 training_loss 0.17441293224692345 test_loss: 0.16283215284347535
epoch: 2 training_loss 0.15277555137872695 test_loss: 0.15887160301208497
epoch: 3 training_loss 0.12952896688133478 test_loss: 0.12887744903564452
epoch: 4 training_loss 0.11948847334831952 test_loss: 0.12203510999679565
epoch: 5 training_loss 0.12632560394704342 test_loss: 0.10825847387313843
epoch: 6 training_loss 0.12008225591853261 test_loss: 0.12464975118637085
epoch: 7 training_loss 0.12524441830813884 test_loss: 0.11036314964294433
epoch: 8 training_loss 0.11688974015414715 test_loss: 0.10411649942398071
epoch: 9 training_loss 0.11180114708840846 test_loss: 0.10999103784561157
epoch: 10 training_loss 0.10600513931363821 test_loss: 0.1209320068359375
epoch: 11 training_loss 0.11572586823254824 test_loss: 0.10241283178329467
epoch: 12 training_loss 0.1073846141807735 test_loss: 0.12198835611343384
epoch: 13 training_loss 0.11708900179713964 test_loss: 0.0990280568599701
epoch: 14 training_loss 0.10127260651439428 test_loss: 0.12190744876861573
epoch: 15 training_loss 0.1025803303718567 test_loss: 0.09946258664131165
epoch: 16 training_loss 0.10871920086443425 test_loss: 0.12015124559402465
epoch: 17 training_loss 0.111790792606771 test_loss: 0.11405627727508545
epoch: 18 training_loss 0.10136798821389675 test_loss: 0.10395071506500245
epoch: 19 training_loss 0.10649491772055626 test_loss: 0.12379554510116578
epoch: 20 training_loss 0.10172828253358603 test_loss: 0.1126745343208313
epoch: 21 training_loss 0.10553713586181403 test_loss: 0.12310314178466797
epoch: 22 training_loss 0.10225828548893333 test_loss: 0.13138701915740966
epoch: 23 training_loss 0.10237199895083904 test_loss: 0.11725515127182007
epoch: 24 training_loss 0.10812038769945502 test_loss: 0.10311298370361328
epoch: 25 training_loss 0.10401282913982868 test_loss: 0.11600978374481201
epoch: 26 training_loss 0.10517882727086544 test_loss: 0.11543924808502197
epoch: 27 training_loss 0.09708916774019599 test_loss: 0.12341407537460328
epoch: 28 training_loss 0.10654110912233591 test_loss: 0.11701377630233764
epoch: 29 training_loss 0.10511436011642218 test_loss: 0.11297825574874878
epoch: 30 training_loss 0.09977119501680136 test_loss: 0.10438419580459594
epoch: 31 training_loss 0.1014250897616148 test_loss: 0.11804617643356323
epoch: 32 training_loss 0.09943921461701394 test_loss: 0.1015654444694519
epoch: 33 training_loss 0.10208072099834681 test_loss: 0.11058146953582763
epoch: 34 training_loss 0.099246874935925 test_loss: 0.1255861282348633
epoch: 35 training_loss 0.09809832204133272 test_loss: 0.10511196851730346
epoch: 36 training_loss 0.0964296212233603 test_loss: 0.1284102201461792
epoch: 37 training_loss 0.0975586468540132 test_loss: 0.13142107725143432
epoch: 38 training_loss 0.09589603386819362 test_loss: 0.11240029335021973
epoch: 39 training_loss 0.10633872902020812 test_loss: 0.11349945068359375
epoch: 40 training_loss 0.1072493047453463 test_loss: 0.11438000202178955
epoch: 41 training_loss 0.10368687389418482 test_loss: 0.12146364450454712
epoch: 42 training_loss 0.1044573998451233 test_loss: 0.12221055030822754
epoch: 43 training_loss 0.09362509813159704 test_loss: 0.12143870592117309
epoch: 44 training_loss 0.10072398783639074 test_loss: 0.12472380399703979
epoch: 45 training_loss 0.10096716677770018 test_loss: 0.12174423933029174
epoch: 46 training_loss 0.10131058746948839 test_loss: 0.10797486305236817
epoch: 47 training_loss 0.10112748021259904 test_loss: 0.11667484045028687
epoch: 48 training_loss 0.09779684733599424 test_loss: 0.11500886678695679
epoch: 49 training_loss 0.10431956015527248 test_loss: 0.11278214454650878
epoch: 50 training_loss 0.10622284047305584 test_loss: 0.11651829481124878
epoch: 51 training_loss 0.0896507783792913 test_loss: 0.11469520330429077
epoch: 52 training_loss 0.097999995239079 test_loss: 0.10901501178741455
epoch: 53 training_loss 0.10350355291739106 test_loss: 0.10820896625518799
epoch: 54 training_loss 0.09221788862720132 test_loss: 0.1289754867553711
epoch: 55 training_loss 0.10876023206859826 test_loss: 0.11337422132492066
epoch: 56 training_loss 0.10072401953861117 test_loss: 0.11707993745803832
epoch: 57 training_loss 0.09482406623661518 test_loss: 0.11435364484786988
epoch: 58 training_loss 0.09900591444224119 test_loss: 0.09219284057617187
epoch: 59 training_loss 0.10868326764553786 test_loss: 0.11696349382400513
epoch: 60 training_loss 0.0957898273319006 test_loss: 0.13536995649337769
epoch: 61 training_loss 0.10100203335285186 test_loss: 0.12216641902923583
epoch: 62 training_loss 0.09661863882094622 test_loss: 0.10849212408065796
epoch: 63 training_loss 0.09797730978578328 test_loss: 0.11967262029647827
epoch: 64 training_loss 0.09180083978921175 test_loss: 0.1103785753250122
epoch: 65 training_loss 0.09834061071276665 test_loss: 0.11395751237869263
epoch: 66 training_loss 0.09896616984158754 test_loss: 0.11109291315078736
epoch: 67 training_loss 0.09912524741142988 test_loss: 0.10204250812530517
epoch: 68 training_loss 0.0959138123691082 test_loss: 0.1450753927230835
epoch: 69 training_loss 0.09588172666728496 test_loss: 0.10939944982528686
epoch: 70 training_loss 0.09903733321465552 test_loss: 0.1159557580947876
epoch: 71 training_loss 0.09609532147645951 test_loss: 0.12157982587814331
epoch: 72 training_loss 0.10111365152522922 test_loss: 0.12012330293655396
epoch: 73 training_loss 0.09892969470471144 test_loss: 0.10102702379226684
epoch: 74 training_loss 0.10160276329144835 test_loss: 0.11979671716690063
epoch: 75 training_loss 0.0991392558068037 test_loss: 0.12985124588012695
epoch: 76 training_loss 0.10605564506724477 test_loss: 0.11368014812469482
epoch: 77 training_loss 0.0936894335411489 test_loss: 0.13094286918640136
epoch: 78 training_loss 0.08891825353726744 test_loss: 0.12141391038894653
epoch: 79 training_loss 0.10392464177682996 test_loss: 0.10170432329177856
epoch: 80 training_loss 0.10091396912932396 test_loss: 0.1030159592628479
epoch: 81 training_loss 0.08855177551507949 test_loss: 0.13001155853271484
epoch: 82 training_loss 0.09959926137700677 test_loss: 0.12314016819000244
epoch: 83 training_loss 0.09500231806188822 test_loss: 0.10711290836334228
epoch: 84 training_loss 0.09656957112252712 test_loss: 0.11952029466629029
epoch: 85 training_loss 0.10003383006900549 test_loss: 0.13313710689544678
epoch: 86 training_loss 0.09446139151230454 test_loss: 0.13879809379577637
epoch: 87 training_loss 0.10133664272725582 test_loss: 0.11140656471252441
epoch: 88 training_loss 0.09227163668721915 test_loss: 0.10660128593444824
epoch: 89 training_loss 0.09633434010669589 test_loss: 0.11517369747161865
epoch: 90 training_loss 0.09263612480834127 test_loss: 0.10268769264221192
epoch: 91 training_loss 0.09474380373954773 test_loss: 0.1294466733932495
epoch: 92 training_loss 0.09306502943858504 test_loss: 0.130376935005188
epoch: 93 training_loss 0.0957807206362486 test_loss: 0.10896364450454712
epoch: 94 training_loss 0.09823341455310583 test_loss: 0.10956796407699584
epoch: 95 training_loss 0.09426817024126649 test_loss: 0.10890491008758545
epoch: 96 training_loss 0.0917400743998587 test_loss: 0.10738117694854736
epoch: 97 training_loss 0.0931656283698976 test_loss: 0.11259701251983642
epoch: 98 training_loss 0.0914672576263547 test_loss: 0.11323425769805909
epoch: 99 training_loss 0.10226030826568604 test_loss: 0.09613071084022522
epoch: 100 training_loss 0.09937188517302274 test_loss: 0.12027753591537475
epoch: 101 training_loss 0.09130844928324222 test_loss: 0.11201457977294922
epoch: 102 training_loss 0.09420508455485105 test_loss: 0.10543839931488037
epoch: 103 training_loss 0.09490920843556523 test_loss: 0.10417774915695191
epoch: 104 training_loss 0.08994376977905631 test_loss: 0.12218399047851562
epoch: 105 training_loss 0.09807461250573396 test_loss: 0.12285587787628174
epoch: 106 training_loss 0.09216619439423085 test_loss: 0.1155470848083496
epoch: 107 training_loss 0.0959466540068388 test_loss: 0.12543261051177979
epoch: 108 training_loss 0.09510276844725013 test_loss: 0.09750829935073853
epoch: 109 training_loss 0.09219106690958143 test_loss: 0.12607072591781615
epoch: 110 training_loss 0.09337751399725676 test_loss: 0.13947656154632568
epoch: 111 training_loss 0.09892148136161268 test_loss: 0.11957234144210815
epoch: 112 training_loss 0.09481841601431369 test_loss: 0.12098495960235596
epoch: 113 training_loss 0.09397974405437708 test_loss: 0.119423508644104
epoch: 114 training_loss 0.09732939500361681 test_loss: 0.11774466037750245
epoch: 115 training_loss 0.08868678225204349 test_loss: 0.1018712878227234
epoch: 116 training_loss 0.09584509629756212 test_loss: 0.12168185710906983
epoch: 117 training_loss 0.09108527433127164 test_loss: 0.1303151488304138
epoch: 118 training_loss 0.08823712596669793 test_loss: 0.13180702924728394
epoch: 119 training_loss 0.0972730141505599 test_loss: 0.11866972446441651
epoch: 120 training_loss 0.09053095629438758 test_loss: 0.12007706165313721
epoch: 121 training_loss 0.08834648464806377 test_loss: 0.11817710399627686
epoch: 122 training_loss 0.09530503312125801 test_loss: 0.12468935251235962
epoch: 123 training_loss 0.09751410810276866 test_loss: 0.11132453680038452
epoch: 124 training_loss 0.091245022341609 test_loss: 0.12136567831039428
epoch: 125 training_loss 0.09205162266269326 test_loss: 0.13103314638137817
epoch: 126 training_loss 0.08801180440932513 test_loss: 0.10916742086410522
epoch: 127 training_loss 0.08942760724574328 test_loss: 0.1109052062034607
epoch: 128 training_loss 0.0909466015920043 test_loss: 0.12099987268447876
epoch: 129 training_loss 0.09067700076848269 test_loss: 0.11740189790725708
epoch: 130 training_loss 0.09404212573543191 test_loss: 0.11026486158370971
epoch: 131 training_loss 0.0931386668421328 test_loss: 0.12311506271362305
epoch: 132 training_loss 0.09319278417155147 test_loss: 0.12617770433425904
epoch: 133 training_loss 0.09275818036869168 test_loss: 0.12916760444641112
epoch: 134 training_loss 0.09451655032113195 test_loss: 0.11610450744628906
epoch: 135 training_loss 0.09137998187914491 test_loss: 0.11277556419372559
epoch: 136 training_loss 0.08762592434883118 test_loss: 0.1357402801513672
epoch: 137 training_loss 0.08226252220571041 test_loss: 0.11805431842803955
epoch: 138 training_loss 0.09265392666682601 test_loss: 0.11104012727737426
epoch: 139 training_loss 0.09473159996792674 test_loss: 0.11616277694702148
epoch: 140 training_loss 0.0916461462713778 test_loss: 0.11781680583953857
epoch: 141 training_loss 0.09749220918864011 test_loss: 0.12467920780181885
epoch: 142 training_loss 0.09174721021205187 test_loss: 0.13184723854064942
epoch: 143 training_loss 0.0913581958040595 test_loss: 0.11081544160842896
epoch: 144 training_loss 0.09119941676035523 test_loss: 0.11368798017501831
epoch: 145 training_loss 0.0905525461770594 test_loss: 0.10686887502670288
epoch: 146 training_loss 0.09381698861718178 test_loss: 0.1170378565788269
epoch: 147 training_loss 0.09142543042078614 test_loss: 0.11847641468048095
epoch: 148 training_loss 0.08745597168803215 test_loss: 0.12023886442184448
epoch: 149 training_loss 0.08503001237288117 test_loss: 0.11946315765380859
epoch: 0 training_loss 0.31025328159332277 test_loss: 0.21266779899597169
epoch: 1 training_loss 0.177975572347641 test_loss: 0.17219948768615723
epoch: 2 training_loss 0.1468476876616478 test_loss: 0.14004924297332763
epoch: 3 training_loss 0.13225132565945386 test_loss: 0.1326878547668457
epoch: 4 training_loss 0.12292336992919445 test_loss: 0.12741711139678955
epoch: 5 training_loss 0.12570347663015127 test_loss: 0.1301640033721924
epoch: 6 training_loss 0.11564412511885167 test_loss: 0.12460043430328369
epoch: 7 training_loss 0.12401083346456289 test_loss: 0.12127625942230225
epoch: 8 training_loss 0.11694712437689304 test_loss: 0.14150763750076295
epoch: 9 training_loss 0.11817821487784386 test_loss: 0.12832895517349244
epoch: 10 training_loss 0.12273463305085898 test_loss: 0.11926758289337158
epoch: 11 training_loss 0.10524444237351417 test_loss: 0.12039563655853272
epoch: 12 training_loss 0.11400414142757655 test_loss: 0.11543654203414917
epoch: 13 training_loss 0.10749825982376933 test_loss: 0.11612612009048462
epoch: 14 training_loss 0.11779012624174356 test_loss: 0.11097818613052368
epoch: 15 training_loss 0.1089549496397376 test_loss: 0.1068468689918518
epoch: 16 training_loss 0.10665147796273232 test_loss: 0.10597028732299804
epoch: 17 training_loss 0.11068769350647926 test_loss: 0.09857916235923767
epoch: 18 training_loss 0.10988720573484898 test_loss: 0.11985732316970825
epoch: 19 training_loss 0.10450933627784252 test_loss: 0.1199982762336731
epoch: 20 training_loss 0.11126388780772686 test_loss: 0.1424076795578003
epoch: 21 training_loss 0.10880140718072653 test_loss: 0.13778059482574462
epoch: 22 training_loss 0.11171462187543511 test_loss: 0.13648366928100586
epoch: 23 training_loss 0.10707759734243155 test_loss: 0.1286674976348877
epoch: 24 training_loss 0.11349727176129817 test_loss: 0.11317381858825684
epoch: 25 training_loss 0.10817437417805195 test_loss: 0.1253930449485779
epoch: 26 training_loss 0.10979596653953194 test_loss: 0.10097941160202026
epoch: 27 training_loss 0.10687069613486529 test_loss: 0.1042441725730896
epoch: 28 training_loss 0.10587867934256792 test_loss: 0.11290439367294311
epoch: 29 training_loss 0.10519398733973503 test_loss: 0.11823108196258544
epoch: 30 training_loss 0.09195032428950072 test_loss: 0.12131861448287964
epoch: 31 training_loss 0.1134527001902461 test_loss: 0.12140228748321533
epoch: 32 training_loss 0.10234280494973064 test_loss: 0.1132224440574646
epoch: 33 training_loss 0.10874781789258123 test_loss: 0.11274175643920899
epoch: 34 training_loss 0.10456774190068245 test_loss: 0.12317337989807128
epoch: 35 training_loss 0.10691321860998869 test_loss: 0.10850238800048828
epoch: 36 training_loss 0.10295032266527414 test_loss: 0.11680644750595093
epoch: 37 training_loss 0.11580841824412345 test_loss: 0.1301511526107788
epoch: 38 training_loss 0.0995065999403596 test_loss: 0.1088373303413391
epoch: 39 training_loss 0.10662079572677613 test_loss: 0.10526655912399292
epoch: 40 training_loss 0.09928327426314354 test_loss: 0.115012788772583
epoch: 41 training_loss 0.10442626252770423 test_loss: 0.0989033341407776
epoch: 42 training_loss 0.10393344413489103 test_loss: 0.11287441253662109
epoch: 43 training_loss 0.10511566316708923 test_loss: 0.12434355020523072
epoch: 44 training_loss 0.10542091768234968 test_loss: 0.12244993448257446
epoch: 45 training_loss 0.10363956995308399 test_loss: 0.13706589937210084
epoch: 46 training_loss 0.0984128488972783 test_loss: 0.11619046926498414
epoch: 47 training_loss 0.09922075804322958 test_loss: 0.13550859689712524
epoch: 48 training_loss 0.09474024306982756 test_loss: 0.10293930768966675
epoch: 49 training_loss 0.10120359126478434 test_loss: 0.09679967761039734
epoch: 50 training_loss 0.10283532375469803 test_loss: 0.10911917686462402
epoch: 51 training_loss 0.0945631868392229 test_loss: 0.1057592749595642
epoch: 52 training_loss 0.10264618810266256 test_loss: 0.12631033658981322
epoch: 53 training_loss 0.1081063786894083 test_loss: 0.10622851848602295
epoch: 54 training_loss 0.1062190692871809 test_loss: 0.11918848752975464
epoch: 55 training_loss 0.09847305102273822 test_loss: 0.11482746601104736
epoch: 56 training_loss 0.101779830083251 test_loss: 0.09439097046852112
epoch: 57 training_loss 0.0990396161004901 test_loss: 0.12702354192733764
epoch: 58 training_loss 0.09595146395266056 test_loss: 0.11855608224868774
epoch: 59 training_loss 0.10332311898469924 test_loss: 0.116432785987854
epoch: 60 training_loss 0.09887383470311761 test_loss: 0.12393265962600708
epoch: 61 training_loss 0.10168149016797542 test_loss: 0.10879273414611816
epoch: 62 training_loss 0.09818911084905267 test_loss: 0.10202187299728394
epoch: 63 training_loss 0.10150459913536906 test_loss: 0.1246039628982544
epoch: 64 training_loss 0.1045131566002965 test_loss: 0.09557266235351562
epoch: 65 training_loss 0.10118411459028721 test_loss: 0.11649669408798217
epoch: 66 training_loss 0.10639253744855523 test_loss: 0.10038180351257324
epoch: 67 training_loss 0.10536099635064602 test_loss: 0.11491961479187011
epoch: 68 training_loss 0.10150952411815524 test_loss: 0.09937350749969483
epoch: 69 training_loss 0.1002931846678257 test_loss: 0.10326387882232665
epoch: 70 training_loss 0.09954281447455288 test_loss: 0.10724745988845825
epoch: 71 training_loss 0.1059965730085969 test_loss: 0.11558806896209717
epoch: 72 training_loss 0.09859542589634657 test_loss: 0.11050149202346801
epoch: 73 training_loss 0.09872696112841367 test_loss: 0.10252814292907715
epoch: 74 training_loss 0.09742655150592328 test_loss: 0.11797515153884888
epoch: 75 training_loss 0.10060161694884301 test_loss: 0.1124082326889038
epoch: 76 training_loss 0.10528111750259996 test_loss: 0.11581848859786988
epoch: 77 training_loss 0.10192173607647419 test_loss: 0.1190642237663269
epoch: 78 training_loss 0.0937512525729835 test_loss: 0.1196435570716858
epoch: 79 training_loss 0.09909478023648262 test_loss: 0.13004668951034545
epoch: 80 training_loss 0.0933267093077302 test_loss: 0.10062867403030396
epoch: 81 training_loss 0.09909764846786856 test_loss: 0.10351055860519409
epoch: 82 training_loss 0.09747618760913611 test_loss: 0.10521234273910522
epoch: 83 training_loss 0.10146762855350971 test_loss: 0.1245116114616394
epoch: 84 training_loss 0.09761662791483104 test_loss: 0.12596384286880494
epoch: 85 training_loss 0.10423101272433996 test_loss: 0.10832881927490234
epoch: 86 training_loss 0.10688488863408566 test_loss: 0.11561195850372315
epoch: 87 training_loss 0.10401007378473878 test_loss: 0.11958103179931641
epoch: 88 training_loss 0.10144243383780122 test_loss: 0.11316859722137451
epoch: 89 training_loss 0.10171016961336136 test_loss: 0.1059160828590393
epoch: 90 training_loss 0.09860519848763943 test_loss: 0.12586747407913207
epoch: 91 training_loss 0.10571101069450378 test_loss: 0.09532543420791625
epoch: 92 training_loss 0.09767561536282302 test_loss: 0.11537818908691407
epoch: 93 training_loss 0.09727561552077532 test_loss: 0.11102490425109864
epoch: 94 training_loss 0.09928713131695986 test_loss: 0.10942492485046387
epoch: 95 training_loss 0.10343961181119084 test_loss: 0.09623726010322571
epoch: 96 training_loss 0.10116953510791063 test_loss: 0.11918706893920898
epoch: 97 training_loss 0.09349099079146982 test_loss: 0.1178704023361206
epoch: 98 training_loss 0.1129220443405211 test_loss: 0.12073379755020142
epoch: 99 training_loss 0.09708510916680098 test_loss: 0.10867164134979249
epoch: 100 training_loss 0.09863503452390432 test_loss: 0.10576337575912476
epoch: 101 training_loss 0.0911142162233591 test_loss: 0.1184998631477356
epoch: 102 training_loss 0.09623064268380403 test_loss: 0.1257412075996399
epoch: 103 training_loss 0.09338085681200027 test_loss: 0.11939247846603393
epoch: 104 training_loss 0.10006811767816544 test_loss: 0.11588431596755981
epoch: 105 training_loss 0.10456840343773365 test_loss: 0.11364071369171143
epoch: 106 training_loss 0.09666976317763329 test_loss: 0.11751698255538941
epoch: 107 training_loss 0.10326057355850934 test_loss: 0.12493948936462403
epoch: 108 training_loss 0.1005646395124495 test_loss: 0.11653836965560913
epoch: 109 training_loss 0.0957967085763812 test_loss: 0.12022196054458618
epoch: 110 training_loss 0.09774720206856728 test_loss: 0.10182693004608154
epoch: 111 training_loss 0.09753786196932196 test_loss: 0.11838734149932861
epoch: 112 training_loss 0.09086400303989649 test_loss: 0.12031770944595337
epoch: 113 training_loss 0.0986103529855609 test_loss: 0.11958757638931275
epoch: 114 training_loss 0.09896816540509462 test_loss: 0.11163488626480103
epoch: 115 training_loss 0.10130106128752231 test_loss: 0.10471419095993043
epoch: 116 training_loss 0.10106481879949569 test_loss: 0.10778956413269043
epoch: 117 training_loss 0.08616800731047988 test_loss: 0.1148180365562439
epoch: 118 training_loss 0.1044191619940102 test_loss: 0.12437571287155151
epoch: 119 training_loss 0.0947855044901371 test_loss: 0.102055823802948
epoch: 120 training_loss 0.09453505542129279 test_loss: 0.12069662809371948
epoch: 121 training_loss 0.08690636069513857 test_loss: 0.10539472103118896
epoch: 122 training_loss 0.10309499660506845 test_loss: 0.10967469215393066
epoch: 123 training_loss 0.0876375275477767 test_loss: 0.09493277668952942
epoch: 124 training_loss 0.10216538460925222 test_loss: 0.11860469579696656
epoch: 125 training_loss 0.09916021093726159 test_loss: 0.11315187215805053
epoch: 126 training_loss 0.095638431571424 test_loss: 0.12279149293899536
epoch: 127 training_loss 0.09257038755342364 test_loss: 0.12616363763809205
epoch: 128 training_loss 0.09039867502637207 test_loss: 0.11522419452667236
epoch: 129 training_loss 0.09791258540004492 test_loss: 0.11820292472839355
epoch: 130 training_loss 0.09111986528150737 test_loss: 0.10637332201004028
epoch: 131 training_loss 0.09368450839072466 test_loss: 0.117176353931427
epoch: 132 training_loss 0.0944531413912773 test_loss: 0.1457494616508484
epoch: 133 training_loss 0.09831471018493175 test_loss: 0.12343384027481079
epoch: 134 training_loss 0.09014690959826112 test_loss: 0.138844895362854
epoch: 135 training_loss 0.09528896298259497 test_loss: 0.12247036695480347
epoch: 136 training_loss 0.09474916618317365 test_loss: 0.12159172296524048
epoch: 137 training_loss 0.09471447506919503 test_loss: 0.12620146274566652
epoch: 138 training_loss 0.09409219650551677 test_loss: 0.10280779600143433
epoch: 139 training_loss 0.08965389363467693 test_loss: 0.12564667463302612
epoch: 140 training_loss 0.0989971992559731 test_loss: 0.10722501277923584
epoch: 141 training_loss 0.08829544572159648 test_loss: 0.1101269006729126
epoch: 142 training_loss 0.09147324580699205 test_loss: 0.11658587455749511
epoch: 143 training_loss 0.09028565907850862 test_loss: 0.11608954668045043
epoch: 144 training_loss 0.09311580730602145 test_loss: 0.10941240787506104
epoch: 145 training_loss 0.09369367867708206 test_loss: 0.11129376888275147
epoch: 146 training_loss 0.10331413248553872 test_loss: 0.11962258815765381
epoch: 147 training_loss 0.08988791393116116 test_loss: 0.11415119171142578
epoch: 148 training_loss 0.09171019131317734 test_loss: 0.12860541343688964
epoch: 149 training_loss 0.0957111737690866 test_loss: 0.1187821388244629
epoch: 0 training_loss 0.3355830103158951 test_loss: 0.23711006641387938
epoch: 1 training_loss 0.18767689034342766 test_loss: 0.16548848152160645
epoch: 2 training_loss 0.15303532853722573 test_loss: 0.17890892028808594
epoch: 3 training_loss 0.1422939683496952 test_loss: 0.13055545091629028
epoch: 4 training_loss 0.13652188684791328 test_loss: 0.13657654523849488
epoch: 5 training_loss 0.13298573952168227 test_loss: 0.13462424278259277
epoch: 6 training_loss 0.1284484712406993 test_loss: 0.12351964712142945
epoch: 7 training_loss 0.12196955386549234 test_loss: 0.11340208053588867
epoch: 8 training_loss 0.12489384394139051 test_loss: 0.13108384609222412
epoch: 9 training_loss 0.12143579699099064 test_loss: 0.1395015001296997
epoch: 10 training_loss 0.11692704515531659 test_loss: 0.1296394944190979
epoch: 11 training_loss 0.10872814070433379 test_loss: 0.11384512186050415
epoch: 12 training_loss 0.11316976228728891 test_loss: 0.15032968521118165
epoch: 13 training_loss 0.11650582168251276 test_loss: 0.1209797978401184
epoch: 14 training_loss 0.11392239674925804 test_loss: 0.11729371547698975
epoch: 15 training_loss 0.11821559891104698 test_loss: 0.12095794677734376
epoch: 16 training_loss 0.10465605963021517 test_loss: 0.125969398021698
epoch: 17 training_loss 0.10522415727376938 test_loss: 0.11753345727920532
epoch: 18 training_loss 0.12006265386939048 test_loss: 0.12064876556396484
epoch: 19 training_loss 0.10775646220892668 test_loss: 0.11475143432617188
epoch: 20 training_loss 0.09992777317762375 test_loss: 0.12952806949615478
epoch: 21 training_loss 0.11547387357801199 test_loss: 0.1274651527404785
epoch: 22 training_loss 0.10235874958336354 test_loss: 0.11011680364608764
epoch: 23 training_loss 0.10416296197101474 test_loss: 0.13334715366363525
epoch: 24 training_loss 0.1075164982676506 test_loss: 0.1221242070198059
epoch: 25 training_loss 0.104438452180475 test_loss: 0.12299530506134033
epoch: 26 training_loss 0.10944573383778333 test_loss: 0.1177264928817749
epoch: 27 training_loss 0.09953422423452139 test_loss: 0.12137558460235595
epoch: 28 training_loss 0.11050536630675196 test_loss: 0.12386401891708373
epoch: 29 training_loss 0.1098454773426056 test_loss: 0.11892796754837036
epoch: 30 training_loss 0.1052609128691256 test_loss: 0.1279648184776306
epoch: 31 training_loss 0.10914077427238227 test_loss: 0.13319538831710814
epoch: 32 training_loss 0.09717095736414194 test_loss: 0.12703617811203002
epoch: 33 training_loss 0.10418407153338194 test_loss: 0.10975865125656128
epoch: 34 training_loss 0.10046906270086765 test_loss: 0.11655786037445068
epoch: 35 training_loss 0.09753896534442902 test_loss: 0.12320520877838134
epoch: 36 training_loss 0.10202106328681111 test_loss: 0.1220629096031189
epoch: 37 training_loss 0.09896974908187986 test_loss: 0.14215222597122193
epoch: 38 training_loss 0.09974411524832248 test_loss: 0.12274637222290039
epoch: 39 training_loss 0.10954203106462955 test_loss: 0.11557449102401733
epoch: 40 training_loss 0.10666009686887264 test_loss: 0.11118860244750976
epoch: 41 training_loss 0.10860010243952274 test_loss: 0.11728835105895996
epoch: 42 training_loss 0.10463007709011435 test_loss: 0.1180688977241516
epoch: 43 training_loss 0.10103593938052655 test_loss: 0.11027144193649292
epoch: 44 training_loss 0.10127857189625501 test_loss: 0.12141673564910889
epoch: 45 training_loss 0.10177474064752459 test_loss: 0.11245942115783691
epoch: 46 training_loss 0.10576126934960485 test_loss: 0.13154153823852538
epoch: 47 training_loss 0.09953783266246319 test_loss: 0.10634105205535889
epoch: 48 training_loss 0.10500724125653506 test_loss: 0.12788541316986085
epoch: 49 training_loss 0.10375342436134816 test_loss: 0.1161826491355896
epoch: 50 training_loss 0.10196787722408772 test_loss: 0.10005059242248535
epoch: 51 training_loss 0.1049667833931744 test_loss: 0.11512256860733032
epoch: 52 training_loss 0.10178020644932985 test_loss: 0.1202541708946228
epoch: 53 training_loss 0.09964254628866911 test_loss: 0.10327785015106201
epoch: 54 training_loss 0.10482843901962041 test_loss: 0.11927751302719117
epoch: 55 training_loss 0.10310392361134291 test_loss: 0.10865744352340698
epoch: 56 training_loss 0.10502747435122728 test_loss: 0.1257678508758545
epoch: 57 training_loss 0.09941550429910422 test_loss: 0.10109107494354248
epoch: 58 training_loss 0.09280022259801626 test_loss: 0.1242497444152832
epoch: 59 training_loss 0.10058273842558264 test_loss: 0.10851112604141236
epoch: 60 training_loss 0.09593173794448376 test_loss: 0.12703423500061034
epoch: 61 training_loss 0.0964587315544486 test_loss: 0.11114338636398316
epoch: 62 training_loss 0.10569958452135325 test_loss: 0.10518150329589844
epoch: 63 training_loss 0.10003130791708827 test_loss: 0.13261038064956665
epoch: 64 training_loss 0.09183765824884177 test_loss: 0.12813953161239625
epoch: 65 training_loss 0.09687034048140049 test_loss: 0.12212191820144654
epoch: 66 training_loss 0.09383604578673839 test_loss: 0.10814119577407837
epoch: 67 training_loss 0.09978706061840058 test_loss: 0.12630218267440796
epoch: 68 training_loss 0.09628315392881631 test_loss: 0.11096036434173584
epoch: 69 training_loss 0.10021935980767012 test_loss: 0.11397697925567626
epoch: 70 training_loss 0.10004086330533028 test_loss: 0.11266950368881226
epoch: 71 training_loss 0.09681803898885846 test_loss: 0.11999834775924682
epoch: 72 training_loss 0.09809887574985623 test_loss: 0.1268383264541626
epoch: 73 training_loss 0.10461677309125662 test_loss: 0.11006160974502563
epoch: 74 training_loss 0.08751725893467664 test_loss: 0.11126086711883545
epoch: 75 training_loss 0.10292210740968585 test_loss: 0.12601194381713868
epoch: 76 training_loss 0.09917649822309613 test_loss: 0.11714727878570556
epoch: 77 training_loss 0.10041252797469497 test_loss: 0.11151268482208251
epoch: 78 training_loss 0.0940036972053349 test_loss: 0.11235909461975098
epoch: 79 training_loss 0.09835550339892507 test_loss: 0.1074336051940918
epoch: 80 training_loss 0.09581277422606944 test_loss: 0.11957713365554809
epoch: 81 training_loss 0.09477184560149908 test_loss: 0.1007662296295166
epoch: 82 training_loss 0.09825891677290201 test_loss: 0.11835702657699584
epoch: 83 training_loss 0.09806575272232294 test_loss: 0.10458158254623413
epoch: 84 training_loss 0.09954615108668805 test_loss: 0.11335952281951904
epoch: 85 training_loss 0.09546247518621385 test_loss: 0.10720741748809814
epoch: 86 training_loss 0.09954715602099895 test_loss: 0.13652684688568115
epoch: 87 training_loss 0.09845870601013303 test_loss: 0.13241477012634278
epoch: 88 training_loss 0.09142600011080504 test_loss: 0.1111295223236084
epoch: 89 training_loss 0.10426856463775039 test_loss: 0.1247948169708252
epoch: 90 training_loss 0.09707781326025725 test_loss: 0.11286026239395142
epoch: 91 training_loss 0.09295213291421532 test_loss: 0.11828521490097046
epoch: 92 training_loss 0.09825714459642768 test_loss: 0.11684725284576417
epoch: 93 training_loss 0.09746471351012588 test_loss: 0.1223831057548523
epoch: 94 training_loss 0.0965609236806631 test_loss: 0.12395312786102294
epoch: 95 training_loss 0.09474074268713593 test_loss: 0.12182857990264892
epoch: 96 training_loss 0.10338901843875646 test_loss: 0.12109376192092895
epoch: 97 training_loss 0.08625551948323845 test_loss: 0.11832703351974487
epoch: 98 training_loss 0.09133502053096891 test_loss: 0.12379333972930909
epoch: 99 training_loss 0.08875573249533772 test_loss: 0.12406091690063477
epoch: 100 training_loss 0.090430106818676 test_loss: 0.12126795053482056
epoch: 101 training_loss 0.0925341831240803 test_loss: 0.11678179502487182
epoch: 102 training_loss 0.09894979180768132 test_loss: 0.12479932308197021
epoch: 103 training_loss 0.0937281072884798 test_loss: 0.11392796039581299
epoch: 104 training_loss 0.09301864508539438 test_loss: 0.1263930082321167
epoch: 105 training_loss 0.09019800339825451 test_loss: 0.13281075954437255
epoch: 106 training_loss 0.09856184676289559 test_loss: 0.1156700611114502
epoch: 107 training_loss 0.0962315246090293 test_loss: 0.1228492021560669
epoch: 108 training_loss 0.09408510860055685 test_loss: 0.12221764326095581
epoch: 109 training_loss 0.1028855144418776 test_loss: 0.10457040071487426
epoch: 110 training_loss 0.10045010283589363 test_loss: 0.11080108880996704
epoch: 111 training_loss 0.10091458708047867 test_loss: 0.11019160747528076
epoch: 112 training_loss 0.0947582134604454 test_loss: 0.11073044538497925
epoch: 113 training_loss 0.08922620819881559 test_loss: 0.10835347175598145
epoch: 114 training_loss 0.09056860188022256 test_loss: 0.1192429542541504
epoch: 115 training_loss 0.09565893474966287 test_loss: 0.12092691659927368
epoch: 116 training_loss 0.08279987243935466 test_loss: 0.10901054143905639
epoch: 117 training_loss 0.09268665079027415 test_loss: 0.10957156419754029
epoch: 118 training_loss 0.09428094808012247 test_loss: 0.11530789136886596
epoch: 119 training_loss 0.09683787032961845 test_loss: 0.10844666957855224
epoch: 120 training_loss 0.09276103122159839 test_loss: 0.11828892230987549
epoch: 121 training_loss 0.10381119959056377 test_loss: 0.1151502013206482
epoch: 122 training_loss 0.09468281973153353 test_loss: 0.11909133195877075
epoch: 123 training_loss 0.09001004433259369 test_loss: 0.10678876638412475
epoch: 124 training_loss 0.09357520345598459 test_loss: 0.10723990201950073
epoch: 125 training_loss 0.09726454066112637 test_loss: 0.1113080620765686
epoch: 126 training_loss 0.09036692723631859 test_loss: 0.12423456907272339
epoch: 127 training_loss 0.09734962806105614 test_loss: 0.09978054761886597
epoch: 128 training_loss 0.08528995042666793 test_loss: 0.12156667709350585
epoch: 129 training_loss 0.09640926705673337 test_loss: 0.12126390933990479
epoch: 130 training_loss 0.08463759800419211 test_loss: 0.10516108274459839
epoch: 131 training_loss 0.09378897853195667 test_loss: 0.11181865930557251
epoch: 132 training_loss 0.09147421726956964 test_loss: 0.11682820320129395
epoch: 133 training_loss 0.08761593734845519 test_loss: 0.11601543426513672
epoch: 134 training_loss 0.08410031713545323 test_loss: 0.12663933038711547
epoch: 135 training_loss 0.0890577830374241 test_loss: 0.10462968349456787
epoch: 136 training_loss 0.08978847688063979 test_loss: 0.10600856542587281
epoch: 137 training_loss 0.09177751276642084 test_loss: 0.1200289011001587
epoch: 138 training_loss 0.09069530492648482 test_loss: 0.11430282592773437
epoch: 139 training_loss 0.08784418197348715 test_loss: 0.11987628936767578
epoch: 140 training_loss 0.09266643771901727 test_loss: 0.10771228075027466
epoch: 141 training_loss 0.08977879408746958 test_loss: 0.10632697343826295
epoch: 142 training_loss 0.09558642564341426 test_loss: 0.110316002368927
epoch: 143 training_loss 0.08623848125338554 test_loss: 0.10929349660873414
epoch: 144 training_loss 0.08899555459618569 test_loss: 0.11838291883468628
epoch: 145 training_loss 0.0882665254920721 test_loss: 0.12398313283920288
epoch: 146 training_loss 0.08868281681090594 test_loss: 0.12264244556427002
epoch: 147 training_loss 0.08958921398967505 test_loss: 0.11406675577163697
epoch: 148 training_loss 0.09023963147774339 test_loss: 0.1042749524116516
epoch: 149 training_loss 0.0864544127508998 test_loss: 0.11707967519760132
epoch: 0 training_loss 0.3152344620227814 test_loss: 0.2141563892364502
epoch: 1 training_loss 0.1780497245490551 test_loss: 0.17548732757568358
epoch: 2 training_loss 0.14377489998936654 test_loss: 0.15806964635849
epoch: 3 training_loss 0.14247074022889136 test_loss: 0.1345016121864319
epoch: 4 training_loss 0.12046787802129984 test_loss: 0.12782936096191405
epoch: 5 training_loss 0.12312852054834365 test_loss: 0.12923336029052734
epoch: 6 training_loss 0.1174471763893962 test_loss: 0.12569184303283693
epoch: 7 training_loss 0.11409329295158387 test_loss: 0.10564916133880616
epoch: 8 training_loss 0.12502668403089046 test_loss: 0.13260231018066407
epoch: 9 training_loss 0.116142586953938 test_loss: 0.11799229383468628
epoch: 10 training_loss 0.11073083061724902 test_loss: 0.12375926971435547
epoch: 11 training_loss 0.11272587306797505 test_loss: 0.11984196901321412
epoch: 12 training_loss 0.10601709425449371 test_loss: 0.13204488754272461
epoch: 13 training_loss 0.1113384597748518 test_loss: 0.10963455438613892
epoch: 14 training_loss 0.10741552416235209 test_loss: 0.1129082441329956
epoch: 15 training_loss 0.10607411000877619 test_loss: 0.11195789575576783
epoch: 16 training_loss 0.10156387474387885 test_loss: 0.11581187248229981
epoch: 17 training_loss 0.10697740387171507 test_loss: 0.13053653240203858
epoch: 18 training_loss 0.10723033893853426 test_loss: 0.11632252931594848
epoch: 19 training_loss 0.11212784143164754 test_loss: 0.12743371725082397
epoch: 20 training_loss 0.10637708079069853 test_loss: 0.12901594638824462
epoch: 21 training_loss 0.11028908407315612 test_loss: 0.11931225061416625
epoch: 22 training_loss 0.10794427888467908 test_loss: 0.1158526062965393
epoch: 23 training_loss 0.10742797695100308 test_loss: 0.11617213487625122
epoch: 24 training_loss 0.10602535774931311 test_loss: 0.10573281049728393
epoch: 25 training_loss 0.11472625957801938 test_loss: 0.10136796236038208
epoch: 26 training_loss 0.10233946111053228 test_loss: 0.11117030382156372
epoch: 27 training_loss 0.10883879542350769 test_loss: 0.10773941278457641
epoch: 28 training_loss 0.11470291227102279 test_loss: 0.10690617561340332
epoch: 29 training_loss 0.10221501886844635 test_loss: 0.10927058458328247
epoch: 30 training_loss 0.10869580892845988 test_loss: 0.10141315460205078
epoch: 31 training_loss 0.10600448239594698 test_loss: 0.10548595190048218
epoch: 32 training_loss 0.1051029209792614 test_loss: 0.12930619716644287
epoch: 33 training_loss 0.10472309459000825 test_loss: 0.14122263193130494
epoch: 34 training_loss 0.11269001953303814 test_loss: 0.10291910171508789
epoch: 35 training_loss 0.10355725670233369 test_loss: 0.10402302742004395
epoch: 36 training_loss 0.10224944684654474 test_loss: 0.1229786992073059
epoch: 37 training_loss 0.10274492910131812 test_loss: 0.10220727920532227
epoch: 38 training_loss 0.10803788444027304 test_loss: 0.10563628673553467
epoch: 39 training_loss 0.10217568412423134 test_loss: 0.11852571964263917
epoch: 40 training_loss 0.1019605541229248 test_loss: 0.12122601270675659
epoch: 41 training_loss 0.10473563326522708 test_loss: 0.11931333541870118
epoch: 42 training_loss 0.1068650918174535 test_loss: 0.1032593846321106
epoch: 43 training_loss 0.10336445108056068 test_loss: 0.12631213665008545
epoch: 44 training_loss 0.10705090817064047 test_loss: 0.11653519868850708
epoch: 45 training_loss 0.10186399228870868 test_loss: 0.11594496965408325
epoch: 46 training_loss 0.10130114931613207 test_loss: 0.10499653816223145
epoch: 47 training_loss 0.10349015949293972 test_loss: 0.10237548351287842
epoch: 48 training_loss 0.09765030281618238 test_loss: 0.11333730220794677
epoch: 49 training_loss 0.10228537868708372 test_loss: 0.12593783140182496
epoch: 50 training_loss 0.10317248838022351 test_loss: 0.12038036584854125
epoch: 51 training_loss 0.09509773356840015 test_loss: 0.10521365404129028
epoch: 52 training_loss 0.1004361697845161 test_loss: 0.11530129909515381
epoch: 53 training_loss 0.10532948378473521 test_loss: 0.1362100601196289
epoch: 54 training_loss 0.10510849766433239 test_loss: 0.1084070086479187
epoch: 55 training_loss 0.10230702226981521 test_loss: 0.118839430809021
epoch: 56 training_loss 0.10386727528646589 test_loss: 0.12087254524230957
epoch: 57 training_loss 0.1029900848865509 test_loss: 0.11255273818969727
epoch: 58 training_loss 0.10543780883774162 test_loss: 0.10834988355636596
epoch: 59 training_loss 0.10342986080795527 test_loss: 0.11775727272033691
epoch: 60 training_loss 0.09589888501912355 test_loss: 0.11645479202270508
epoch: 61 training_loss 0.10341103039681912 test_loss: 0.10463852882385254
epoch: 62 training_loss 0.09581442944705486 test_loss: 0.1100618839263916
epoch: 63 training_loss 0.09399194482713938 test_loss: 0.10505170822143554
epoch: 64 training_loss 0.1056491506099701 test_loss: 0.11039543151855469
epoch: 65 training_loss 0.10112781479954719 test_loss: 0.11450003385543824
epoch: 66 training_loss 0.0987534916959703 test_loss: 0.09754332900047302
epoch: 67 training_loss 0.09558206571266055 test_loss: 0.09571908712387085
epoch: 68 training_loss 0.09762773787602783 test_loss: 0.1195652961730957
epoch: 69 training_loss 0.10552121695131063 test_loss: 0.10483245849609375
epoch: 70 training_loss 0.09886675342917442 test_loss: 0.11709179878234863
epoch: 71 training_loss 0.09724345307797194 test_loss: 0.1166603684425354
epoch: 72 training_loss 0.10527849540114403 test_loss: 0.11773413419723511
epoch: 73 training_loss 0.09878829466179014 test_loss: 0.11200588941574097
epoch: 74 training_loss 0.09129788517951966 test_loss: 0.1072116732597351
epoch: 75 training_loss 0.09898857919499278 test_loss: 0.11765102148056031
epoch: 76 training_loss 0.10018297482281924 test_loss: 0.09933806657791137
epoch: 77 training_loss 0.09383700856938958 test_loss: 0.10519583225250244
epoch: 78 training_loss 0.10540528543293476 test_loss: 0.12088052034378052
epoch: 79 training_loss 0.10516218032687902 test_loss: 0.10063707828521729
epoch: 80 training_loss 0.09890226718969643 test_loss: 0.0976358711719513
epoch: 81 training_loss 0.10740507133305073 test_loss: 0.1171761155128479
epoch: 82 training_loss 0.09741401817649603 test_loss: 0.10730148553848266
epoch: 83 training_loss 0.09937495136633516 test_loss: 0.10662951469421386
epoch: 84 training_loss 0.1027576682344079 test_loss: 0.12577741146087645
epoch: 85 training_loss 0.10114026311784982 test_loss: 0.12254977226257324
epoch: 86 training_loss 0.09401326559484005 test_loss: 0.1321395993232727
epoch: 87 training_loss 0.10451437579467893 test_loss: 0.11695274114608764
epoch: 88 training_loss 0.09974161067977548 test_loss: 0.09881911277770997
epoch: 89 training_loss 0.10252341169863939 test_loss: 0.11087788343429565
epoch: 90 training_loss 0.10079630028456449 test_loss: 0.1199028491973877
epoch: 91 training_loss 0.09700530242174864 test_loss: 0.10177758932113648
epoch: 92 training_loss 0.09531925851479173 test_loss: 0.11408756971359253
epoch: 93 training_loss 0.09453556701540947 test_loss: 0.11085155010223388
epoch: 94 training_loss 0.09446497963741422 test_loss: 0.1078181505203247
epoch: 95 training_loss 0.09757780565880239 test_loss: 0.11923885345458984
epoch: 96 training_loss 0.09412337195128202 test_loss: 0.10151803493499756
epoch: 97 training_loss 0.09129930675029754 test_loss: 0.1315070867538452
epoch: 98 training_loss 0.10065750623121858 test_loss: 0.12473961114883422
epoch: 99 training_loss 0.09447068553417921 test_loss: 0.10983226299285889
epoch: 100 training_loss 0.09052125070244074 test_loss: 0.09551386833190918
epoch: 101 training_loss 0.09879194598644972 test_loss: 0.11438162326812744
epoch: 102 training_loss 0.09903226943686605 test_loss: 0.11642814874649048
epoch: 103 training_loss 0.09583459313958884 test_loss: 0.1111834168434143
epoch: 104 training_loss 0.08742383733391762 test_loss: 0.11793502569198608
epoch: 105 training_loss 0.10224673284217715 test_loss: 0.10278549194335937
epoch: 106 training_loss 0.09679732158780098 test_loss: 0.11165308952331543
epoch: 107 training_loss 0.10380251104012132 test_loss: 0.11238949298858643
epoch: 108 training_loss 0.09142931502312422 test_loss: 0.10079141855239868
epoch: 109 training_loss 0.09644346633926033 test_loss: 0.10882909297943115
epoch: 110 training_loss 0.0902093337662518 test_loss: 0.09835101962089539
epoch: 111 training_loss 0.0920578796043992 test_loss: 0.11824716329574585
epoch: 112 training_loss 0.10163864929229022 test_loss: 0.1213636040687561
epoch: 113 training_loss 0.09205679256469011 test_loss: 0.11076078414916993
epoch: 114 training_loss 0.0923769505135715 test_loss: 0.11213023662567138
epoch: 115 training_loss 0.0935069446451962 test_loss: 0.11494669914245606
epoch: 116 training_loss 0.09003762051463127 test_loss: 0.10435364246368409
epoch: 117 training_loss 0.09258898843079805 test_loss: 0.08830989599227905
epoch: 118 training_loss 0.09324382208287715 test_loss: 0.1184137225151062
epoch: 119 training_loss 0.09283439671620726 test_loss: 0.10196886062622071
epoch: 120 training_loss 0.10174928054213524 test_loss: 0.10568128824234009
epoch: 121 training_loss 0.09245730003342033 test_loss: 0.10435049533843994
epoch: 122 training_loss 0.09565083149820566 test_loss: 0.10944937467575074
epoch: 123 training_loss 0.08958842799067497 test_loss: 0.09685004353523255
epoch: 124 training_loss 0.0887520350329578 test_loss: 0.11802302598953247
epoch: 125 training_loss 0.09619743123650551 test_loss: 0.12740862369537354
epoch: 126 training_loss 0.0964567206054926 test_loss: 0.11953219175338745
epoch: 127 training_loss 0.09212046315893531 test_loss: 0.12922351360321044
epoch: 128 training_loss 0.08917097518220544 test_loss: 0.1329903483390808
epoch: 129 training_loss 0.08941053593531251 test_loss: 0.12407714128494263
epoch: 130 training_loss 0.08624690059572458 test_loss: 0.10386183261871337
epoch: 131 training_loss 0.09468394858762622 test_loss: 0.11301170587539673
epoch: 132 training_loss 0.09173476781696081 test_loss: 0.11414916515350342
epoch: 133 training_loss 0.08968649733811616 test_loss: 0.12066336870193481
epoch: 134 training_loss 0.09416169043630361 test_loss: 0.10622193813323974
epoch: 135 training_loss 0.09021406210958957 test_loss: 0.12359977960586548
epoch: 136 training_loss 0.09514124069362878 test_loss: 0.11618751287460327
epoch: 137 training_loss 0.0924852211214602 test_loss: 0.12977957725524902
epoch: 138 training_loss 0.08541451133787632 test_loss: 0.1146125316619873
epoch: 139 training_loss 0.09536107137799263 test_loss: 0.12362512350082397
epoch: 140 training_loss 0.0880807644687593 test_loss: 0.10930373668670654
epoch: 141 training_loss 0.0948643520474434 test_loss: 0.11430457830429078
epoch: 142 training_loss 0.08785760285332798 test_loss: 0.11196190118789673
epoch: 143 training_loss 0.08718778464943171 test_loss: 0.11000605821609497
epoch: 144 training_loss 0.08450546151027083 test_loss: 0.1085241198539734
epoch: 145 training_loss 0.0899033123627305 test_loss: 0.09925239086151123
epoch: 146 training_loss 0.08996721953153611 test_loss: 0.10086617469787598
epoch: 147 training_loss 0.08964526871219278 test_loss: 0.10841102600097656
epoch: 148 training_loss 0.0841144965775311 test_loss: 0.12755076885223388
epoch: 149 training_loss 0.08173921114765108 test_loss: 0.089506334066391
episode: 0 training return: -999.9409688487814
episode: 1 training return: -999.9385553239225
episode: 2 training return: -999.9181665685765
episode: 3 training return: -999.9440625108643
epoch: 1 test_true_pfm: -0.38315553228153404 sim_pfm: -999.9559153896671
episode: 4 training return: -999.9405013672588
episode: 5 training return: -999.9206057435835
episode: 6 training return: -999.925708252662
episode: 7 training return: -999.9449769864749
epoch: 2 test_true_pfm: -0.9084349475158922 sim_pfm: -999.9525382923224
episode: 8 training return: -999.9137233198886
episode: 9 training return: -999.9275261584816
episode: 10 training return: -999.9546113668049
episode: 11 training return: -999.9258159693995
epoch: 3 test_true_pfm: -0.7025038864240593 sim_pfm: -999.9530956561908
episode: 12 training return: -999.9445921349742
episode: 13 training return: -999.4108875506045
episode: 14 training return: -999.9408584494126
episode: 15 training return: -999.9223306730408
epoch: 4 test_true_pfm: -0.2359091989259592 sim_pfm: -999.952790691646
episode: 16 training return: -999.9496938415169
episode: 17 training return: -999.9301668587183
episode: 18 training return: -999.9119587166867
episode: 19 training return: -999.9100891056744
epoch: 5 test_true_pfm: -0.11105720374144952 sim_pfm: -999.9539837413877
episode: 20 training return: -999.9286241508051
episode: 21 training return: -999.9311042324459
episode: 22 training return: -999.933442979145
episode: 23 training return: -999.9406325592122
epoch: 6 test_true_pfm: -0.46998635755321255 sim_pfm: -999.9527639483722
episode: 24 training return: -999.9245894909454
episode: 25 training return: -1000.0620158944245
episode: 26 training return: -999.9323714096879
episode: 27 training return: -999.9348709581392
epoch: 7 test_true_pfm: 0.23691274346964816 sim_pfm: -999.9536799329144
episode: 28 training return: -999.9223704047912
episode: 29 training return: -999.927306130835
episode: 30 training return: -999.919846189433
episode: 31 training return: -999.9199656432318
epoch: 8 test_true_pfm: -0.575544259812775 sim_pfm: -999.9533717522921
episode: 32 training return: -999.9257754064138
episode: 33 training return: -999.9256674772749
episode: 34 training return: -999.9261632625596
episode: 35 training return: -999.9367601687029
epoch: 9 test_true_pfm: -0.9693133520832049 sim_pfm: -999.9538522312861
episode: 36 training return: -999.9311171774395
episode: 37 training return: -999.9370093013057
episode: 38 training return: -999.9308923141135
episode: 39 training return: -999.927894482666
epoch: 10 test_true_pfm: 0.3832079256245691 sim_pfm: -999.9532203498093
episode: 40 training return: -999.94514417798
episode: 41 training return: -999.9345100418977
episode: 42 training return: -999.9236079237115
episode: 43 training return: -999.9345777930333
epoch: 11 test_true_pfm: -0.7722027643476724 sim_pfm: -999.9538284191032
episode: 44 training return: -999.9154728017237
episode: 45 training return: -999.9397514648664
episode: 46 training return: -999.8958259695956
episode: 47 training return: -999.9341300622881
epoch: 12 test_true_pfm: -1.2263293384226521 sim_pfm: -999.9532626536014
episode: 48 training return: -999.9354333641395
episode: 49 training return: -999.9343655179375
episode: 50 training return: -999.9009994065359
episode: 51 training return: -999.9320855330976
epoch: 13 test_true_pfm: -0.4556520166805591 sim_pfm: -999.9540333819726
episode: 52 training return: -999.9233034796345
episode: 53 training return: -999.6762732422487
episode: 54 training return: -999.9327638077968
episode: 55 training return: -999.9324449660813
epoch: 14 test_true_pfm: -1.0778068602014115 sim_pfm: -999.9529329054322
episode: 56 training return: -999.9067668841523
episode: 57 training return: -999.9279625068153
episode: 58 training return: -999.923557979193
episode: 59 training return: -999.9088400628594
epoch: 15 test_true_pfm: -0.5467376131739453 sim_pfm: -999.9542484353614
episode: 60 training return: -999.9225408273074
episode: 61 training return: -999.9232998624924
episode: 62 training return: -999.9131617885749
episode: 63 training return: -999.9238003207355
epoch: 16 test_true_pfm: -0.11951841904041198 sim_pfm: -999.9530690243455
episode: 64 training return: -999.914962123609
episode: 65 training return: -999.9081231765805
episode: 66 training return: -999.9185807856983
episode: 67 training return: -999.9355134556613
epoch: 17 test_true_pfm: -0.0058950589578778785 sim_pfm: -999.9526642547016
episode: 68 training return: -999.9364653306124
episode: 69 training return: -999.9315483217343
episode: 70 training return: -999.937555498624
episode: 71 training return: -999.9389226755216
epoch: 18 test_true_pfm: -0.42397862494442595 sim_pfm: -999.9541537171891
episode: 72 training return: -999.932319057086
episode: 73 training return: -1000.0053910675766
episode: 74 training return: -999.9277501045386
episode: 75 training return: -999.9330143036948
epoch: 19 test_true_pfm: -0.1447907178139685 sim_pfm: -999.9532812359707
episode: 76 training return: -999.9071408891855
episode: 77 training return: -999.9284646681281
episode: 78 training return: -999.9307461864965
episode: 79 training return: -999.9448443680802
epoch: 20 test_true_pfm: 0.13379695524646187 sim_pfm: -999.9544415566447
episode: 80 training return: -999.9411115902416
episode: 81 training return: -999.9356050514064
episode: 82 training return: -999.3343376738732
episode: 83 training return: -999.9360159198872
epoch: 21 test_true_pfm: 0.11509633263286567 sim_pfm: -999.9539313618346
episode: 84 training return: -999.9255066054716
episode: 85 training return: -999.9083326185853
episode: 86 training return: -999.9494998808627
episode: 87 training return: -999.9336054097283
epoch: 22 test_true_pfm: -0.0926588006161473 sim_pfm: -999.9524974155074
episode: 88 training return: -999.9260008782855
episode: 89 training return: -999.9354267497497
episode: 90 training return: -999.9144675550954
episode: 91 training return: -999.9379536697176
epoch: 23 test_true_pfm: -0.7299226522086176 sim_pfm: -999.9536608575596
episode: 92 training return: -999.9308915996747
episode: 93 training return: -999.9340565692457
episode: 94 training return: -999.9438986618609
episode: 95 training return: -999.9233582063966
epoch: 24 test_true_pfm: -0.36560190991670455 sim_pfm: -999.953965280698
episode: 96 training return: -999.9212308078261
episode: 97 training return: -1000.0341808821155
episode: 98 training return: -999.9334759892005
episode: 99 training return: -999.931566335366
epoch: 25 test_true_pfm: -0.22494897881514198 sim_pfm: -999.9530477966009
episode: 100 training return: -999.9433537926382
episode: 101 training return: -999.9268694488998
episode: 102 training return: -999.9513428308097
episode: 103 training return: -999.915067954448
epoch: 26 test_true_pfm: 0.23944753095572133 sim_pfm: -999.9538783721222
episode: 104 training return: -999.9155606593688
episode: 105 training return: -999.9430899177022
episode: 106 training return: -999.9348575161019
episode: 107 training return: -999.9097192630397
epoch: 27 test_true_pfm: -0.7799250730722868 sim_pfm: -999.9538657864091
episode: 108 training return: -999.9401518373109
episode: 109 training return: -999.9104672064407
episode: 110 training return: -999.9189741261857
episode: 111 training return: -999.9439289910267
epoch: 28 test_true_pfm: -0.4433601436262106 sim_pfm: -999.9534499185289
episode: 112 training return: -999.9382807641847
episode: 113 training return: -999.9476737401683
episode: 114 training return: -999.930231900147
episode: 115 training return: -999.9306260586077
epoch: 29 test_true_pfm: -0.4709688924027104 sim_pfm: -999.9531477305168
episode: 116 training return: -999.9329464180146
episode: 117 training return: -999.9169270252802
episode: 118 training return: -999.9182173135197
episode: 119 training return: -999.9181905019536
epoch: 30 test_true_pfm: -0.4454623744331836 sim_pfm: -999.9531358679532
episode: 120 training return: -999.9220537038601
episode: 121 training return: -999.9255367659865
episode: 122 training return: -999.9397744306376
episode: 123 training return: -999.9279793927277
epoch: 31 test_true_pfm: -0.5934603772191244 sim_pfm: -999.9526542292473
episode: 124 training return: -999.8974948344753
episode: 125 training return: -999.9300107647147
episode: 126 training return: -999.9300636104783
episode: 127 training return: -999.9334702991903
epoch: 32 test_true_pfm: -0.5715747506608292 sim_pfm: -999.9529522701138
episode: 128 training return: -999.9229737472444
episode: 129 training return: -999.9295653132787
episode: 130 training return: -999.9336225549362
episode: 131 training return: -999.9215857054838
epoch: 33 test_true_pfm: 0.4041401872161776 sim_pfm: -999.9535349044571
episode: 132 training return: -999.9272601335982
episode: 133 training return: -999.9281400036667
episode: 134 training return: -999.9404100375933
episode: 135 training return: -999.1550929858888
epoch: 34 test_true_pfm: -0.43638486893789596 sim_pfm: -999.9540809448011
episode: 136 training return: -999.9145291968977
episode: 137 training return: -999.9475808044474
episode: 138 training return: -999.9365277778943
episode: 139 training return: -999.9295599565443
epoch: 35 test_true_pfm: -0.46707835552770366 sim_pfm: -999.9531653917073
episode: 140 training return: -999.6144502918752
episode: 141 training return: -999.9428087654344
episode: 142 training return: -999.9312320025143
episode: 143 training return: -999.9459029046981
epoch: 36 test_true_pfm: -0.23785576190262944 sim_pfm: -999.9530383434876
episode: 144 training return: -999.9380167193368
episode: 145 training return: -999.9166422435695
episode: 146 training return: -999.9004070794115
episode: 147 training return: -999.9055324837403
epoch: 37 test_true_pfm: 0.48947939627363923 sim_pfm: -999.9522664699562
episode: 148 training return: -999.9164124277435
episode: 149 training return: -999.9132942729045
episode: 150 training return: -999.9421420300336
episode: 151 training return: -999.9196194883256
epoch: 38 test_true_pfm: -0.0550122439853882 sim_pfm: -999.9532692277338
episode: 152 training return: -999.9236637110976
episode: 153 training return: -999.9427688643226
episode: 154 training return: -999.9306521289283
episode: 155 training return: -999.9042736082598
epoch: 39 test_true_pfm: -0.39999340778408293 sim_pfm: -999.9542832989924
episode: 156 training return: -999.9326692883802
episode: 157 training return: -999.9264781593052
episode: 158 training return: -999.9187107618061
episode: 159 training return: -999.9188090729059
epoch: 40 test_true_pfm: 0.15742185261889852 sim_pfm: -999.9529772740976
episode: 160 training return: -999.930668692477
episode: 161 training return: -999.9270715793614
episode: 162 training return: -999.9345331208482
episode: 163 training return: -999.9326515400047
epoch: 41 test_true_pfm: 0.19297716834242737 sim_pfm: -999.9535069943116
episode: 164 training return: -999.9458288741905
episode: 165 training return: -999.9259717763039
episode: 166 training return: -999.9187666085757
episode: 167 training return: -999.9419279605296
epoch: 42 test_true_pfm: -0.6561049380993439 sim_pfm: -999.9536789174969
episode: 168 training return: -999.9413990948892
episode: 169 training return: -999.9476700938696
episode: 170 training return: -999.9275305698999
episode: 171 training return: -999.93447673991
epoch: 43 test_true_pfm: 0.3043635846709751 sim_pfm: -999.9535922222503
episode: 172 training return: -999.909103324659
episode: 173 training return: -999.92469973424
episode: 174 training return: -999.9190635352512
episode: 175 training return: -999.9303773571968
epoch: 44 test_true_pfm: 0.0280545911351389 sim_pfm: -999.9538226138249
episode: 176 training return: -999.9195254332095
episode: 177 training return: -999.7570789237731
episode: 178 training return: -999.9119314985758
episode: 179 training return: -999.9233008871648
epoch: 45 test_true_pfm: -0.05585227351327255 sim_pfm: -999.9535893953316
episode: 180 training return: -999.9316223276888
episode: 181 training return: -999.9323313944957
episode: 182 training return: -999.9280893391547
episode: 183 training return: -999.9423125824101
epoch: 46 test_true_pfm: -0.10028456870353347 sim_pfm: -999.9546471711607
episode: 184 training return: -999.9019208791505
episode: 185 training return: -999.5881800506779
episode: 186 training return: -999.9293017645687
episode: 187 training return: -999.926814444859
epoch: 47 test_true_pfm: 0.005927643875693112 sim_pfm: -999.9545000723265
episode: 188 training return: -999.9254949256797
episode: 189 training return: -999.9163489731823
episode: 190 training return: -999.9258588352834
episode: 191 training return: -999.918818496128
epoch: 48 test_true_pfm: -0.49492068281019624 sim_pfm: -999.9533165119732
episode: 192 training return: -999.9262845897777
episode: 193 training return: -999.9266816486681
episode: 194 training return: -999.9549090463352
episode: 195 training return: -999.9234670698302
epoch: 49 test_true_pfm: 0.10270313584960052 sim_pfm: -999.9536262836027
episode: 196 training return: -999.7706446825349
episode: 197 training return: -999.9251593320155
episode: 198 training return: -999.9351067877981
episode: 199 training return: -999.9374952446342
epoch: 50 test_true_pfm: -0.20972128814205973 sim_pfm: -999.9552475768979
episode: 200 training return: -999.9215122258126
episode: 201 training return: -999.9345433835377
episode: 202 training return: -1000.1589360625218
episode: 203 training return: -999.9296126908125
epoch: 51 test_true_pfm: -0.16335392588841188 sim_pfm: -999.9539326307046
episode: 204 training return: -999.9444533932473
episode: 205 training return: -999.9368212433914
episode: 206 training return: -999.9244832578281
episode: 207 training return: -999.8964729738709
epoch: 52 test_true_pfm: -0.3447535439811739 sim_pfm: -999.9534393691612
episode: 208 training return: -999.930626363589
episode: 209 training return: -999.9412182943643
episode: 210 training return: -999.9159723111169
episode: 211 training return: -999.9265910406998
epoch: 53 test_true_pfm: -0.648878406893587 sim_pfm: -999.9539577639922
episode: 212 training return: -999.9337882289639
episode: 213 training return: -999.921219829257
episode: 214 training return: -999.9332075505752
episode: 215 training return: -999.9541352735029
epoch: 54 test_true_pfm: 0.16403555282835974 sim_pfm: -999.9540963936623
episode: 216 training return: -999.9112525895795
episode: 217 training return: -999.9322242069519
episode: 218 training return: -999.928481783344
episode: 219 training return: -999.9320881279319
epoch: 55 test_true_pfm: 0.2769303798396274 sim_pfm: -999.953609866005
episode: 220 training return: -999.9368149250914
episode: 221 training return: -999.9121450268605
episode: 222 training return: -999.9102566100876
episode: 223 training return: -999.9502290957928
epoch: 56 test_true_pfm: -0.5909659408245366 sim_pfm: -999.9541984955334
episode: 224 training return: -999.9095503145709
episode: 225 training return: -999.9454449525417
episode: 226 training return: -999.9023940925313
episode: 227 training return: -999.9060251646755
epoch: 57 test_true_pfm: -0.1322268777706343 sim_pfm: -999.953240652736
episode: 228 training return: -999.9158073765384
episode: 229 training return: -999.9302187550692
episode: 230 training return: -999.9243417207007
episode: 231 training return: -999.9215301396108
epoch: 58 test_true_pfm: -0.5626490591871495 sim_pfm: -999.9532227205892
episode: 232 training return: -999.9407538590093
episode: 233 training return: -999.9363786290563
episode: 234 training return: -999.932832810718
episode: 235 training return: -999.9435652768792
epoch: 59 test_true_pfm: 0.3883995215477482 sim_pfm: -999.9531809798124
episode: 236 training return: -999.9003587434062
episode: 237 training return: -999.9326569868206
episode: 238 training return: -999.9093854312486
episode: 239 training return: -999.9234496825917
epoch: 60 test_true_pfm: -1.1071122134398623 sim_pfm: -999.9530804056616
episode: 240 training return: -999.9072302954899
episode: 241 training return: -999.9233672802843
episode: 242 training return: -999.9357636502161
episode: 243 training return: -999.9330289361544
epoch: 61 test_true_pfm: 0.08161522256414126 sim_pfm: -999.9535616471218
episode: 244 training return: -999.9455179781398
episode: 245 training return: -999.9342438773857
episode: 246 training return: -999.9107598902142
episode: 247 training return: -999.9506870005538
epoch: 62 test_true_pfm: 0.46603770023051455 sim_pfm: -999.9533391995948
episode: 248 training return: -999.9391442122329
episode: 249 training return: -999.9297748277186
episode: 250 training return: -999.8843690277901
episode: 251 training return: -999.9396630500692
epoch: 63 test_true_pfm: -0.6503169714774231 sim_pfm: -999.953200088537
episode: 252 training return: -999.9171294588466
episode: 253 training return: -999.939376122482
episode: 254 training return: -999.9271648786596
episode: 255 training return: -999.9485110249954
epoch: 64 test_true_pfm: -0.2763116124476399 sim_pfm: -999.9542498799336
episode: 256 training return: -999.9199216927439
episode: 257 training return: -999.9374307042701
episode: 258 training return: -999.9338534362105
episode: 259 training return: -999.937908277877
epoch: 65 test_true_pfm: -0.699072703031506 sim_pfm: -999.9522188098093
episode: 260 training return: -999.9471912403488
episode: 261 training return: -999.9211964664551
episode: 262 training return: -999.9285178661578
episode: 263 training return: -999.9471196337147
epoch: 66 test_true_pfm: -0.2798586772231068 sim_pfm: -999.9538249226983
episode: 264 training return: -999.9094262838202
episode: 265 training return: -999.9374841261096
episode: 266 training return: -999.9266666688585
episode: 267 training return: -999.9285207559782
epoch: 67 test_true_pfm: -0.09454648846332275 sim_pfm: -999.9531895085279
episode: 268 training return: -999.9370659451663
episode: 269 training return: -999.9209593550657
episode: 270 training return: -999.937694265151
episode: 271 training return: -999.9304388972403
epoch: 68 test_true_pfm: -0.1475079794531698 sim_pfm: -999.9543233064877
episode: 272 training return: -999.9357269879418
episode: 273 training return: -999.8990405642663
episode: 274 training return: -999.9345428031568
episode: 275 training return: -999.9286964120952
epoch: 69 test_true_pfm: -0.15752455101974402 sim_pfm: -999.9531193694855
episode: 276 training return: -999.9130686915801
episode: 277 training return: -999.9401196075609
episode: 278 training return: -999.9386493554952
episode: 279 training return: -999.9374221781104
epoch: 70 test_true_pfm: -0.6632988591611241 sim_pfm: -999.9529743598622
episode: 280 training return: -999.9122084902211
episode: 281 training return: -999.913949837951
episode: 282 training return: -999.9382070140601
episode: 283 training return: -999.9090438505277
epoch: 71 test_true_pfm: -0.6557325173337262 sim_pfm: -999.9533786852935
episode: 284 training return: -999.9316126598975
episode: 285 training return: -999.9420869809766
episode: 286 training return: -999.936136539264
episode: 287 training return: -999.9344146452087
epoch: 72 test_true_pfm: -0.13858561000080113 sim_pfm: -999.9532418152212
episode: 288 training return: -999.9104263451235
episode: 289 training return: -999.9301565716294
episode: 290 training return: -999.9561423443129
episode: 291 training return: -999.917479619264
epoch: 73 test_true_pfm: -0.40319955030176485 sim_pfm: -999.9536688447835
episode: 292 training return: -999.9283611473126
episode: 293 training return: -999.8928837182418
episode: 294 training return: -999.918997596485
episode: 295 training return: -999.9056412740489
epoch: 74 test_true_pfm: -0.055885435672841634 sim_pfm: -999.9541291453397
episode: 296 training return: -999.9420712003387
episode: 297 training return: -999.9094279166159
episode: 298 training return: -999.9019457246765
episode: 299 training return: -999.9419329756747
epoch: 75 test_true_pfm: -0.48571519639563204 sim_pfm: -999.954681169219
episode: 300 training return: -999.912798800972
episode: 301 training return: -999.8988694511171
episode: 302 training return: -999.9262786340997
episode: 303 training return: -999.9505883603291
epoch: 76 test_true_pfm: 0.13677107221047327 sim_pfm: -999.9534017998773
episode: 304 training return: -999.9338868414476
episode: 305 training return: -999.9128717335142
episode: 306 training return: -999.9341420194304
episode: 307 training return: -999.9273716705296
epoch: 77 test_true_pfm: 0.22863237625850927 sim_pfm: -999.9543623860137
episode: 308 training return: -999.9267325100924
episode: 309 training return: -999.9341368003875
episode: 310 training return: -999.9402231829739
episode: 311 training return: -999.9223519584647
epoch: 78 test_true_pfm: -0.025039834756489927 sim_pfm: -999.9526757271714
episode: 312 training return: -999.9241371080174
episode: 313 training return: -999.9196760902141
episode: 314 training return: -999.9269489448984
episode: 315 training return: -999.925993869563
epoch: 79 test_true_pfm: -1.1915902828981146 sim_pfm: -999.9536873602225
episode: 316 training return: -999.940127038511
episode: 317 training return: -999.9232807577391
episode: 318 training return: -999.9265004659974
episode: 319 training return: -999.9070132007608
epoch: 80 test_true_pfm: -0.4766041624376885 sim_pfm: -999.9540276782781
episode: 320 training return: -999.9368301496249
episode: 321 training return: -999.9449655986007
episode: 322 training return: -999.9217124971021
episode: 323 training return: -999.9285091394128
epoch: 81 test_true_pfm: -0.907073815266767 sim_pfm: -999.9531602367538
episode: 324 training return: -999.9213337830487
episode: 325 training return: -999.9254634742553
episode: 326 training return: -999.932181714863
episode: 327 training return: -999.9329699409018
epoch: 82 test_true_pfm: -0.5603377129891729 sim_pfm: -999.9537325570063
episode: 328 training return: -999.9193190845302
episode: 329 training return: -999.937413731234
episode: 330 training return: -999.923702136521
episode: 331 training return: -999.9222614393118
epoch: 83 test_true_pfm: -0.5740394031343564 sim_pfm: -999.9534528624663
episode: 332 training return: -999.896856579797
episode: 333 training return: -999.9266479246756
episode: 334 training return: -999.9160360494506
episode: 335 training return: -999.9183350707701
epoch: 84 test_true_pfm: -0.092988188435371 sim_pfm: -999.9525760814798
episode: 336 training return: -999.9233006301848
episode: 337 training return: -999.9136193983613
episode: 338 training return: -999.9443650388693
episode: 339 training return: -999.9188213721502
epoch: 85 test_true_pfm: -0.5309171370338075 sim_pfm: -999.9533431756817
episode: 340 training return: -999.9240691006697
episode: 341 training return: -999.9430577220213
episode: 342 training return: -999.9155297910836
episode: 343 training return: -999.9402992220738
epoch: 86 test_true_pfm: -0.11422034483276484 sim_pfm: -999.9541289287894
episode: 344 training return: -999.9171016022898
episode: 345 training return: -999.9050123426023
episode: 346 training return: -999.9243301538781
episode: 347 training return: -999.945409846673
epoch: 87 test_true_pfm: 0.006351442649491279 sim_pfm: -999.9518828879654
episode: 348 training return: -999.9299646584545
episode: 349 training return: -999.9080687940453
episode: 350 training return: -999.9416414333806
episode: 351 training return: -999.9136699458965
epoch: 88 test_true_pfm: 0.25263748551726206 sim_pfm: -999.9546562206216
episode: 352 training return: -999.9217137603168
episode: 353 training return: -999.9513532434719
episode: 354 training return: -999.920299240307
episode: 355 training return: -999.9319439640755
epoch: 89 test_true_pfm: -0.3987870832546905 sim_pfm: -999.9534909293778
episode: 356 training return: -999.940069769942
episode: 357 training return: -999.9293251831591
episode: 358 training return: -999.9305436278471
episode: 359 training return: -999.9253458615443
epoch: 90 test_true_pfm: -0.4362535363512161 sim_pfm: -999.9539703921581
episode: 360 training return: -999.9328524696282
episode: 361 training return: -999.9314695199769
episode: 362 training return: -999.9330171738355
episode: 363 training return: -999.93667330712
epoch: 91 test_true_pfm: -0.7411277086265078 sim_pfm: -999.9526844022398
episode: 364 training return: -999.9316191139295
episode: 365 training return: -999.8991894651068
episode: 366 training return: -999.9323710609182
episode: 367 training return: -999.911246470198
epoch: 92 test_true_pfm: 0.1935523043038958 sim_pfm: -999.9526654846317
episode: 368 training return: -999.9212367863993
episode: 369 training return: -999.904124767459
episode: 370 training return: -999.9289681173947
episode: 371 training return: -999.9325603875169
epoch: 93 test_true_pfm: 0.010683959125929324 sim_pfm: -999.9528159320826
episode: 372 training return: -999.9419490025956
episode: 373 training return: -999.9248048729394
episode: 374 training return: -999.9384104736475
episode: 375 training return: -999.9370375933931
epoch: 94 test_true_pfm: -0.030948313636216456 sim_pfm: -999.9541005452252
episode: 376 training return: -999.9098871398849
episode: 377 training return: -999.9183968097055
episode: 378 training return: -999.9405264349504
episode: 379 training return: -999.9131900964044
epoch: 95 test_true_pfm: -0.9226843974845322 sim_pfm: -999.9535057239347
episode: 380 training return: -999.9366302761284
episode: 381 training return: -999.9160785580464
episode: 382 training return: -999.9228786210679
episode: 383 training return: -999.9374802267156
epoch: 96 test_true_pfm: -0.981731045209513 sim_pfm: -999.9534568398358
episode: 384 training return: -999.9119353566175
episode: 385 training return: -999.9269923729069
episode: 386 training return: -999.9446081479427
episode: 387 training return: -999.9303469250826
epoch: 97 test_true_pfm: -0.9903453804935681 sim_pfm: -999.9527761016126
episode: 388 training return: -999.9228242114629
episode: 389 training return: -999.9385895186748
episode: 390 training return: -999.2239965719276
episode: 391 training return: -999.9255249551347
epoch: 98 test_true_pfm: -0.1402486787764006 sim_pfm: -999.9538891176608
episode: 392 training return: -999.9371855450139
episode: 393 training return: -999.9264089194932
episode: 394 training return: -999.9231647925994
episode: 395 training return: -999.9329389638535
epoch: 99 test_true_pfm: -0.7512360517525756 sim_pfm: -999.9532890627597
episode: 396 training return: -999.9397025306382
episode: 397 training return: -999.9269112028497
episode: 398 training return: -999.9364562789859
episode: 399 training return: -999.9356089813751
epoch: 100 test_true_pfm: 0.14540452316762478 sim_pfm: -999.9534758215714
episode: 400 training return: -999.9124752908654
episode: 401 training return: -999.9304895551004
episode: 402 training return: -999.9251280000198
episode: 403 training return: -999.929821265003
epoch: 101 test_true_pfm: -0.4755475101682885 sim_pfm: -999.9536267521286
episode: 404 training return: -999.9239344658976
episode: 405 training return: -999.9079186674762
episode: 406 training return: -999.9228448336728
episode: 407 training return: -999.9089354161562
epoch: 102 test_true_pfm: -0.7315841151902805 sim_pfm: -999.9528455643667
episode: 408 training return: -999.9129206877135
episode: 409 training return: -999.908409475772
episode: 410 training return: -999.9247087497504
episode: 411 training return: -999.9561951484503
epoch: 103 test_true_pfm: -0.4165074191641507 sim_pfm: -999.9531233087383
episode: 412 training return: -999.9474222767398
episode: 413 training return: -999.9126434220653
episode: 414 training return: -999.9056957031308
episode: 415 training return: -999.9340348099329
epoch: 104 test_true_pfm: -0.408300250438464 sim_pfm: -999.9537212672464
episode: 416 training return: -999.9096641684682
episode: 417 training return: -999.9376771911043
episode: 418 training return: -999.9089397575425
episode: 419 training return: -999.9218021890101
epoch: 105 test_true_pfm: -0.5470871834895505 sim_pfm: -999.9529607022301
episode: 420 training return: -999.9210116728037
episode: 421 training return: -999.915201186618
episode: 422 training return: -999.9422457763537
episode: 423 training return: -999.9290943726662
epoch: 106 test_true_pfm: -1.0845314127658428 sim_pfm: -999.953374636323
episode: 424 training return: -999.9426462158777
episode: 425 training return: -999.9212886008476
episode: 426 training return: -999.9172599861487
episode: 427 training return: -999.9310378174954
epoch: 107 test_true_pfm: -0.5059675391492395 sim_pfm: -999.9538675727108
episode: 428 training return: -999.9164262724919
episode: 429 training return: -999.9514130878824
episode: 430 training return: -999.9295147898001
episode: 431 training return: -999.9123384328796
epoch: 108 test_true_pfm: -0.2828466622918511 sim_pfm: -999.9534830534394
episode: 432 training return: -999.9156671656024
episode: 433 training return: -999.9344329347902
episode: 434 training return: -999.931054775515
episode: 435 training return: -999.9257712206592
epoch: 109 test_true_pfm: -0.051870580383870546 sim_pfm: -999.954274760045
episode: 436 training return: -999.9484212604577
episode: 437 training return: -999.9258590530578
episode: 438 training return: -999.9333678573885
episode: 439 training return: -999.9203436546067
epoch: 110 test_true_pfm: -0.6205856694444337 sim_pfm: -999.9537354553127
episode: 440 training return: -999.9337921197159
episode: 441 training return: -999.9416703628287
episode: 442 training return: -999.9332897738987
episode: 443 training return: -999.9347192435797
epoch: 111 test_true_pfm: -0.2366250320890406 sim_pfm: -999.9527216674711
episode: 444 training return: -999.8982864776582
episode: 445 training return: -999.9355874195468
episode: 446 training return: -999.9200063253213
episode: 447 training return: -999.9212917211038
epoch: 112 test_true_pfm: 0.5186268781130441 sim_pfm: -999.9534133661876
episode: 448 training return: -999.9144272416872
episode: 449 training return: -999.9339475577397
episode: 450 training return: -999.928376925993
episode: 451 training return: -999.936463010879
epoch: 113 test_true_pfm: -0.49545546758091846 sim_pfm: -999.9536845765566
episode: 452 training return: -999.9241518320598
episode: 453 training return: -999.9314780949745
episode: 454 training return: -999.9444184657262
episode: 455 training return: -999.9284928817934
epoch: 114 test_true_pfm: -0.8698184064654778 sim_pfm: -999.9533195764956
episode: 456 training return: -999.9204378403489
episode: 457 training return: -999.9221364574104
episode: 458 training return: -999.2689006816644
episode: 459 training return: -999.9257704110722
epoch: 115 test_true_pfm: 0.6159229902699469 sim_pfm: -999.9528844935684
episode: 460 training return: -999.8915938389557
episode: 461 training return: -999.9327967861152
episode: 462 training return: -999.9344121203671
episode: 463 training return: -999.9239217888597
epoch: 116 test_true_pfm: -0.3044315660086194 sim_pfm: -999.9541570598526
episode: 464 training return: -999.9380705890251
episode: 465 training return: -999.8810474109449
episode: 466 training return: -999.9314976718388
episode: 467 training return: -999.9461174055949
epoch: 117 test_true_pfm: -0.4368915898288372 sim_pfm: -999.9537855256589
episode: 468 training return: -999.9241619177864
episode: 469 training return: -999.9092970582908
episode: 470 training return: -999.9247738359992
episode: 471 training return: -999.9275634953477
epoch: 118 test_true_pfm: -0.4190041652189218 sim_pfm: -999.9545284547252
episode: 472 training return: -999.924047678119
episode: 473 training return: -999.9361291350801
episode: 474 training return: -999.9462243962345
episode: 475 training return: -999.9011878870416
epoch: 119 test_true_pfm: -0.023287577255937702 sim_pfm: -999.9534126145718
episode: 476 training return: -999.9369921984846
episode: 477 training return: -999.9455809175247
episode: 478 training return: -999.9200817223364
episode: 479 training return: -999.9170927064822
epoch: 120 test_true_pfm: -0.5473350009563219 sim_pfm: -999.9543173904407
episode: 480 training return: -999.9323729523389
episode: 481 training return: -999.9316388586914
episode: 482 training return: -999.939358103008
episode: 483 training return: -999.9308122330194
epoch: 121 test_true_pfm: 0.300691980138478 sim_pfm: -999.9528482256036
episode: 484 training return: -999.9261728984028
episode: 485 training return: -999.9339285067864
episode: 486 training return: -999.9402804082335
episode: 487 training return: -999.9307119377661
epoch: 122 test_true_pfm: -0.051572207464333365 sim_pfm: -999.9547138447824
episode: 488 training return: -999.9350402380439
episode: 489 training return: -999.9326406385419
episode: 490 training return: -999.9407277366147
episode: 491 training return: -999.9445048550014
epoch: 123 test_true_pfm: 0.3316596685876636 sim_pfm: -999.9541239033612
episode: 492 training return: -999.9165001416841
episode: 493 training return: -999.9318975123917
episode: 494 training return: -999.9424699992596
episode: 495 training return: -999.9482507409658
epoch: 124 test_true_pfm: 0.3105789073152456 sim_pfm: -999.9545475406666
episode: 496 training return: -999.9100502777173
episode: 497 training return: -999.933621128038
episode: 498 training return: -999.9440577576217
episode: 499 training return: -999.9098774328539
epoch: 125 test_true_pfm: -0.20740414432782814 sim_pfm: -999.9535716171166
episode: 500 training return: -999.9432268295216
episode: 501 training return: -999.9019845553843
episode: 502 training return: -999.9393194409063
episode: 503 training return: -999.9145002398583
epoch: 126 test_true_pfm: -0.6434161823066307 sim_pfm: -999.9542516351175
episode: 504 training return: -999.9356230316998
episode: 505 training return: -999.9295539165386
episode: 506 training return: -999.9318138847298
episode: 507 training return: -999.9206907995497
epoch: 127 test_true_pfm: -0.5146607063877512 sim_pfm: -999.9533854487748
episode: 508 training return: -999.9326259017317
episode: 509 training return: -999.9325135989907
episode: 510 training return: -999.9180191920092
episode: 511 training return: -999.9390945318975
epoch: 128 test_true_pfm: -0.624047610835284 sim_pfm: -999.953448005755
episode: 512 training return: -999.9306143122112
episode: 513 training return: -999.8107089132485
episode: 514 training return: -999.9452868003943
episode: 515 training return: -999.9433517643158
epoch: 129 test_true_pfm: -1.1254714677656565 sim_pfm: -999.9530952213644
episode: 516 training return: -999.9635897201416
episode: 517 training return: -999.9243097882201
episode: 518 training return: -999.9366456014621
episode: 519 training return: -999.9392307388109
epoch: 130 test_true_pfm: -0.039663656195184895 sim_pfm: -999.9540929644037
episode: 520 training return: -999.9411851297859
episode: 521 training return: -999.934670385391
episode: 522 training return: -999.93625664074
episode: 523 training return: -999.9155217304723
epoch: 131 test_true_pfm: -0.27484728526177843 sim_pfm: -999.9552006884402
episode: 524 training return: -999.9174551382889
episode: 525 training return: -999.9356909852831
episode: 526 training return: -999.9188271685408
episode: 527 training return: -999.9118678762011
epoch: 132 test_true_pfm: -0.15631756906306207 sim_pfm: -999.9543619600286
episode: 528 training return: -999.9413519253051
episode: 529 training return: -999.9224396284405
episode: 530 training return: -999.9453341402896
episode: 531 training return: -999.9255869226581
epoch: 133 test_true_pfm: -0.24767327541630543 sim_pfm: -999.9540090105126
episode: 532 training return: -999.9173404116178
episode: 533 training return: -999.9275049649896
episode: 534 training return: -999.9055412133058
episode: 535 training return: -999.9266127171065
epoch: 134 test_true_pfm: -0.27313762670429775 sim_pfm: -999.9536857278548
episode: 536 training return: -999.92156420718
episode: 537 training return: -999.90739471666
episode: 538 training return: -999.9302063209059
episode: 539 training return: -999.9191540441635
epoch: 135 test_true_pfm: 0.2841074216416008 sim_pfm: -999.9543453301562
episode: 540 training return: -999.9017549567377
episode: 541 training return: -999.9174044717233
episode: 542 training return: -999.9226250605526
episode: 543 training return: -999.9418437488544
epoch: 136 test_true_pfm: -0.5888579786762342 sim_pfm: -999.9540994358907
episode: 544 training return: -999.8825615050358
episode: 545 training return: -999.8796346084296
episode: 546 training return: -999.9264974842315
episode: 547 training return: -999.9274873428429
epoch: 137 test_true_pfm: -1.1164570993859941 sim_pfm: -999.9531568828298
episode: 548 training return: -999.9187690680242
episode: 549 training return: -999.9581326082323
episode: 550 training return: -999.9195567813707
episode: 551 training return: -999.9359179263303
epoch: 138 test_true_pfm: -1.1304040631088526 sim_pfm: -999.9528603304128
episode: 552 training return: -999.9200650723541
episode: 553 training return: -999.937645301601
episode: 554 training return: -999.925444022918
episode: 555 training return: -999.9074220565568
epoch: 139 test_true_pfm: -0.6574836412796528 sim_pfm: -999.9526801265457
episode: 556 training return: -999.9317352082822
episode: 557 training return: -999.9186613756674
episode: 558 training return: -999.9023517915075
episode: 559 training return: -999.9224271066024
epoch: 140 test_true_pfm: -0.4559433770302104 sim_pfm: -999.9541663316614
episode: 560 training return: -999.933500037372
episode: 561 training return: -999.9201946725581
episode: 562 training return: -999.9403201159913
episode: 563 training return: -999.9397654694684
epoch: 141 test_true_pfm: -0.4230782517953328 sim_pfm: -999.9533951583229
episode: 564 training return: -999.9373914238186
episode: 565 training return: -999.9446638795823
episode: 566 training return: -999.943860842164
episode: 567 training return: -999.9226668061895
epoch: 142 test_true_pfm: 0.04297227622158051 sim_pfm: -999.9543512096037
episode: 568 training return: -999.9409832316132
episode: 569 training return: -999.9072957450101
episode: 570 training return: -999.9164794659465
episode: 571 training return: -999.947955504264
epoch: 143 test_true_pfm: -0.39531881736265223 sim_pfm: -999.9535022959739
episode: 572 training return: -999.9237935452114
episode: 573 training return: -999.9262586064453
episode: 574 training return: -999.9165273647284
episode: 575 training return: -999.9233996724552
epoch: 144 test_true_pfm: 0.27515637335954635 sim_pfm: -999.9536847173772
episode: 576 training return: -999.9104416011237
episode: 577 training return: -999.9325511679268
episode: 578 training return: -999.9305730955697
episode: 579 training return: -999.9310925242368
epoch: 145 test_true_pfm: -0.36534234007926997 sim_pfm: -999.9532683077781
episode: 580 training return: -999.9069327455187
episode: 581 training return: -999.9392370826841
episode: 582 training return: -999.9396912915666
episode: 583 training return: -999.9322822073856
epoch: 146 test_true_pfm: 0.1660216330100848 sim_pfm: -999.9541660904566
episode: 584 training return: -999.9130834612172
episode: 585 training return: -999.9223401872327
episode: 586 training return: -999.9272632369527
episode: 587 training return: -999.9433377134252
epoch: 147 test_true_pfm: -0.3208351957521853 sim_pfm: -999.9528321874681
episode: 588 training return: -999.9367516765917
episode: 589 training return: -999.9279972300968
episode: 590 training return: -999.9283080422078
episode: 591 training return: -999.9490458626191
epoch: 148 test_true_pfm: 0.7871652668835414 sim_pfm: -999.9524079599183
episode: 592 training return: -999.9453483650166
episode: 593 training return: -999.9290900852111
episode: 594 training return: -999.9320980717073
episode: 595 training return: -999.9365065894473
epoch: 149 test_true_pfm: -0.40629192695771704 sim_pfm: -999.9530208658344
episode: 596 training return: -999.9353865913345
episode: 597 training return: -999.9080015793795
episode: 598 training return: -999.9197621139839
episode: 599 training return: -999.9061392931483
epoch: 150 test_true_pfm: -0.9660764717520784 sim_pfm: -999.9529516270546
