['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.2894083298742771 test_loss: 0.18727678060531616
epoch: 1 training_loss 0.18737229086458684 test_loss: 0.16910713911056519
epoch: 2 training_loss 0.17501064144074918 test_loss: 0.16899865865707397
epoch: 3 training_loss 0.17540906697511674 test_loss: 0.1592429757118225
epoch: 4 training_loss 0.16343538515269757 test_loss: 0.1551080107688904
epoch: 5 training_loss 0.16884309701621533 test_loss: 0.16287267208099365
epoch: 6 training_loss 0.15172103948891164 test_loss: 0.15428835153579712
epoch: 7 training_loss 0.15493481110781432 test_loss: 0.16026755571365356
epoch: 8 training_loss 0.15441494278609752 test_loss: 0.16082727909088135
epoch: 9 training_loss 0.14950185287743806 test_loss: 0.1535244584083557
epoch: 10 training_loss 0.15405836705118417 test_loss: 0.17523210048675536
epoch: 11 training_loss 0.15580100014805795 test_loss: 0.14624866247177123
epoch: 12 training_loss 0.14744357187300922 test_loss: 0.15288792848587035
epoch: 13 training_loss 0.15005182050168514 test_loss: 0.14928804636001586
epoch: 14 training_loss 0.14508523214608432 test_loss: 0.17783336639404296
epoch: 15 training_loss 0.15132028065621853 test_loss: 0.16451075077056884
epoch: 16 training_loss 0.14200192175805568 test_loss: 0.15235815048217774
epoch: 17 training_loss 0.14872463420033455 test_loss: 0.14334652423858643
epoch: 18 training_loss 0.1453327617049217 test_loss: 0.1386818766593933
epoch: 19 training_loss 0.1545910223387182 test_loss: 0.14338157176971436
epoch: 20 training_loss 0.14737673081457614 test_loss: 0.16343528032302856
epoch: 21 training_loss 0.15200556363910436 test_loss: 0.13737224340438842
epoch: 22 training_loss 0.14049747742712498 test_loss: 0.1462862730026245
epoch: 23 training_loss 0.15239395845681428 test_loss: 0.1513729453086853
epoch: 24 training_loss 0.13259209271520375 test_loss: 0.14294341802597046
epoch: 25 training_loss 0.14056233771145343 test_loss: 0.1360011577606201
epoch: 26 training_loss 0.14648265443742275 test_loss: 0.1445547580718994
epoch: 27 training_loss 0.1505071233212948 test_loss: 0.16579005718231202
epoch: 28 training_loss 0.15181352227926254 test_loss: 0.16624014377593993
epoch: 29 training_loss 0.14067954253405332 test_loss: 0.17614178657531737
epoch: 30 training_loss 0.1478104478120804 test_loss: 0.13886088132858276
epoch: 31 training_loss 0.14046331137418747 test_loss: 0.14886958599090577
epoch: 32 training_loss 0.148501292206347 test_loss: 0.14505584239959718
epoch: 33 training_loss 0.15188028678297996 test_loss: 0.14927557706832886
epoch: 34 training_loss 0.14952055271714926 test_loss: 0.16955947875976562
epoch: 35 training_loss 0.13641798682510853 test_loss: 0.14367278814315795
epoch: 36 training_loss 0.13933267358690501 test_loss: 0.1564209818840027
epoch: 37 training_loss 0.15430576339364052 test_loss: 0.1591515064239502
epoch: 38 training_loss 0.14912234261631965 test_loss: 0.13329451084136962
epoch: 39 training_loss 0.14316726177930833 test_loss: 0.1424052119255066
epoch: 40 training_loss 0.1423341615498066 test_loss: 0.1466200828552246
epoch: 41 training_loss 0.1487067146971822 test_loss: 0.13619513511657716
epoch: 42 training_loss 0.13195491168648005 test_loss: 0.13395111560821532
epoch: 43 training_loss 0.14786513602361084 test_loss: 0.13701915740966797
epoch: 44 training_loss 0.13377282675355673 test_loss: 0.1551612138748169
epoch: 45 training_loss 0.1390127805247903 test_loss: 0.13052459955215454
epoch: 46 training_loss 0.14133776739239692 test_loss: 0.1256899356842041
epoch: 47 training_loss 0.1351438108831644 test_loss: 0.14075379371643065
epoch: 48 training_loss 0.148599708750844 test_loss: 0.16180713176727296
epoch: 49 training_loss 0.13960891269147396 test_loss: 0.18214021921157836
epoch: 50 training_loss 0.13849906913936139 test_loss: 0.13907949924468993
epoch: 51 training_loss 0.14568626053631306 test_loss: 0.15446566343307494
epoch: 52 training_loss 0.14295368500053882 test_loss: 0.15626857280731202
epoch: 53 training_loss 0.14277088664472104 test_loss: 0.14318736791610717
epoch: 54 training_loss 0.1342203588038683 test_loss: 0.14400892257690429
epoch: 55 training_loss 0.13945900563150646 test_loss: 0.12697561979293823
epoch: 56 training_loss 0.15153907768428326 test_loss: 0.1369871973991394
epoch: 57 training_loss 0.1388825187459588 test_loss: 0.1574519991874695
epoch: 58 training_loss 0.1408714047446847 test_loss: 0.1439381718635559
epoch: 59 training_loss 0.14180183995515108 test_loss: 0.14130012989044188
epoch: 60 training_loss 0.138310175165534 test_loss: 0.14732240438461303
epoch: 61 training_loss 0.14214793112128973 test_loss: 0.1312909245491028
epoch: 62 training_loss 0.13068879242986442 test_loss: 0.13890507221221923
epoch: 63 training_loss 0.14822989858686925 test_loss: 0.13474360704421998
epoch: 64 training_loss 0.15192450154572726 test_loss: 0.1530017375946045
epoch: 65 training_loss 0.14192732840776442 test_loss: 0.16326866149902344
epoch: 66 training_loss 0.14063880309462548 test_loss: 0.1416499972343445
epoch: 67 training_loss 0.14203988119959832 test_loss: 0.14828574657440186
epoch: 68 training_loss 0.13524863712489604 test_loss: 0.14894927740097047
epoch: 69 training_loss 0.13330035865306855 test_loss: 0.11841922998428345
epoch: 70 training_loss 0.14341922171413898 test_loss: 0.14776982069015504
epoch: 71 training_loss 0.1423344950377941 test_loss: 0.1447666049003601
epoch: 72 training_loss 0.13435652192682027 test_loss: 0.13559839725494385
epoch: 73 training_loss 0.140692434348166 test_loss: 0.1317663311958313
epoch: 74 training_loss 0.13888964235782622 test_loss: 0.12491034269332886
epoch: 75 training_loss 0.14415043868124486 test_loss: 0.1449413537979126
epoch: 76 training_loss 0.13857174042612314 test_loss: 0.13879319429397582
epoch: 77 training_loss 0.141093577593565 test_loss: 0.17390210628509523
epoch: 78 training_loss 0.129780612885952 test_loss: 0.14607561826705934
epoch: 79 training_loss 0.13138258054852486 test_loss: 0.14084576368331908
epoch: 80 training_loss 0.14058892160654068 test_loss: 0.13173096179962157
epoch: 81 training_loss 0.13481483966112137 test_loss: 0.14250420331954955
epoch: 82 training_loss 0.13676659669727087 test_loss: 0.14747657775878906
epoch: 83 training_loss 0.13143210045993328 test_loss: 0.17864712476730346
epoch: 84 training_loss 0.13779505304992198 test_loss: 0.13789368867874147
epoch: 85 training_loss 0.1410404882580042 test_loss: 0.15258402824401857
epoch: 86 training_loss 0.13300102349370718 test_loss: 0.12789971828460694
epoch: 87 training_loss 0.13652299098670484 test_loss: 0.14464621543884276
epoch: 88 training_loss 0.128858440592885 test_loss: 0.13519107103347777
epoch: 89 training_loss 0.1420462007075548 test_loss: 0.1413748860359192
epoch: 90 training_loss 0.13319334123283624 test_loss: 0.15372824668884277
epoch: 91 training_loss 0.13061440613120795 test_loss: 0.13240723609924315
epoch: 92 training_loss 0.14251413498073817 test_loss: 0.12317851781845093
epoch: 93 training_loss 0.13319486856460572 test_loss: 0.1540374994277954
epoch: 94 training_loss 0.13825423393398523 test_loss: 0.15242230892181396
epoch: 95 training_loss 0.12696429813280702 test_loss: 0.13466647863388062
epoch: 96 training_loss 0.13078765247017146 test_loss: 0.13823459148406983
epoch: 97 training_loss 0.1355899251252413 test_loss: 0.15027753114700318
epoch: 98 training_loss 0.12869443614035844 test_loss: 0.12977687120437623
epoch: 99 training_loss 0.13843059998005627 test_loss: 0.15645673274993896
epoch: 100 training_loss 0.1314978812634945 test_loss: 0.1544143795967102
epoch: 101 training_loss 0.13008396480232476 test_loss: 0.12930119037628174
epoch: 102 training_loss 0.13085476864129306 test_loss: 0.13635659217834473
epoch: 103 training_loss 0.1388822566717863 test_loss: 0.14584797620773315
epoch: 104 training_loss 0.1325400388240814 test_loss: 0.12292084693908692
epoch: 105 training_loss 0.13941290438175202 test_loss: 0.13946378231048584
epoch: 106 training_loss 0.13008148308843373 test_loss: 0.15707721710205078
epoch: 107 training_loss 0.1353681843355298 test_loss: 0.13791677951812745
epoch: 108 training_loss 0.1355081543326378 test_loss: 0.13515294790267945
epoch: 109 training_loss 0.14384291891008616 test_loss: 0.13852607011795043
epoch: 110 training_loss 0.13592701002955437 test_loss: 0.14187315702438355
epoch: 111 training_loss 0.14402962945401668 test_loss: 0.1346742630004883
epoch: 112 training_loss 0.133319832123816 test_loss: 0.1365702986717224
epoch: 113 training_loss 0.13229163967072963 test_loss: 0.17951929569244385
epoch: 114 training_loss 0.13178812079131602 test_loss: 0.15819607973098754
epoch: 115 training_loss 0.14214394979178904 test_loss: 0.1492852210998535
epoch: 116 training_loss 0.12898224931210278 test_loss: 0.14147027730941772
epoch: 117 training_loss 0.127373674698174 test_loss: 0.13851929903030397
epoch: 118 training_loss 0.12844086095690727 test_loss: 0.1403820753097534
epoch: 119 training_loss 0.13372914463281632 test_loss: 0.14349921941757202
epoch: 120 training_loss 0.13784967157989741 test_loss: 0.1486607789993286
epoch: 121 training_loss 0.14002494640648366 test_loss: 0.14180359840393067
epoch: 122 training_loss 0.13542949724942446 test_loss: 0.1279844880104065
epoch: 123 training_loss 0.13453671835362913 test_loss: 0.14547399282455445
epoch: 124 training_loss 0.13701470892876386 test_loss: 0.13637135028839112
epoch: 125 training_loss 0.14087029926478864 test_loss: 0.15253102779388428
epoch: 126 training_loss 0.1415317567065358 test_loss: 0.1498608946800232
epoch: 127 training_loss 0.13888594903051854 test_loss: 0.14617483615875243
epoch: 128 training_loss 0.13822796992957592 test_loss: 0.16359609365463257
epoch: 129 training_loss 0.13390133675187826 test_loss: 0.14151190519332885
epoch: 130 training_loss 0.1333925748988986 test_loss: 0.15459606647491456
epoch: 131 training_loss 0.13218914300203324 test_loss: 0.1532922863960266
epoch: 132 training_loss 0.1305926461517811 test_loss: 0.14168015718460084
epoch: 133 training_loss 0.13163078136742115 test_loss: 0.15517158508300782
epoch: 134 training_loss 0.12683376330882312 test_loss: 0.15212719440460204
epoch: 135 training_loss 0.15149857074022294 test_loss: 0.14940723180770873
epoch: 136 training_loss 0.14473292138427496 test_loss: 0.1486492395401001
epoch: 137 training_loss 0.14132736910134555 test_loss: 0.13951774835586547
epoch: 138 training_loss 0.13398591581732033 test_loss: 0.13846954107284545
epoch: 139 training_loss 0.12647097140550614 test_loss: 0.13374476432800292
epoch: 140 training_loss 0.14184394579380752 test_loss: 0.13648993968963624
epoch: 141 training_loss 0.14360753580927849 test_loss: 0.13963518142700196
epoch: 142 training_loss 0.13212058950215577 test_loss: 0.13580933809280396
epoch: 143 training_loss 0.1262631045281887 test_loss: 0.14034795761108398
epoch: 144 training_loss 0.13121555276215077 test_loss: 0.13876328468322754
epoch: 145 training_loss 0.13120596323162317 test_loss: 0.1376028299331665
epoch: 146 training_loss 0.1288938308134675 test_loss: 0.12406458854675292
epoch: 147 training_loss 0.13787385016679765 test_loss: 0.14828373193740846
epoch: 148 training_loss 0.14091893780976533 test_loss: 0.12180067300796509
epoch: 149 training_loss 0.140615949369967 test_loss: 0.13831912279129027
epoch: 0 training_loss 0.3126274109631777 test_loss: 0.2156749963760376
epoch: 1 training_loss 0.178357916995883 test_loss: 0.15610886812210084
epoch: 2 training_loss 0.1787124490365386 test_loss: 0.164395534992218
epoch: 3 training_loss 0.16605752490460873 test_loss: 0.1337469458580017
epoch: 4 training_loss 0.1557274866476655 test_loss: 0.14778764247894288
epoch: 5 training_loss 0.14941702648997307 test_loss: 0.17996516227722167
epoch: 6 training_loss 0.1603808806091547 test_loss: 0.16409826278686523
epoch: 7 training_loss 0.15163899943232537 test_loss: 0.17598247528076172
epoch: 8 training_loss 0.1543000665307045 test_loss: 0.13413066864013673
epoch: 9 training_loss 0.15090462636202573 test_loss: 0.13437501192092896
epoch: 10 training_loss 0.14570705872029066 test_loss: 0.14722294807434083
epoch: 11 training_loss 0.15891513630747794 test_loss: 0.13891267776489258
epoch: 12 training_loss 0.15317413777112962 test_loss: 0.16237735748291016
epoch: 13 training_loss 0.1429330626875162 test_loss: 0.15306726694107056
epoch: 14 training_loss 0.15672021064907313 test_loss: 0.12133598327636719
epoch: 15 training_loss 0.14521090164780617 test_loss: 0.1516924738883972
epoch: 16 training_loss 0.15136802185326814 test_loss: 0.15825059413909912
epoch: 17 training_loss 0.15608082063496112 test_loss: 0.14761631488800048
epoch: 18 training_loss 0.15050304014235735 test_loss: 0.1247105598449707
epoch: 19 training_loss 0.1461099398881197 test_loss: 0.14465447664260864
epoch: 20 training_loss 0.14075486909598112 test_loss: 0.15002872943878173
epoch: 21 training_loss 0.150284471064806 test_loss: 0.12723126411437988
epoch: 22 training_loss 0.13802165038883685 test_loss: 0.13834422826766968
epoch: 23 training_loss 0.13597689397633075 test_loss: 0.1427881598472595
epoch: 24 training_loss 0.13715503513813018 test_loss: 0.15119400024414062
epoch: 25 training_loss 0.14426931466907264 test_loss: 0.13035181760787964
epoch: 26 training_loss 0.14420836966484785 test_loss: 0.13980660438537598
epoch: 27 training_loss 0.14561990238726139 test_loss: 0.12799750566482543
epoch: 28 training_loss 0.13811976760625838 test_loss: 0.16664750576019288
epoch: 29 training_loss 0.14131706722080709 test_loss: 0.1472496509552002
epoch: 30 training_loss 0.14486279614269734 test_loss: 0.13238749504089356
epoch: 31 training_loss 0.137183931581676 test_loss: 0.13794502019882202
epoch: 32 training_loss 0.13929280884563922 test_loss: 0.1435190796852112
epoch: 33 training_loss 0.14153720811009407 test_loss: 0.12880643606185913
epoch: 34 training_loss 0.13880179788917302 test_loss: 0.13726861476898194
epoch: 35 training_loss 0.13355308335274457 test_loss: 0.15327103137969972
epoch: 36 training_loss 0.1444219582155347 test_loss: 0.13271671533584595
epoch: 37 training_loss 0.13277363412082197 test_loss: 0.1311112403869629
epoch: 38 training_loss 0.13649173725396394 test_loss: 0.13238476514816283
epoch: 39 training_loss 0.13500958867371082 test_loss: 0.12354414463043213
epoch: 40 training_loss 0.13806990277022124 test_loss: 0.14400638341903688
epoch: 41 training_loss 0.1424270175769925 test_loss: 0.13825536966323854
epoch: 42 training_loss 0.13918386083096265 test_loss: 0.12930809259414672
epoch: 43 training_loss 0.1354613458365202 test_loss: 0.12833890914916993
epoch: 44 training_loss 0.1438853895291686 test_loss: 0.14668353796005248
epoch: 45 training_loss 0.1356076655909419 test_loss: 0.12849510908126832
epoch: 46 training_loss 0.12926490064710378 test_loss: 0.10116671323776245
epoch: 47 training_loss 0.13031239625066518 test_loss: 0.13564784526824952
epoch: 48 training_loss 0.13714778874069453 test_loss: 0.1456989526748657
epoch: 49 training_loss 0.1342132594808936 test_loss: 0.12512396574020385
epoch: 50 training_loss 0.13283757098019122 test_loss: 0.16161099672317505
epoch: 51 training_loss 0.14005671598017216 test_loss: 0.13187661170959472
epoch: 52 training_loss 0.12775272760540246 test_loss: 0.14291528463363648
epoch: 53 training_loss 0.13221719779074192 test_loss: 0.1373789429664612
epoch: 54 training_loss 0.13579662311822177 test_loss: 0.12416256666183471
epoch: 55 training_loss 0.1349970119446516 test_loss: 0.13188495635986328
epoch: 56 training_loss 0.13925707798451184 test_loss: 0.12222534418106079
epoch: 57 training_loss 0.13643339816480876 test_loss: 0.1335572361946106
epoch: 58 training_loss 0.13431501373648644 test_loss: 0.1364728331565857
epoch: 59 training_loss 0.1367498255893588 test_loss: 0.12946653366088867
epoch: 60 training_loss 0.13484171237796544 test_loss: 0.1279977798461914
epoch: 61 training_loss 0.1419487274438143 test_loss: 0.12868053913116456
epoch: 62 training_loss 0.12880453608930112 test_loss: 0.13610491752624512
epoch: 63 training_loss 0.1320111107081175 test_loss: 0.1399467706680298
epoch: 64 training_loss 0.1454650992900133 test_loss: 0.1270171046257019
epoch: 65 training_loss 0.13657561801373957 test_loss: 0.14215450286865233
epoch: 66 training_loss 0.14417456306517124 test_loss: 0.13342797756195068
epoch: 67 training_loss 0.13551207683980465 test_loss: 0.123356032371521
epoch: 68 training_loss 0.13703112002462148 test_loss: 0.13965339660644532
epoch: 69 training_loss 0.13801089212298392 test_loss: 0.12213308811187744
epoch: 70 training_loss 0.14183185156434774 test_loss: 0.14578192234039306
epoch: 71 training_loss 0.13726560857146977 test_loss: 0.14252203702926636
epoch: 72 training_loss 0.1445331934466958 test_loss: 0.12580922842025757
epoch: 73 training_loss 0.139770205616951 test_loss: 0.12587149143218995
epoch: 74 training_loss 0.14058327469974757 test_loss: 0.13703540563583375
epoch: 75 training_loss 0.1255106210336089 test_loss: 0.1508987307548523
epoch: 76 training_loss 0.13311538882553578 test_loss: 0.12750285863876343
epoch: 77 training_loss 0.1340460466220975 test_loss: 0.1305910348892212
epoch: 78 training_loss 0.1372574445977807 test_loss: 0.1276087999343872
epoch: 79 training_loss 0.14065813045948744 test_loss: 0.12390801906585694
epoch: 80 training_loss 0.14372068479657174 test_loss: 0.1411522388458252
epoch: 81 training_loss 0.1317835007980466 test_loss: 0.11758174896240234
epoch: 82 training_loss 0.1342202140763402 test_loss: 0.1259207844734192
epoch: 83 training_loss 0.13363765265792607 test_loss: 0.1266098737716675
epoch: 84 training_loss 0.1342751307412982 test_loss: 0.14306910037994386
epoch: 85 training_loss 0.13182584267109632 test_loss: 0.13392376899719238
epoch: 86 training_loss 0.1375200980529189 test_loss: 0.13292450904846193
epoch: 87 training_loss 0.1375010136142373 test_loss: 0.13862541913986207
epoch: 88 training_loss 0.14061365615576504 test_loss: 0.12918275594711304
epoch: 89 training_loss 0.1389813081920147 test_loss: 0.13698315620422363
epoch: 90 training_loss 0.1342907076328993 test_loss: 0.14313125610351562
epoch: 91 training_loss 0.13303168870508672 test_loss: 0.13682500123977662
epoch: 92 training_loss 0.13452507205307485 test_loss: 0.13023972511291504
epoch: 93 training_loss 0.14399135284125805 test_loss: 0.12955422401428224
epoch: 94 training_loss 0.12707939401268958 test_loss: 0.1368135929107666
epoch: 95 training_loss 0.13428431320935488 test_loss: 0.13386154174804688
epoch: 96 training_loss 0.1393783098831773 test_loss: 0.12226226329803466
epoch: 97 training_loss 0.13732741225510836 test_loss: 0.12475136518478394
epoch: 98 training_loss 0.13543589778244494 test_loss: 0.13906025886535645
epoch: 99 training_loss 0.1280647414177656 test_loss: 0.12366403341293335
epoch: 100 training_loss 0.14080115877091884 test_loss: 0.12866568565368652
epoch: 101 training_loss 0.1281714679300785 test_loss: 0.12984408140182496
epoch: 102 training_loss 0.12956516403704882 test_loss: 0.14183194637298585
epoch: 103 training_loss 0.1409586026892066 test_loss: 0.13064466714859008
epoch: 104 training_loss 0.13451114527881144 test_loss: 0.13111836910247804
epoch: 105 training_loss 0.13667227976024152 test_loss: 0.12494090795516968
epoch: 106 training_loss 0.13243456691503525 test_loss: 0.13584028482437133
epoch: 107 training_loss 0.1343923693522811 test_loss: 0.15789581537246705
epoch: 108 training_loss 0.13369032103568315 test_loss: 0.12915573120117188
epoch: 109 training_loss 0.12900784336030482 test_loss: 0.1319062352180481
epoch: 110 training_loss 0.130589886829257 test_loss: 0.12561367750167846
epoch: 111 training_loss 0.1346882700547576 test_loss: 0.12339125871658325
epoch: 112 training_loss 0.1307748070731759 test_loss: 0.12102581262588501
epoch: 113 training_loss 0.1300914017483592 test_loss: 0.12353631258010864
epoch: 114 training_loss 0.13071891717612744 test_loss: 0.14295886754989623
epoch: 115 training_loss 0.13237599089741706 test_loss: 0.13820239305496215
epoch: 116 training_loss 0.13561021558940412 test_loss: 0.13215405941009523
epoch: 117 training_loss 0.12384306352585554 test_loss: 0.13472598791122437
epoch: 118 training_loss 0.13081436369568109 test_loss: 0.13240965604782104
epoch: 119 training_loss 0.1307012091949582 test_loss: 0.12789336442947388
epoch: 120 training_loss 0.13687573172152043 test_loss: 0.129041850566864
epoch: 121 training_loss 0.1368157124519348 test_loss: 0.12588683366775513
epoch: 122 training_loss 0.13569824665784835 test_loss: 0.1438591480255127
epoch: 123 training_loss 0.13727991841733456 test_loss: 0.13318969011306764
epoch: 124 training_loss 0.13173656843602657 test_loss: 0.11654406785964966
epoch: 125 training_loss 0.13477290906012057 test_loss: 0.12008082866668701
epoch: 126 training_loss 0.13358587846159936 test_loss: 0.1361555576324463
epoch: 127 training_loss 0.13218023244291544 test_loss: 0.1238286852836609
epoch: 128 training_loss 0.14080346822738649 test_loss: 0.12831270694732666
epoch: 129 training_loss 0.12890703234821557 test_loss: 0.12867451906204225
epoch: 130 training_loss 0.12952222727239132 test_loss: 0.12147986888885498
epoch: 131 training_loss 0.1360717809200287 test_loss: 0.13150300979614257
epoch: 132 training_loss 0.13035436429083347 test_loss: 0.1331043004989624
epoch: 133 training_loss 0.13349068388342858 test_loss: 0.13010650873184204
epoch: 134 training_loss 0.1323394149914384 test_loss: 0.14018229246139527
epoch: 135 training_loss 0.13180144488811493 test_loss: 0.13490285873413085
epoch: 136 training_loss 0.1303789933770895 test_loss: 0.12849284410476686
epoch: 137 training_loss 0.13475383456796408 test_loss: 0.13952115774154664
epoch: 138 training_loss 0.12981973871588706 test_loss: 0.12297557592391968
epoch: 139 training_loss 0.1291423267312348 test_loss: 0.12109158039093018
epoch: 140 training_loss 0.12993911266326905 test_loss: 0.1254921317100525
epoch: 141 training_loss 0.1338151829689741 test_loss: 0.1255352735519409
epoch: 142 training_loss 0.1328589732758701 test_loss: 0.12451202869415283
epoch: 143 training_loss 0.1361022875085473 test_loss: 0.12327001094818116
epoch: 144 training_loss 0.1380919958651066 test_loss: 0.11784662008285522
epoch: 145 training_loss 0.13569570280611515 test_loss: 0.11640194654464722
epoch: 146 training_loss 0.139078666344285 test_loss: 0.12204456329345703
epoch: 147 training_loss 0.1330284008011222 test_loss: 0.1348847270011902
epoch: 148 training_loss 0.13601456973701714 test_loss: 0.13712310791015625
epoch: 149 training_loss 0.1313418135046959 test_loss: 0.12100964784622192
epoch: 0 training_loss 0.2756933878362179 test_loss: 0.17943331003189086
epoch: 1 training_loss 0.18088497079908847 test_loss: 0.15723943710327148
epoch: 2 training_loss 0.16159780137240887 test_loss: 0.15698095560073852
epoch: 3 training_loss 0.17186025567352772 test_loss: 0.14411627054214476
epoch: 4 training_loss 0.15140250116586684 test_loss: 0.171949303150177
epoch: 5 training_loss 0.146797831133008 test_loss: 0.13888955116271973
epoch: 6 training_loss 0.1435696253180504 test_loss: 0.16881223917007446
epoch: 7 training_loss 0.1517191630601883 test_loss: 0.1452440142631531
epoch: 8 training_loss 0.14540941469371318 test_loss: 0.14771586656570435
epoch: 9 training_loss 0.14378755774348975 test_loss: 0.13831111192703247
epoch: 10 training_loss 0.15410977371037007 test_loss: 0.1420229434967041
epoch: 11 training_loss 0.1492338240891695 test_loss: 0.15718566179275512
epoch: 12 training_loss 0.14671086825430393 test_loss: 0.16314696073532103
epoch: 13 training_loss 0.13598035898059607 test_loss: 0.18965370655059816
epoch: 14 training_loss 0.1427043327316642 test_loss: 0.1324986457824707
epoch: 15 training_loss 0.133658402338624 test_loss: 0.11678520441055298
epoch: 16 training_loss 0.13213133189827203 test_loss: 0.11843504905700683
epoch: 17 training_loss 0.14349020812660457 test_loss: 0.14480142593383788
epoch: 18 training_loss 0.13771143179386855 test_loss: 0.14732086658477783
epoch: 19 training_loss 0.1327747331932187 test_loss: 0.12685987949371338
epoch: 20 training_loss 0.14213789884001016 test_loss: 0.1381931185722351
epoch: 21 training_loss 0.13785984955728053 test_loss: 0.13905956745147705
epoch: 22 training_loss 0.14708648525178433 test_loss: 0.14224909543991088
epoch: 23 training_loss 0.13816997746005655 test_loss: 0.1583584189414978
epoch: 24 training_loss 0.13817020725458862 test_loss: 0.14351093769073486
epoch: 25 training_loss 0.1387625147216022 test_loss: 0.12602369785308837
epoch: 26 training_loss 0.13938766174018383 test_loss: 0.1431918978691101
epoch: 27 training_loss 0.143290351703763 test_loss: 0.1347719192504883
epoch: 28 training_loss 0.13437740832567216 test_loss: 0.13725608587265015
epoch: 29 training_loss 0.13924088306725024 test_loss: 0.15158432722091675
epoch: 30 training_loss 0.14263008389621973 test_loss: 0.13014599084854125
epoch: 31 training_loss 0.1363879641145468 test_loss: 0.1591088056564331
epoch: 32 training_loss 0.13267496079206467 test_loss: 0.13672319650650025
epoch: 33 training_loss 0.12760616451501847 test_loss: 0.11480038166046143
epoch: 34 training_loss 0.1293746306374669 test_loss: 0.13637418746948243
epoch: 35 training_loss 0.1263441067561507 test_loss: 0.15168088674545288
epoch: 36 training_loss 0.13770334776490928 test_loss: 0.13945587873458862
epoch: 37 training_loss 0.14249161329120397 test_loss: 0.1508554458618164
epoch: 38 training_loss 0.13487103309482337 test_loss: 0.1267338991165161
epoch: 39 training_loss 0.13423759132623672 test_loss: 0.12395541667938233
epoch: 40 training_loss 0.13312903769314288 test_loss: 0.13540579080581666
epoch: 41 training_loss 0.14142921101301908 test_loss: 0.1320374846458435
epoch: 42 training_loss 0.13683566451072693 test_loss: 0.11946234703063965
epoch: 43 training_loss 0.13423995159566401 test_loss: 0.13345783948898315
epoch: 44 training_loss 0.1353739585727453 test_loss: 0.11884993314743042
epoch: 45 training_loss 0.13965892869979143 test_loss: 0.13443334102630616
epoch: 46 training_loss 0.1317829764634371 test_loss: 0.11393576860427856
epoch: 47 training_loss 0.13399673216044902 test_loss: 0.13055096864700316
epoch: 48 training_loss 0.12811752621084452 test_loss: 0.14198741912841797
epoch: 49 training_loss 0.14119087889790535 test_loss: 0.1411768078804016
epoch: 50 training_loss 0.1315452716499567 test_loss: 0.1311987519264221
epoch: 51 training_loss 0.13932431414723395 test_loss: 0.13382322788238527
epoch: 52 training_loss 0.1344654047116637 test_loss: 0.11692323684692382
epoch: 53 training_loss 0.13533638041466475 test_loss: 0.13146522045135497
epoch: 54 training_loss 0.12717887431383132 test_loss: 0.14022428989410402
epoch: 55 training_loss 0.13409250795841218 test_loss: 0.13191115856170654
epoch: 56 training_loss 0.132083584330976 test_loss: 0.12367407083511353
epoch: 57 training_loss 0.13342320319265127 test_loss: 0.12421227693557739
epoch: 58 training_loss 0.1364188326895237 test_loss: 0.12586257457733155
epoch: 59 training_loss 0.13548568297177555 test_loss: 0.1449047803878784
epoch: 60 training_loss 0.13319060679525138 test_loss: 0.12631598711013795
epoch: 61 training_loss 0.12792595818638802 test_loss: 0.1368715524673462
epoch: 62 training_loss 0.1302055461332202 test_loss: 0.14088282585144044
epoch: 63 training_loss 0.12879394147545098 test_loss: 0.12426066398620605
epoch: 64 training_loss 0.1326475501060486 test_loss: 0.14985477924346924
epoch: 65 training_loss 0.13081260912120343 test_loss: 0.1397389531135559
epoch: 66 training_loss 0.12096130974590778 test_loss: 0.13821150064468385
epoch: 67 training_loss 0.13676042031496763 test_loss: 0.12629420757293702
epoch: 68 training_loss 0.13152340814471244 test_loss: 0.14146575927734376
epoch: 69 training_loss 0.1301173746958375 test_loss: 0.12184344530105591
epoch: 70 training_loss 0.12921331517398357 test_loss: 0.12591991424560547
epoch: 71 training_loss 0.1231471524015069 test_loss: 0.1142653226852417
epoch: 72 training_loss 0.14203178219497203 test_loss: 0.13499119281768798
epoch: 73 training_loss 0.13743297770619392 test_loss: 0.13295735120773317
epoch: 74 training_loss 0.12585150327533484 test_loss: 0.12098357677459717
epoch: 75 training_loss 0.12947933312505483 test_loss: 0.11773180961608887
epoch: 76 training_loss 0.13250865455716848 test_loss: 0.14631683826446534
epoch: 77 training_loss 0.1275540404394269 test_loss: 0.13298732042312622
epoch: 78 training_loss 0.13162618439644574 test_loss: 0.1184763789176941
epoch: 79 training_loss 0.13173675511032343 test_loss: 0.11621867418289185
epoch: 80 training_loss 0.13524571869522334 test_loss: 0.1335180878639221
epoch: 81 training_loss 0.13112280275672675 test_loss: 0.1341518998146057
epoch: 82 training_loss 0.12555981561541557 test_loss: 0.14293086528778076
epoch: 83 training_loss 0.1396225256472826 test_loss: 0.1277616262435913
epoch: 84 training_loss 0.13372192669659852 test_loss: 0.15108226537704467
epoch: 85 training_loss 0.12607691410928965 test_loss: 0.12798081636428832
epoch: 86 training_loss 0.12877973694354294 test_loss: 0.13198820352554322
epoch: 87 training_loss 0.1388223510980606 test_loss: 0.11730078458786011
epoch: 88 training_loss 0.13857440557330847 test_loss: 0.13059628009796143
epoch: 89 training_loss 0.1312417034432292 test_loss: 0.13021703958511352
epoch: 90 training_loss 0.1295295311883092 test_loss: 0.11185373067855835
epoch: 91 training_loss 0.12355460152029991 test_loss: 0.133034610748291
epoch: 92 training_loss 0.12904397148638963 test_loss: 0.12721422910690308
epoch: 93 training_loss 0.13750230485573411 test_loss: 0.13816440105438232
epoch: 94 training_loss 0.12863827861845492 test_loss: 0.1367817997932434
epoch: 95 training_loss 0.12942919105291367 test_loss: 0.12819186449050904
epoch: 96 training_loss 0.12371394000947475 test_loss: 0.14029266834259033
epoch: 97 training_loss 0.12868910809978842 test_loss: 0.12964580059051514
epoch: 98 training_loss 0.12577967673540116 test_loss: 0.1378719449043274
epoch: 99 training_loss 0.13867021698504686 test_loss: 0.13799004554748534
epoch: 100 training_loss 0.12851355846971274 test_loss: 0.13242659568786622
epoch: 101 training_loss 0.12808350736275315 test_loss: 0.13429487943649293
epoch: 102 training_loss 0.12825020922347904 test_loss: 0.13627078533172607
epoch: 103 training_loss 0.12826568111777306 test_loss: 0.13254956007003785
epoch: 104 training_loss 0.12834185265004636 test_loss: 0.13267470598220826
epoch: 105 training_loss 0.1366272472962737 test_loss: 0.12585821151733398
epoch: 106 training_loss 0.13244380872696637 test_loss: 0.12075423002243042
epoch: 107 training_loss 0.13249314818531274 test_loss: 0.1255699634552002
epoch: 108 training_loss 0.13307060293853282 test_loss: 0.13751668930053712
epoch: 109 training_loss 0.13231709379702805 test_loss: 0.12055059671401977
epoch: 110 training_loss 0.12395753022283315 test_loss: 0.11021809577941895
epoch: 111 training_loss 0.1270441646873951 test_loss: 0.13406057357788087
epoch: 112 training_loss 0.13049573000520467 test_loss: 0.140541410446167
epoch: 113 training_loss 0.13431246498599647 test_loss: 0.14645278453826904
epoch: 114 training_loss 0.1294351715222001 test_loss: 0.12950586080551146
epoch: 115 training_loss 0.12904021255671977 test_loss: 0.12646645307540894
epoch: 116 training_loss 0.13079186279326677 test_loss: 0.13173954486846923
epoch: 117 training_loss 0.14363684363663196 test_loss: 0.15947966575622557
epoch: 118 training_loss 0.12720854241400958 test_loss: 0.12560750246047975
epoch: 119 training_loss 0.1264064324274659 test_loss: 0.12576078176498412
epoch: 120 training_loss 0.1354440041258931 test_loss: 0.15503748655319213
epoch: 121 training_loss 0.11912003692239523 test_loss: 0.12281122207641601
epoch: 122 training_loss 0.12845216032117604 test_loss: 0.16567527055740355
epoch: 123 training_loss 0.1373391167446971 test_loss: 0.12373625040054322
epoch: 124 training_loss 0.13254265602678061 test_loss: 0.12476826906204223
epoch: 125 training_loss 0.1268458042293787 test_loss: 0.12067341804504395
epoch: 126 training_loss 0.12750603038817643 test_loss: 0.13501665592193604
epoch: 127 training_loss 0.13207826778292656 test_loss: 0.1315619945526123
epoch: 128 training_loss 0.1317028847709298 test_loss: 0.14566428661346437
epoch: 129 training_loss 0.11810778420418501 test_loss: 0.125260591506958
epoch: 130 training_loss 0.12518966808915138 test_loss: 0.13775310516357422
epoch: 131 training_loss 0.12156861949712038 test_loss: 0.13423224687576293
epoch: 132 training_loss 0.128406417183578 test_loss: 0.13581355810165405
epoch: 133 training_loss 0.12601428641006351 test_loss: 0.12429554462432861
epoch: 134 training_loss 0.1351082321628928 test_loss: 0.13957576751708983
epoch: 135 training_loss 0.12208659134805203 test_loss: 0.12867846488952636
epoch: 136 training_loss 0.1197431180626154 test_loss: 0.15314196348190307
epoch: 137 training_loss 0.12919001758098603 test_loss: 0.11252378225326538
epoch: 138 training_loss 0.12879265028983355 test_loss: 0.13981986045837402
epoch: 139 training_loss 0.1338835709169507 test_loss: 0.14839638471603395
epoch: 140 training_loss 0.12135556844994426 test_loss: 0.12487690448760987
epoch: 141 training_loss 0.13631692975759507 test_loss: 0.12488113641738892
epoch: 142 training_loss 0.12908223763108254 test_loss: 0.12050886154174804
epoch: 143 training_loss 0.13010531216859816 test_loss: 0.14010463953018187
epoch: 144 training_loss 0.1286078642681241 test_loss: 0.14259181022644044
epoch: 145 training_loss 0.12402896467596293 test_loss: 0.13960424661636353
epoch: 146 training_loss 0.12255516599863768 test_loss: 0.12739144563674926
epoch: 147 training_loss 0.13060854256153107 test_loss: 0.14199234247207643
epoch: 148 training_loss 0.1310439206659794 test_loss: 0.13043346405029296
epoch: 149 training_loss 0.12826274951919914 test_loss: 0.1330920696258545
epoch: 0 training_loss 0.28525336310267446 test_loss: 0.19685815572738646
epoch: 1 training_loss 0.18648875035345555 test_loss: 0.18489128351211548
epoch: 2 training_loss 0.17170728728175164 test_loss: 0.19066934585571288
epoch: 3 training_loss 0.16604485996067525 test_loss: 0.17577604055404664
epoch: 4 training_loss 0.1699431712180376 test_loss: 0.17345455884933472
epoch: 5 training_loss 0.16265309937298297 test_loss: 0.17093874216079713
epoch: 6 training_loss 0.15752529323101044 test_loss: 0.14271171092987062
epoch: 7 training_loss 0.1484201805666089 test_loss: 0.14043768644332885
epoch: 8 training_loss 0.14611612737178803 test_loss: 0.125325608253479
epoch: 9 training_loss 0.14603634554892778 test_loss: 0.172443687915802
epoch: 10 training_loss 0.1504062605649233 test_loss: 0.1672034502029419
epoch: 11 training_loss 0.15425634082406758 test_loss: 0.16617774963378906
epoch: 12 training_loss 0.14208033464848996 test_loss: 0.16088985204696654
epoch: 13 training_loss 0.14954243235290052 test_loss: 0.1576189398765564
epoch: 14 training_loss 0.15174901768565177 test_loss: 0.14042798280715943
epoch: 15 training_loss 0.1410587926954031 test_loss: 0.1541467308998108
epoch: 16 training_loss 0.15125125041231513 test_loss: 0.1496274471282959
epoch: 17 training_loss 0.14655391719192268 test_loss: 0.15974924564361573
epoch: 18 training_loss 0.140656568557024 test_loss: 0.1168362021446228
epoch: 19 training_loss 0.15043152153491973 test_loss: 0.16361621618270875
epoch: 20 training_loss 0.1409556222334504 test_loss: 0.17054113149642944
epoch: 21 training_loss 0.1485907307267189 test_loss: 0.16207749843597413
epoch: 22 training_loss 0.14289827179163694 test_loss: 0.14631177186965943
epoch: 23 training_loss 0.14693781156092883 test_loss: 0.1337994694709778
epoch: 24 training_loss 0.14040387228131293 test_loss: 0.15108364820480347
epoch: 25 training_loss 0.14729353122413158 test_loss: 0.1441033124923706
epoch: 26 training_loss 0.13606616608798505 test_loss: 0.14472267627716065
epoch: 27 training_loss 0.14048058997839688 test_loss: 0.16040499210357667
epoch: 28 training_loss 0.14038965839892625 test_loss: 0.15323621034622192
epoch: 29 training_loss 0.14110370934009553 test_loss: 0.15556461811065675
epoch: 30 training_loss 0.1315507499501109 test_loss: 0.15944572687149047
epoch: 31 training_loss 0.1507481650263071 test_loss: 0.14200032949447633
epoch: 32 training_loss 0.1377136081829667 test_loss: 0.1560894012451172
epoch: 33 training_loss 0.13576000314205885 test_loss: 0.1376117944717407
epoch: 34 training_loss 0.13660797584801912 test_loss: 0.1339867115020752
epoch: 35 training_loss 0.1404629909992218 test_loss: 0.1516236186027527
epoch: 36 training_loss 0.13827363248914482 test_loss: 0.14555799961090088
epoch: 37 training_loss 0.13679179791361093 test_loss: 0.15334361791610718
epoch: 38 training_loss 0.13935794699937104 test_loss: 0.12868764400482177
epoch: 39 training_loss 0.13796943858265875 test_loss: 0.14863934516906738
epoch: 40 training_loss 0.1428276776522398 test_loss: 0.13451286554336547
epoch: 41 training_loss 0.14166810464113952 test_loss: 0.14004687070846558
epoch: 42 training_loss 0.132936498709023 test_loss: 0.14684518575668334
epoch: 43 training_loss 0.13754655800759794 test_loss: 0.1481965184211731
epoch: 44 training_loss 0.14103086885064842 test_loss: 0.13557262420654298
epoch: 45 training_loss 0.14108561657369137 test_loss: 0.14908127784729003
epoch: 46 training_loss 0.1384740836918354 test_loss: 0.1305690050125122
epoch: 47 training_loss 0.13857549756765367 test_loss: 0.132338285446167
epoch: 48 training_loss 0.1331211394071579 test_loss: 0.15959089994430542
epoch: 49 training_loss 0.14029775211587547 test_loss: 0.1458868145942688
epoch: 50 training_loss 0.13439044188708066 test_loss: 0.14367239475250243
epoch: 51 training_loss 0.127373283803463 test_loss: 0.14562852382659913
epoch: 52 training_loss 0.13170406742021443 test_loss: 0.14027528762817382
epoch: 53 training_loss 0.13579122394323348 test_loss: 0.1588142156600952
epoch: 54 training_loss 0.13210030540823936 test_loss: 0.14415196180343628
epoch: 55 training_loss 0.13137445297092198 test_loss: 0.15044810771942138
epoch: 56 training_loss 0.13555085007101297 test_loss: 0.13938331604003906
epoch: 57 training_loss 0.13253167416900397 test_loss: 0.13633127212524415
epoch: 58 training_loss 0.13665154531598092 test_loss: 0.14245843887329102
epoch: 59 training_loss 0.14068420175462962 test_loss: 0.1426914095878601
epoch: 60 training_loss 0.13468339804559945 test_loss: 0.14556994438171386
epoch: 61 training_loss 0.13985092379152775 test_loss: 0.14389771223068237
epoch: 62 training_loss 0.1416772797703743 test_loss: 0.12346895933151245
epoch: 63 training_loss 0.13681897979229687 test_loss: 0.14233431816101075
epoch: 64 training_loss 0.13308132003992795 test_loss: 0.1260363221168518
epoch: 65 training_loss 0.12686833776533604 test_loss: 0.14326664209365844
epoch: 66 training_loss 0.1342277704179287 test_loss: 0.15674058198928834
epoch: 67 training_loss 0.13889326840639116 test_loss: 0.12362006902694703
epoch: 68 training_loss 0.1442306024581194 test_loss: 0.13695541620254517
epoch: 69 training_loss 0.130734209343791 test_loss: 0.12902183532714845
epoch: 70 training_loss 0.13528707284480335 test_loss: 0.13812735080718994
epoch: 71 training_loss 0.1315698627009988 test_loss: 0.14299540519714354
epoch: 72 training_loss 0.13874910581856967 test_loss: 0.15067799091339112
epoch: 73 training_loss 0.13040372967720032 test_loss: 0.12563079595565796
epoch: 74 training_loss 0.13758148103952408 test_loss: 0.14611750841140747
epoch: 75 training_loss 0.13859764274209738 test_loss: 0.15182541608810424
epoch: 76 training_loss 0.1300172223523259 test_loss: 0.1405022144317627
epoch: 77 training_loss 0.1419971513748169 test_loss: 0.1463489532470703
epoch: 78 training_loss 0.13700386341661214 test_loss: 0.14048056602478026
epoch: 79 training_loss 0.12911685831844807 test_loss: 0.13120795488357545
epoch: 80 training_loss 0.1295352413877845 test_loss: 0.13241294622421265
epoch: 81 training_loss 0.13201095443218946 test_loss: 0.13642210960388185
epoch: 82 training_loss 0.13525640580803155 test_loss: 0.13567827939987182
epoch: 83 training_loss 0.14150190189480782 test_loss: 0.14925615787506102
epoch: 84 training_loss 0.13422195617109536 test_loss: 0.13411531448364258
epoch: 85 training_loss 0.13438584040850401 test_loss: 0.13705610036849974
epoch: 86 training_loss 0.12383664298802614 test_loss: 0.1246124267578125
epoch: 87 training_loss 0.14245364479720593 test_loss: 0.16226730346679688
epoch: 88 training_loss 0.13662910502403974 test_loss: 0.1378333568572998
epoch: 89 training_loss 0.13133344646543266 test_loss: 0.15407638549804686
epoch: 90 training_loss 0.13949933364987374 test_loss: 0.14043084383010865
epoch: 91 training_loss 0.13638913691043852 test_loss: 0.12905580997467042
epoch: 92 training_loss 0.1313006567955017 test_loss: 0.14662176370620728
epoch: 93 training_loss 0.13964224349707366 test_loss: 0.13077952861785888
epoch: 94 training_loss 0.1329343681782484 test_loss: 0.14847766160964965
epoch: 95 training_loss 0.12971008874475956 test_loss: 0.12491101026535034
epoch: 96 training_loss 0.13062616031616925 test_loss: 0.1331969380378723
epoch: 97 training_loss 0.136695202216506 test_loss: 0.1410541296005249
epoch: 98 training_loss 0.14063226237893103 test_loss: 0.14871612787246705
epoch: 99 training_loss 0.1392531729862094 test_loss: 0.1380153179168701
epoch: 100 training_loss 0.13195062573999167 test_loss: 0.13706870079040528
epoch: 101 training_loss 0.13215449154376985 test_loss: 0.12714534997940063
epoch: 102 training_loss 0.13085942193865777 test_loss: 0.14190374612808226
epoch: 103 training_loss 0.13617203470319508 test_loss: 0.14030356407165528
epoch: 104 training_loss 0.13406490493565798 test_loss: 0.13216062784194946
epoch: 105 training_loss 0.14068939816206694 test_loss: 0.12815481424331665
epoch: 106 training_loss 0.12661182668060064 test_loss: 0.12550359964370728
epoch: 107 training_loss 0.13578758921474218 test_loss: 0.15975024700164794
epoch: 108 training_loss 0.1354172693938017 test_loss: 0.14939190149307252
epoch: 109 training_loss 0.1349849659949541 test_loss: 0.13255369663238525
epoch: 110 training_loss 0.13427911181002855 test_loss: 0.13007402420043945
epoch: 111 training_loss 0.1310991406068206 test_loss: 0.13226784467697145
epoch: 112 training_loss 0.13530755907297135 test_loss: 0.1522218704223633
epoch: 113 training_loss 0.13155055832117796 test_loss: 0.126591956615448
epoch: 114 training_loss 0.1264009539783001 test_loss: 0.13645462989807128
epoch: 115 training_loss 0.13207076888531447 test_loss: 0.13308829069137573
epoch: 116 training_loss 0.1281862948089838 test_loss: 0.12336422204971313
epoch: 117 training_loss 0.13700870521366595 test_loss: 0.13504968881607055
epoch: 118 training_loss 0.13974083550274372 test_loss: 0.13329973220825195
epoch: 119 training_loss 0.14147664349526168 test_loss: 0.15106486082077025
epoch: 120 training_loss 0.13139000706374646 test_loss: 0.12400418519973755
epoch: 121 training_loss 0.12654626660048962 test_loss: 0.1482999563217163
epoch: 122 training_loss 0.13497734177857637 test_loss: 0.12519659996032714
epoch: 123 training_loss 0.1304521816968918 test_loss: 0.1385716199874878
epoch: 124 training_loss 0.12347707089036702 test_loss: 0.13500900268554689
epoch: 125 training_loss 0.13097280578687787 test_loss: 0.13908827304840088
epoch: 126 training_loss 0.1314021309092641 test_loss: 0.13047428131103517
epoch: 127 training_loss 0.13640736473724246 test_loss: 0.1387072205543518
epoch: 128 training_loss 0.13661297861486674 test_loss: 0.13491441011428834
epoch: 129 training_loss 0.1364646252244711 test_loss: 0.12957212924957276
epoch: 130 training_loss 0.12198585104197264 test_loss: 0.12751386165618897
epoch: 131 training_loss 0.13345737934112548 test_loss: 0.13777984380722047
epoch: 132 training_loss 0.13204995181411505 test_loss: 0.1367521643638611
epoch: 133 training_loss 0.12466514341533184 test_loss: 0.14081698656082153
epoch: 134 training_loss 0.12634566862136126 test_loss: 0.13661258220672606
epoch: 135 training_loss 0.1356529888883233 test_loss: 0.12823456525802612
epoch: 136 training_loss 0.12765931069850922 test_loss: 0.13219791650772095
epoch: 137 training_loss 0.1309371218457818 test_loss: 0.13148348331451415
epoch: 138 training_loss 0.12892246682196856 test_loss: 0.12677334547042846
epoch: 139 training_loss 0.13307966843247412 test_loss: 0.14336622953414918
epoch: 140 training_loss 0.1295459549129009 test_loss: 0.1378175973892212
epoch: 141 training_loss 0.13238389071077108 test_loss: 0.13802467584609984
epoch: 142 training_loss 0.12802613267675042 test_loss: 0.12367961406707764
epoch: 143 training_loss 0.12991171289235354 test_loss: 0.14121268987655639
epoch: 144 training_loss 0.1341766133531928 test_loss: 0.13719946146011353
epoch: 145 training_loss 0.13409969713538886 test_loss: 0.14286863803863525
epoch: 146 training_loss 0.13575210040435196 test_loss: 0.13867664337158203
epoch: 147 training_loss 0.1293204341828823 test_loss: 0.14108505249023437
epoch: 148 training_loss 0.13271347288042307 test_loss: 0.1419545292854309
epoch: 149 training_loss 0.13328669518232344 test_loss: 0.14035903215408324
episode: 0 training return: -1000.5869286575586
episode: 1 training return: -1005.0047259691921
episode: 2 training return: -1000.2406606988357
episode: 3 training return: -1000.7277615499787
epoch: 1 test_true_pfm: -74.01757358477295 sim_pfm: -981.1990510158251
episode: 4 training return: -1040.9067002135384
episode: 5 training return: -1008.6279881172773
episode: 6 training return: -986.1511224518047
episode: 7 training return: -996.7626593610606
epoch: 2 test_true_pfm: 159.7359526412888 sim_pfm: -962.5964662133321
episode: 8 training return: -1041.6025756363044
episode: 9 training return: -988.7354411406822
episode: 10 training return: -971.3816208133696
episode: 11 training return: -966.7718477370446
epoch: 3 test_true_pfm: 213.90305719478928 sim_pfm: -978.3797217979967
episode: 12 training return: -1022.5760518799539
episode: 13 training return: -987.30532754866
episode: 14 training return: -955.9981228958447
episode: 15 training return: -943.2371539558544
epoch: 4 test_true_pfm: 228.74455771637795 sim_pfm: -928.0909380905309
episode: 16 training return: -964.3957321680681
episode: 17 training return: -939.3644216373532
episode: 18 training return: -931.5777460910821
episode: 19 training return: -931.4485799058629
epoch: 5 test_true_pfm: 245.21192555491402 sim_pfm: -939.8658996948425
episode: 20 training return: -944.0177842006003
episode: 21 training return: -932.9317539229654
episode: 22 training return: -967.2108434493057
episode: 23 training return: -925.1564291491591
epoch: 6 test_true_pfm: 249.44402141019864 sim_pfm: -926.0644264820811
episode: 24 training return: -923.7371427238652
episode: 25 training return: -947.166713252346
episode: 26 training return: -905.7052238951977
episode: 27 training return: -915.8425420910015
epoch: 7 test_true_pfm: 204.1176307020012 sim_pfm: -906.354711248006
episode: 28 training return: -934.2384727905171
episode: 29 training return: -929.0105721318401
episode: 30 training return: -936.5504136494201
episode: 31 training return: -942.9332686653808
epoch: 8 test_true_pfm: 248.7268100931329 sim_pfm: -898.384114474827
episode: 32 training return: -928.7013892111113
episode: 33 training return: -911.9025568828644
episode: 34 training return: -902.627042024155
episode: 35 training return: -929.9930427495437
epoch: 9 test_true_pfm: 229.88591859942582 sim_pfm: -911.3412899267681
episode: 36 training return: -921.1181352951443
episode: 37 training return: -902.3344320397097
episode: 38 training return: -933.1334354981008
episode: 39 training return: -913.2097667575906
epoch: 10 test_true_pfm: 248.8983947735475 sim_pfm: -894.7068797101178
episode: 40 training return: -893.8823410821454
episode: 41 training return: -887.9019767715296
episode: 42 training return: -903.4065061309836
episode: 43 training return: -893.561344831662
epoch: 11 test_true_pfm: 240.9040635903394 sim_pfm: -895.9107640482783
episode: 44 training return: -914.6415412432649
episode: 45 training return: -928.9617872406868
episode: 46 training return: -901.4588203716961
episode: 47 training return: -915.2020115311552
epoch: 12 test_true_pfm: 182.97124623589897 sim_pfm: -902.546488813174
episode: 48 training return: -903.0444683381621
episode: 49 training return: -907.5553406964553
episode: 50 training return: -898.427877150782
episode: 51 training return: -897.299808085217
epoch: 13 test_true_pfm: 213.0174645533099 sim_pfm: -887.1863658734916
episode: 52 training return: -896.1595368646722
episode: 53 training return: -891.5907354821395
episode: 54 training return: -898.1482521743411
episode: 55 training return: -883.4422016244616
epoch: 14 test_true_pfm: 248.1658617351156 sim_pfm: -868.0666619442918
episode: 56 training return: -894.5905118436182
episode: 57 training return: -889.8426932416737
episode: 58 training return: -893.0773969760827
episode: 59 training return: -878.6712823997879
epoch: 15 test_true_pfm: 207.02835685643706 sim_pfm: -870.9303644318394
episode: 60 training return: -887.7688279688003
episode: 61 training return: -889.5404247392075
episode: 62 training return: -897.9033260955105
episode: 63 training return: -900.7567809410627
epoch: 16 test_true_pfm: 190.15368926221467 sim_pfm: -889.9599645708055
episode: 64 training return: -892.0586436442516
episode: 65 training return: -888.5419087739706
episode: 66 training return: -887.6592299437702
episode: 67 training return: -896.9531338380015
epoch: 17 test_true_pfm: 213.9239723644523 sim_pfm: -876.8468696013292
episode: 68 training return: -898.4606187252075
episode: 69 training return: -891.9311329676779
episode: 70 training return: -876.9521088658707
episode: 71 training return: -880.7024211854964
epoch: 18 test_true_pfm: 253.01215425302135 sim_pfm: -861.1383270956208
episode: 72 training return: -883.0508321986318
episode: 73 training return: -886.07188381381
episode: 74 training return: -878.3899164247962
episode: 75 training return: -877.8402484259368
epoch: 19 test_true_pfm: 218.4211476225141 sim_pfm: -863.4329009819229
episode: 76 training return: -935.9072709111068
episode: 77 training return: -882.339944277269
episode: 78 training return: -885.6498584973544
episode: 79 training return: -880.5587145307924
epoch: 20 test_true_pfm: 234.67180860719384 sim_pfm: -868.4387537896573
episode: 80 training return: -887.9267805496469
episode: 81 training return: -884.4765182322365
episode: 82 training return: -798.3598966103152
episode: 83 training return: -878.3126287939373
epoch: 21 test_true_pfm: 263.3989531834514 sim_pfm: -866.3130778468749
episode: 84 training return: -868.5741986704805
episode: 85 training return: -853.1595585811257
episode: 86 training return: -892.1175552042041
episode: 87 training return: -871.3581096847059
epoch: 22 test_true_pfm: 225.14051711706645 sim_pfm: -863.3195758478165
episode: 88 training return: -880.8961991899977
episode: 89 training return: -919.7480009837757
episode: 90 training return: -889.1975688015656
episode: 91 training return: -827.1353262280724
epoch: 23 test_true_pfm: 225.94615683756362 sim_pfm: -858.9393244955482
episode: 92 training return: -872.3689270827816
episode: 93 training return: -870.7768262561315
episode: 94 training return: -878.130542583327
episode: 95 training return: -880.5772092158818
epoch: 24 test_true_pfm: 220.93884559943035 sim_pfm: -864.3111379886914
episode: 96 training return: -872.4055975274049
episode: 97 training return: -879.0122306083473
episode: 98 training return: -894.9223514237364
episode: 99 training return: -880.1514467301008
epoch: 25 test_true_pfm: 222.94985753141611 sim_pfm: -861.8032479200841
episode: 100 training return: -882.0819575356284
episode: 101 training return: -893.3179584009717
episode: 102 training return: -876.9159036651372
episode: 103 training return: -867.1733387531865
epoch: 26 test_true_pfm: 247.64008407256787 sim_pfm: -849.6786769884892
episode: 104 training return: -867.348059511486
episode: 105 training return: -865.0899747150223
episode: 106 training return: -870.1546138496649
episode: 107 training return: -860.069564534502
epoch: 27 test_true_pfm: 222.01715427451936 sim_pfm: -857.582074948445
episode: 108 training return: -876.6195806686894
episode: 109 training return: -870.3603121222194
episode: 110 training return: -857.0147486091357
episode: 111 training return: -870.9208325195772
epoch: 28 test_true_pfm: 265.5160847172919 sim_pfm: -856.0382653115889
episode: 112 training return: -866.7684021470734
episode: 113 training return: -863.828426142733
episode: 114 training return: -859.2161408403798
episode: 115 training return: -871.0561604499803
epoch: 29 test_true_pfm: 238.88683375691258 sim_pfm: -849.5503880828701
episode: 116 training return: -867.2573553229432
episode: 117 training return: -879.0662186330193
episode: 118 training return: -859.5924097532568
episode: 119 training return: -860.7253134714373
epoch: 30 test_true_pfm: 230.01716498240526 sim_pfm: -856.1704742872589
episode: 120 training return: -881.6004313199853
episode: 121 training return: -863.9563360600306
episode: 122 training return: -858.3199211670308
episode: 123 training return: -860.4801587488038
epoch: 31 test_true_pfm: 238.89976057885494 sim_pfm: -848.7027867376211
episode: 124 training return: -854.9974577930666
episode: 125 training return: -864.5275239755877
episode: 126 training return: -874.3893602465499
episode: 127 training return: -859.7722935082786
epoch: 32 test_true_pfm: 248.43381675000407 sim_pfm: -842.7130548097915
episode: 128 training return: -851.738037435523
episode: 129 training return: -866.6554576944711
episode: 130 training return: -862.2417607721056
episode: 131 training return: -850.2777481542721
epoch: 33 test_true_pfm: 266.67530842682464 sim_pfm: -854.9952364675491
episode: 132 training return: -861.4092859412129
episode: 133 training return: -857.3355807023693
episode: 134 training return: -867.4790797037205
episode: 135 training return: -863.1297793144313
epoch: 34 test_true_pfm: 235.2178643511605 sim_pfm: -854.446336826335
episode: 136 training return: -863.7028222435833
episode: 137 training return: -861.0374664202525
episode: 138 training return: -861.534372963291
episode: 139 training return: -852.9310605639306
epoch: 35 test_true_pfm: 274.0832750915786 sim_pfm: -853.2007343141622
episode: 140 training return: -850.3795196847416
episode: 141 training return: -883.1155521624493
episode: 142 training return: -866.4063673090649
episode: 143 training return: -846.039056530554
epoch: 36 test_true_pfm: 248.7339406246292 sim_pfm: -850.4953689115056
episode: 144 training return: -854.1084091160614
episode: 145 training return: -852.1356904745581
episode: 146 training return: -872.3332987269087
episode: 147 training return: -860.1401665353375
epoch: 37 test_true_pfm: 248.94589245529082 sim_pfm: -841.3210466315486
episode: 148 training return: -860.0753348684822
episode: 149 training return: -864.1494451557769
episode: 150 training return: -848.0028948184952
episode: 151 training return: -862.9016510081166
epoch: 38 test_true_pfm: 245.6988344615077 sim_pfm: -846.2954652519899
episode: 152 training return: -885.0961777462006
episode: 153 training return: -867.3930979320462
episode: 154 training return: -861.2558384611565
episode: 155 training return: -863.7966339548748
epoch: 39 test_true_pfm: 239.62900310256202 sim_pfm: -847.2791144648481
episode: 156 training return: -857.1221179615183
episode: 157 training return: -860.6398997237094
episode: 158 training return: -851.9732254482942
episode: 159 training return: -854.5824753832115
epoch: 40 test_true_pfm: 238.30067130915617 sim_pfm: -850.7925394124574
episode: 160 training return: -868.0367885036299
episode: 161 training return: -861.6963673534451
episode: 162 training return: -861.8242862391398
episode: 163 training return: -851.2368043887836
epoch: 41 test_true_pfm: 267.663027440541 sim_pfm: -845.8731215683703
episode: 164 training return: -867.1973244427735
episode: 165 training return: -859.4433228787885
episode: 166 training return: -866.4261997485582
episode: 167 training return: -852.8960395058077
epoch: 42 test_true_pfm: 242.97068496872626 sim_pfm: -844.8130407246002
episode: 168 training return: -851.6584028815882
episode: 169 training return: -846.6691978889669
episode: 170 training return: -862.1727309726746
episode: 171 training return: -851.814564523041
epoch: 43 test_true_pfm: 246.2159018147444 sim_pfm: -845.908427669723
episode: 172 training return: -848.2334232294996
episode: 173 training return: -861.5412492752595
episode: 174 training return: -859.6663655494691
episode: 175 training return: -848.0504086746502
epoch: 44 test_true_pfm: 243.45144911230196 sim_pfm: -839.8473608022141
episode: 176 training return: -850.3145982782459
episode: 177 training return: -853.7254285453515
episode: 178 training return: -847.8347439791403
episode: 179 training return: -851.4439588220221
epoch: 45 test_true_pfm: 249.96595190850925 sim_pfm: -847.8944350286765
episode: 180 training return: -849.2298661052039
episode: 181 training return: -851.2257596347239
episode: 182 training return: -853.513428672283
episode: 183 training return: -851.6895840404854
epoch: 46 test_true_pfm: 252.0171017861795 sim_pfm: -845.5179637876321
episode: 184 training return: -860.6025083576183
episode: 185 training return: -851.7192503066509
episode: 186 training return: -855.7149827728352
episode: 187 training return: -852.065016159755
epoch: 47 test_true_pfm: 246.68635646641494 sim_pfm: -848.3689649502143
episode: 188 training return: -862.4793924919327
episode: 189 training return: -859.0744690853076
episode: 190 training return: -849.0960162323728
episode: 191 training return: -859.0613899794903
epoch: 48 test_true_pfm: 260.5335785338448 sim_pfm: -837.805619055256
episode: 192 training return: -856.7987405104304
episode: 193 training return: -848.9750331026462
episode: 194 training return: -860.9380917353528
episode: 195 training return: -870.682302903312
epoch: 49 test_true_pfm: 277.4555580970054 sim_pfm: -844.7609944126262
episode: 196 training return: -861.8960932891003
episode: 197 training return: -851.0660819306911
episode: 198 training return: -845.3465733305949
episode: 199 training return: -856.0661094924754
epoch: 50 test_true_pfm: 254.11937228804834 sim_pfm: -844.0795123585409
episode: 200 training return: -845.8943430293918
episode: 201 training return: -865.5861173880294
episode: 202 training return: -865.1436723384356
episode: 203 training return: -853.4712346456432
epoch: 51 test_true_pfm: 246.7040504421333 sim_pfm: -835.1706746726635
episode: 204 training return: -846.1244940242218
episode: 205 training return: -854.0058462554108
episode: 206 training return: -856.2826234047377
episode: 207 training return: -858.3842839244893
epoch: 52 test_true_pfm: 252.0777600388108 sim_pfm: -837.4046269601255
episode: 208 training return: -861.6569258217037
episode: 209 training return: -849.163570924939
episode: 210 training return: -852.9575842548267
episode: 211 training return: -858.7996843901694
epoch: 53 test_true_pfm: 254.49765000485286 sim_pfm: -837.8336752443862
episode: 212 training return: -857.9669111350393
episode: 213 training return: -845.799562628162
episode: 214 training return: -864.1613672013066
episode: 215 training return: -847.9568415070016
epoch: 54 test_true_pfm: 274.5609459781021 sim_pfm: -845.5874183894626
episode: 216 training return: -848.39006084887
episode: 217 training return: -860.3198665230688
episode: 218 training return: -855.9120864903614
episode: 219 training return: -855.8680992563548
epoch: 55 test_true_pfm: 267.5809101997429 sim_pfm: -845.9554658275061
episode: 220 training return: -856.3332286247928
episode: 221 training return: -860.0893721195858
episode: 222 training return: -854.5070642800273
episode: 223 training return: -856.0719970809399
epoch: 56 test_true_pfm: 263.32613851187284 sim_pfm: -838.2524931709155
episode: 224 training return: -840.1663359662476
episode: 225 training return: -839.0444188024077
episode: 226 training return: -847.3404120157677
episode: 227 training return: -848.6698648312889
epoch: 57 test_true_pfm: 249.2533302114704 sim_pfm: -837.0645919262084
episode: 228 training return: -856.8119839061908
episode: 229 training return: -855.4007438127776
episode: 230 training return: -853.0005860671068
episode: 231 training return: -849.376121646094
epoch: 58 test_true_pfm: 242.9274025163611 sim_pfm: -841.3957913364065
episode: 232 training return: -844.0397255672889
episode: 233 training return: -847.8961532017146
episode: 234 training return: -847.7052405009996
episode: 235 training return: -848.7997221874647
epoch: 59 test_true_pfm: 263.0880364561461 sim_pfm: -838.7225759014542
episode: 236 training return: -855.0958905606672
episode: 237 training return: -862.0099706303959
episode: 238 training return: -848.0711832256637
episode: 239 training return: -852.3553462948068
epoch: 60 test_true_pfm: 266.3395091630072 sim_pfm: -832.6590599921716
episode: 240 training return: -849.3088872429914
episode: 241 training return: -852.0622329774466
episode: 242 training return: -859.1577034345869
episode: 243 training return: -851.0741478361641
epoch: 61 test_true_pfm: 258.5928190404748 sim_pfm: -834.8677645954326
episode: 244 training return: -844.091096450308
episode: 245 training return: -844.4474563905828
episode: 246 training return: -848.4801984651102
episode: 247 training return: -860.7403636295438
epoch: 62 test_true_pfm: 257.24423667406444 sim_pfm: -835.8042929918082
episode: 248 training return: -850.8216755713473
episode: 249 training return: -851.7393983230874
episode: 250 training return: -851.4819027793479
episode: 251 training return: -856.1540490233684
epoch: 63 test_true_pfm: 251.4791003813548 sim_pfm: -839.1235716312227
episode: 252 training return: -841.354713412405
episode: 253 training return: -847.1766186602874
episode: 254 training return: -854.84840652626
episode: 255 training return: -853.935682173745
epoch: 64 test_true_pfm: 258.68179535378596 sim_pfm: -834.6335770634145
episode: 256 training return: -870.5088568377838
episode: 257 training return: -853.7901775582122
episode: 258 training return: -859.3395716556553
episode: 259 training return: -859.0405242761652
epoch: 65 test_true_pfm: 261.0123137718811 sim_pfm: -838.8010567578549
episode: 260 training return: -857.6291246527084
episode: 261 training return: -839.7365464357101
episode: 262 training return: -855.7742142560168
episode: 263 training return: -871.1607172212495
epoch: 66 test_true_pfm: 235.99450048656465 sim_pfm: -844.1795912715735
episode: 264 training return: -863.1861864998859
episode: 265 training return: -862.4132493356367
episode: 266 training return: -848.0929328555911
episode: 267 training return: -852.9150648167339
epoch: 67 test_true_pfm: 276.5201450138013 sim_pfm: -842.4764595099942
episode: 268 training return: -848.0419357716655
episode: 269 training return: -863.5738065204124
episode: 270 training return: -850.400465892835
episode: 271 training return: -851.4558825271105
epoch: 68 test_true_pfm: 253.52965767252 sim_pfm: -843.2912664976274
episode: 272 training return: -856.4396452691867
episode: 273 training return: -861.6377368302459
episode: 274 training return: -879.4720509144078
episode: 275 training return: -864.164386128003
epoch: 69 test_true_pfm: 249.97129335220657 sim_pfm: -842.3533333437699
episode: 276 training return: -863.3186034456749
episode: 277 training return: -861.5128052252984
episode: 278 training return: -864.7838903427696
episode: 279 training return: -847.5136300249864
epoch: 70 test_true_pfm: 248.30583362706184 sim_pfm: -841.1927727035187
episode: 280 training return: -861.3809848370015
episode: 281 training return: -858.8608582845978
episode: 282 training return: -860.6402330685978
episode: 283 training return: -861.9287718309479
epoch: 71 test_true_pfm: 212.23328651487228 sim_pfm: -850.8249666816622
episode: 284 training return: -873.052528153618
episode: 285 training return: -866.2042189468855
episode: 286 training return: -887.3940710963184
episode: 287 training return: -860.1484215835362
epoch: 72 test_true_pfm: 219.9583047247385 sim_pfm: -846.365564208732
episode: 288 training return: -873.9817662288259
episode: 289 training return: -851.2444301261265
episode: 290 training return: -853.2304922299062
episode: 291 training return: -861.2992938531828
epoch: 73 test_true_pfm: 224.47594418501146 sim_pfm: -844.4699560047825
episode: 292 training return: -848.4685316051307
episode: 293 training return: -853.4270890334002
episode: 294 training return: -850.5678087561564
episode: 295 training return: -854.2825267834776
epoch: 74 test_true_pfm: 252.6795844013577 sim_pfm: -849.8458611184689
episode: 296 training return: -851.9626978350024
episode: 297 training return: -848.3201869668511
episode: 298 training return: -852.3137128564407
episode: 299 training return: -856.9676600053732
epoch: 75 test_true_pfm: 232.45474634189227 sim_pfm: -841.3329622415869
episode: 300 training return: -858.3897332050498
episode: 301 training return: -855.1463620010907
episode: 302 training return: -841.8821649420321
episode: 303 training return: -852.450438875564
epoch: 76 test_true_pfm: 252.07498352588186 sim_pfm: -842.5534549775134
episode: 304 training return: -849.6272473379559
episode: 305 training return: -872.6242146297151
episode: 306 training return: -852.2344618910369
episode: 307 training return: -850.1552600146804
epoch: 77 test_true_pfm: 245.21445192905216 sim_pfm: -841.0260610941259
episode: 308 training return: -852.0370177245683
episode: 309 training return: -852.5616904499914
episode: 310 training return: -847.9830112396205
episode: 311 training return: -854.3404653492703
epoch: 78 test_true_pfm: 229.4974365431783 sim_pfm: -847.7452289809593
episode: 312 training return: -843.7031797506156
episode: 313 training return: -859.8866813094149
episode: 314 training return: -856.4895515001028
episode: 315 training return: -847.833793467056
epoch: 79 test_true_pfm: 228.54602199456374 sim_pfm: -842.2581392691742
episode: 316 training return: -856.8743775057183
episode: 317 training return: -847.5978126139489
episode: 318 training return: -842.5086712319812
episode: 319 training return: -845.2562925118955
epoch: 80 test_true_pfm: 231.7427964452073 sim_pfm: -842.7797456910904
episode: 320 training return: -852.2500451589677
episode: 321 training return: -849.3478184470031
episode: 322 training return: -851.2806390792985
episode: 323 training return: -843.9072916308373
epoch: 81 test_true_pfm: 251.00113068106702 sim_pfm: -838.6143757056525
episode: 324 training return: -851.6290658279023
episode: 325 training return: -846.5589373443116
episode: 326 training return: -856.2427552998686
episode: 327 training return: -845.871980619846
epoch: 82 test_true_pfm: 246.16438677546378 sim_pfm: -835.2702238208755
episode: 328 training return: -854.2054334883479
episode: 329 training return: -841.9482047811488
episode: 330 training return: -850.9707226234098
episode: 331 training return: -856.9793712925566
epoch: 83 test_true_pfm: 254.60678119008674 sim_pfm: -838.8810262444778
episode: 332 training return: -854.823948326245
episode: 333 training return: -848.2777537174252
episode: 334 training return: -854.1779350842213
episode: 335 training return: -849.8785534368599
epoch: 84 test_true_pfm: 255.47445661092175 sim_pfm: -834.6988121046497
episode: 336 training return: -851.5027716732553
episode: 337 training return: -852.6111600255967
episode: 338 training return: -851.1578512342476
episode: 339 training return: -849.6662991238478
epoch: 85 test_true_pfm: 247.62953403896765 sim_pfm: -836.4540618860893
episode: 340 training return: -848.9494933404326
episode: 341 training return: -857.6310334478682
episode: 342 training return: -853.2193563073729
episode: 343 training return: -856.9702269121269
epoch: 86 test_true_pfm: 250.28776650238038 sim_pfm: -835.8761197392259
episode: 344 training return: -860.1166832432408
episode: 345 training return: -852.704309706905
episode: 346 training return: -848.9901056327288
episode: 347 training return: -850.800826242559
epoch: 87 test_true_pfm: 240.2852938313115 sim_pfm: -839.2137982509006
episode: 348 training return: -848.3051511812428
episode: 349 training return: -845.9035875882711
episode: 350 training return: -851.601659722324
episode: 351 training return: -854.2983877234035
epoch: 88 test_true_pfm: 228.97077640648527 sim_pfm: -847.5862452469856
episode: 352 training return: -846.0981243997039
episode: 353 training return: -856.3539399291351
episode: 354 training return: -862.062231555079
episode: 355 training return: -853.5880954207187
epoch: 89 test_true_pfm: 241.82578085577697 sim_pfm: -843.0343775135576
episode: 356 training return: -846.7091314721814
episode: 357 training return: -857.3429200548162
episode: 358 training return: -853.7693838243097
episode: 359 training return: -857.6124937137125
epoch: 90 test_true_pfm: 237.31238687530615 sim_pfm: -838.9711857205493
episode: 360 training return: -855.7401695788438
episode: 361 training return: -844.8361613875377
episode: 362 training return: -846.731700911893
episode: 363 training return: -848.3916447277205
epoch: 91 test_true_pfm: 251.20110160714424 sim_pfm: -835.3666274670953
episode: 364 training return: -855.3027530543362
episode: 365 training return: -842.9087919360747
episode: 366 training return: -847.4115827668614
episode: 367 training return: -851.1911553249224
epoch: 92 test_true_pfm: 236.6305680122225 sim_pfm: -844.6618083778931
episode: 368 training return: -851.8849248089136
episode: 369 training return: -849.0292831017051
episode: 370 training return: -845.850753600364
episode: 371 training return: -842.2356276388722
epoch: 93 test_true_pfm: 266.65804788245026 sim_pfm: -837.556073462402
episode: 372 training return: -848.9793937907586
episode: 373 training return: -852.6649109025402
episode: 374 training return: -851.7384829774813
episode: 375 training return: -843.7247383661817
epoch: 94 test_true_pfm: 243.44425345846028 sim_pfm: -841.3514423845609
episode: 376 training return: -842.611962848279
episode: 377 training return: -859.7865458672456
episode: 378 training return: -855.9247414541644
episode: 379 training return: -848.8520413284049
epoch: 95 test_true_pfm: 239.51767410755136 sim_pfm: -836.9102794721695
episode: 380 training return: -846.3429663232591
episode: 381 training return: -850.700742155759
episode: 382 training return: -858.1477415246658
episode: 383 training return: -843.4112008886542
epoch: 96 test_true_pfm: 259.8149404146701 sim_pfm: -837.8606753744695
episode: 384 training return: -850.9584338632735
episode: 385 training return: -855.1403081284985
episode: 386 training return: -855.1848132514843
episode: 387 training return: -844.2235015740038
epoch: 97 test_true_pfm: 265.8883169508312 sim_pfm: -836.313913940037
episode: 388 training return: -848.3509382397484
episode: 389 training return: -848.5989824167942
episode: 390 training return: -847.8816005320017
episode: 391 training return: -848.5158152997288
epoch: 98 test_true_pfm: 257.7376090239344 sim_pfm: -837.497687942
episode: 392 training return: -853.1535975666307
episode: 393 training return: -854.6779710044863
episode: 394 training return: -848.5246567856515
episode: 395 training return: -854.6953389068814
epoch: 99 test_true_pfm: 270.0179691393171 sim_pfm: -842.4326990945325
episode: 396 training return: -845.3657164054457
episode: 397 training return: -844.5974406060411
episode: 398 training return: -840.8528702879181
episode: 399 training return: -846.5212224907149
epoch: 100 test_true_pfm: 275.87547559869705 sim_pfm: -835.0763898325154
episode: 400 training return: -840.5521923903714
episode: 401 training return: -855.3838369300436
episode: 402 training return: -850.2813628526626
episode: 403 training return: -859.5196539903534
epoch: 101 test_true_pfm: 260.19148942635775 sim_pfm: -841.2839905525975
episode: 404 training return: -848.3469375496379
episode: 405 training return: -837.3088205818685
episode: 406 training return: -842.4962557580207
episode: 407 training return: -834.3191865368428
epoch: 102 test_true_pfm: 259.70626894472747 sim_pfm: -837.6702968879814
episode: 408 training return: -849.6806687336388
episode: 409 training return: -848.6082746504177
episode: 410 training return: -853.1096240161795
episode: 411 training return: -845.5206753765999
epoch: 103 test_true_pfm: 267.0834249560575 sim_pfm: -839.0791151347686
episode: 412 training return: -848.307458997184
episode: 413 training return: -852.7211959732957
episode: 414 training return: -852.488931653388
episode: 415 training return: -842.9080470796346
epoch: 104 test_true_pfm: 260.82748940176265 sim_pfm: -840.3946219962596
episode: 416 training return: -851.2850914333728
episode: 417 training return: -854.2525100046158
episode: 418 training return: -850.12861421349
episode: 419 training return: -854.6374302214946
epoch: 105 test_true_pfm: 262.75845856611 sim_pfm: -841.1719783710737
episode: 420 training return: -846.2696345765444
episode: 421 training return: -845.8120153166219
episode: 422 training return: -853.1526975464241
episode: 423 training return: -844.5745800046902
epoch: 106 test_true_pfm: 270.95398024134715 sim_pfm: -835.3903992063401
episode: 424 training return: -844.339469929744
episode: 425 training return: -853.1740356592902
episode: 426 training return: -855.9350376441934
episode: 427 training return: -852.4632704850686
epoch: 107 test_true_pfm: 251.1017127244733 sim_pfm: -839.6065501852441
episode: 428 training return: -854.0007481982965
episode: 429 training return: -840.3609262979996
episode: 430 training return: -852.0240767832147
episode: 431 training return: -840.6389541087794
epoch: 108 test_true_pfm: 262.5681589692286 sim_pfm: -837.5194123409298
episode: 432 training return: -847.688029710954
episode: 433 training return: -843.4183975304298
episode: 434 training return: -870.2821050453288
episode: 435 training return: -852.5450840335407
epoch: 109 test_true_pfm: 247.39396300854557 sim_pfm: -837.9987681240087
episode: 436 training return: -843.8354784445486
episode: 437 training return: -853.1240481891399
episode: 438 training return: -850.4532142407736
episode: 439 training return: -853.4809603574358
epoch: 110 test_true_pfm: 238.4847809679843 sim_pfm: -836.3088595411991
episode: 440 training return: -844.8349841942842
episode: 441 training return: -850.138121589341
episode: 442 training return: -850.5521796288059
episode: 443 training return: -841.3126316243789
epoch: 111 test_true_pfm: 263.581697993141 sim_pfm: -837.5974093142413
episode: 444 training return: -849.5565614783849
episode: 445 training return: -851.2956793080164
episode: 446 training return: -852.0687914765427
episode: 447 training return: -845.0108646236168
epoch: 112 test_true_pfm: 268.77356447725793 sim_pfm: -834.7351009949392
episode: 448 training return: -842.6768774158909
episode: 449 training return: -854.6654282124259
episode: 450 training return: -851.9881360118886
episode: 451 training return: -844.4826453116873
epoch: 113 test_true_pfm: 267.1441520359376 sim_pfm: -839.2221475577498
episode: 452 training return: -844.2293170379337
episode: 453 training return: -851.4376180409197
episode: 454 training return: -847.416372990843
episode: 455 training return: -845.3121562074517
epoch: 114 test_true_pfm: 241.64843798933668 sim_pfm: -831.596141657741
episode: 456 training return: -847.1328651170932
episode: 457 training return: -845.7985142397387
episode: 458 training return: -854.6541930792072
episode: 459 training return: -840.6451068004499
epoch: 115 test_true_pfm: 271.5784729808808 sim_pfm: -831.9123597489898
episode: 460 training return: -840.6167208226201
episode: 461 training return: -841.7283921967274
episode: 462 training return: -850.6423937983101
episode: 463 training return: -853.5427370593392
epoch: 116 test_true_pfm: 261.62316345084605 sim_pfm: -834.8036703561514
episode: 464 training return: -855.7125859537318
episode: 465 training return: -854.5000045405915
episode: 466 training return: -842.9619797388019
episode: 467 training return: -851.7270753010035
epoch: 117 test_true_pfm: 269.9778476251769 sim_pfm: -831.669795998325
episode: 468 training return: -842.4114361748572
episode: 469 training return: -846.9023985755742
episode: 470 training return: -845.60192565628
episode: 471 training return: -849.0381355338219
epoch: 118 test_true_pfm: 264.2411707751647 sim_pfm: -833.366797562143
episode: 472 training return: -855.2585591433993
episode: 473 training return: -851.1677290711366
episode: 474 training return: -856.4471085375187
episode: 475 training return: -848.1236838918434
epoch: 119 test_true_pfm: 259.2181431272714 sim_pfm: -832.638686963363
episode: 476 training return: -851.717642234884
episode: 477 training return: -845.8919411578004
episode: 478 training return: -833.2248486838687
episode: 479 training return: -850.4035047996249
epoch: 120 test_true_pfm: 263.38010217088583 sim_pfm: -837.9142276924453
episode: 480 training return: -849.3453989999502
episode: 481 training return: -846.4209838401684
episode: 482 training return: -858.227878328594
episode: 483 training return: -841.8954132658466
epoch: 121 test_true_pfm: 255.64091812006714 sim_pfm: -834.8868983468378
episode: 484 training return: -865.2501392972472
episode: 485 training return: -836.5795346611394
episode: 486 training return: -841.8317536973989
episode: 487 training return: -845.0777016949039
epoch: 122 test_true_pfm: 261.4147269419491 sim_pfm: -834.3028690671387
episode: 488 training return: -841.3546632084526
episode: 489 training return: -842.1756220712119
episode: 490 training return: -855.1118666303024
episode: 491 training return: -836.8390213072902
epoch: 123 test_true_pfm: 235.524266402701 sim_pfm: -829.1504819677734
episode: 492 training return: -836.6788897733046
episode: 493 training return: -842.544859047847
episode: 494 training return: -845.198324272439
episode: 495 training return: -846.3830774933689
epoch: 124 test_true_pfm: 270.5666317263245 sim_pfm: -830.4231502948236
episode: 496 training return: -852.2975290787223
episode: 497 training return: -849.9354696385963
episode: 498 training return: -843.1888085185086
episode: 499 training return: -846.6416408787762
epoch: 125 test_true_pfm: 271.7201289091072 sim_pfm: -832.7678744563045
episode: 500 training return: -844.2342965051046
episode: 501 training return: -852.792904813376
episode: 502 training return: -849.5399097915737
episode: 503 training return: -846.0095301324502
epoch: 126 test_true_pfm: 269.05631552559015 sim_pfm: -838.8454239748012
episode: 504 training return: -850.4323884278084
episode: 505 training return: -854.52146083626
episode: 506 training return: -849.1907014428448
episode: 507 training return: -838.4746684376107
epoch: 127 test_true_pfm: 265.9274406088584 sim_pfm: -834.1510023805931
episode: 508 training return: -847.5023510647774
episode: 509 training return: -843.2809905216916
episode: 510 training return: -853.4733584395418
episode: 511 training return: -845.2223389158377
epoch: 128 test_true_pfm: 257.39226294907064 sim_pfm: -834.2252546337166
episode: 512 training return: -849.6349398383051
episode: 513 training return: -848.2402464707147
episode: 514 training return: -853.8702992488525
episode: 515 training return: -841.2753999787975
epoch: 129 test_true_pfm: 247.6041365437475 sim_pfm: -837.6057435474764
episode: 516 training return: -848.9195147694419
episode: 517 training return: -859.1981525822245
episode: 518 training return: -851.2608720058445
episode: 519 training return: -848.2697768567866
epoch: 130 test_true_pfm: 266.1904297165799 sim_pfm: -836.568820851382
episode: 520 training return: -846.9286680003814
episode: 521 training return: -843.8514336562553
episode: 522 training return: -848.8764845466987
episode: 523 training return: -838.1551699675994
epoch: 131 test_true_pfm: 267.19344571172445 sim_pfm: -834.5466237905981
episode: 524 training return: -843.261605836553
episode: 525 training return: -840.8735670507876
episode: 526 training return: -847.8299341875007
episode: 527 training return: -843.7268003211701
epoch: 132 test_true_pfm: 271.7238317665917 sim_pfm: -832.319018991887
episode: 528 training return: -845.4528305834081
episode: 529 training return: -842.3586587343191
episode: 530 training return: -842.4157347746843
episode: 531 training return: -835.4283628322142
epoch: 133 test_true_pfm: 269.4969538213822 sim_pfm: -825.2932450040717
episode: 532 training return: -844.1466798818008
episode: 533 training return: -838.0683947139415
episode: 534 training return: -859.830991261875
episode: 535 training return: -839.8644718027713
epoch: 134 test_true_pfm: 278.70119864723245 sim_pfm: -830.469166565049
episode: 536 training return: -837.6440479704597
episode: 537 training return: -843.0134831808849
episode: 538 training return: -834.7460535972996
episode: 539 training return: -845.1446571088775
epoch: 135 test_true_pfm: 270.7455059464541 sim_pfm: -829.4461365454167
episode: 540 training return: -842.6905005665445
episode: 541 training return: -847.202795496357
episode: 542 training return: -851.1717250369138
episode: 543 training return: -854.7986906388754
epoch: 136 test_true_pfm: 279.2057800949522 sim_pfm: -831.5607046601408
episode: 544 training return: -841.6476510681046
episode: 545 training return: -849.0102692969666
episode: 546 training return: -845.7675630386344
episode: 547 training return: -841.4261570844579
epoch: 137 test_true_pfm: 267.1158350437363 sim_pfm: -832.9512186182204
episode: 548 training return: -846.3662341386695
episode: 549 training return: -845.8117485364808
episode: 550 training return: -844.438775492545
episode: 551 training return: -848.711719630707
epoch: 138 test_true_pfm: 274.59234077748823 sim_pfm: -830.7575125616604
episode: 552 training return: -854.537499157826
episode: 553 training return: -851.5416213334277
episode: 554 training return: -854.628079874135
episode: 555 training return: -841.535210281316
epoch: 139 test_true_pfm: 279.5395483715742 sim_pfm: -833.8706101510717
episode: 556 training return: -852.2057769466753
episode: 557 training return: -842.4651031679755
episode: 558 training return: -836.1316722304908
episode: 559 training return: -842.3566567100339
epoch: 140 test_true_pfm: 272.9194126763131 sim_pfm: -828.6047072136854
episode: 560 training return: -844.4758745772656
episode: 561 training return: -840.7236106987476
episode: 562 training return: -843.256694994834
episode: 563 training return: -836.4290636169555
epoch: 141 test_true_pfm: 260.2603590565869 sim_pfm: -838.8565223441647
episode: 564 training return: -846.377264635233
episode: 565 training return: -848.6508268458525
episode: 566 training return: -852.101688320255
episode: 567 training return: -849.0498610386533
epoch: 142 test_true_pfm: 268.8762411436405 sim_pfm: -836.1089069039296
episode: 568 training return: -835.909093732243
episode: 569 training return: -845.5751406695812
episode: 570 training return: -845.1446508801597
episode: 571 training return: -844.4115374395797
epoch: 143 test_true_pfm: 270.4930617734674 sim_pfm: -833.372727225946
episode: 572 training return: -841.6815045615165
episode: 573 training return: -843.2112596210316
episode: 574 training return: -835.6532643843299
episode: 575 training return: -840.1851138772704
epoch: 144 test_true_pfm: 255.26903702010827 sim_pfm: -839.849385580797
episode: 576 training return: -845.8103338238109
episode: 577 training return: -846.0981950785106
episode: 578 training return: -841.1690798795962
episode: 579 training return: -860.1241159641946
epoch: 145 test_true_pfm: 268.27204701300064 sim_pfm: -833.6521551011466
episode: 580 training return: -833.1635637587773
episode: 581 training return: -842.8379071357544
episode: 582 training return: -839.6280732052134
episode: 583 training return: -845.4167643579786
epoch: 146 test_true_pfm: 258.59101278006926 sim_pfm: -837.1698359511341
episode: 584 training return: -832.1486659420136
episode: 585 training return: -847.7380365213834
episode: 586 training return: -846.8588567534807
episode: 587 training return: -853.0168145040922
epoch: 147 test_true_pfm: 261.0348040197127 sim_pfm: -833.2487634393668
episode: 588 training return: -841.0160149432132
episode: 589 training return: -842.9166268935223
episode: 590 training return: -846.2994220941513
episode: 591 training return: -843.5108625987376
epoch: 148 test_true_pfm: 255.51956854996715 sim_pfm: -829.2636759892995
episode: 592 training return: -855.765508081763
episode: 593 training return: -852.1828982410407
episode: 594 training return: -849.5848861309324
episode: 595 training return: -847.3630902285188
epoch: 149 test_true_pfm: 269.94149237540597 sim_pfm: -830.5010671387408
episode: 596 training return: -839.9595375914715
episode: 597 training return: -843.894483079775
episode: 598 training return: -847.0468543944361
episode: 599 training return: -844.1692692379828
epoch: 150 test_true_pfm: 275.63510506605763 sim_pfm: -833.4634777951396
