['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.2828696747124195 test_loss: 0.18836476802825927
epoch: 1 training_loss 0.19644387796521187 test_loss: 0.17638741731643676
epoch: 2 training_loss 0.18482761405408382 test_loss: 0.17398425340652465
epoch: 3 training_loss 0.15842234946787356 test_loss: 0.17539693117141725
epoch: 4 training_loss 0.15313568782061338 test_loss: 0.12810832262039185
epoch: 5 training_loss 0.14899721808731556 test_loss: 0.1353186249732971
epoch: 6 training_loss 0.14753053445369005 test_loss: 0.12855093479156493
epoch: 7 training_loss 0.14843507308512927 test_loss: 0.13801074028015137
epoch: 8 training_loss 0.1426023457944393 test_loss: 0.13597679138183594
epoch: 9 training_loss 0.14223269280046225 test_loss: 0.13544437885284424
epoch: 10 training_loss 0.13318301886320114 test_loss: 0.12525995969772338
epoch: 11 training_loss 0.14542782183736563 test_loss: 0.12384635210037231
epoch: 12 training_loss 0.13691573856398465 test_loss: 0.13207876682281494
epoch: 13 training_loss 0.12021724071353673 test_loss: 0.16775617599487305
epoch: 14 training_loss 0.13498780757188797 test_loss: 0.13651769161224364
epoch: 15 training_loss 0.12830221794545651 test_loss: 0.13504128456115722
epoch: 16 training_loss 0.12342320013791323 test_loss: 0.1271411657333374
epoch: 17 training_loss 0.12512200966477394 test_loss: 0.12195154428482055
epoch: 18 training_loss 0.13370916461572052 test_loss: 0.12297441959381103
epoch: 19 training_loss 0.12961392160505056 test_loss: 0.1588199257850647
epoch: 20 training_loss 0.12187476571649312 test_loss: 0.12306027412414551
epoch: 21 training_loss 0.11983440648764372 test_loss: 0.14324746131896973
epoch: 22 training_loss 0.12649784598499536 test_loss: 0.11417559385299683
epoch: 23 training_loss 0.12703533310443163 test_loss: 0.11769882440567017
epoch: 24 training_loss 0.12382217593491078 test_loss: 0.11743696928024291
epoch: 25 training_loss 0.12668922815471886 test_loss: 0.12974084615707399
epoch: 26 training_loss 0.12568338818848132 test_loss: 0.12381180524826049
epoch: 27 training_loss 0.12819822872057557 test_loss: 0.16098423004150392
epoch: 28 training_loss 0.126079073920846 test_loss: 0.13533997535705566
epoch: 29 training_loss 0.11865684363991022 test_loss: 0.10183229446411132
epoch: 30 training_loss 0.11966217894107103 test_loss: 0.1281450867652893
epoch: 31 training_loss 0.13324319388717412 test_loss: 0.11658360958099365
epoch: 32 training_loss 0.12338378679007292 test_loss: 0.12809937000274657
epoch: 33 training_loss 0.11845740780234337 test_loss: 0.12236082553863525
epoch: 34 training_loss 0.11822866778820754 test_loss: 0.11168483495712281
epoch: 35 training_loss 0.12159701526165008 test_loss: 0.120323646068573
epoch: 36 training_loss 0.12558024208992719 test_loss: 0.1237074851989746
epoch: 37 training_loss 0.1258786167576909 test_loss: 0.1132317066192627
epoch: 38 training_loss 0.12173537395894528 test_loss: 0.12799986600875854
epoch: 39 training_loss 0.13020540181547402 test_loss: 0.11669580936431885
epoch: 40 training_loss 0.12336908686906099 test_loss: 0.120071280002594
epoch: 41 training_loss 0.12457722567021846 test_loss: 0.11866083145141601
epoch: 42 training_loss 0.12941492315381764 test_loss: 0.12486603260040283
epoch: 43 training_loss 0.12667137760668992 test_loss: 0.1378351092338562
epoch: 44 training_loss 0.11749015994369984 test_loss: 0.12436752319335938
epoch: 45 training_loss 0.11956610213965177 test_loss: 0.1296900987625122
epoch: 46 training_loss 0.12306759618222714 test_loss: 0.1224210023880005
epoch: 47 training_loss 0.11735378608107566 test_loss: 0.1256782293319702
epoch: 48 training_loss 0.12388163790106774 test_loss: 0.11663398742675782
epoch: 49 training_loss 0.12202128164470195 test_loss: 0.12256513833999634
epoch: 50 training_loss 0.10922979082912207 test_loss: 0.12597339153289794
epoch: 51 training_loss 0.11795288000255823 test_loss: 0.11301985979080201
epoch: 52 training_loss 0.1315062826499343 test_loss: 0.12109098434448243
epoch: 53 training_loss 0.12082595515996218 test_loss: 0.1112676978111267
epoch: 54 training_loss 0.11633541472256184 test_loss: 0.12815052270889282
epoch: 55 training_loss 0.11462744396179915 test_loss: 0.12463194131851196
epoch: 56 training_loss 0.12510728526860476 test_loss: 0.11850042343139648
epoch: 57 training_loss 0.11635204143822193 test_loss: 0.13383544683456422
epoch: 58 training_loss 0.12189322784543037 test_loss: 0.12141116857528686
epoch: 59 training_loss 0.11783672926947475 test_loss: 0.11309267282485962
epoch: 60 training_loss 0.12173978682607413 test_loss: 0.12252411842346192
epoch: 61 training_loss 0.11934944447129965 test_loss: 0.11111226081848144
epoch: 62 training_loss 0.11793523438274861 test_loss: 0.11058593988418579
epoch: 63 training_loss 0.11569757029414177 test_loss: 0.11696869134902954
epoch: 64 training_loss 0.12044514194130898 test_loss: 0.10356309413909912
epoch: 65 training_loss 0.12213082738220692 test_loss: 0.13960912227630615
epoch: 66 training_loss 0.11837304772809149 test_loss: 0.12392456531524658
epoch: 67 training_loss 0.11045288845896721 test_loss: 0.11882983446121216
epoch: 68 training_loss 0.12139797221869231 test_loss: 0.13194442987442018
epoch: 69 training_loss 0.11856924824416637 test_loss: 0.11103736162185669
epoch: 70 training_loss 0.12595064736902714 test_loss: 0.10156546831130982
epoch: 71 training_loss 0.11584070976823568 test_loss: 0.11013714075088502
epoch: 72 training_loss 0.11707958035171032 test_loss: 0.12698110342025756
epoch: 73 training_loss 0.11900847122073173 test_loss: 0.10596057176589965
epoch: 74 training_loss 0.1147392751649022 test_loss: 0.10261116027832032
epoch: 75 training_loss 0.11517711265012621 test_loss: 0.12828608751296997
epoch: 76 training_loss 0.11934482280164957 test_loss: 0.10707311630249024
epoch: 77 training_loss 0.11816359715536237 test_loss: 0.12117249965667724
epoch: 78 training_loss 0.11104424025863409 test_loss: 0.1307705044746399
epoch: 79 training_loss 0.12367476746439934 test_loss: 0.12369203567504883
epoch: 80 training_loss 0.11114384975284337 test_loss: 0.10325977802276612
epoch: 81 training_loss 0.11364971313625574 test_loss: 0.12130495309829711
epoch: 82 training_loss 0.12387581679970026 test_loss: 0.10846232175827027
epoch: 83 training_loss 0.11307530134916305 test_loss: 0.1357309937477112
epoch: 84 training_loss 0.11434934318065643 test_loss: 0.10936356782913208
epoch: 85 training_loss 0.11224895384162664 test_loss: 0.11686402559280396
epoch: 86 training_loss 0.11686117369681596 test_loss: 0.1189980149269104
epoch: 87 training_loss 0.11148616284132004 test_loss: 0.11657605171203614
epoch: 88 training_loss 0.11775489831343293 test_loss: 0.10719048976898193
epoch: 89 training_loss 0.12094150928780437 test_loss: 0.12877215147018434
epoch: 90 training_loss 0.11694900281727313 test_loss: 0.12137730121612549
epoch: 91 training_loss 0.12397329196333885 test_loss: 0.11477483510971069
epoch: 92 training_loss 0.11612236399203539 test_loss: 0.10614876747131348
epoch: 93 training_loss 0.11310251095332205 test_loss: 0.1411316514015198
epoch: 94 training_loss 0.11873297236859798 test_loss: 0.12564142942428588
epoch: 95 training_loss 0.11331928256899118 test_loss: 0.12065378427505494
epoch: 96 training_loss 0.11810918107628822 test_loss: 0.12877230644226073
epoch: 97 training_loss 0.1183330088853836 test_loss: 0.10192285776138306
epoch: 98 training_loss 0.116365119330585 test_loss: 0.11665374040603638
epoch: 99 training_loss 0.12075112294405699 test_loss: 0.11660571098327636
epoch: 100 training_loss 0.124075350984931 test_loss: 0.1220920205116272
epoch: 101 training_loss 0.11552977297455072 test_loss: 0.1303353190422058
epoch: 102 training_loss 0.10963481683284045 test_loss: 0.10313479900360108
epoch: 103 training_loss 0.11354172356426716 test_loss: 0.1083109736442566
epoch: 104 training_loss 0.11794770397245884 test_loss: 0.11231328248977661
epoch: 105 training_loss 0.11339991401880979 test_loss: 0.12908726930618286
epoch: 106 training_loss 0.12023320101201534 test_loss: 0.11663663387298584
epoch: 107 training_loss 0.11866357043385506 test_loss: 0.10338904857635497
epoch: 108 training_loss 0.11850238118320704 test_loss: 0.1188862681388855
epoch: 109 training_loss 0.11667522378265857 test_loss: 0.11309187412261963
epoch: 110 training_loss 0.11332359161227941 test_loss: 0.10890738964080811
epoch: 111 training_loss 0.1078808656334877 test_loss: 0.11268750429153443
epoch: 112 training_loss 0.11969192147254944 test_loss: 0.11552165746688843
epoch: 113 training_loss 0.10805403362959623 test_loss: 0.12916252613067628
epoch: 114 training_loss 0.11258281361311674 test_loss: 0.11870267391204833
epoch: 115 training_loss 0.1165647923015058 test_loss: 0.14209219217300414
epoch: 116 training_loss 0.11747538682073355 test_loss: 0.12104291915893554
epoch: 117 training_loss 0.1142213201150298 test_loss: 0.1096491813659668
epoch: 118 training_loss 0.10970696773380041 test_loss: 0.12166429758071899
epoch: 119 training_loss 0.12068976525217295 test_loss: 0.12582405805587768
epoch: 120 training_loss 0.11288537349551916 test_loss: 0.11488466262817383
epoch: 121 training_loss 0.1271699006482959 test_loss: 0.10959024429321289
epoch: 122 training_loss 0.11320000179111958 test_loss: 0.11644861698150635
epoch: 123 training_loss 0.11122261624783278 test_loss: 0.11814752817153931
epoch: 124 training_loss 0.12268360622227192 test_loss: 0.12195484638214112
epoch: 125 training_loss 0.11841865114867688 test_loss: 0.10645115375518799
epoch: 126 training_loss 0.11819213550537824 test_loss: 0.11009970903396607
epoch: 127 training_loss 0.11566132836043835 test_loss: 0.11083195209503174
epoch: 128 training_loss 0.1186264780536294 test_loss: 0.0993737518787384
epoch: 129 training_loss 0.1060732320509851 test_loss: 0.10904568433761597
epoch: 130 training_loss 0.11345880437642336 test_loss: 0.12236157655715943
epoch: 131 training_loss 0.11432722020894288 test_loss: 0.11935555934906006
epoch: 132 training_loss 0.11261453250423074 test_loss: 0.10054645538330079
epoch: 133 training_loss 0.11156123645603656 test_loss: 0.11249618530273438
epoch: 134 training_loss 0.1204937994480133 test_loss: 0.1258581042289734
epoch: 135 training_loss 0.11391283743083477 test_loss: 0.10569249391555786
epoch: 136 training_loss 0.11365521267056465 test_loss: 0.10511070489883423
epoch: 137 training_loss 0.11462836436927319 test_loss: 0.11378417015075684
epoch: 138 training_loss 0.11364529330283403 test_loss: 0.11255772113800049
epoch: 139 training_loss 0.11318285726010799 test_loss: 0.10073862075805665
epoch: 140 training_loss 0.11843830026686192 test_loss: 0.1103296160697937
epoch: 141 training_loss 0.11433751415461302 test_loss: 0.1026574969291687
epoch: 142 training_loss 0.10845085294917226 test_loss: 0.11912400722503662
epoch: 143 training_loss 0.11233659535646438 test_loss: 0.11305761337280273
epoch: 144 training_loss 0.10885333515703678 test_loss: 0.11797657012939453
epoch: 145 training_loss 0.11847330048680306 test_loss: 0.11110130548477173
epoch: 146 training_loss 0.11058640133589506 test_loss: 0.11812553405761719
epoch: 147 training_loss 0.11277805916965007 test_loss: 0.11397457122802734
epoch: 148 training_loss 0.10160459335893393 test_loss: 0.12347389459609985
epoch: 149 training_loss 0.12340985748916865 test_loss: 0.12132903337478637
epoch: 0 training_loss 0.2930651428550482 test_loss: 0.19799129962921141
epoch: 1 training_loss 0.18467137537896633 test_loss: 0.17602620124816895
epoch: 2 training_loss 0.1731666150689125 test_loss: 0.14718127250671387
epoch: 3 training_loss 0.15161689706146717 test_loss: 0.1469592571258545
epoch: 4 training_loss 0.1531186592951417 test_loss: 0.18954910039901735
epoch: 5 training_loss 0.1664681138843298 test_loss: 0.14044848680496216
epoch: 6 training_loss 0.13320157874375582 test_loss: 0.13618295192718505
epoch: 7 training_loss 0.13988590013235808 test_loss: 0.16070330142974854
epoch: 8 training_loss 0.15217705797404052 test_loss: 0.12892524003982545
epoch: 9 training_loss 0.13755758099257945 test_loss: 0.15631126165390014
epoch: 10 training_loss 0.14061750389635563 test_loss: 0.14679704904556273
epoch: 11 training_loss 0.14049632735550405 test_loss: 0.14244104623794557
epoch: 12 training_loss 0.1382585132867098 test_loss: 0.15851556062698363
epoch: 13 training_loss 0.1330662553757429 test_loss: 0.1250002384185791
epoch: 14 training_loss 0.12895037392154335 test_loss: 0.14470969438552855
epoch: 15 training_loss 0.13697533529251815 test_loss: 0.11465517282485962
epoch: 16 training_loss 0.13301804814487694 test_loss: 0.11556371450424194
epoch: 17 training_loss 0.12864135831594467 test_loss: 0.1316434144973755
epoch: 18 training_loss 0.12578019697219134 test_loss: 0.11052367687225342
epoch: 19 training_loss 0.12781059343367815 test_loss: 0.11837681531906127
epoch: 20 training_loss 0.12167734801769256 test_loss: 0.11718471050262451
epoch: 21 training_loss 0.12865538951009511 test_loss: 0.12416609525680541
epoch: 22 training_loss 0.12559775784611701 test_loss: 0.12316792011260987
epoch: 23 training_loss 0.13315737340599298 test_loss: 0.11364631652832032
epoch: 24 training_loss 0.12389092970639468 test_loss: 0.11577101945877075
epoch: 25 training_loss 0.1253565955720842 test_loss: 0.12558480501174926
epoch: 26 training_loss 0.1224261910840869 test_loss: 0.11765419244766236
epoch: 27 training_loss 0.11773105001077055 test_loss: 0.1285393238067627
epoch: 28 training_loss 0.13210596561431884 test_loss: 0.10922234058380127
epoch: 29 training_loss 0.12754319388419388 test_loss: 0.10881773233413697
epoch: 30 training_loss 0.1229205172508955 test_loss: 0.11198886632919311
epoch: 31 training_loss 0.12344273168593645 test_loss: 0.11918931007385254
epoch: 32 training_loss 0.11924419350922108 test_loss: 0.14097769260406495
epoch: 33 training_loss 0.13104409113526344 test_loss: 0.12893372774124146
epoch: 34 training_loss 0.12800113074481487 test_loss: 0.11165913343429565
epoch: 35 training_loss 0.12021522173658013 test_loss: 0.12041116952896118
epoch: 36 training_loss 0.12734914153814317 test_loss: 0.11763616800308227
epoch: 37 training_loss 0.11814118780195713 test_loss: 0.10861732959747314
epoch: 38 training_loss 0.1204773024842143 test_loss: 0.12191412448883057
epoch: 39 training_loss 0.12104125302284956 test_loss: 0.12451934814453125
epoch: 40 training_loss 0.1327664241567254 test_loss: 0.10689927339553833
epoch: 41 training_loss 0.1164415429159999 test_loss: 0.10496013164520264
epoch: 42 training_loss 0.11825098503381014 test_loss: 0.125304651260376
epoch: 43 training_loss 0.11783992126584053 test_loss: 0.08579102158546448
epoch: 44 training_loss 0.12308872140944004 test_loss: 0.11841830015182495
epoch: 45 training_loss 0.11741178754717112 test_loss: 0.12043449878692628
epoch: 46 training_loss 0.1219604256376624 test_loss: 0.11716556549072266
epoch: 47 training_loss 0.11802751936018467 test_loss: 0.10881394147872925
epoch: 48 training_loss 0.12097879504784942 test_loss: 0.10383098125457764
epoch: 49 training_loss 0.12606641873717309 test_loss: 0.10129616260528565
epoch: 50 training_loss 0.12936597034335137 test_loss: 0.08933099508285522
epoch: 51 training_loss 0.1208348547667265 test_loss: 0.11668188571929931
epoch: 52 training_loss 0.1135946799069643 test_loss: 0.1264101028442383
epoch: 53 training_loss 0.12074006251990795 test_loss: 0.10823599100112916
epoch: 54 training_loss 0.11833103805780411 test_loss: 0.12413941621780396
epoch: 55 training_loss 0.12939984602853655 test_loss: 0.11776405572891235
epoch: 56 training_loss 0.11814843062311411 test_loss: 0.11761225461959839
epoch: 57 training_loss 0.11988934248685837 test_loss: 0.11088143587112427
epoch: 58 training_loss 0.12222683358937501 test_loss: 0.10685470104217529
epoch: 59 training_loss 0.12544481545686723 test_loss: 0.12485051155090332
epoch: 60 training_loss 0.12495829174295067 test_loss: 0.10485873222351075
epoch: 61 training_loss 0.11721656497567892 test_loss: 0.13159064054489136
epoch: 62 training_loss 0.11513871977105737 test_loss: 0.11285613775253296
epoch: 63 training_loss 0.11357944203540683 test_loss: 0.11563135385513305
epoch: 64 training_loss 0.11659575279802084 test_loss: 0.10983351469039918
epoch: 65 training_loss 0.11838972821831703 test_loss: 0.10861310958862305
epoch: 66 training_loss 0.12426381487399339 test_loss: 0.10588222742080688
epoch: 67 training_loss 0.12663261257112027 test_loss: 0.09973650574684143
epoch: 68 training_loss 0.12370159864425659 test_loss: 0.12197154760360718
epoch: 69 training_loss 0.10974978432059287 test_loss: 0.0911274254322052
epoch: 70 training_loss 0.11864048901945352 test_loss: 0.11220576763153076
epoch: 71 training_loss 0.12111992064863443 test_loss: 0.10866377353668213
epoch: 72 training_loss 0.12413453824818134 test_loss: 0.11074242591857911
epoch: 73 training_loss 0.11175973672419787 test_loss: 0.12014477252960205
epoch: 74 training_loss 0.11563886199146509 test_loss: 0.11638226509094238
epoch: 75 training_loss 0.12614181691780688 test_loss: 0.11319490671157836
epoch: 76 training_loss 0.11455211577937006 test_loss: 0.12541059255599976
epoch: 77 training_loss 0.12846671564504505 test_loss: 0.1127442479133606
epoch: 78 training_loss 0.11188519509509205 test_loss: 0.11897033452987671
epoch: 79 training_loss 0.11701829038560391 test_loss: 0.11521496772766113
epoch: 80 training_loss 0.12546865459531545 test_loss: 0.1323950171470642
epoch: 81 training_loss 0.11754904061555863 test_loss: 0.0998788058757782
epoch: 82 training_loss 0.1155816038697958 test_loss: 0.10816798210144044
epoch: 83 training_loss 0.10591496504843236 test_loss: 0.11501744985580445
epoch: 84 training_loss 0.11844874517992139 test_loss: 0.11181684732437133
epoch: 85 training_loss 0.12273521732538939 test_loss: 0.11489484310150147
epoch: 86 training_loss 0.11136582681909203 test_loss: 0.12213603258132935
epoch: 87 training_loss 0.11970706770196557 test_loss: 0.12408566474914551
epoch: 88 training_loss 0.1271031870506704 test_loss: 0.10739848613739014
epoch: 89 training_loss 0.1177323205024004 test_loss: 0.1047750473022461
epoch: 90 training_loss 0.12122043142095208 test_loss: 0.10841541290283203
epoch: 91 training_loss 0.10916750410571695 test_loss: 0.12034727334976196
epoch: 92 training_loss 0.11877613075077534 test_loss: 0.11777380704879761
epoch: 93 training_loss 0.11735160134732724 test_loss: 0.10500655174255372
epoch: 94 training_loss 0.118409883081913 test_loss: 0.10754280090332032
epoch: 95 training_loss 0.12080869521945715 test_loss: 0.10413678884506225
epoch: 96 training_loss 0.12264402884989976 test_loss: 0.13547084331512452
epoch: 97 training_loss 0.12161468852311373 test_loss: 0.11696274280548095
epoch: 98 training_loss 0.11778621405363082 test_loss: 0.11000889539718628
epoch: 99 training_loss 0.11092279337346554 test_loss: 0.13040177822113036
epoch: 100 training_loss 0.12281734578311443 test_loss: 0.09372705817222596
epoch: 101 training_loss 0.11954075094312429 test_loss: 0.11195671558380127
epoch: 102 training_loss 0.11176399249583482 test_loss: 0.10719695091247558
epoch: 103 training_loss 0.11334479121491313 test_loss: 0.10999770164489746
epoch: 104 training_loss 0.11842764191329479 test_loss: 0.10853945016860962
epoch: 105 training_loss 0.12351068630814552 test_loss: 0.09985556602478027
epoch: 106 training_loss 0.12025414511561394 test_loss: 0.10781292915344239
epoch: 107 training_loss 0.11871330741792917 test_loss: 0.10962601900100707
epoch: 108 training_loss 0.11072596922516822 test_loss: 0.10958856344223022
epoch: 109 training_loss 0.1128633476048708 test_loss: 0.11956291198730469
epoch: 110 training_loss 0.12241736322641372 test_loss: 0.11173822879791259
epoch: 111 training_loss 0.1188988560065627 test_loss: 0.1065034031867981
epoch: 112 training_loss 0.12880317274481057 test_loss: 0.11997911930084229
epoch: 113 training_loss 0.12142675887793303 test_loss: 0.1029498815536499
epoch: 114 training_loss 0.11719929955899716 test_loss: 0.10919312238693238
epoch: 115 training_loss 0.12174935027956962 test_loss: 0.11542687416076661
epoch: 116 training_loss 0.11008055444806814 test_loss: 0.12052693367004394
epoch: 117 training_loss 0.11627035800367594 test_loss: 0.09780850410461425
epoch: 118 training_loss 0.11191039111465216 test_loss: 0.11162292957305908
epoch: 119 training_loss 0.11555874977260829 test_loss: 0.11834152936935424
epoch: 120 training_loss 0.11491943866014481 test_loss: 0.11780987977981568
epoch: 121 training_loss 0.1130686654150486 test_loss: 0.10733734369277954
epoch: 122 training_loss 0.11549374461174011 test_loss: 0.11090373992919922
epoch: 123 training_loss 0.11542327288538218 test_loss: 0.09945042133331299
epoch: 124 training_loss 0.12851907659322023 test_loss: 0.11059359312057496
epoch: 125 training_loss 0.12390983607620001 test_loss: 0.10862725973129272
epoch: 126 training_loss 0.11895088817924261 test_loss: 0.11828258037567138
epoch: 127 training_loss 0.12371716482564807 test_loss: 0.11150166988372803
epoch: 128 training_loss 0.11513778388500213 test_loss: 0.11237449645996093
epoch: 129 training_loss 0.11359084699302911 test_loss: 0.10472818613052368
epoch: 130 training_loss 0.11085148319602013 test_loss: 0.12477611303329468
epoch: 131 training_loss 0.12191052880138159 test_loss: 0.12154338359832764
epoch: 132 training_loss 0.12411231592297554 test_loss: 0.13992068767547608
epoch: 133 training_loss 0.12269754137843847 test_loss: 0.12977118492126466
epoch: 134 training_loss 0.11609293531626463 test_loss: 0.12759181261062622
epoch: 135 training_loss 0.1188535474613309 test_loss: 0.11733273267745972
epoch: 136 training_loss 0.11381133048795164 test_loss: 0.09618373513221741
epoch: 137 training_loss 0.11830261955037713 test_loss: 0.11450926065444947
epoch: 138 training_loss 0.12443065863102674 test_loss: 0.10150156021118165
epoch: 139 training_loss 0.11357398070394993 test_loss: 0.11214680671691894
epoch: 140 training_loss 0.114927121065557 test_loss: 0.09224790930747986
epoch: 141 training_loss 0.10911762274801731 test_loss: 0.11111314296722412
epoch: 142 training_loss 0.11454944487661123 test_loss: 0.09518164396286011
epoch: 143 training_loss 0.11459069844335318 test_loss: 0.10745880603790284
epoch: 144 training_loss 0.12552918035537006 test_loss: 0.10894432067871093
epoch: 145 training_loss 0.11150321006774902 test_loss: 0.10471065044403076
epoch: 146 training_loss 0.12770498532801866 test_loss: 0.113155198097229
epoch: 147 training_loss 0.13160553354769944 test_loss: 0.12146972417831421
epoch: 148 training_loss 0.11678063180297613 test_loss: 0.11673557758331299
epoch: 149 training_loss 0.11159049363806844 test_loss: 0.1277247667312622
epoch: 0 training_loss 0.28006818622350693 test_loss: 0.25182766914367677
epoch: 1 training_loss 0.2157645571231842 test_loss: 0.213164758682251
epoch: 2 training_loss 0.17843126364052295 test_loss: 0.20374047756195068
epoch: 3 training_loss 0.1659385822713375 test_loss: 0.20441603660583496
epoch: 4 training_loss 0.17302666015923024 test_loss: 0.17927076816558837
epoch: 5 training_loss 0.15194572042673826 test_loss: 0.17130790948867797
epoch: 6 training_loss 0.1580003099888563 test_loss: 0.15932562351226806
epoch: 7 training_loss 0.14162458918988705 test_loss: 0.17097716331481932
epoch: 8 training_loss 0.15375552870333195 test_loss: 0.14714733362197877
epoch: 9 training_loss 0.14000065125524996 test_loss: 0.1375072956085205
epoch: 10 training_loss 0.13359635997563601 test_loss: 0.14575949907302857
epoch: 11 training_loss 0.1296293954923749 test_loss: 0.12579810619354248
epoch: 12 training_loss 0.1285375376045704 test_loss: 0.14907950162887573
epoch: 13 training_loss 0.13665484193712474 test_loss: 0.1471081256866455
epoch: 14 training_loss 0.12879991553723813 test_loss: 0.1356447696685791
epoch: 15 training_loss 0.13330033974722028 test_loss: 0.131450355052948
epoch: 16 training_loss 0.13111238097772002 test_loss: 0.18301920890808104
epoch: 17 training_loss 0.1263787994533777 test_loss: 0.16258547306060792
epoch: 18 training_loss 0.13128736279904843 test_loss: 0.14900639057159423
epoch: 19 training_loss 0.1330203526839614 test_loss: 0.13352147340774537
epoch: 20 training_loss 0.1257346659526229 test_loss: 0.12062677145004272
epoch: 21 training_loss 0.12426956173032522 test_loss: 0.13916115760803222
epoch: 22 training_loss 0.126810194440186 test_loss: 0.10931082963943481
epoch: 23 training_loss 0.11480889689177275 test_loss: 0.13731353282928466
epoch: 24 training_loss 0.12271871160715818 test_loss: 0.16053880453109742
epoch: 25 training_loss 0.11526305556297302 test_loss: 0.14085744619369506
epoch: 26 training_loss 0.12251759521663189 test_loss: 0.14652731418609619
epoch: 27 training_loss 0.13296698816120625 test_loss: 0.13765337467193603
epoch: 28 training_loss 0.12254603758454323 test_loss: 0.14043169021606444
epoch: 29 training_loss 0.11492973849177361 test_loss: 0.13538023233413696
epoch: 30 training_loss 0.1262038241326809 test_loss: 0.13575005531311035
epoch: 31 training_loss 0.11992973018437623 test_loss: 0.15151370763778688
epoch: 32 training_loss 0.12764709793031215 test_loss: 0.1354658603668213
epoch: 33 training_loss 0.11619476012885571 test_loss: 0.14764275550842285
epoch: 34 training_loss 0.1235573561117053 test_loss: 0.141831111907959
epoch: 35 training_loss 0.13194271840155125 test_loss: 0.12819544076919556
epoch: 36 training_loss 0.11545394226908684 test_loss: 0.1323573112487793
epoch: 37 training_loss 0.12466232664883137 test_loss: 0.12128474712371826
epoch: 38 training_loss 0.1314222815260291 test_loss: 0.12747386693954468
epoch: 39 training_loss 0.12099807173013687 test_loss: 0.13246674537658693
epoch: 40 training_loss 0.12854172091931104 test_loss: 0.13128553628921508
epoch: 41 training_loss 0.12328723970800638 test_loss: 0.15063620805740358
epoch: 42 training_loss 0.12312245815992355 test_loss: 0.12547775506973266
epoch: 43 training_loss 0.12149881314486265 test_loss: 0.12639365196228028
epoch: 44 training_loss 0.11861061502248049 test_loss: 0.13584824800491332
epoch: 45 training_loss 0.12072675466537476 test_loss: 0.15379681587219238
epoch: 46 training_loss 0.11354376927018166 test_loss: 0.13057368993759155
epoch: 47 training_loss 0.12068140469491481 test_loss: 0.12844256162643433
epoch: 48 training_loss 0.10950471591204405 test_loss: 0.13537805080413817
epoch: 49 training_loss 0.12431154556572438 test_loss: 0.12156285047531128
epoch: 50 training_loss 0.12035545613616705 test_loss: 0.10385034084320069
epoch: 51 training_loss 0.11321741249412298 test_loss: 0.14842458963394164
epoch: 52 training_loss 0.12486284647136926 test_loss: 0.1266286253929138
epoch: 53 training_loss 0.1218732962012291 test_loss: 0.15361273288726807
epoch: 54 training_loss 0.11774534128606319 test_loss: 0.13301080465316772
epoch: 55 training_loss 0.1202059156447649 test_loss: 0.1343549370765686
epoch: 56 training_loss 0.11809166677296162 test_loss: 0.1457399845123291
epoch: 57 training_loss 0.12245319522917271 test_loss: 0.12444977760314942
epoch: 58 training_loss 0.12048337403684854 test_loss: 0.12919037342071532
epoch: 59 training_loss 0.1241061644256115 test_loss: 0.11724612712860108
epoch: 60 training_loss 0.11746221847832203 test_loss: 0.12731024026870727
epoch: 61 training_loss 0.11485846322029829 test_loss: 0.1282261848449707
epoch: 62 training_loss 0.1273412737995386 test_loss: 0.14019367694854737
epoch: 63 training_loss 0.11176094008609652 test_loss: 0.12917611598968506
epoch: 64 training_loss 0.1211479565873742 test_loss: 0.12930647134780884
epoch: 65 training_loss 0.12279288370162249 test_loss: 0.1334560990333557
epoch: 66 training_loss 0.11452115930616856 test_loss: 0.12829477787017823
epoch: 67 training_loss 0.11125589344650506 test_loss: 0.12565016746520996
epoch: 68 training_loss 0.12083825204521417 test_loss: 0.124571692943573
epoch: 69 training_loss 0.11236205795779824 test_loss: 0.1321135401725769
epoch: 70 training_loss 0.11665923193097115 test_loss: 0.13483501672744752
epoch: 71 training_loss 0.11391497697681188 test_loss: 0.12466714382171631
epoch: 72 training_loss 0.11372243463993073 test_loss: 0.16202337741851808
epoch: 73 training_loss 0.11485197808593511 test_loss: 0.11879608631134034
epoch: 74 training_loss 0.1127925674058497 test_loss: 0.1393808364868164
epoch: 75 training_loss 0.11933323454111815 test_loss: 0.1324460506439209
epoch: 76 training_loss 0.11369528457522392 test_loss: 0.13375164270401002
epoch: 77 training_loss 0.12356079068034888 test_loss: 0.115775728225708
epoch: 78 training_loss 0.1177354983985424 test_loss: 0.12392616271972656
epoch: 79 training_loss 0.1082065636664629 test_loss: 0.15333229303359985
epoch: 80 training_loss 0.12115752190351486 test_loss: 0.13006181716918946
epoch: 81 training_loss 0.11390404364094138 test_loss: 0.14679633378982543
epoch: 82 training_loss 0.11687593152746559 test_loss: 0.1260589838027954
epoch: 83 training_loss 0.10991775445640087 test_loss: 0.1145139455795288
epoch: 84 training_loss 0.11643130611628294 test_loss: 0.1264794111251831
epoch: 85 training_loss 0.1145076822116971 test_loss: 0.12248824834823609
epoch: 86 training_loss 0.11908295008353889 test_loss: 0.13530161380767822
epoch: 87 training_loss 0.12192417345941067 test_loss: 0.12086670398712158
epoch: 88 training_loss 0.10916190840303898 test_loss: 0.13789167404174804
epoch: 89 training_loss 0.11444721896201372 test_loss: 0.11164928674697876
epoch: 90 training_loss 0.11375791747123003 test_loss: 0.12159562110900879
epoch: 91 training_loss 0.1091072754561901 test_loss: 0.13486040830612184
epoch: 92 training_loss 0.11425316084176301 test_loss: 0.12492475509643555
epoch: 93 training_loss 0.1232968582585454 test_loss: 0.1384197235107422
epoch: 94 training_loss 0.10581915214657783 test_loss: 0.13431389331817628
epoch: 95 training_loss 0.11072420058771967 test_loss: 0.11896713972091674
epoch: 96 training_loss 0.114398336969316 test_loss: 0.12846405506134034
epoch: 97 training_loss 0.12410334270447493 test_loss: 0.11717804670333862
epoch: 98 training_loss 0.11648061845451593 test_loss: 0.14476864337921141
epoch: 99 training_loss 0.11146317522972822 test_loss: 0.12744519710540772
epoch: 100 training_loss 0.10899118814617395 test_loss: 0.13094285726547242
epoch: 101 training_loss 0.11352747095748782 test_loss: 0.1246866226196289
epoch: 102 training_loss 0.11362511474639177 test_loss: 0.1289731740951538
epoch: 103 training_loss 0.12004305707290769 test_loss: 0.13137079477310182
epoch: 104 training_loss 0.1106918354332447 test_loss: 0.1329986572265625
epoch: 105 training_loss 0.12418152414262294 test_loss: 0.13640683889389038
epoch: 106 training_loss 0.11679994309321046 test_loss: 0.11585334539413453
epoch: 107 training_loss 0.11660776797682047 test_loss: 0.14748376607894897
epoch: 108 training_loss 0.11426435200497508 test_loss: 0.10705850124359131
epoch: 109 training_loss 0.10623747797682881 test_loss: 0.12046854496002198
epoch: 110 training_loss 0.11718629036098718 test_loss: 0.10757284164428711
epoch: 111 training_loss 0.10622449634596705 test_loss: 0.13732377290725709
epoch: 112 training_loss 0.11154940892010927 test_loss: 0.139097261428833
epoch: 113 training_loss 0.1128285487741232 test_loss: 0.12977955341339112
epoch: 114 training_loss 0.10730851147323847 test_loss: 0.11411391496658325
epoch: 115 training_loss 0.10700761877000332 test_loss: 0.1352722764015198
epoch: 116 training_loss 0.1124918576143682 test_loss: 0.12749744653701783
epoch: 117 training_loss 0.11329211357980967 test_loss: 0.10660392045974731
epoch: 118 training_loss 0.10355942098423838 test_loss: 0.1312793731689453
epoch: 119 training_loss 0.1206565110012889 test_loss: 0.11747287511825562
epoch: 120 training_loss 0.10729856453835965 test_loss: 0.13837578296661376
epoch: 121 training_loss 0.11258189897984267 test_loss: 0.11823385953903198
epoch: 122 training_loss 0.1194594157487154 test_loss: 0.12862117290496827
epoch: 123 training_loss 0.10527088258415461 test_loss: 0.1410342574119568
epoch: 124 training_loss 0.11360843367874622 test_loss: 0.1647404193878174
epoch: 125 training_loss 0.1181356155872345 test_loss: 0.12499604225158692
epoch: 126 training_loss 0.10877728339284659 test_loss: 0.144581401348114
epoch: 127 training_loss 0.11323014870285988 test_loss: 0.12854171991348268
epoch: 128 training_loss 0.11168580494821072 test_loss: 0.1347069263458252
epoch: 129 training_loss 0.09952466782182455 test_loss: 0.12033119201660156
epoch: 130 training_loss 0.11392962280660868 test_loss: 0.11681708097457885
epoch: 131 training_loss 0.11338982295244932 test_loss: 0.12923624515533447
epoch: 132 training_loss 0.11682176014408469 test_loss: 0.1099552035331726
epoch: 133 training_loss 0.10857085939496755 test_loss: 0.14285149574279785
epoch: 134 training_loss 0.11071241818368435 test_loss: 0.13261110782623292
epoch: 135 training_loss 0.11187823932617903 test_loss: 0.11983349323272705
epoch: 136 training_loss 0.10754910081624985 test_loss: 0.1301171064376831
epoch: 137 training_loss 0.11536832302808761 test_loss: 0.14326732158660888
epoch: 138 training_loss 0.11017165889963508 test_loss: 0.12838921546936036
epoch: 139 training_loss 0.11196820732206106 test_loss: 0.14840141534805298
epoch: 140 training_loss 0.11917048869654537 test_loss: 0.13245614767074584
epoch: 141 training_loss 0.11693485666066408 test_loss: 0.10009033679962158
epoch: 142 training_loss 0.10387322064489127 test_loss: 0.1322989583015442
epoch: 143 training_loss 0.10568295396864415 test_loss: 0.11555540561676025
epoch: 144 training_loss 0.11568785216659308 test_loss: 0.11653547286987305
epoch: 145 training_loss 0.10930735774338246 test_loss: 0.13291478157043457
epoch: 146 training_loss 0.11343216829001904 test_loss: 0.11575049161911011
epoch: 147 training_loss 0.11930123217403889 test_loss: 0.12390880584716797
epoch: 148 training_loss 0.11557586427778005 test_loss: 0.13096944093704224
epoch: 149 training_loss 0.10920157618820667 test_loss: 0.12873483896255494
epoch: 0 training_loss 0.27847768023610114 test_loss: 0.17955691814422609
epoch: 1 training_loss 0.18782283678650857 test_loss: 0.1598479151725769
epoch: 2 training_loss 0.17613110557198525 test_loss: 0.17742806673049927
epoch: 3 training_loss 0.1591504316776991 test_loss: 0.14658199548721312
epoch: 4 training_loss 0.15358527399599553 test_loss: 0.21927943229675292
epoch: 5 training_loss 0.14239766221493483 test_loss: 0.16188356876373292
epoch: 6 training_loss 0.13937613423913717 test_loss: 0.17592209577560425
epoch: 7 training_loss 0.14578809574246407 test_loss: 0.132859206199646
epoch: 8 training_loss 0.1400722199678421 test_loss: 0.15079174041748047
epoch: 9 training_loss 0.1403729984909296 test_loss: 0.15670887231826783
epoch: 10 training_loss 0.13913202092051505 test_loss: 0.14138689041137695
epoch: 11 training_loss 0.133236562050879 test_loss: 0.14697425365447997
epoch: 12 training_loss 0.1254789074510336 test_loss: 0.12871143817901612
epoch: 13 training_loss 0.12632320094853638 test_loss: 0.12167226076126099
epoch: 14 training_loss 0.12833634518086912 test_loss: 0.13564082384109497
epoch: 15 training_loss 0.13060642562806607 test_loss: 0.1345863699913025
epoch: 16 training_loss 0.13149958822876215 test_loss: 0.14025635719299318
epoch: 17 training_loss 0.1266481939330697 test_loss: 0.12181696891784669
epoch: 18 training_loss 0.1221688000112772 test_loss: 0.11049131155014039
epoch: 19 training_loss 0.12839939001947642 test_loss: 0.1349273443222046
epoch: 20 training_loss 0.13166764106601478 test_loss: 0.13280260562896729
epoch: 21 training_loss 0.12662516750395297 test_loss: 0.10076570510864258
epoch: 22 training_loss 0.12121235121041536 test_loss: 0.12585711479187012
epoch: 23 training_loss 0.11409983932971954 test_loss: 0.12630802392959595
epoch: 24 training_loss 0.12938809785991906 test_loss: 0.12325156927108764
epoch: 25 training_loss 0.1264545999467373 test_loss: 0.12183374166488647
epoch: 26 training_loss 0.11928804790601134 test_loss: 0.12324903011322022
epoch: 27 training_loss 0.12247088544070721 test_loss: 0.13242424726486207
epoch: 28 training_loss 0.12685052879154682 test_loss: 0.13623180389404296
epoch: 29 training_loss 0.11848802180960774 test_loss: 0.12572669982910156
epoch: 30 training_loss 0.12011792276054621 test_loss: 0.11933027505874634
epoch: 31 training_loss 0.12029049443081022 test_loss: 0.12782278060913085
epoch: 32 training_loss 0.12262059535831213 test_loss: 0.11220327615737916
epoch: 33 training_loss 0.11988380359485745 test_loss: 0.1423782229423523
epoch: 34 training_loss 0.12159790374338626 test_loss: 0.1081990122795105
epoch: 35 training_loss 0.118521898239851 test_loss: 0.12097375392913819
epoch: 36 training_loss 0.11547967918217182 test_loss: 0.1321433663368225
epoch: 37 training_loss 0.12630964532494546 test_loss: 0.1337273120880127
epoch: 38 training_loss 0.12472194310277701 test_loss: 0.12470718622207641
epoch: 39 training_loss 0.12755413444712757 test_loss: 0.11076689958572387
epoch: 40 training_loss 0.12005841936916113 test_loss: 0.1325306177139282
epoch: 41 training_loss 0.12138365268707275 test_loss: 0.12145709991455078
epoch: 42 training_loss 0.11436093639582395 test_loss: 0.13252538442611694
epoch: 43 training_loss 0.13003632389008998 test_loss: 0.09600906372070313
epoch: 44 training_loss 0.12320881590247154 test_loss: 0.1243477702140808
epoch: 45 training_loss 0.11833509851247072 test_loss: 0.1261041522026062
epoch: 46 training_loss 0.12447140930220485 test_loss: 0.11961542367935181
epoch: 47 training_loss 0.11677329409867525 test_loss: 0.12281732559204102
epoch: 48 training_loss 0.11783426694571972 test_loss: 0.10778948068618774
epoch: 49 training_loss 0.11218502463772893 test_loss: 0.119674813747406
epoch: 50 training_loss 0.11352010909467936 test_loss: 0.14421299695968628
epoch: 51 training_loss 0.12091221710667015 test_loss: 0.15315024852752684
epoch: 52 training_loss 0.11804005440324544 test_loss: 0.11791125535964966
epoch: 53 training_loss 0.11190946727991104 test_loss: 0.1078916311264038
epoch: 54 training_loss 0.12280738623812795 test_loss: 0.12726448774337767
epoch: 55 training_loss 0.12102942986413837 test_loss: 0.11782532930374146
epoch: 56 training_loss 0.12317970283329487 test_loss: 0.11539232730865479
epoch: 57 training_loss 0.11625440867617726 test_loss: 0.1332099199295044
epoch: 58 training_loss 0.12211724560707808 test_loss: 0.11885313987731934
epoch: 59 training_loss 0.12067366279661655 test_loss: 0.1170650601387024
epoch: 60 training_loss 0.12264657530933619 test_loss: 0.11355888843536377
epoch: 61 training_loss 0.11522585809230805 test_loss: 0.11795583963394166
epoch: 62 training_loss 0.11662644378840924 test_loss: 0.12067258358001709
epoch: 63 training_loss 0.11884233912453056 test_loss: 0.11563223600387573
epoch: 64 training_loss 0.12650619328022003 test_loss: 0.1143418312072754
epoch: 65 training_loss 0.12189165368676186 test_loss: 0.11811524629592896
epoch: 66 training_loss 0.12358360156416894 test_loss: 0.1295330047607422
epoch: 67 training_loss 0.11694318365305661 test_loss: 0.11972916126251221
epoch: 68 training_loss 0.12062524046748876 test_loss: 0.1300410032272339
epoch: 69 training_loss 0.1234680151566863 test_loss: 0.11580085754394531
epoch: 70 training_loss 0.10933842005208134 test_loss: 0.12264041900634766
epoch: 71 training_loss 0.11812173314392567 test_loss: 0.1262570023536682
epoch: 72 training_loss 0.12642900977283716 test_loss: 0.13672493696212767
epoch: 73 training_loss 0.11644017640501261 test_loss: 0.1296866536140442
epoch: 74 training_loss 0.1222871938161552 test_loss: 0.11082239151000976
epoch: 75 training_loss 0.11427470430731773 test_loss: 0.11815797090530396
epoch: 76 training_loss 0.11847409456968308 test_loss: 0.12415235042572022
epoch: 77 training_loss 0.12286005049943924 test_loss: 0.11709729433059693
epoch: 78 training_loss 0.11591851051896811 test_loss: 0.10542051792144776
epoch: 79 training_loss 0.12512002412229775 test_loss: 0.11836988925933838
epoch: 80 training_loss 0.11783763784915209 test_loss: 0.11888227462768555
epoch: 81 training_loss 0.11807450968772173 test_loss: 0.11525496244430541
epoch: 82 training_loss 0.11473574861884117 test_loss: 0.11531133651733398
epoch: 83 training_loss 0.12290923394262791 test_loss: 0.10839383602142334
epoch: 84 training_loss 0.11716479204595089 test_loss: 0.11319890022277831
epoch: 85 training_loss 0.1216678660362959 test_loss: 0.14016847610473632
epoch: 86 training_loss 0.11480918457731605 test_loss: 0.11975698471069336
epoch: 87 training_loss 0.11774245813488961 test_loss: 0.1306077480316162
epoch: 88 training_loss 0.10679481629282236 test_loss: 0.1242340087890625
epoch: 89 training_loss 0.1130385759100318 test_loss: 0.1300489902496338
epoch: 90 training_loss 0.11447856912389398 test_loss: 0.11645619869232178
epoch: 91 training_loss 0.11918235704302788 test_loss: 0.12032495737075806
epoch: 92 training_loss 0.11932959623634815 test_loss: 0.1388583779335022
epoch: 93 training_loss 0.1196856564283371 test_loss: 0.12057801485061645
epoch: 94 training_loss 0.11575420670211316 test_loss: 0.11353809833526611
epoch: 95 training_loss 0.11439515616744757 test_loss: 0.12325091361999511
epoch: 96 training_loss 0.11316328259184956 test_loss: 0.14196521043777466
epoch: 97 training_loss 0.11737384542822837 test_loss: 0.11751005649566651
epoch: 98 training_loss 0.10865295395255088 test_loss: 0.12159187793731689
epoch: 99 training_loss 0.12631934020668267 test_loss: 0.13546570539474487
epoch: 100 training_loss 0.11782717455178499 test_loss: 0.11395224332809448
epoch: 101 training_loss 0.11368636632338167 test_loss: 0.11369277238845825
epoch: 102 training_loss 0.11698353987187148 test_loss: 0.1133884072303772
epoch: 103 training_loss 0.1185043510980904 test_loss: 0.11273603439331055
epoch: 104 training_loss 0.11199286550283433 test_loss: 0.1099744200706482
epoch: 105 training_loss 0.1184688950330019 test_loss: 0.12699202299118043
epoch: 106 training_loss 0.1206524776853621 test_loss: 0.13816207647323608
epoch: 107 training_loss 0.11497485613450407 test_loss: 0.12163090705871582
epoch: 108 training_loss 0.11752717513591052 test_loss: 0.12686712741851808
epoch: 109 training_loss 0.11778449576348066 test_loss: 0.10311720371246338
epoch: 110 training_loss 0.11208396539092064 test_loss: 0.11782197952270508
epoch: 111 training_loss 0.1112083619646728 test_loss: 0.10510373115539551
epoch: 112 training_loss 0.11928728967905045 test_loss: 0.13290860652923583
epoch: 113 training_loss 0.10113004134967923 test_loss: 0.12775710821151734
epoch: 114 training_loss 0.11498767506331205 test_loss: 0.11169291734695434
epoch: 115 training_loss 0.11943086855113506 test_loss: 0.13164300918579103
epoch: 116 training_loss 0.12311233852058649 test_loss: 0.12328546047210694
epoch: 117 training_loss 0.11422036238014698 test_loss: 0.1269212245941162
epoch: 118 training_loss 0.11807279352098704 test_loss: 0.12857095003128052
epoch: 119 training_loss 0.11694887870922685 test_loss: 0.13111492395401
epoch: 120 training_loss 0.11747916348278523 test_loss: 0.10822750329971313
epoch: 121 training_loss 0.1144883025251329 test_loss: 0.11622006893157959
epoch: 122 training_loss 0.11278084145858884 test_loss: 0.10650825500488281
epoch: 123 training_loss 0.10999264420941472 test_loss: 0.11715741157531738
epoch: 124 training_loss 0.11125120190903544 test_loss: 0.11036868095397949
epoch: 125 training_loss 0.11431156676262617 test_loss: 0.11359587907791138
epoch: 126 training_loss 0.12364992927759885 test_loss: 0.11117717027664184
epoch: 127 training_loss 0.11329441139474511 test_loss: 0.15472702980041503
epoch: 128 training_loss 0.12710215475410222 test_loss: 0.13838971853256227
epoch: 129 training_loss 0.11999208796769381 test_loss: 0.1322633981704712
epoch: 130 training_loss 0.11406982567161322 test_loss: 0.11694611310958862
epoch: 131 training_loss 0.11324068920686842 test_loss: 0.13373569250106812
epoch: 132 training_loss 0.12102958431467414 test_loss: 0.136606764793396
epoch: 133 training_loss 0.11435457618907094 test_loss: 0.11718785762786865
epoch: 134 training_loss 0.11874204840511084 test_loss: 0.09951881170272828
epoch: 135 training_loss 0.1079861275292933 test_loss: 0.1263163924217224
epoch: 136 training_loss 0.11805915741249919 test_loss: 0.12297927141189575
epoch: 137 training_loss 0.11341707065701484 test_loss: 0.1404150128364563
epoch: 138 training_loss 0.11011072246357799 test_loss: 0.13498427867889404
epoch: 139 training_loss 0.11387527212500573 test_loss: 0.10791282653808594
epoch: 140 training_loss 0.11456406511366367 test_loss: 0.13047034740448
epoch: 141 training_loss 0.10996272169053554 test_loss: 0.11893532276153565
epoch: 142 training_loss 0.12188336126506329 test_loss: 0.11496529579162598
epoch: 143 training_loss 0.12134757902473212 test_loss: 0.11495420932769776
epoch: 144 training_loss 0.11073288384824992 test_loss: 0.1260067343711853
epoch: 145 training_loss 0.11668702989816665 test_loss: 0.11107771396636963
epoch: 146 training_loss 0.10764985224232078 test_loss: 0.12706176042556763
epoch: 147 training_loss 0.1098253507912159 test_loss: 0.11091722249984741
epoch: 148 training_loss 0.10105442013591528 test_loss: 0.1366346597671509
epoch: 149 training_loss 0.11530867600813507 test_loss: 0.1235511302947998
episode: 0 training return: -1151.0496274819893
episode: 1 training return: -1147.5295284618626
episode: 2 training return: -1249.4909790647644
episode: 3 training return: -1286.8737518310609
epoch: 1 test_true_pfm: 82.69600035034075 sim_pfm: -986.630818332609
episode: 4 training return: -1183.8645193590248
episode: 5 training return: -1038.8390182377382
episode: 6 training return: -1133.8043203993739
episode: 7 training return: -1044.6514524020872
epoch: 2 test_true_pfm: -71.54503155273262 sim_pfm: -1004.2325202575754
episode: 8 training return: -1049.9157945293712
episode: 9 training return: -1099.9700787940778
episode: 10 training return: -1007.5341320546404
episode: 11 training return: -1256.8056948385865
epoch: 3 test_true_pfm: 186.70619983596353 sim_pfm: -1169.2166897828808
episode: 12 training return: -1039.96201452035
episode: 13 training return: -1031.7949247568308
episode: 14 training return: -1074.039698798138
episode: 15 training return: -997.6988476532393
epoch: 4 test_true_pfm: -194.07343630334466 sim_pfm: -1014.5951369366849
episode: 16 training return: -989.5703310401786
episode: 17 training return: -1006.9798713747489
episode: 18 training return: -1151.8327233277118
episode: 19 training return: -1097.9323007619573
epoch: 5 test_true_pfm: 236.92814459034662 sim_pfm: -917.2417817977656
episode: 20 training return: -1069.444134607333
episode: 21 training return: -992.4602316012757
episode: 22 training return: -1006.3485231638855
episode: 23 training return: -1003.6156616915217
epoch: 6 test_true_pfm: 7.751375348819136 sim_pfm: -983.5119335154959
episode: 24 training return: -995.6257308734621
episode: 25 training return: -1009.6107519100538
episode: 26 training return: -1109.5605127314848
episode: 27 training return: -1062.8901249390256
epoch: 7 test_true_pfm: 70.41805725718984 sim_pfm: -1033.9305278910388
episode: 28 training return: -1061.2854388794356
episode: 29 training return: -1006.2374370660348
episode: 30 training return: -1008.0771358745466
episode: 31 training return: -996.484691954986
epoch: 8 test_true_pfm: 10.501806815791545 sim_pfm: -1050.2899458739437
episode: 32 training return: -1034.3075488357426
episode: 33 training return: -1324.4796444133058
episode: 34 training return: -1025.9665055959658
episode: 35 training return: -999.0143060132996
epoch: 9 test_true_pfm: -70.625598350225 sim_pfm: -957.820505674706
episode: 36 training return: -1018.207217856374
episode: 37 training return: -1032.9213199857138
episode: 38 training return: -1010.1853872220055
episode: 39 training return: -986.5139233363071
epoch: 10 test_true_pfm: 61.4172063113279 sim_pfm: -981.1654113520614
episode: 40 training return: -1007.4727961288934
episode: 41 training return: -999.472110505472
episode: 42 training return: -998.6942411708878
episode: 43 training return: -994.0024094479138
epoch: 11 test_true_pfm: 79.54654798559488 sim_pfm: -981.1354607670538
episode: 44 training return: -1007.463244299621
episode: 45 training return: -988.5072399515496
episode: 46 training return: -1016.1656319173803
episode: 47 training return: -1000.8315941306608
epoch: 12 test_true_pfm: 7.437938031821442 sim_pfm: -955.3600048279322
episode: 48 training return: -1005.2128198312225
episode: 49 training return: -1004.0950275581342
episode: 50 training return: -995.3215206034665
episode: 51 training return: -1002.9139544775862
epoch: 13 test_true_pfm: 53.76506192810942 sim_pfm: -980.7959435164538
episode: 52 training return: -983.6952631263975
episode: 53 training return: -999.0681432342258
episode: 54 training return: -1005.9217585617199
episode: 55 training return: -985.9669474732091
epoch: 14 test_true_pfm: -35.56441151089161 sim_pfm: -976.2293003776366
episode: 56 training return: -999.7623999285726
episode: 57 training return: -998.1758756019399
episode: 58 training return: -993.2560491962836
episode: 59 training return: -978.3048591018363
epoch: 15 test_true_pfm: 23.38102508221534 sim_pfm: -980.1874907372179
episode: 60 training return: -980.9533180906135
episode: 61 training return: -986.5164120376706
episode: 62 training return: -1006.7879404044795
episode: 63 training return: -1001.9426463203821
epoch: 16 test_true_pfm: -6.041256640858223 sim_pfm: -973.0505620991877
episode: 64 training return: -985.7784315424738
episode: 65 training return: -1001.3697300210287
episode: 66 training return: -994.0033703481947
episode: 67 training return: -986.6188976732547
epoch: 17 test_true_pfm: 8.260301399414905 sim_pfm: -980.3865655687147
episode: 68 training return: -988.4820287480024
episode: 69 training return: -961.8737651094407
episode: 70 training return: -985.0118066410682
episode: 71 training return: -981.8350016257646
epoch: 18 test_true_pfm: 11.106042562052323 sim_pfm: -976.9512109866547
episode: 72 training return: -987.7153883853598
episode: 73 training return: -993.4000806793551
episode: 74 training return: -986.539939785441
episode: 75 training return: -974.1844265417868
epoch: 19 test_true_pfm: 13.49625132985217 sim_pfm: -981.353194987625
episode: 76 training return: -977.632037561257
episode: 77 training return: -978.2313601815537
episode: 78 training return: -961.0217705238041
episode: 79 training return: -975.1410798513136
epoch: 20 test_true_pfm: 45.36761716186627 sim_pfm: -967.0272824346144
episode: 80 training return: -1004.1483689280152
episode: 81 training return: -1000.2372558347678
episode: 82 training return: -958.8193109818031
episode: 83 training return: -993.5147657123645
epoch: 21 test_true_pfm: 34.80133838274245 sim_pfm: -976.6269266853377
episode: 84 training return: -986.9253356242318
episode: 85 training return: -977.6823349357879
episode: 86 training return: -979.1093572020322
episode: 87 training return: -979.5829752836576
epoch: 22 test_true_pfm: 54.64328081216937 sim_pfm: -960.3980434540791
episode: 88 training return: -976.7370613603239
episode: 89 training return: -956.552343090864
episode: 90 training return: -973.7567867457004
episode: 91 training return: -972.338952682629
epoch: 23 test_true_pfm: 79.24762383881053 sim_pfm: -963.1170214692282
episode: 92 training return: -969.6100702696507
episode: 93 training return: -936.3934864321172
episode: 94 training return: -1012.0163848605866
episode: 95 training return: -925.4862961552946
epoch: 24 test_true_pfm: 75.08203746443473 sim_pfm: -955.3783634701671
episode: 96 training return: -965.3170888320216
episode: 97 training return: -986.0226721568995
episode: 98 training return: -939.1387425564837
episode: 99 training return: -961.9980938714175
epoch: 25 test_true_pfm: 35.18361266408824 sim_pfm: -971.7160274150741
episode: 100 training return: -963.6878247863001
episode: 101 training return: -986.7682268557666
episode: 102 training return: -982.8921595280115
episode: 103 training return: -957.6237638095475
epoch: 26 test_true_pfm: 4.152153219370475 sim_pfm: -961.3955936630961
episode: 104 training return: -983.4895431924479
episode: 105 training return: -977.8562126628206
episode: 106 training return: -989.9654875478598
episode: 107 training return: -962.3886099144783
epoch: 27 test_true_pfm: 24.62819941678946 sim_pfm: -975.0800324417379
episode: 108 training return: -958.2820172313094
episode: 109 training return: -959.9254767125509
episode: 110 training return: -967.3573450452883
episode: 111 training return: -976.4200227731635
epoch: 28 test_true_pfm: 50.259763848050945 sim_pfm: -927.2579882170495
episode: 112 training return: -996.7879915172323
episode: 113 training return: -931.3173926754689
episode: 114 training return: -953.7817158476216
episode: 115 training return: -932.8628904316053
epoch: 29 test_true_pfm: 24.239595162625477 sim_pfm: -934.1667061993134
episode: 116 training return: -904.1955900554874
episode: 117 training return: -987.3713820791597
episode: 118 training return: -950.1034951320308
episode: 119 training return: -968.1634084123335
epoch: 30 test_true_pfm: -0.20470227492837031 sim_pfm: -941.2446179417458
episode: 120 training return: -929.7233444420076
episode: 121 training return: -923.3539661702365
episode: 122 training return: -949.867397827109
episode: 123 training return: -965.3913545402253
epoch: 31 test_true_pfm: 21.487284810311753 sim_pfm: -903.7407819139675
episode: 124 training return: -978.8708620771241
episode: 125 training return: -936.6376321865291
episode: 126 training return: -956.5282630550715
episode: 127 training return: -964.4601884768591
epoch: 32 test_true_pfm: -14.719847372856814 sim_pfm: -897.4217198544396
episode: 128 training return: -975.7751898884985
episode: 129 training return: -936.0567048867811
episode: 130 training return: -911.3994673154884
episode: 131 training return: -984.335116493311
epoch: 33 test_true_pfm: -22.602099011128093 sim_pfm: -942.039580124895
episode: 132 training return: -984.4421189502417
episode: 133 training return: -908.012400856308
episode: 134 training return: -892.9102807604845
episode: 135 training return: -938.2761272895756
epoch: 34 test_true_pfm: -77.29231662344093 sim_pfm: -830.2126567735295
episode: 136 training return: -961.8096908613494
episode: 137 training return: -917.6598856230412
episode: 138 training return: -962.0750505057251
episode: 139 training return: -994.5405171475298
epoch: 35 test_true_pfm: -106.16810058627765 sim_pfm: -917.7256996580786
episode: 140 training return: -968.907084474486
episode: 141 training return: -945.6923584797306
episode: 142 training return: -962.299171716328
episode: 143 training return: -917.5569640890254
epoch: 36 test_true_pfm: -90.76551690940522 sim_pfm: -867.8114005234237
episode: 144 training return: -976.3838272723481
episode: 145 training return: -931.5902232009039
episode: 146 training return: -977.272155173921
episode: 147 training return: -973.4748940684862
epoch: 37 test_true_pfm: 47.91247826546701 sim_pfm: -938.5818375560133
episode: 148 training return: -959.6192072158126
episode: 149 training return: -986.497174809742
episode: 150 training return: -953.0789498496098
episode: 151 training return: -975.5647342111753
epoch: 38 test_true_pfm: -46.28891037281042 sim_pfm: -859.022502842549
episode: 152 training return: -963.929946380294
episode: 153 training return: -980.726484346096
episode: 154 training return: -937.2954836343324
episode: 155 training return: -872.567825837787
epoch: 39 test_true_pfm: 122.42443992741578 sim_pfm: -953.9385347444619
episode: 156 training return: -964.5943913381336
episode: 157 training return: -907.9347625473492
episode: 158 training return: -916.3785429747454
episode: 159 training return: -984.8322386200903
epoch: 40 test_true_pfm: -112.42501434022925 sim_pfm: -919.1582995276021
episode: 160 training return: -940.4361582596389
episode: 161 training return: -922.8355049811755
episode: 162 training return: -980.4280007347755
episode: 163 training return: -969.883694597195
epoch: 41 test_true_pfm: -21.894569755201598 sim_pfm: -963.4860015562754
episode: 164 training return: -970.0734997247836
episode: 165 training return: -990.8482814596857
episode: 166 training return: -976.794377900991
episode: 167 training return: -980.7575193582841
epoch: 42 test_true_pfm: -114.85022734355302 sim_pfm: -835.0974389336058
episode: 168 training return: -934.8403304781502
episode: 169 training return: -866.1563667609868
episode: 170 training return: -945.2752237268058
episode: 171 training return: -974.1659348194102
epoch: 43 test_true_pfm: 114.05321252187836 sim_pfm: -962.3666258541602
episode: 172 training return: -887.0232889178678
episode: 173 training return: -916.8243443046375
episode: 174 training return: -954.9054849597536
episode: 175 training return: -878.3721660276449
epoch: 44 test_true_pfm: -29.353542498081783 sim_pfm: -880.6386367213512
episode: 176 training return: -878.659420833633
episode: 177 training return: -898.8405429797617
episode: 178 training return: -870.1053083662192
episode: 179 training return: -970.8667329924822
epoch: 45 test_true_pfm: 127.17956825527153 sim_pfm: -958.5656675504066
episode: 180 training return: -976.2375410795613
episode: 181 training return: -985.49522888133
episode: 182 training return: -938.5374761703292
episode: 183 training return: -985.7820259691018
epoch: 46 test_true_pfm: -107.88783379922738 sim_pfm: -915.7773043272897
episode: 184 training return: -880.8034471969338
episode: 185 training return: -893.0634419724477
episode: 186 training return: -975.8767519014256
episode: 187 training return: -952.0356545484193
epoch: 47 test_true_pfm: 112.4308842094368 sim_pfm: -959.2476660156993
episode: 188 training return: -989.6283155772849
episode: 189 training return: -976.9539708191853
episode: 190 training return: -909.2212774871554
episode: 191 training return: -980.6155357331123
epoch: 48 test_true_pfm: 82.04919542230704 sim_pfm: -860.031428173741
episode: 192 training return: -973.5569168311507
episode: 193 training return: -911.6325809729999
episode: 194 training return: -956.6873115386547
episode: 195 training return: -859.888709745199
epoch: 49 test_true_pfm: -28.538705705711877 sim_pfm: -836.997719180069
episode: 196 training return: -971.6018639573209
episode: 197 training return: -906.1078548383746
episode: 198 training return: -916.2495468192426
episode: 199 training return: -921.4974016682299
epoch: 50 test_true_pfm: -38.19045615409632 sim_pfm: -810.4692402688148
episode: 200 training return: -990.2285454433304
episode: 201 training return: -940.1021868079913
episode: 202 training return: -977.1963582002355
episode: 203 training return: -845.9155285204919
epoch: 51 test_true_pfm: -101.7658527071951 sim_pfm: -794.8511104776975
episode: 204 training return: -965.1914000293513
episode: 205 training return: -986.5110924391198
episode: 206 training return: -985.2931354370542
episode: 207 training return: -918.3391209369203
epoch: 52 test_true_pfm: 4.97808101437973 sim_pfm: -894.9726743225938
episode: 208 training return: -951.8078767952089
episode: 209 training return: -982.7800134933358
episode: 210 training return: -922.2834181946217
episode: 211 training return: -982.2348969309611
epoch: 53 test_true_pfm: -87.28601695124767 sim_pfm: -806.9017689169106
episode: 212 training return: -969.0217239956175
episode: 213 training return: -863.5830518344393
episode: 214 training return: -951.0907989895593
episode: 215 training return: -951.5714061693672
epoch: 54 test_true_pfm: -57.75337193300686 sim_pfm: -898.6417914965629
episode: 216 training return: -947.4003938379384
episode: 217 training return: -954.051547264581
episode: 218 training return: -969.2464583726064
episode: 219 training return: -927.1324226552039
epoch: 55 test_true_pfm: -190.20519494201838 sim_pfm: -784.2390364363085
episode: 220 training return: -851.660277120184
episode: 221 training return: -974.2936010191851
episode: 222 training return: -905.2193797380643
episode: 223 training return: -971.5307104054571
epoch: 56 test_true_pfm: -81.02573577478968 sim_pfm: -773.2581243268179
episode: 224 training return: -948.5205615161768
episode: 225 training return: -961.8707823818155
episode: 226 training return: -874.7161392866522
episode: 227 training return: -979.414630832593
epoch: 57 test_true_pfm: -156.64305294187255 sim_pfm: -791.3322989500921
episode: 228 training return: -966.485226585853
episode: 229 training return: -947.3408200540442
episode: 230 training return: -930.7215176379714
episode: 231 training return: -949.4060019720446
epoch: 58 test_true_pfm: 120.03258059571151 sim_pfm: -825.8261332188031
episode: 232 training return: -889.1765005816002
episode: 233 training return: -962.0934838263194
episode: 234 training return: -974.3145350912748
episode: 235 training return: -880.4250087691804
epoch: 59 test_true_pfm: 56.46271656378778 sim_pfm: -852.4598473597416
episode: 236 training return: -940.2327085003549
episode: 237 training return: -945.4069269345122
episode: 238 training return: -974.4001786416586
episode: 239 training return: -930.3255827279627
epoch: 60 test_true_pfm: -105.5632791142242 sim_pfm: -920.3588650509024
episode: 240 training return: -951.0116677247142
episode: 241 training return: -923.1647110498332
episode: 242 training return: -865.7015015762985
episode: 243 training return: -892.3680539502398
epoch: 61 test_true_pfm: -115.26327743719258 sim_pfm: -855.5724350634074
episode: 244 training return: -995.4121815058351
episode: 245 training return: -984.3216563683428
episode: 246 training return: -962.4858140033506
episode: 247 training return: -971.1317053887083
epoch: 62 test_true_pfm: -7.141602736534544 sim_pfm: -847.9352035695342
episode: 248 training return: -874.021433963959
episode: 249 training return: -981.7090722230841
episode: 250 training return: -879.7381505663147
episode: 251 training return: -966.0400476602654
epoch: 63 test_true_pfm: -82.15372093103964 sim_pfm: -852.3160436997517
episode: 252 training return: -985.3139262045274
episode: 253 training return: -839.6104901631505
episode: 254 training return: -925.1358154129247
episode: 255 training return: -953.0999831420075
epoch: 64 test_true_pfm: -117.14626312762054 sim_pfm: -820.1722479583955
episode: 256 training return: -973.0701689125041
episode: 257 training return: -984.3232376066135
episode: 258 training return: -955.9922942239647
episode: 259 training return: -971.2208886247622
epoch: 65 test_true_pfm: 40.46128674335581 sim_pfm: -917.3981557556957
episode: 260 training return: -957.1939133118958
episode: 261 training return: -962.1673356046107
episode: 262 training return: -889.7195759699447
episode: 263 training return: -933.4742617008692
epoch: 66 test_true_pfm: -75.89787044788824 sim_pfm: -775.2252018599688
episode: 264 training return: -871.370236276544
episode: 265 training return: -934.7376291969583
episode: 266 training return: -932.2533753974118
episode: 267 training return: -895.7457454148048
epoch: 67 test_true_pfm: -58.34942481008566 sim_pfm: -765.8517804429342
episode: 268 training return: -985.6594362524943
episode: 269 training return: -959.5891027341473
episode: 270 training return: -989.7373688901022
episode: 271 training return: -978.5817850673445
epoch: 68 test_true_pfm: 73.13453126228062 sim_pfm: -843.2223205851409
episode: 272 training return: -991.3007439569001
episode: 273 training return: -965.7991214335956
episode: 274 training return: -958.6503539007668
episode: 275 training return: -970.3876378615043
epoch: 69 test_true_pfm: -160.37999016594014 sim_pfm: -798.75790646105
episode: 276 training return: -916.1628229637879
episode: 277 training return: -899.0713145910951
episode: 278 training return: -889.3000164212184
episode: 279 training return: -871.601829258857
epoch: 70 test_true_pfm: 50.54112157224895 sim_pfm: -801.3675924464069
episode: 280 training return: -912.1219257044736
episode: 281 training return: -955.3807607734861
episode: 282 training return: -943.1373487286626
episode: 283 training return: -925.4914019323228
epoch: 71 test_true_pfm: -142.83847944222944 sim_pfm: -761.9446495087119
episode: 284 training return: -856.2493436245844
episode: 285 training return: -876.1681276039645
episode: 286 training return: -970.664871511663
episode: 287 training return: -896.7999384891764
epoch: 72 test_true_pfm: -102.57912402902781 sim_pfm: -763.3484805321192
episode: 288 training return: -861.4663417288331
episode: 289 training return: -938.8059322329976
episode: 290 training return: -884.7528031685907
episode: 291 training return: -976.3610440965523
epoch: 73 test_true_pfm: 114.32123624611542 sim_pfm: -948.9941304759855
episode: 292 training return: -985.5269450746848
episode: 293 training return: -982.3894798701717
episode: 294 training return: -967.7754417787603
episode: 295 training return: -974.499138645279
epoch: 74 test_true_pfm: -151.3338723696468 sim_pfm: -760.3605345415821
episode: 296 training return: -977.4141711147913
episode: 297 training return: -981.3282582854873
episode: 298 training return: -914.3331403909777
episode: 299 training return: -936.5581531817371
epoch: 75 test_true_pfm: -179.68126734333455 sim_pfm: -751.4604332130457
episode: 300 training return: -978.8914071220307
episode: 301 training return: -966.4301975208349
episode: 302 training return: -938.2220878956421
episode: 303 training return: -990.2896736923233
epoch: 76 test_true_pfm: -108.19922176116141 sim_pfm: -748.9716741499483
episode: 304 training return: -851.76859904541
episode: 305 training return: -980.4235311044399
episode: 306 training return: -841.2584364782458
episode: 307 training return: -899.1081580081681
epoch: 77 test_true_pfm: -24.283433769779965 sim_pfm: -882.6214251455458
episode: 308 training return: -867.6091115047012
episode: 309 training return: -962.5882645048795
episode: 310 training return: -942.815846480911
episode: 311 training return: -864.0058607054858
epoch: 78 test_true_pfm: 68.13272954403334 sim_pfm: -961.5207045330853
episode: 312 training return: -988.5086817685816
episode: 313 training return: -972.1934883182363
episode: 314 training return: -873.3014387702962
episode: 315 training return: -972.729375243126
epoch: 79 test_true_pfm: 38.09261019350054 sim_pfm: -788.8113617760482
episode: 316 training return: -958.1405190831334
episode: 317 training return: -978.3259630812827
episode: 318 training return: -939.2208330167533
episode: 319 training return: -921.8201677401477
epoch: 80 test_true_pfm: -136.83041470150337 sim_pfm: -773.4977196486406
episode: 320 training return: -968.9538522249851
episode: 321 training return: -984.9313152775493
episode: 322 training return: -983.5213613977289
episode: 323 training return: -939.6773953980453
epoch: 81 test_true_pfm: -61.18244145088509 sim_pfm: -758.1026487145747
episode: 324 training return: -955.645936602861
episode: 325 training return: -980.4670611631718
episode: 326 training return: -929.0820890633572
episode: 327 training return: -985.2611527276945
epoch: 82 test_true_pfm: -6.295071767296274 sim_pfm: -784.8640878797987
episode: 328 training return: -965.6344791320804
episode: 329 training return: -872.2091877950878
episode: 330 training return: -982.7801572738972
episode: 331 training return: -884.695319373633
epoch: 83 test_true_pfm: -84.67193400139668 sim_pfm: -770.0117172407078
episode: 332 training return: -859.1909415697153
episode: 333 training return: -933.3262861802568
episode: 334 training return: -851.0682864515857
episode: 335 training return: -977.5209016373194
epoch: 84 test_true_pfm: -120.84997336259812 sim_pfm: -778.7421574688414
episode: 336 training return: -873.3479211181199
episode: 337 training return: -924.6163696789052
episode: 338 training return: -895.7113484893287
episode: 339 training return: -845.8833791274438
epoch: 85 test_true_pfm: 79.44176710087662 sim_pfm: -769.717114133725
episode: 340 training return: -986.44960934433
episode: 341 training return: -941.0295874316505
episode: 342 training return: -948.7685356627627
episode: 343 training return: -865.5086918097398
epoch: 86 test_true_pfm: 39.35842475313839 sim_pfm: -891.4481465239975
episode: 344 training return: -868.1946392358101
episode: 345 training return: -985.095424319444
episode: 346 training return: -971.6917265095276
episode: 347 training return: -888.7600073268566
epoch: 87 test_true_pfm: -74.46571258717279 sim_pfm: -818.3157666387764
episode: 348 training return: -982.0959347999509
episode: 349 training return: -957.0820819684993
episode: 350 training return: -951.1511471211329
episode: 351 training return: -843.6130715568623
epoch: 88 test_true_pfm: 30.104945194820882 sim_pfm: -813.8637395361861
episode: 352 training return: -946.1924910296583
episode: 353 training return: -928.6145567036641
episode: 354 training return: -956.4285538036929
episode: 355 training return: -894.9166260024638
epoch: 89 test_true_pfm: 52.23018583108757 sim_pfm: -883.4193391166618
episode: 356 training return: -911.4491247544162
episode: 357 training return: -967.9737204761674
episode: 358 training return: -933.8261021413547
episode: 359 training return: -970.789953904812
epoch: 90 test_true_pfm: -36.79941042718528 sim_pfm: -786.6184363700864
episode: 360 training return: -945.4597262522299
episode: 361 training return: -980.033442561852
episode: 362 training return: -859.132404757136
episode: 363 training return: -960.2570096274875
epoch: 91 test_true_pfm: -111.35867381140436 sim_pfm: -795.1937736438364
episode: 364 training return: -984.9806061946149
episode: 365 training return: -978.5730816588988
episode: 366 training return: -980.6147457165707
episode: 367 training return: -974.7730272891083
epoch: 92 test_true_pfm: -183.50357524644653 sim_pfm: -747.6649533216829
episode: 368 training return: -987.2277541180476
episode: 369 training return: -981.4758584363096
episode: 370 training return: -939.0361441848377
episode: 371 training return: -969.7585754724643
epoch: 93 test_true_pfm: 19.139244443172018 sim_pfm: -821.2861327041104
episode: 372 training return: -982.559656803817
episode: 373 training return: -982.4588399360442
episode: 374 training return: -866.5410220384931
episode: 375 training return: -941.6491198477762
epoch: 94 test_true_pfm: 53.92549567834505 sim_pfm: -765.9749053516281
episode: 376 training return: -969.1955356023778
episode: 377 training return: -977.664564499206
episode: 378 training return: -984.6345705238695
episode: 379 training return: -833.845669843125
epoch: 95 test_true_pfm: -203.06426702567137 sim_pfm: -762.0856322170226
episode: 380 training return: -967.3193323262308
episode: 381 training return: -986.7061925018355
episode: 382 training return: -961.6252088967389
episode: 383 training return: -957.9701149709339
epoch: 96 test_true_pfm: -167.22073678449343 sim_pfm: -781.7653447893072
episode: 384 training return: -991.7169563140789
episode: 385 training return: -937.3250614229287
episode: 386 training return: -827.2290069424389
episode: 387 training return: -863.2020268322265
epoch: 97 test_true_pfm: -136.4264028549301 sim_pfm: -831.3480966227775
episode: 388 training return: -967.2835266834974
episode: 389 training return: -979.3466843425333
episode: 390 training return: -845.6907601882097
episode: 391 training return: -980.7852633676449
epoch: 98 test_true_pfm: -162.04239104283243 sim_pfm: -746.9033568071004
episode: 392 training return: -923.0501912571987
episode: 393 training return: -982.2729100091505
episode: 394 training return: -881.7803094445941
episode: 395 training return: -856.6911886018679
epoch: 99 test_true_pfm: 139.58088409404806 sim_pfm: -955.4848418001098
episode: 396 training return: -858.3972277434293
episode: 397 training return: -849.577936024123
episode: 398 training return: -886.8378727256933
episode: 399 training return: -987.6769223396565
epoch: 100 test_true_pfm: -252.90065605763536 sim_pfm: -750.1986548401624
episode: 400 training return: -827.7112563075568
episode: 401 training return: -948.8121625049646
episode: 402 training return: -949.1810005246389
episode: 403 training return: -982.1380522635235
epoch: 101 test_true_pfm: -203.35134303583868 sim_pfm: -747.0902181511043
episode: 404 training return: -949.8294276700802
episode: 405 training return: -857.9339415415042
episode: 406 training return: -882.1049266642946
episode: 407 training return: -967.3505012692391
epoch: 102 test_true_pfm: -134.4421547494253 sim_pfm: -733.7276092202119
episode: 408 training return: -859.5371602525712
episode: 409 training return: -895.4427094904303
episode: 410 training return: -979.664476351805
episode: 411 training return: -984.9855956464619
epoch: 103 test_true_pfm: 64.07759596495173 sim_pfm: -917.2290193440716
episode: 412 training return: -891.6492050941642
episode: 413 training return: -898.7651948352188
episode: 414 training return: -936.7784998200123
episode: 415 training return: -984.441557379517
epoch: 104 test_true_pfm: -123.84082910533249 sim_pfm: -760.9317826898041
episode: 416 training return: -900.0825488379543
episode: 417 training return: -834.5840723245096
episode: 418 training return: -956.8810922478593
episode: 419 training return: -988.6608510036253
epoch: 105 test_true_pfm: -9.00981134764511 sim_pfm: -736.5230953048739
episode: 420 training return: -980.9295617102553
episode: 421 training return: -861.1089435954202
episode: 422 training return: -974.5028957381093
episode: 423 training return: -857.109248186881
epoch: 106 test_true_pfm: -109.9675298311107 sim_pfm: -766.1533413648399
episode: 424 training return: -969.818944118862
episode: 425 training return: -934.7042482368662
episode: 426 training return: -972.1774542622369
episode: 427 training return: -931.8040798396713
epoch: 107 test_true_pfm: -62.69831035376271 sim_pfm: -796.5378428680409
episode: 428 training return: -867.9555535967237
episode: 429 training return: -935.8197675025868
episode: 430 training return: -965.5979331560668
episode: 431 training return: -968.9621537634985
epoch: 108 test_true_pfm: -72.8337731995328 sim_pfm: -741.5411867042536
episode: 432 training return: -985.7024226042282
episode: 433 training return: -954.8983182489337
episode: 434 training return: -842.3337289377876
episode: 435 training return: -853.359897251949
epoch: 109 test_true_pfm: -213.724274736196 sim_pfm: -745.2455335430664
episode: 436 training return: -926.047289626575
episode: 437 training return: -934.855242287082
episode: 438 training return: -879.5040486868363
episode: 439 training return: -967.2729109121664
epoch: 110 test_true_pfm: -10.61454765881713 sim_pfm: -767.932780900395
episode: 440 training return: -951.9410268392498
episode: 441 training return: -907.2835569254953
episode: 442 training return: -902.9407587607791
episode: 443 training return: -892.1956645198969
epoch: 111 test_true_pfm: -85.72825176242827 sim_pfm: -803.4393610023781
episode: 444 training return: -986.6713395984821
episode: 445 training return: -851.1390326308145
episode: 446 training return: -874.3295028661219
episode: 447 training return: -966.8660156920644
epoch: 112 test_true_pfm: -78.97789591137273 sim_pfm: -735.0910400430221
episode: 448 training return: -878.5702830479171
episode: 449 training return: -958.2159460805162
episode: 450 training return: -850.4940338471769
episode: 451 training return: -985.2303720030502
epoch: 113 test_true_pfm: 24.19945432943642 sim_pfm: -796.2750242105559
episode: 452 training return: -890.7379713421964
episode: 453 training return: -896.0486554392139
episode: 454 training return: -852.8929740724992
episode: 455 training return: -902.8274361958279
epoch: 114 test_true_pfm: 132.2139424558312 sim_pfm: -772.6527177066981
episode: 456 training return: -969.9702972095595
episode: 457 training return: -946.1545156083585
episode: 458 training return: -957.9607409235091
episode: 459 training return: -912.3567614533519
epoch: 115 test_true_pfm: -307.90140356371086 sim_pfm: -759.1183098083144
episode: 460 training return: -968.2600261469522
episode: 461 training return: -879.2771681023107
episode: 462 training return: -820.6438431926366
episode: 463 training return: -968.7354284372808
epoch: 116 test_true_pfm: 17.053584807299643 sim_pfm: -799.2699275645815
episode: 464 training return: -852.1066158018784
episode: 465 training return: -862.7967619133816
episode: 466 training return: -940.80562160505
episode: 467 training return: -965.0227172959784
epoch: 117 test_true_pfm: -207.48910865677507 sim_pfm: -736.4778730045464
episode: 468 training return: -979.0914309395287
episode: 469 training return: -878.7038606995493
episode: 470 training return: -958.7100582381369
episode: 471 training return: -937.2489672300236
epoch: 118 test_true_pfm: -3.8108715057738713 sim_pfm: -695.6225942977335
episode: 472 training return: -935.7398149050691
episode: 473 training return: -979.1735735662635
episode: 474 training return: -914.7556550085257
episode: 475 training return: -955.7618142961074
epoch: 119 test_true_pfm: -267.1505963104209 sim_pfm: -783.3826431361849
episode: 476 training return: -936.4536514655626
episode: 477 training return: -862.6551324282598
episode: 478 training return: -970.2783107568036
episode: 479 training return: -833.4460085436021
epoch: 120 test_true_pfm: -14.660153166473068 sim_pfm: -749.6393213617584
episode: 480 training return: -988.702393151737
episode: 481 training return: -955.3588813613515
episode: 482 training return: -964.816534776949
episode: 483 training return: -948.5090327028494
epoch: 121 test_true_pfm: -20.99495039737674 sim_pfm: -717.6849878501131
episode: 484 training return: -986.5367056860798
episode: 485 training return: -908.8243705758517
episode: 486 training return: -898.8124712332803
episode: 487 training return: -990.5809348408394
epoch: 122 test_true_pfm: -285.042645045116 sim_pfm: -764.7163797420252
episode: 488 training return: -834.1002324650378
episode: 489 training return: -977.9836792158478
episode: 490 training return: -832.3409737014193
episode: 491 training return: -955.7503735324511
epoch: 123 test_true_pfm: -149.4016246583947 sim_pfm: -726.9319329827163
episode: 492 training return: -938.1954986278198
episode: 493 training return: -913.4073313180415
episode: 494 training return: -985.2151011159057
episode: 495 training return: -892.4897205678287
epoch: 124 test_true_pfm: -56.434502996822836 sim_pfm: -758.5081078612025
episode: 496 training return: -813.9081415926229
episode: 497 training return: -920.6349676933238
episode: 498 training return: -975.4750600949069
episode: 499 training return: -855.5044699296279
epoch: 125 test_true_pfm: -207.80353979049013 sim_pfm: -732.4866441056996
episode: 500 training return: -947.9346735290836
episode: 501 training return: -968.2061434964266
episode: 502 training return: -940.8930495953316
episode: 503 training return: -869.4116763411497
epoch: 126 test_true_pfm: -75.43661926060666 sim_pfm: -716.2996767550738
episode: 504 training return: -975.6524683206061
episode: 505 training return: -919.2667580040337
episode: 506 training return: -837.6498481537349
episode: 507 training return: -848.6467881144517
epoch: 127 test_true_pfm: -375.73700091189517 sim_pfm: -769.4327209037095
episode: 508 training return: -959.8234245785749
episode: 509 training return: -790.6776279076483
episode: 510 training return: -948.2335781396981
episode: 511 training return: -883.2629935893973
epoch: 128 test_true_pfm: 13.843899571306709 sim_pfm: -777.4989443729859
episode: 512 training return: -931.1410905396366
episode: 513 training return: -947.0095448019079
episode: 514 training return: -976.7408989420801
episode: 515 training return: -913.6556374411069
epoch: 129 test_true_pfm: -17.529806401721306 sim_pfm: -710.1432442107874
episode: 516 training return: -973.1259920110376
episode: 517 training return: -843.7687774783209
episode: 518 training return: -963.011189804289
episode: 519 training return: -918.735769602482
epoch: 130 test_true_pfm: -48.80380581641915 sim_pfm: -729.7859061005807
episode: 520 training return: -886.7604501860161
episode: 521 training return: -979.5295876543685
episode: 522 training return: -819.7828153194379
episode: 523 training return: -966.9213755513293
epoch: 131 test_true_pfm: -54.31801647117815 sim_pfm: -847.0027162620478
episode: 524 training return: -939.0787754254296
episode: 525 training return: -964.0696654999672
episode: 526 training return: -967.6521526382264
episode: 527 training return: -922.2838386229648
epoch: 132 test_true_pfm: -33.39855521917185 sim_pfm: -719.9701243502695
episode: 528 training return: -965.0606227293537
episode: 529 training return: -959.6927623390985
episode: 530 training return: -933.4043262208148
episode: 531 training return: -842.4719928930031
epoch: 133 test_true_pfm: -48.558621521460566 sim_pfm: -750.8831106679194
episode: 532 training return: -892.4364930391881
episode: 533 training return: -863.734039318607
episode: 534 training return: -848.461911442239
episode: 535 training return: -978.4699924891497
epoch: 134 test_true_pfm: -221.75514981369778 sim_pfm: -735.2924634128794
episode: 536 training return: -985.3973370648304
episode: 537 training return: -831.2997810466939
episode: 538 training return: -867.3910870483119
episode: 539 training return: -946.0457030069642
epoch: 135 test_true_pfm: -63.38262406113406 sim_pfm: -741.108488345278
episode: 540 training return: -947.850139378422
episode: 541 training return: -965.9903420373722
episode: 542 training return: -944.9675271858548
episode: 543 training return: -819.3576413037229
epoch: 136 test_true_pfm: 133.55828966600882 sim_pfm: -713.8001916330946
episode: 544 training return: -928.438103424465
episode: 545 training return: -903.8380587201528
episode: 546 training return: -984.848695348548
episode: 547 training return: -853.8378537238646
epoch: 137 test_true_pfm: -198.1769958938055 sim_pfm: -736.8157800200551
episode: 548 training return: -917.4111702230756
episode: 549 training return: -973.5177976464847
episode: 550 training return: -905.6194455384633
episode: 551 training return: -955.8427084656028
epoch: 138 test_true_pfm: -106.608574349112 sim_pfm: -806.4426013817641
episode: 552 training return: -887.3620811424215
episode: 553 training return: -843.2577687769272
episode: 554 training return: -879.6544990571307
episode: 555 training return: -819.0835080763349
epoch: 139 test_true_pfm: -11.94787759534943 sim_pfm: -714.2203856994405
episode: 556 training return: -945.5649518974271
episode: 557 training return: -937.8509669120659
episode: 558 training return: -920.8631918763281
episode: 559 training return: -943.2290861627608
epoch: 140 test_true_pfm: -140.1107236407244 sim_pfm: -771.5153047569344
episode: 560 training return: -862.2674211937474
episode: 561 training return: -957.1808267840267
episode: 562 training return: -949.4950295224537
episode: 563 training return: -926.2589561598891
epoch: 141 test_true_pfm: -150.23752482533556 sim_pfm: -763.9774819654108
episode: 564 training return: -972.8160983915895
episode: 565 training return: -836.6564331200543
episode: 566 training return: -918.8542854053093
episode: 567 training return: -840.7474759919321
epoch: 142 test_true_pfm: 41.06794779679051 sim_pfm: -728.4183991737223
episode: 568 training return: -921.8024346606173
episode: 569 training return: -957.3004182388595
episode: 570 training return: -965.8587844291173
episode: 571 training return: -855.5924198762984
epoch: 143 test_true_pfm: 4.147223726559293 sim_pfm: -830.9402775947261
episode: 572 training return: -948.6427533569038
episode: 573 training return: -978.9685045192725
episode: 574 training return: -866.482535404941
episode: 575 training return: -939.138708507365
epoch: 144 test_true_pfm: -98.9955934411247 sim_pfm: -732.5295587225628
episode: 576 training return: -881.9775984873489
episode: 577 training return: -944.6589389951555
episode: 578 training return: -968.7209759522967
episode: 579 training return: -968.0354011674107
epoch: 145 test_true_pfm: -56.40055521574584 sim_pfm: -696.8536813855491
episode: 580 training return: -987.2675978737335
episode: 581 training return: -828.0747543637383
episode: 582 training return: -963.4818607482366
episode: 583 training return: -849.7977581387057
epoch: 146 test_true_pfm: -59.657915695433275 sim_pfm: -742.609907288772
episode: 584 training return: -832.863704119827
episode: 585 training return: -957.7050618369133
episode: 586 training return: -965.6166573537294
episode: 587 training return: -960.4672055921287
epoch: 147 test_true_pfm: -178.60389157221303 sim_pfm: -749.588350417303
episode: 588 training return: -925.1710077044442
episode: 589 training return: -843.4124676906602
episode: 590 training return: -971.0503648574866
episode: 591 training return: -911.6480063020181
epoch: 148 test_true_pfm: -74.10596487772928 sim_pfm: -826.9605182785435
episode: 592 training return: -898.0240441177052
episode: 593 training return: -889.4698342686205
episode: 594 training return: -878.6735382550634
episode: 595 training return: -967.3667974571445
epoch: 149 test_true_pfm: -27.190511821676406 sim_pfm: -741.7583889829871
episode: 596 training return: -904.9638647255554
episode: 597 training return: -841.3220218533862
episode: 598 training return: -961.444500013623
episode: 599 training return: -841.6424473395956
epoch: 150 test_true_pfm: -68.4205447075392 sim_pfm: -832.805897358608
