['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.3054397536814213 test_loss: 0.18521149158477784
epoch: 1 training_loss 0.17232680410146714 test_loss: 0.17848434448242187
epoch: 2 training_loss 0.14642247438430786 test_loss: 0.1525844931602478
epoch: 3 training_loss 0.14192908056080342 test_loss: 0.12631521224975586
epoch: 4 training_loss 0.12583598803728818 test_loss: 0.12062642574310303
epoch: 5 training_loss 0.11618867516517639 test_loss: 0.1300421953201294
epoch: 6 training_loss 0.12218217682093382 test_loss: 0.12921338081359862
epoch: 7 training_loss 0.11924146935343742 test_loss: 0.12880569696426392
epoch: 8 training_loss 0.11321782320737839 test_loss: 0.10966620445251465
epoch: 9 training_loss 0.11338638830929995 test_loss: 0.12268646955490112
epoch: 10 training_loss 0.1205143577978015 test_loss: 0.12021931409835815
epoch: 11 training_loss 0.11060746263712645 test_loss: 0.1189875841140747
epoch: 12 training_loss 0.11002912562340499 test_loss: 0.1182621955871582
epoch: 13 training_loss 0.11613550085574388 test_loss: 0.11383228302001953
epoch: 14 training_loss 0.10103790190070867 test_loss: 0.12348517179489135
epoch: 15 training_loss 0.10881953509524464 test_loss: 0.12732505798339844
epoch: 16 training_loss 0.10750906944274902 test_loss: 0.11706197261810303
epoch: 17 training_loss 0.11595606483519078 test_loss: 0.1281752109527588
epoch: 18 training_loss 0.10151753777638078 test_loss: 0.11238465309143067
epoch: 19 training_loss 0.10565783765166997 test_loss: 0.1284019947052002
epoch: 20 training_loss 0.10684418773278594 test_loss: 0.11328811645507812
epoch: 21 training_loss 0.10864629616960883 test_loss: 0.1190147876739502
epoch: 22 training_loss 0.1156108495593071 test_loss: 0.10459054708480835
epoch: 23 training_loss 0.10709304528310895 test_loss: 0.12251235246658325
epoch: 24 training_loss 0.1041118298470974 test_loss: 0.10774381160736084
epoch: 25 training_loss 0.10024622414261103 test_loss: 0.11420549154281616
epoch: 26 training_loss 0.102262588981539 test_loss: 0.11427631378173828
epoch: 27 training_loss 0.11085775103420019 test_loss: 0.11372112035751343
epoch: 28 training_loss 0.10345942992717028 test_loss: 0.11018890142440796
epoch: 29 training_loss 0.11181296963244676 test_loss: 0.11983097791671753
epoch: 30 training_loss 0.10443432409316301 test_loss: 0.1255962610244751
epoch: 31 training_loss 0.10003897031769156 test_loss: 0.10290733575820923
epoch: 32 training_loss 0.112545295227319 test_loss: 0.10187513828277588
epoch: 33 training_loss 0.10263376362621784 test_loss: 0.12400212287902831
epoch: 34 training_loss 0.10459936287254096 test_loss: 0.10226480960845948
epoch: 35 training_loss 0.1090467993170023 test_loss: 0.08962206840515137
epoch: 36 training_loss 0.09846593875437976 test_loss: 0.10225813388824463
epoch: 37 training_loss 0.10202200662344695 test_loss: 0.1141289234161377
epoch: 38 training_loss 0.1044893866032362 test_loss: 0.13106316328048706
epoch: 39 training_loss 0.10064925668761134 test_loss: 0.1283905029296875
epoch: 40 training_loss 0.10130564955994487 test_loss: 0.10579640865325927
epoch: 41 training_loss 0.09501043643802404 test_loss: 0.11808619499206544
epoch: 42 training_loss 0.10129147939383984 test_loss: 0.11498399972915649
epoch: 43 training_loss 0.10220350798219442 test_loss: 0.10902518033981323
epoch: 44 training_loss 0.09369000546634197 test_loss: 0.11150338649749755
epoch: 45 training_loss 0.099248973056674 test_loss: 0.10816494226455689
epoch: 46 training_loss 0.10867056455463171 test_loss: 0.1154940128326416
epoch: 47 training_loss 0.0955822211317718 test_loss: 0.12362565994262695
epoch: 48 training_loss 0.1066563681140542 test_loss: 0.0980340838432312
epoch: 49 training_loss 0.09710771577432752 test_loss: 0.1022032380104065
epoch: 50 training_loss 0.09952508095651864 test_loss: 0.10951130390167237
epoch: 51 training_loss 0.09310761954635381 test_loss: 0.10598090887069703
epoch: 52 training_loss 0.10026072872802615 test_loss: 0.10917800664901733
epoch: 53 training_loss 0.1007026006653905 test_loss: 0.10336861610412598
epoch: 54 training_loss 0.10230809140950442 test_loss: 0.10496720075607299
epoch: 55 training_loss 0.09829825147986412 test_loss: 0.1093598484992981
epoch: 56 training_loss 0.09869805358350277 test_loss: 0.11903916597366333
epoch: 57 training_loss 0.09683173391968011 test_loss: 0.09828090071678161
epoch: 58 training_loss 0.10795515798032283 test_loss: 0.09823038578033447
epoch: 59 training_loss 0.10094945820979774 test_loss: 0.11151440143585205
epoch: 60 training_loss 0.09645762564614416 test_loss: 0.10865180492401123
epoch: 61 training_loss 0.09713910557329655 test_loss: 0.0972593069076538
epoch: 62 training_loss 0.10435494476929306 test_loss: 0.11039851903915406
epoch: 63 training_loss 0.09952150097116828 test_loss: 0.09191784262657166
epoch: 64 training_loss 0.10417514164000749 test_loss: 0.11131531000137329
epoch: 65 training_loss 0.09914026096463203 test_loss: 0.11388463973999023
epoch: 66 training_loss 0.09650539331138135 test_loss: 0.11748833656311035
epoch: 67 training_loss 0.10002434678375721 test_loss: 0.09909635186195373
epoch: 68 training_loss 0.10030170537531376 test_loss: 0.10803570747375488
epoch: 69 training_loss 0.09477858938276768 test_loss: 0.10665806531906127
epoch: 70 training_loss 0.09886048372834921 test_loss: 0.10601528882980346
epoch: 71 training_loss 0.09624719666317105 test_loss: 0.08862547278404236
epoch: 72 training_loss 0.10287597825750709 test_loss: 0.10970479249954224
epoch: 73 training_loss 0.09768962631002069 test_loss: 0.10801331996917725
epoch: 74 training_loss 0.09680286921560764 test_loss: 0.11342102289199829
epoch: 75 training_loss 0.09745269941166043 test_loss: 0.10154576301574707
epoch: 76 training_loss 0.10045253606513142 test_loss: 0.11925631761550903
epoch: 77 training_loss 0.09624919088557363 test_loss: 0.12169719934463501
epoch: 78 training_loss 0.09831674380227923 test_loss: 0.11886422634124756
epoch: 79 training_loss 0.09936883682385086 test_loss: 0.12846661806106568
epoch: 80 training_loss 0.09736208014190197 test_loss: 0.10612555742263793
epoch: 81 training_loss 0.10112515792250633 test_loss: 0.1008790373802185
epoch: 82 training_loss 0.09551051938906312 test_loss: 0.11154656410217285
epoch: 83 training_loss 0.10289478594437242 test_loss: 0.11841816902160644
epoch: 84 training_loss 0.09565214522182941 test_loss: 0.10780135393142701
epoch: 85 training_loss 0.09606891077011824 test_loss: 0.11665947437286377
epoch: 86 training_loss 0.10385739285498857 test_loss: 0.11626946926116943
epoch: 87 training_loss 0.10437476933002472 test_loss: 0.11229506731033326
epoch: 88 training_loss 0.1030671938508749 test_loss: 0.09782560467720032
epoch: 89 training_loss 0.09728794775903225 test_loss: 0.10545598268508911
epoch: 90 training_loss 0.08955967884510756 test_loss: 0.10398091077804565
epoch: 91 training_loss 0.09687713584862649 test_loss: 0.12903493642807007
epoch: 92 training_loss 0.09800604285672307 test_loss: 0.11623759269714355
epoch: 93 training_loss 0.09284347340464592 test_loss: 0.10991724729537963
epoch: 94 training_loss 0.09719868127256631 test_loss: 0.0948108971118927
epoch: 95 training_loss 0.10122267920523882 test_loss: 0.11599128246307373
epoch: 96 training_loss 0.09911116171628237 test_loss: 0.11247031688690186
epoch: 97 training_loss 0.09850168511271477 test_loss: 0.09594342112541199
epoch: 98 training_loss 0.09913339041173458 test_loss: 0.10896531343460084
epoch: 99 training_loss 0.09717438776046038 test_loss: 0.11791107654571534
epoch: 100 training_loss 0.09798661898821592 test_loss: 0.10365406274795533
epoch: 101 training_loss 0.10063899334520102 test_loss: 0.10991978645324707
epoch: 102 training_loss 0.09147905606776476 test_loss: 0.1092564582824707
epoch: 103 training_loss 0.09994920937344431 test_loss: 0.1079828381538391
epoch: 104 training_loss 0.09640003360807896 test_loss: 0.10357478857040406
epoch: 105 training_loss 0.09303756665438413 test_loss: 0.11966254711151122
epoch: 106 training_loss 0.09461724815890193 test_loss: 0.11155967712402344
epoch: 107 training_loss 0.10031297374516726 test_loss: 0.12381186485290527
epoch: 108 training_loss 0.09828814385458827 test_loss: 0.09865167140960693
epoch: 109 training_loss 0.10244847554713488 test_loss: 0.128653085231781
epoch: 110 training_loss 0.10382615525275468 test_loss: 0.10241998434066772
epoch: 111 training_loss 0.09657146336510777 test_loss: 0.10418630838394165
epoch: 112 training_loss 0.09385404514148832 test_loss: 0.10865609645843506
epoch: 113 training_loss 0.08633634679019452 test_loss: 0.10777599811553955
epoch: 114 training_loss 0.09943041034042835 test_loss: 0.11618489027023315
epoch: 115 training_loss 0.09752648549154401 test_loss: 0.10494819879531861
epoch: 116 training_loss 0.09551063634455204 test_loss: 0.11436142921447753
epoch: 117 training_loss 0.09152779607102275 test_loss: 0.10856399536132813
epoch: 118 training_loss 0.08796390190720559 test_loss: 0.1059144377708435
epoch: 119 training_loss 0.09447116128169 test_loss: 0.11573574542999268
epoch: 120 training_loss 0.09564721282571555 test_loss: 0.12139233350753784
epoch: 121 training_loss 0.09644719798117876 test_loss: 0.1100061297416687
epoch: 122 training_loss 0.08627913739532232 test_loss: 0.09952291250228881
epoch: 123 training_loss 0.10109896028414368 test_loss: 0.12492474317550659
epoch: 124 training_loss 0.09465832084417343 test_loss: 0.10238009691238403
epoch: 125 training_loss 0.09923130730167032 test_loss: 0.10595968961715699
epoch: 126 training_loss 0.09198706996627153 test_loss: 0.11322821378707885
epoch: 127 training_loss 0.09603850997984409 test_loss: 0.12478708028793335
epoch: 128 training_loss 0.09818832179531455 test_loss: 0.10120630264282227
epoch: 129 training_loss 0.09446276469156147 test_loss: 0.11656070947647094
epoch: 130 training_loss 0.09522595278918743 test_loss: 0.10506579875946045
epoch: 131 training_loss 0.09219153905287385 test_loss: 0.11480801105499268
epoch: 132 training_loss 0.09804717708379031 test_loss: 0.10529309511184692
epoch: 133 training_loss 0.09290539130568504 test_loss: 0.10603495836257934
epoch: 134 training_loss 0.09414272658526897 test_loss: 0.1287860631942749
epoch: 135 training_loss 0.1016977532953024 test_loss: 0.10890140533447265
epoch: 136 training_loss 0.09526379033923149 test_loss: 0.10630853176116943
epoch: 137 training_loss 0.09434823136776686 test_loss: 0.1093135952949524
epoch: 138 training_loss 0.0937532844208181 test_loss: 0.11587642431259156
epoch: 139 training_loss 0.09695327576249838 test_loss: 0.09763081669807434
epoch: 140 training_loss 0.0994167897105217 test_loss: 0.11663674116134644
epoch: 141 training_loss 0.09310627935454249 test_loss: 0.10750378370285034
epoch: 142 training_loss 0.09502039548009635 test_loss: 0.12181941270828248
epoch: 143 training_loss 0.09923496056348086 test_loss: 0.12098817825317383
epoch: 144 training_loss 0.09794915795326233 test_loss: 0.10000680685043335
epoch: 145 training_loss 0.09219964044168591 test_loss: 0.1054917335510254
epoch: 146 training_loss 0.09308325737714768 test_loss: 0.1025971531867981
epoch: 147 training_loss 0.08751096960157156 test_loss: 0.11478338241577149
epoch: 148 training_loss 0.0932845713570714 test_loss: 0.08725412487983704
epoch: 149 training_loss 0.08776096567511558 test_loss: 0.12024120092391968
epoch: 0 training_loss 0.3045532040297985 test_loss: 0.2023935556411743
epoch: 1 training_loss 0.1753853616863489 test_loss: 0.15436614751815797
epoch: 2 training_loss 0.13017755826935173 test_loss: 0.11969003677368165
epoch: 3 training_loss 0.13285449046641587 test_loss: 0.14472384452819825
epoch: 4 training_loss 0.12749186977744104 test_loss: 0.12168618440628051
epoch: 5 training_loss 0.12841606374830009 test_loss: 0.1291965126991272
epoch: 6 training_loss 0.12659234065562486 test_loss: 0.1083989143371582
epoch: 7 training_loss 0.11448397245258093 test_loss: 0.12541728019714354
epoch: 8 training_loss 0.11840996317565441 test_loss: 0.13149234056472778
epoch: 9 training_loss 0.10949156567454338 test_loss: 0.12883733510971068
epoch: 10 training_loss 0.11394220657646656 test_loss: 0.12174034118652344
epoch: 11 training_loss 0.11363862048834562 test_loss: 0.12928100824356079
epoch: 12 training_loss 0.11161540837958456 test_loss: 0.10622216463088989
epoch: 13 training_loss 0.11331395173445344 test_loss: 0.11189157962799072
epoch: 14 training_loss 0.1118396633118391 test_loss: 0.09563504457473755
epoch: 15 training_loss 0.11098494369536638 test_loss: 0.1118040680885315
epoch: 16 training_loss 0.10269368920475244 test_loss: 0.11741119623184204
epoch: 17 training_loss 0.10874320043250918 test_loss: 0.12155604362487793
epoch: 18 training_loss 0.09862440636381507 test_loss: 0.11793138980865478
epoch: 19 training_loss 0.10994500130414962 test_loss: 0.12017925977706909
epoch: 20 training_loss 0.11189338829368353 test_loss: 0.14507437944412233
epoch: 21 training_loss 0.10573075465857983 test_loss: 0.1279102921485901
epoch: 22 training_loss 0.11353297855705023 test_loss: 0.09963972568511963
epoch: 23 training_loss 0.10963958226144314 test_loss: 0.12955193519592284
epoch: 24 training_loss 0.10812550317496061 test_loss: 0.11487442255020142
epoch: 25 training_loss 0.10247351869940757 test_loss: 0.11418689489364624
epoch: 26 training_loss 0.09064777493476868 test_loss: 0.10238173007965087
epoch: 27 training_loss 0.10642875552177429 test_loss: 0.12186205387115479
epoch: 28 training_loss 0.09549709860235453 test_loss: 0.11268887519836426
epoch: 29 training_loss 0.11062749695032835 test_loss: 0.12892106771469117
epoch: 30 training_loss 0.1076219168677926 test_loss: 0.11467400789260865
epoch: 31 training_loss 0.10185845490545034 test_loss: 0.10649405717849732
epoch: 32 training_loss 0.10248394753783942 test_loss: 0.09956731796264648
epoch: 33 training_loss 0.10079865537583828 test_loss: 0.1297743558883667
epoch: 34 training_loss 0.10572873868048191 test_loss: 0.11748826503753662
epoch: 35 training_loss 0.10301254525780677 test_loss: 0.12424339056015014
epoch: 36 training_loss 0.1073823587037623 test_loss: 0.1021512508392334
epoch: 37 training_loss 0.10671807017177343 test_loss: 0.0965575933456421
epoch: 38 training_loss 0.10154257293790579 test_loss: 0.10218238830566406
epoch: 39 training_loss 0.10459291663020849 test_loss: 0.10651962757110596
epoch: 40 training_loss 0.10790756352245807 test_loss: 0.13269811868667603
epoch: 41 training_loss 0.10170723725110292 test_loss: 0.10079667568206788
epoch: 42 training_loss 0.10508441284298897 test_loss: 0.13022044897079468
epoch: 43 training_loss 0.10674250289797783 test_loss: 0.11466712951660156
epoch: 44 training_loss 0.10399634391069412 test_loss: 0.12908564805984496
epoch: 45 training_loss 0.09964991718530655 test_loss: 0.12194091081619263
epoch: 46 training_loss 0.10276704125106334 test_loss: 0.09952969551086426
epoch: 47 training_loss 0.10168895322829485 test_loss: 0.11330993175506592
epoch: 48 training_loss 0.10554895853623748 test_loss: 0.10986068248748779
epoch: 49 training_loss 0.10283283591270446 test_loss: 0.09550879001617432
epoch: 50 training_loss 0.0997403797134757 test_loss: 0.11287603378295899
epoch: 51 training_loss 0.0934363353997469 test_loss: 0.1177066445350647
epoch: 52 training_loss 0.10781811847351491 test_loss: 0.10228166580200196
epoch: 53 training_loss 0.10629399010911583 test_loss: 0.11030656099319458
epoch: 54 training_loss 0.10623113095760345 test_loss: 0.1076618790626526
epoch: 55 training_loss 0.10618853833526373 test_loss: 0.09648571610450744
epoch: 56 training_loss 0.10500188544392586 test_loss: 0.11292614936828613
epoch: 57 training_loss 0.10531056860461831 test_loss: 0.13061299324035644
epoch: 58 training_loss 0.11086495343595743 test_loss: 0.11596585512161255
epoch: 59 training_loss 0.10183995993807912 test_loss: 0.10143998861312867
epoch: 60 training_loss 0.0963687832094729 test_loss: 0.10173987150192261
epoch: 61 training_loss 0.09843607239425183 test_loss: 0.10734951496124268
epoch: 62 training_loss 0.09795679800212383 test_loss: 0.11087061166763305
epoch: 63 training_loss 0.10331090623512865 test_loss: 0.11723699569702148
epoch: 64 training_loss 0.10052474601194263 test_loss: 0.11418979167938233
epoch: 65 training_loss 0.10155596099793911 test_loss: 0.09784509539604187
epoch: 66 training_loss 0.10487464485689997 test_loss: 0.09567316174507141
epoch: 67 training_loss 0.09783396068960429 test_loss: 0.1064801573753357
epoch: 68 training_loss 0.10041613629087806 test_loss: 0.09903613328933716
epoch: 69 training_loss 0.09469355497509241 test_loss: 0.11696501970291137
epoch: 70 training_loss 0.10038693450391292 test_loss: 0.10952715873718262
epoch: 71 training_loss 0.0897763510234654 test_loss: 0.11852409839630126
epoch: 72 training_loss 0.10095856996253133 test_loss: 0.1073183536529541
epoch: 73 training_loss 0.10048910662531853 test_loss: 0.1073300838470459
epoch: 74 training_loss 0.09847243348136545 test_loss: 0.09922323226928711
epoch: 75 training_loss 0.09680794224143029 test_loss: 0.10511242151260376
epoch: 76 training_loss 0.09850605130195618 test_loss: 0.11257281303405761
epoch: 77 training_loss 0.0975287481956184 test_loss: 0.11661484241485595
epoch: 78 training_loss 0.0949482323601842 test_loss: 0.11224578619003296
epoch: 79 training_loss 0.10360161371529103 test_loss: 0.11954265832901001
epoch: 80 training_loss 0.09828402370214462 test_loss: 0.10285719633102416
epoch: 81 training_loss 0.09743284543976188 test_loss: 0.11529169082641602
epoch: 82 training_loss 0.09564925000071525 test_loss: 0.10804637670516967
epoch: 83 training_loss 0.09964058294892311 test_loss: 0.12134257555007935
epoch: 84 training_loss 0.09759534873068333 test_loss: 0.1166413426399231
epoch: 85 training_loss 0.0959477598592639 test_loss: 0.10183660984039307
epoch: 86 training_loss 0.10014157444238662 test_loss: 0.10925393104553223
epoch: 87 training_loss 0.10302944520488382 test_loss: 0.11193058490753174
epoch: 88 training_loss 0.09832519171759486 test_loss: 0.10311719179153442
epoch: 89 training_loss 0.09786866767331957 test_loss: 0.10815254449844361
epoch: 90 training_loss 0.09876747589558363 test_loss: 0.11155205965042114
epoch: 91 training_loss 0.09263589683920145 test_loss: 0.10987062454223633
epoch: 92 training_loss 0.0906732364371419 test_loss: 0.12310453653335571
epoch: 93 training_loss 0.10049471825361252 test_loss: 0.11304454803466797
epoch: 94 training_loss 0.09502934036776424 test_loss: 0.10999336242675781
epoch: 95 training_loss 0.09835982754826546 test_loss: 0.11810070276260376
epoch: 96 training_loss 0.09465118635445834 test_loss: 0.10218936204910278
epoch: 97 training_loss 0.10315179070457817 test_loss: 0.11966415643692016
epoch: 98 training_loss 0.09706335140392185 test_loss: 0.10940089225769042
epoch: 99 training_loss 0.09593030702322722 test_loss: 0.12630906105041503
epoch: 100 training_loss 0.09347827155143022 test_loss: 0.11888524293899536
epoch: 101 training_loss 0.1019892941787839 test_loss: 0.11585623025894165
epoch: 102 training_loss 0.09919426798820495 test_loss: 0.10565879344940185
epoch: 103 training_loss 0.09356064511463046 test_loss: 0.10945889949798585
epoch: 104 training_loss 0.09333036713302136 test_loss: 0.11500530242919922
epoch: 105 training_loss 0.09933117020875215 test_loss: 0.11201061010360717
epoch: 106 training_loss 0.10002957969903946 test_loss: 0.11231685876846313
epoch: 107 training_loss 0.09388507261872292 test_loss: 0.11312308311462402
epoch: 108 training_loss 0.08917767181992531 test_loss: 0.10613617897033692
epoch: 109 training_loss 0.09447662891820073 test_loss: 0.11420751810073852
epoch: 110 training_loss 0.10121984504163266 test_loss: 0.1251434087753296
epoch: 111 training_loss 0.09771720424294472 test_loss: 0.10971384048461914
epoch: 112 training_loss 0.09550636431202292 test_loss: 0.10385870933532715
epoch: 113 training_loss 0.09642355766147376 test_loss: 0.13275673389434814
epoch: 114 training_loss 0.09304173281416298 test_loss: 0.11316852569580078
epoch: 115 training_loss 0.09764448637142778 test_loss: 0.11266521215438843
epoch: 116 training_loss 0.09286696635186673 test_loss: 0.12433081865310669
epoch: 117 training_loss 0.094512013848871 test_loss: 0.11849846839904785
epoch: 118 training_loss 0.0971353539545089 test_loss: 0.09827198386192322
epoch: 119 training_loss 0.0924175919406116 test_loss: 0.10572905540466308
epoch: 120 training_loss 0.0904458480887115 test_loss: 0.1270171284675598
epoch: 121 training_loss 0.09131863648071885 test_loss: 0.11650810241699219
epoch: 122 training_loss 0.10036550229415298 test_loss: 0.11658245325088501
epoch: 123 training_loss 0.09139205381274224 test_loss: 0.1052888035774231
epoch: 124 training_loss 0.09699429586529731 test_loss: 0.11483530998229981
epoch: 125 training_loss 0.09278427351266146 test_loss: 0.10956133604049682
epoch: 126 training_loss 0.0935083766002208 test_loss: 0.10455976724624634
epoch: 127 training_loss 0.08921246184036136 test_loss: 0.11025353670120239
epoch: 128 training_loss 0.09156077276915311 test_loss: 0.10892635583877563
epoch: 129 training_loss 0.09039702508598566 test_loss: 0.12123377323150634
epoch: 130 training_loss 0.0973876891285181 test_loss: 0.1250733733177185
epoch: 131 training_loss 0.0947120787575841 test_loss: 0.13195656538009642
epoch: 132 training_loss 0.08446782793849707 test_loss: 0.11786324977874756
epoch: 133 training_loss 0.0893018502742052 test_loss: 0.11837167739868164
epoch: 134 training_loss 0.09217035146430134 test_loss: 0.13489258289337158
epoch: 135 training_loss 0.09217210600152612 test_loss: 0.12236921787261963
epoch: 136 training_loss 0.09710907215252519 test_loss: 0.11858202219009399
epoch: 137 training_loss 0.09122329998761415 test_loss: 0.11959080696105957
epoch: 138 training_loss 0.0993383014947176 test_loss: 0.12116782665252686
epoch: 139 training_loss 0.09322347212582827 test_loss: 0.13545113801956177
epoch: 140 training_loss 0.09674953643232584 test_loss: 0.13291040658950806
epoch: 141 training_loss 0.08803091868758202 test_loss: 0.11208411455154418
epoch: 142 training_loss 0.10241558026522397 test_loss: 0.1303157925605774
epoch: 143 training_loss 0.0908385755866766 test_loss: 0.1268152713775635
epoch: 144 training_loss 0.09389652123674751 test_loss: 0.09259950518608093
epoch: 145 training_loss 0.08643348885700107 test_loss: 0.12269457578659057
epoch: 146 training_loss 0.09439614906907082 test_loss: 0.12390096187591552
epoch: 147 training_loss 0.09722005477175116 test_loss: 0.1114094614982605
epoch: 148 training_loss 0.0937209565937519 test_loss: 0.10497806072235108
epoch: 149 training_loss 0.08372082654386759 test_loss: 0.13116331100463868
epoch: 0 training_loss 0.3317456811666489 test_loss: 0.2220924139022827
epoch: 1 training_loss 0.18088540889322757 test_loss: 0.14766122102737428
epoch: 2 training_loss 0.15401149459183217 test_loss: 0.1647806406021118
epoch: 3 training_loss 0.1423650983721018 test_loss: 0.12818948030471802
epoch: 4 training_loss 0.12786706462502478 test_loss: 0.12666847705841064
epoch: 5 training_loss 0.12858041528612374 test_loss: 0.1194029688835144
epoch: 6 training_loss 0.13102810993790626 test_loss: 0.11808121204376221
epoch: 7 training_loss 0.1276827510818839 test_loss: 0.1205458402633667
epoch: 8 training_loss 0.12055419437587261 test_loss: 0.12975676059722902
epoch: 9 training_loss 0.11927776288241149 test_loss: 0.1208154320716858
epoch: 10 training_loss 0.10963819209486246 test_loss: 0.1226474404335022
epoch: 11 training_loss 0.11313812965527177 test_loss: 0.1173632025718689
epoch: 12 training_loss 0.10755444552749395 test_loss: 0.11388740539550782
epoch: 13 training_loss 0.10229711070656776 test_loss: 0.12038241624832154
epoch: 14 training_loss 0.11139404717832804 test_loss: 0.122572922706604
epoch: 15 training_loss 0.10887913431972265 test_loss: 0.11012935638427734
epoch: 16 training_loss 0.10254179403185844 test_loss: 0.10438858270645142
epoch: 17 training_loss 0.1041605857387185 test_loss: 0.11206214427947998
epoch: 18 training_loss 0.10924107134342194 test_loss: 0.11603972911834717
epoch: 19 training_loss 0.11105166394263506 test_loss: 0.10656099319458008
epoch: 20 training_loss 0.10539225177839398 test_loss: 0.10147306919097901
epoch: 21 training_loss 0.10504017230123282 test_loss: 0.12375121116638184
epoch: 22 training_loss 0.11012293875217438 test_loss: 0.10837982892990113
epoch: 23 training_loss 0.10284368032589555 test_loss: 0.10088287591934204
epoch: 24 training_loss 0.10277683248743415 test_loss: 0.1189935564994812
epoch: 25 training_loss 0.10763178117573262 test_loss: 0.10797288417816162
epoch: 26 training_loss 0.10696827702224254 test_loss: 0.10635548830032349
epoch: 27 training_loss 0.10689203236252069 test_loss: 0.12436939477920532
epoch: 28 training_loss 0.11095444489270449 test_loss: 0.10586854219436645
epoch: 29 training_loss 0.10013639006763697 test_loss: 0.10179961919784546
epoch: 30 training_loss 0.10057237092405558 test_loss: 0.1223455786705017
epoch: 31 training_loss 0.10121359977871179 test_loss: 0.1120952010154724
epoch: 32 training_loss 0.10370523249730468 test_loss: 0.10260322093963622
epoch: 33 training_loss 0.10104775979183614 test_loss: 0.10782536268234252
epoch: 34 training_loss 0.10346811570227146 test_loss: 0.11219650506973267
epoch: 35 training_loss 0.1006887411698699 test_loss: 0.11219251155853271
epoch: 36 training_loss 0.10838214736431837 test_loss: 0.10711252689361572
epoch: 37 training_loss 0.11102729335427285 test_loss: 0.12286005020141602
epoch: 38 training_loss 0.10800422061234713 test_loss: 0.12117496728897095
epoch: 39 training_loss 0.10882157739251852 test_loss: 0.12158862352371216
epoch: 40 training_loss 0.10369101151823998 test_loss: 0.1075674295425415
epoch: 41 training_loss 0.10337032727897168 test_loss: 0.11320266723632813
epoch: 42 training_loss 0.10288743801414967 test_loss: 0.11194689273834228
epoch: 43 training_loss 0.09934489235281944 test_loss: 0.11328458786010742
epoch: 44 training_loss 0.10127391615882515 test_loss: 0.106289541721344
epoch: 45 training_loss 0.10193044571205974 test_loss: 0.11013356447219849
epoch: 46 training_loss 0.1024928190279752 test_loss: 0.11217901706695557
epoch: 47 training_loss 0.10343121349811554 test_loss: 0.1059568166732788
epoch: 48 training_loss 0.10226274838671089 test_loss: 0.10845050811767579
epoch: 49 training_loss 0.10010757017880678 test_loss: 0.10315499305725098
epoch: 50 training_loss 0.09753809437155724 test_loss: 0.10961406230926514
epoch: 51 training_loss 0.09856926754117012 test_loss: 0.10887227058410645
epoch: 52 training_loss 0.10232719851657748 test_loss: 0.11355001926422119
epoch: 53 training_loss 0.09904443595558404 test_loss: 0.12208176851272583
epoch: 54 training_loss 0.10159612614661455 test_loss: 0.12042844295501709
epoch: 55 training_loss 0.1096351007372141 test_loss: 0.11694511175155639
epoch: 56 training_loss 0.0984886428900063 test_loss: 0.1051832914352417
epoch: 57 training_loss 0.10033320359885693 test_loss: 0.10194029808044433
epoch: 58 training_loss 0.10315929951146245 test_loss: 0.1062042236328125
epoch: 59 training_loss 0.10228365950286389 test_loss: 0.10204493999481201
epoch: 60 training_loss 0.10025047477334738 test_loss: 0.1090649962425232
epoch: 61 training_loss 0.10222217205911875 test_loss: 0.1080561637878418
epoch: 62 training_loss 0.10130798723548651 test_loss: 0.10226452350616455
epoch: 63 training_loss 0.10282641658559441 test_loss: 0.11255275011062622
epoch: 64 training_loss 0.095554358959198 test_loss: 0.10471988916397094
epoch: 65 training_loss 0.10200435072183608 test_loss: 0.10256527662277222
epoch: 66 training_loss 0.0980471407622099 test_loss: 0.10223805904388428
epoch: 67 training_loss 0.10064261619001627 test_loss: 0.09293306469917298
epoch: 68 training_loss 0.09519145995378495 test_loss: 0.10052086114883423
epoch: 69 training_loss 0.09729813430458308 test_loss: 0.11085795164108277
epoch: 70 training_loss 0.09859368316829205 test_loss: 0.09069127440452576
epoch: 71 training_loss 0.09357656953856348 test_loss: 0.11075639724731445
epoch: 72 training_loss 0.10175258230417966 test_loss: 0.10853209495544433
epoch: 73 training_loss 0.09821725318208337 test_loss: 0.10772106647491456
epoch: 74 training_loss 0.10095980789512396 test_loss: 0.11293936967849731
epoch: 75 training_loss 0.09633038958534598 test_loss: 0.11099294424057007
epoch: 76 training_loss 0.102945461217314 test_loss: 0.09956841468811035
epoch: 77 training_loss 0.10293097887188196 test_loss: 0.11919337511062622
epoch: 78 training_loss 0.10296456161886454 test_loss: 0.10969030857086182
epoch: 79 training_loss 0.09854388827458024 test_loss: 0.11701798439025879
epoch: 80 training_loss 0.10066330481320619 test_loss: 0.11396844387054443
epoch: 81 training_loss 0.09971765337511897 test_loss: 0.12932804822921753
epoch: 82 training_loss 0.09624221082776785 test_loss: 0.12055870294570922
epoch: 83 training_loss 0.09682322632521391 test_loss: 0.11922129392623901
epoch: 84 training_loss 0.10174886122345925 test_loss: 0.10156022310256958
epoch: 85 training_loss 0.0940167598798871 test_loss: 0.11050870418548583
epoch: 86 training_loss 0.09468605367466808 test_loss: 0.11587152481079102
epoch: 87 training_loss 0.09799649022519588 test_loss: 0.11062672138214111
epoch: 88 training_loss 0.10082445077598096 test_loss: 0.10074348449707031
epoch: 89 training_loss 0.09576900560408831 test_loss: 0.12223678827285767
epoch: 90 training_loss 0.09426198715344071 test_loss: 0.13047871589660645
epoch: 91 training_loss 0.10221128914505244 test_loss: 0.0936832845211029
epoch: 92 training_loss 0.10469782199710607 test_loss: 0.11519134044647217
epoch: 93 training_loss 0.10224952194839716 test_loss: 0.10360418558120728
epoch: 94 training_loss 0.09849966323003173 test_loss: 0.09739388823509217
epoch: 95 training_loss 0.0976725061237812 test_loss: 0.10924949645996093
epoch: 96 training_loss 0.09897192668169737 test_loss: 0.09541053771972656
epoch: 97 training_loss 0.10109403742477298 test_loss: 0.10564919710159301
epoch: 98 training_loss 0.10022234123200179 test_loss: 0.10954214334487915
epoch: 99 training_loss 0.09521111238747836 test_loss: 0.11006271839141846
epoch: 100 training_loss 0.09527270067483187 test_loss: 0.10553443431854248
epoch: 101 training_loss 0.0961943114735186 test_loss: 0.11221786737442016
epoch: 102 training_loss 0.09594060838222504 test_loss: 0.10969144105911255
epoch: 103 training_loss 0.10060845714062452 test_loss: 0.10368634462356567
epoch: 104 training_loss 0.10432862360030412 test_loss: 0.10512592792510986
epoch: 105 training_loss 0.0919955651089549 test_loss: 0.1139490008354187
epoch: 106 training_loss 0.1042886503599584 test_loss: 0.1203792929649353
epoch: 107 training_loss 0.09628687962889672 test_loss: 0.11335593461990356
epoch: 108 training_loss 0.09460787497460842 test_loss: 0.10348196029663086
epoch: 109 training_loss 0.08604055667296052 test_loss: 0.10535072088241577
epoch: 110 training_loss 0.09456354606896639 test_loss: 0.11269235610961914
epoch: 111 training_loss 0.09986383240669966 test_loss: 0.09710811376571656
epoch: 112 training_loss 0.09052647901698947 test_loss: 0.11602728366851807
epoch: 113 training_loss 0.0957705095782876 test_loss: 0.10331602096557617
epoch: 114 training_loss 0.09470226019620895 test_loss: 0.09003558158874511
epoch: 115 training_loss 0.08682683747261763 test_loss: 0.12530317306518554
epoch: 116 training_loss 0.08651713883504271 test_loss: 0.08569761514663696
epoch: 117 training_loss 0.09728042341768742 test_loss: 0.10851399898529053
epoch: 118 training_loss 0.09965923987329006 test_loss: 0.11146587133407593
epoch: 119 training_loss 0.09525259468704463 test_loss: 0.10125257968902587
epoch: 120 training_loss 0.09361564010381698 test_loss: 0.09634776711463929
epoch: 121 training_loss 0.09601169990375638 test_loss: 0.09449538588523865
epoch: 122 training_loss 0.0917968257702887 test_loss: 0.10788743495941162
epoch: 123 training_loss 0.09295053727924824 test_loss: 0.12979769706726074
epoch: 124 training_loss 0.09059606648981572 test_loss: 0.12297033071517945
epoch: 125 training_loss 0.09165967963635921 test_loss: 0.11116114854812623
epoch: 126 training_loss 0.09329176742583513 test_loss: 0.11470439434051513
epoch: 127 training_loss 0.09215203909203411 test_loss: 0.10501617193222046
epoch: 128 training_loss 0.08587630961090326 test_loss: 0.11116586923599243
epoch: 129 training_loss 0.0898593707382679 test_loss: 0.10085593461990357
epoch: 130 training_loss 0.09675092168152333 test_loss: 0.10580021142959595
epoch: 131 training_loss 0.09476666131988168 test_loss: 0.134332537651062
epoch: 132 training_loss 0.09514107327908278 test_loss: 0.1060674548149109
epoch: 133 training_loss 0.09191533047705888 test_loss: 0.08797690868377686
epoch: 134 training_loss 0.09594158988445997 test_loss: 0.11542153358459473
epoch: 135 training_loss 0.09163731925189494 test_loss: 0.11050820350646973
epoch: 136 training_loss 0.09525996789336205 test_loss: 0.10483241081237793
epoch: 137 training_loss 0.09631506633013487 test_loss: 0.10721999406814575
epoch: 138 training_loss 0.0919758484698832 test_loss: 0.10909733772277833
epoch: 139 training_loss 0.09157437410205603 test_loss: 0.10837731361389161
epoch: 140 training_loss 0.0901151306182146 test_loss: 0.1180773138999939
epoch: 141 training_loss 0.08972511120140553 test_loss: 0.112224543094635
epoch: 142 training_loss 0.08692144544795155 test_loss: 0.1156505823135376
epoch: 143 training_loss 0.08782577313482762 test_loss: 0.11156233549118041
epoch: 144 training_loss 0.09324430981650948 test_loss: 0.1009833812713623
epoch: 145 training_loss 0.08898067433387041 test_loss: 0.09790652990341187
epoch: 146 training_loss 0.09254719771444797 test_loss: 0.12480143308639527
epoch: 147 training_loss 0.09192219583317637 test_loss: 0.12139745950698852
epoch: 148 training_loss 0.09424804169684649 test_loss: 0.09594098329544068
epoch: 149 training_loss 0.09216528862714768 test_loss: 0.08991946578025818
epoch: 0 training_loss 0.3106690291315317 test_loss: 0.22554969787597656
epoch: 1 training_loss 0.1755863729864359 test_loss: 0.15239793062210083
epoch: 2 training_loss 0.1511929713189602 test_loss: 0.19173227548599242
epoch: 3 training_loss 0.12470545917749405 test_loss: 0.14456428289413453
epoch: 4 training_loss 0.1325469544902444 test_loss: 0.12787507772445678
epoch: 5 training_loss 0.11721459040418267 test_loss: 0.1201432228088379
epoch: 6 training_loss 0.12375684969127178 test_loss: 0.12439343929290772
epoch: 7 training_loss 0.12291329175233841 test_loss: 0.1282700777053833
epoch: 8 training_loss 0.12060286778956651 test_loss: 0.14532699584960937
epoch: 9 training_loss 0.11467796735465527 test_loss: 0.13004451990127563
epoch: 10 training_loss 0.11309113077819348 test_loss: 0.1322277307510376
epoch: 11 training_loss 0.11703144535422325 test_loss: 0.11297715902328491
epoch: 12 training_loss 0.11522384282201528 test_loss: 0.10071104764938354
epoch: 13 training_loss 0.10297547854483127 test_loss: 0.1283651828765869
epoch: 14 training_loss 0.10691064439713954 test_loss: 0.12901972532272338
epoch: 15 training_loss 0.10937287505716085 test_loss: 0.12281750440597534
epoch: 16 training_loss 0.10893302144482732 test_loss: 0.12241063117980958
epoch: 17 training_loss 0.10789958380162716 test_loss: 0.11095080375671387
epoch: 18 training_loss 0.11161233132705092 test_loss: 0.10547120571136474
epoch: 19 training_loss 0.10258814504370094 test_loss: 0.11402528285980225
epoch: 20 training_loss 0.1035442671366036 test_loss: 0.1182450532913208
epoch: 21 training_loss 0.1038883052393794 test_loss: 0.13978774547576905
epoch: 22 training_loss 0.10373083326965571 test_loss: 0.11005996465682984
epoch: 23 training_loss 0.10342008609324693 test_loss: 0.11728180646896362
epoch: 24 training_loss 0.10207825884222985 test_loss: 0.1279548168182373
epoch: 25 training_loss 0.10910906881093979 test_loss: 0.12029896974563599
epoch: 26 training_loss 0.11367718098685145 test_loss: 0.0973521888256073
epoch: 27 training_loss 0.10966287326067686 test_loss: 0.13259276151657104
epoch: 28 training_loss 0.10947847485542297 test_loss: 0.11336590051651001
epoch: 29 training_loss 0.11143572945147753 test_loss: 0.13873302936553955
epoch: 30 training_loss 0.11025339994579554 test_loss: 0.11365407705307007
epoch: 31 training_loss 0.10497800774872303 test_loss: 0.10539858341217041
epoch: 32 training_loss 0.10170455321669579 test_loss: 0.10713318586349488
epoch: 33 training_loss 0.10867269337177277 test_loss: 0.1122360348701477
epoch: 34 training_loss 0.10829077476635575 test_loss: 0.11452338695526124
epoch: 35 training_loss 0.10164804883301258 test_loss: 0.11872823238372802
epoch: 36 training_loss 0.10157264748588204 test_loss: 0.09220876693725585
epoch: 37 training_loss 0.09861858684569597 test_loss: 0.0994622528553009
epoch: 38 training_loss 0.10808833088725806 test_loss: 0.1275973677635193
epoch: 39 training_loss 0.11029511615633965 test_loss: 0.11283299922943116
epoch: 40 training_loss 0.09959952810779214 test_loss: 0.11444617509841919
epoch: 41 training_loss 0.10381325218826533 test_loss: 0.09865913987159729
epoch: 42 training_loss 0.10159357141703368 test_loss: 0.1209253191947937
epoch: 43 training_loss 0.09794376865029335 test_loss: 0.1266513228416443
epoch: 44 training_loss 0.10596807081252337 test_loss: 0.1297472357749939
epoch: 45 training_loss 0.0980949492380023 test_loss: 0.10685162544250489
epoch: 46 training_loss 0.09387539125047624 test_loss: 0.145820689201355
epoch: 47 training_loss 0.10376546939834952 test_loss: 0.10777112245559692
epoch: 48 training_loss 0.09725008402019739 test_loss: 0.10187124013900757
epoch: 49 training_loss 0.09988381192088128 test_loss: 0.12587511539459229
epoch: 50 training_loss 0.09936378950253129 test_loss: 0.10437339544296265
epoch: 51 training_loss 0.10763017695397138 test_loss: 0.10534944534301757
epoch: 52 training_loss 0.0981313681602478 test_loss: 0.12712702751159669
epoch: 53 training_loss 0.09486607119441032 test_loss: 0.11848076581954955
epoch: 54 training_loss 0.09734357319772244 test_loss: 0.10826066732406617
epoch: 55 training_loss 0.0973227546364069 test_loss: 0.11590276956558228
epoch: 56 training_loss 0.10350110495463014 test_loss: 0.13310171365737916
epoch: 57 training_loss 0.09242316758260131 test_loss: 0.13696955442428588
epoch: 58 training_loss 0.10198631636798382 test_loss: 0.11257021427154541
epoch: 59 training_loss 0.10078758653253317 test_loss: 0.11950063705444336
epoch: 60 training_loss 0.09969308845698834 test_loss: 0.11033761501312256
epoch: 61 training_loss 0.09471398016437887 test_loss: 0.11041851043701172
epoch: 62 training_loss 0.0978996229544282 test_loss: 0.12202687263488769
epoch: 63 training_loss 0.09866456933319569 test_loss: 0.12287520170211792
epoch: 64 training_loss 0.10510876677930354 test_loss: 0.12740226984024047
epoch: 65 training_loss 0.10340141758322716 test_loss: 0.13565410375595094
epoch: 66 training_loss 0.09421404169872404 test_loss: 0.11447200775146485
epoch: 67 training_loss 0.09806545678526163 test_loss: 0.11440167427062989
epoch: 68 training_loss 0.09302798982709647 test_loss: 0.13006436824798584
epoch: 69 training_loss 0.10118483833968639 test_loss: 0.11127375364303589
epoch: 70 training_loss 0.10242509599775076 test_loss: 0.11194156408309937
epoch: 71 training_loss 0.09641010448336601 test_loss: 0.09993036389350891
epoch: 72 training_loss 0.10251110900193453 test_loss: 0.11853505373001098
epoch: 73 training_loss 0.10160660352557897 test_loss: 0.10782065391540527
epoch: 74 training_loss 0.09210036154836417 test_loss: 0.11136276721954345
epoch: 75 training_loss 0.09953473806381226 test_loss: 0.11307276487350464
epoch: 76 training_loss 0.10286586876958609 test_loss: 0.11221257448196412
epoch: 77 training_loss 0.09502235099673272 test_loss: 0.11627799272537231
epoch: 78 training_loss 0.09759271837770939 test_loss: 0.1232447862625122
epoch: 79 training_loss 0.09985020238906145 test_loss: 0.11881978511810302
epoch: 80 training_loss 0.10344133453443646 test_loss: 0.13066357374191284
epoch: 81 training_loss 0.09895035047084093 test_loss: 0.11429164409637452
epoch: 82 training_loss 0.09454287219792605 test_loss: 0.11224949359893799
epoch: 83 training_loss 0.08985416993498802 test_loss: 0.12762657403945923
epoch: 84 training_loss 0.10385173484683037 test_loss: 0.12610046863555907
epoch: 85 training_loss 0.10114813756197691 test_loss: 0.12882190942764282
epoch: 86 training_loss 0.1016476857289672 test_loss: 0.10621777772903443
epoch: 87 training_loss 0.09794665105640889 test_loss: 0.12088847160339355
epoch: 88 training_loss 0.10061414301395416 test_loss: 0.11941921710968018
epoch: 89 training_loss 0.10177725797519088 test_loss: 0.13049477338790894
epoch: 90 training_loss 0.09127996837720274 test_loss: 0.12127501964569092
epoch: 91 training_loss 0.09020158274099231 test_loss: 0.12435237169265748
epoch: 92 training_loss 0.09677788931876422 test_loss: 0.1194608211517334
epoch: 93 training_loss 0.10426742309704423 test_loss: 0.12278707027435302
epoch: 94 training_loss 0.091963977124542 test_loss: 0.1210809588432312
epoch: 95 training_loss 0.09237919114530087 test_loss: 0.1010326623916626
epoch: 96 training_loss 0.09620810728520154 test_loss: 0.09745188951492309
epoch: 97 training_loss 0.1008793567866087 test_loss: 0.10202623605728149
epoch: 98 training_loss 0.09406148970127105 test_loss: 0.1363563895225525
epoch: 99 training_loss 0.09434022130444646 test_loss: 0.1236747145652771
epoch: 100 training_loss 0.08763382509350777 test_loss: 0.14111680984497071
epoch: 101 training_loss 0.09432515878230334 test_loss: 0.10624451637268066
epoch: 102 training_loss 0.09814027151092887 test_loss: 0.11133072376251221
epoch: 103 training_loss 0.09229085488244891 test_loss: 0.12396055459976196
epoch: 104 training_loss 0.09003327623009681 test_loss: 0.12099366188049317
epoch: 105 training_loss 0.09196322809904814 test_loss: 0.14013388156890869
epoch: 106 training_loss 0.10404300840571523 test_loss: 0.13429067134857178
epoch: 107 training_loss 0.09007565839216113 test_loss: 0.09787668585777283
epoch: 108 training_loss 0.10673772411420941 test_loss: 0.11260532140731812
epoch: 109 training_loss 0.09381282581016422 test_loss: 0.1289254307746887
epoch: 110 training_loss 0.09886149458587169 test_loss: 0.11432887315750122
epoch: 111 training_loss 0.09439438672736288 test_loss: 0.11367923021316528
epoch: 112 training_loss 0.09955221818760038 test_loss: 0.1101873278617859
epoch: 113 training_loss 0.09582014933228493 test_loss: 0.10607926845550537
epoch: 114 training_loss 0.0975274907425046 test_loss: 0.11469743251800538
epoch: 115 training_loss 0.09481481943279504 test_loss: 0.11739048957824708
epoch: 116 training_loss 0.09142991356551647 test_loss: 0.1282718300819397
epoch: 117 training_loss 0.09023686949163676 test_loss: 0.10901156663894654
epoch: 118 training_loss 0.09239606009796261 test_loss: 0.130838942527771
epoch: 119 training_loss 0.09777518693357706 test_loss: 0.11051729917526246
epoch: 120 training_loss 0.08563633661717177 test_loss: 0.10786622762680054
epoch: 121 training_loss 0.09734261082485318 test_loss: 0.13299182653427125
epoch: 122 training_loss 0.1007146286778152 test_loss: 0.108523428440094
epoch: 123 training_loss 0.09158474626019597 test_loss: 0.1022726058959961
epoch: 124 training_loss 0.09818921655416489 test_loss: 0.11948924064636231
epoch: 125 training_loss 0.0919740559719503 test_loss: 0.127729594707489
epoch: 126 training_loss 0.09900131169706583 test_loss: 0.11764720678329468
epoch: 127 training_loss 0.09196264557540416 test_loss: 0.1084779977798462
epoch: 128 training_loss 0.09931076754815876 test_loss: 0.14045698642730714
epoch: 129 training_loss 0.09071667833253741 test_loss: 0.10901881456375122
epoch: 130 training_loss 0.09208699718117713 test_loss: 0.12832627296447754
epoch: 131 training_loss 0.09550881883129478 test_loss: 0.11917346715927124
epoch: 132 training_loss 0.09037442456930876 test_loss: 0.12486419677734376
epoch: 133 training_loss 0.09064272236078978 test_loss: 0.10979729890823364
epoch: 134 training_loss 0.09081624187529087 test_loss: 0.11498466730117798
epoch: 135 training_loss 0.0928041074424982 test_loss: 0.12994685173034667
epoch: 136 training_loss 0.0865670084953308 test_loss: 0.10959415435791016
epoch: 137 training_loss 0.09338393876329064 test_loss: 0.13121644258499146
epoch: 138 training_loss 0.09219482814893126 test_loss: 0.10445525646209716
epoch: 139 training_loss 0.09199726905673743 test_loss: 0.13229594230651856
epoch: 140 training_loss 0.08687636224552989 test_loss: 0.12747331857681274
epoch: 141 training_loss 0.09182791005820036 test_loss: 0.12355210781097412
epoch: 142 training_loss 0.0928357902355492 test_loss: 0.12095122337341309
epoch: 143 training_loss 0.09003050338476896 test_loss: 0.15740236043930053
epoch: 144 training_loss 0.08814859250560403 test_loss: 0.10204029083251953
epoch: 145 training_loss 0.08787859336473047 test_loss: 0.13199564218521118
epoch: 146 training_loss 0.09306000992655754 test_loss: 0.12322269678115845
epoch: 147 training_loss 0.09534996222704649 test_loss: 0.1068886399269104
epoch: 148 training_loss 0.09140554302372038 test_loss: 0.1282223105430603
epoch: 149 training_loss 0.0864190816320479 test_loss: 0.10860050916671753
episode: 0 training return: -999.9824502478015
episode: 1 training return: -999.9797729745105
episode: 2 training return: -999.9799334867558
episode: 3 training return: -999.9821663965485
epoch: 1 test_true_pfm: -0.23344987933392947 sim_pfm: -999.8420219758337
episode: 4 training return: -999.9471840725763
episode: 5 training return: -999.9436176842927
episode: 6 training return: -999.9838182455283
episode: 7 training return: -999.9817827878776
epoch: 2 test_true_pfm: -0.7191933138943739 sim_pfm: -999.8418838800408
episode: 8 training return: -999.9835877563079
episode: 9 training return: -999.9836832901676
episode: 10 training return: -999.9795063519132
episode: 11 training return: -999.9849234271511
epoch: 3 test_true_pfm: -0.5719161471568589 sim_pfm: -999.8409844602829
episode: 12 training return: -999.9256159023955
episode: 13 training return: -999.9826289222549
episode: 14 training return: -999.9840552607544
episode: 15 training return: -999.9842271712832
epoch: 4 test_true_pfm: -0.707623646770234 sim_pfm: -999.8411045761574
episode: 16 training return: -999.9823712631563
episode: 17 training return: -999.9614954149731
episode: 18 training return: -999.9822498185971
episode: 19 training return: -999.9765697115924
epoch: 5 test_true_pfm: -0.287490900211587 sim_pfm: -999.8382992881603
episode: 20 training return: -999.9719195578521
episode: 21 training return: -999.9474558707725
episode: 22 training return: -999.9816371712109
episode: 23 training return: -999.9858369211471
epoch: 6 test_true_pfm: -0.45040594320128097 sim_pfm: -999.8429055627203
episode: 24 training return: -999.9846749954696
episode: 25 training return: -999.921037620048
episode: 26 training return: -999.9790620223556
episode: 27 training return: -999.9819152414099
epoch: 7 test_true_pfm: -0.4226312581957372 sim_pfm: -999.8433288929035
episode: 28 training return: -999.9792952868487
episode: 29 training return: -999.9814132128205
episode: 30 training return: -999.9566972451611
episode: 31 training return: -999.9832584638649
epoch: 8 test_true_pfm: 0.5830747108066522 sim_pfm: -999.8404212510169
episode: 32 training return: -1000.8641142097365
episode: 33 training return: -999.9750601581458
episode: 34 training return: -999.980601119921
episode: 35 training return: -999.9869233830893
epoch: 9 test_true_pfm: 0.4134103652866255 sim_pfm: -999.8413020574045
episode: 36 training return: -999.9844999713392
episode: 37 training return: -999.9819459606241
episode: 38 training return: -999.9350681185946
episode: 39 training return: -999.9820157799021
epoch: 10 test_true_pfm: -0.523513853010517 sim_pfm: -999.8437682305612
episode: 40 training return: -999.9846033706696
episode: 41 training return: -999.9832875776332
episode: 42 training return: -999.9787050297468
episode: 43 training return: -999.9840485030809
epoch: 11 test_true_pfm: -0.6865536275889701 sim_pfm: -999.8411527870518
episode: 44 training return: -999.9829389230666
episode: 45 training return: -999.9646265494006
episode: 46 training return: -999.9836192224236
episode: 47 training return: -999.9842408700882
epoch: 12 test_true_pfm: 0.21529591900758352 sim_pfm: -999.8427503481139
episode: 48 training return: -999.9247878439306
episode: 49 training return: -999.9820125048075
episode: 50 training return: -999.9376438261199
episode: 51 training return: -999.984773589563
epoch: 13 test_true_pfm: 0.17517540427850942 sim_pfm: -999.8414978986362
episode: 52 training return: -999.9805131210748
episode: 53 training return: -999.9850865113657
episode: 54 training return: -999.9842008651207
episode: 55 training return: -999.9413381105479
epoch: 14 test_true_pfm: 0.06047274918214613 sim_pfm: -999.8403099139035
episode: 56 training return: -999.9277373843931
episode: 57 training return: -999.9806793088254
episode: 58 training return: -999.9856187267636
episode: 59 training return: -999.9803178501462
epoch: 15 test_true_pfm: -0.07493102238677668 sim_pfm: -999.8406904352211
episode: 60 training return: -999.9874604307699
episode: 61 training return: -999.9075194586194
episode: 62 training return: -999.9811950308825
episode: 63 training return: -999.9464989999838
epoch: 16 test_true_pfm: 0.513177446376682 sim_pfm: -999.8407608514966
episode: 64 training return: -999.9863307014001
episode: 65 training return: -999.9865335954876
episode: 66 training return: -999.9557767609086
episode: 67 training return: -999.983799965241
epoch: 17 test_true_pfm: 0.019361750722059607 sim_pfm: -999.8426972045631
episode: 68 training return: -999.9814208335149
episode: 69 training return: -999.9217322054245
episode: 70 training return: -999.9825667326593
episode: 71 training return: -999.9815050386023
epoch: 18 test_true_pfm: -0.5262389107935889 sim_pfm: -999.8416651665735
episode: 72 training return: -999.9810395295943
episode: 73 training return: -999.9804806936978
episode: 74 training return: -1000.435183338204
episode: 75 training return: -999.9837750323893
epoch: 19 test_true_pfm: 0.4302489886765651 sim_pfm: -999.8410574499898
episode: 76 training return: -999.9837386641818
episode: 77 training return: -999.9816129815341
episode: 78 training return: -999.9816606325513
episode: 79 training return: -999.9855981268142
epoch: 20 test_true_pfm: 0.003298626228982323 sim_pfm: -999.8418655392555
episode: 80 training return: -999.9537150708508
episode: 81 training return: -999.982203098315
episode: 82 training return: -999.9568485116506
episode: 83 training return: -999.9823943537242
epoch: 21 test_true_pfm: -0.7060690801228295 sim_pfm: -999.8443527142122
episode: 84 training return: -999.9847192472384
episode: 85 training return: -999.984351180447
episode: 86 training return: -999.9736360300836
episode: 87 training return: -999.9848298572132
epoch: 22 test_true_pfm: -0.7024696181244448 sim_pfm: -999.8411192894131
episode: 88 training return: -999.9772100227133
episode: 89 training return: -999.9847480936149
episode: 90 training return: -999.9838045283809
episode: 91 training return: -999.983662679354
epoch: 23 test_true_pfm: -0.6074278306527922 sim_pfm: -999.8424451967944
episode: 92 training return: -999.9862061678571
episode: 93 training return: -999.9816727789774
episode: 94 training return: -999.9822013648608
episode: 95 training return: -999.9852336886258
epoch: 24 test_true_pfm: -0.20041351471007526 sim_pfm: -999.8443737427039
episode: 96 training return: -999.9627741536096
episode: 97 training return: -999.9822886020228
episode: 98 training return: -999.9843674958415
episode: 99 training return: -999.9545454901752
epoch: 25 test_true_pfm: -0.06105974235874608 sim_pfm: -999.8417301207725
episode: 100 training return: -999.9784894382318
episode: 101 training return: -999.9809260961315
episode: 102 training return: -999.9821251983583
episode: 103 training return: -999.9445901855267
epoch: 26 test_true_pfm: -0.3385193846267203 sim_pfm: -999.8422878323755
episode: 104 training return: -999.9763694632975
episode: 105 training return: -999.9817587778331
episode: 106 training return: -999.9291444859307
episode: 107 training return: -999.9853493578512
epoch: 27 test_true_pfm: -0.6052755807205377 sim_pfm: -999.8416355872919
episode: 108 training return: -999.9518113267097
episode: 109 training return: -999.9835280321175
episode: 110 training return: -999.9813266030427
episode: 111 training return: -999.9816052790678
epoch: 28 test_true_pfm: -0.29901438800831664 sim_pfm: -999.8433501981386
episode: 112 training return: -999.9828335938072
episode: 113 training return: -999.9827851932087
episode: 114 training return: -999.9813906665192
episode: 115 training return: -999.984077536983
epoch: 29 test_true_pfm: 0.09656234417947114 sim_pfm: -999.8404774817656
episode: 116 training return: -999.9151536024278
episode: 117 training return: -999.9723415283958
episode: 118 training return: -999.9809632321354
episode: 119 training return: -999.9850608257836
epoch: 30 test_true_pfm: 0.08564956737772794 sim_pfm: -999.8403706804105
episode: 120 training return: -999.9798640772342
episode: 121 training return: -999.9844564025449
episode: 122 training return: -999.9622221778297
episode: 123 training return: -999.9827665604715
epoch: 31 test_true_pfm: 0.08394102606785957 sim_pfm: -999.8417738587062
episode: 124 training return: -999.9679619694373
episode: 125 training return: -999.9802981668694
episode: 126 training return: -999.9834464415358
episode: 127 training return: -999.9168020695739
epoch: 32 test_true_pfm: -0.19925394890533477 sim_pfm: -999.841868559169
episode: 128 training return: -999.9821699678434
episode: 129 training return: -999.9834352126663
episode: 130 training return: -999.9385567800621
episode: 131 training return: -999.9694509024969
epoch: 33 test_true_pfm: -0.9990656416058251 sim_pfm: -999.8407055102792
episode: 132 training return: -999.9667683510896
episode: 133 training return: -999.9734010758873
episode: 134 training return: -999.9860183195622
episode: 135 training return: -999.9397538126037
epoch: 34 test_true_pfm: -0.1741788936420513 sim_pfm: -999.8417299707352
episode: 136 training return: -999.9859666466506
episode: 137 training return: -999.9817079318061
episode: 138 training return: -999.9837846466625
episode: 139 training return: -999.9837625480949
epoch: 35 test_true_pfm: -0.006575668511585715 sim_pfm: -999.8424152451425
episode: 140 training return: -999.9823147554227
episode: 141 training return: -999.9844745982808
episode: 142 training return: -999.9539331172085
episode: 143 training return: -999.986044355245
epoch: 36 test_true_pfm: 0.3827481925579089 sim_pfm: -999.8432163001609
episode: 144 training return: -1000.1557131832374
episode: 145 training return: -999.9865675246999
episode: 146 training return: -999.9685588519473
episode: 147 training return: -999.9842535166061
epoch: 37 test_true_pfm: -0.6374229027833617 sim_pfm: -999.8415015157935
episode: 148 training return: -999.9722749310076
episode: 149 training return: -999.9522061833264
episode: 150 training return: -999.9858546773933
episode: 151 training return: -999.9723018908388
epoch: 38 test_true_pfm: 0.28718167618953466 sim_pfm: -999.8407205152004
episode: 152 training return: -999.9819239260995
episode: 153 training return: -999.9839739523617
episode: 154 training return: -999.9671778778113
episode: 155 training return: -999.9852705009799
epoch: 39 test_true_pfm: -0.12045664954547868 sim_pfm: -999.8416339418867
episode: 156 training return: -999.9859839544916
episode: 157 training return: -999.9669434518008
episode: 158 training return: -999.9856168700467
episode: 159 training return: -999.9797316381942
epoch: 40 test_true_pfm: -0.5810405271473529 sim_pfm: -999.843221909131
episode: 160 training return: -999.9808617349723
episode: 161 training return: -999.9786080421251
episode: 162 training return: -999.9554933452887
episode: 163 training return: -999.9843028270124
epoch: 41 test_true_pfm: -0.512886254972642 sim_pfm: -999.8414191454973
episode: 164 training return: -999.9787929753378
episode: 165 training return: -999.9852299960421
episode: 166 training return: -999.996903372088
episode: 167 training return: -999.9796159295436
epoch: 42 test_true_pfm: -0.49114168593208624 sim_pfm: -999.8416234473151
episode: 168 training return: -999.9827894075836
episode: 169 training return: -999.9242256572605
episode: 170 training return: -999.9831710135678
episode: 171 training return: -999.9856092177276
epoch: 43 test_true_pfm: -0.530538287591215 sim_pfm: -999.8428571048547
episode: 172 training return: -999.937502972136
episode: 173 training return: -999.9460754107647
episode: 174 training return: -999.9789833621278
episode: 175 training return: -999.9839126849863
epoch: 44 test_true_pfm: -0.02813256932920527 sim_pfm: -999.8414045186029
episode: 176 training return: -999.9876569336794
episode: 177 training return: -999.9437120248868
episode: 178 training return: -999.9845027173889
episode: 179 training return: -999.9716445549637
epoch: 45 test_true_pfm: -0.3573074331925696 sim_pfm: -999.8407523754455
episode: 180 training return: -1000.0605533671328
episode: 181 training return: -999.9317185868136
episode: 182 training return: -999.9754635869082
episode: 183 training return: -999.9793783136915
epoch: 46 test_true_pfm: -0.21463770713705496 sim_pfm: -999.8419181152367
episode: 184 training return: -999.9842431773827
episode: 185 training return: -999.9648930588439
episode: 186 training return: -999.9844804091485
episode: 187 training return: -999.9814299139035
epoch: 47 test_true_pfm: 0.11566471763442165 sim_pfm: -999.8416389135288
episode: 188 training return: -1000.2419157229784
episode: 189 training return: -999.9297056051047
episode: 190 training return: -999.9853505148172
episode: 191 training return: -1000.2580275160998
epoch: 48 test_true_pfm: 0.4173007112405913 sim_pfm: -999.8418124711496
episode: 192 training return: -999.931131792981
episode: 193 training return: -999.9428260282428
episode: 194 training return: -999.9838781595563
episode: 195 training return: -999.981381305051
epoch: 49 test_true_pfm: 0.48970487039063576 sim_pfm: -999.8413024997002
episode: 196 training return: -999.9816064678688
episode: 197 training return: -999.9797276101812
episode: 198 training return: -999.9842523936983
episode: 199 training return: -999.9704906805069
epoch: 50 test_true_pfm: -0.006099646873100088 sim_pfm: -999.84075070436
episode: 200 training return: -999.9428336132266
episode: 201 training return: -999.9809396893669
episode: 202 training return: -999.982381294628
episode: 203 training return: -999.9602562968796
epoch: 51 test_true_pfm: -0.4189889231615241 sim_pfm: -999.84039694287
episode: 204 training return: -999.9843527606804
episode: 205 training return: -999.9830970918783
episode: 206 training return: -1000.0337320936891
episode: 207 training return: -999.9821788346064
epoch: 52 test_true_pfm: -0.6559346110982432 sim_pfm: -999.8407630290886
episode: 208 training return: -999.9767126101684
episode: 209 training return: -999.9037401697685
episode: 210 training return: -999.9833143642537
episode: 211 training return: -999.9789800490181
epoch: 53 test_true_pfm: -0.6044528986214371 sim_pfm: -999.8415825250963
episode: 212 training return: -999.9697642056559
episode: 213 training return: -999.9861834562595
episode: 214 training return: -999.9859090121435
episode: 215 training return: -999.8759557980568
epoch: 54 test_true_pfm: -0.1841491531700936 sim_pfm: -999.8418376783488
episode: 216 training return: -999.9859534043721
episode: 217 training return: -999.9836368168649
episode: 218 training return: -999.9789495690374
episode: 219 training return: -999.9848243285745
epoch: 55 test_true_pfm: -0.6063943045441049 sim_pfm: -999.8403003353033
episode: 220 training return: -999.9829993417648
episode: 221 training return: -999.9833809608572
episode: 222 training return: -999.9631024370236
episode: 223 training return: -999.9845597934333
epoch: 56 test_true_pfm: -0.3871080800127325 sim_pfm: -999.8422059447861
episode: 224 training return: -999.9833077895787
episode: 225 training return: -999.9582967414963
episode: 226 training return: -999.9849931726976
episode: 227 training return: -999.9816123019092
epoch: 57 test_true_pfm: -0.5155964969763337 sim_pfm: -999.8414907770311
episode: 228 training return: -999.9841646356142
episode: 229 training return: -999.9837777866361
episode: 230 training return: -999.9838078128424
episode: 231 training return: -999.9842984810538
epoch: 58 test_true_pfm: -0.43767163709780027 sim_pfm: -999.8413604528138
episode: 232 training return: -999.9295103143156
episode: 233 training return: -999.9781429795316
episode: 234 training return: -999.9834415225986
episode: 235 training return: -999.9819064680617
epoch: 59 test_true_pfm: -0.7680992711936794 sim_pfm: -999.8411023734874
episode: 236 training return: -999.9832994271472
episode: 237 training return: -999.9827374571236
episode: 238 training return: -999.957088463874
episode: 239 training return: -999.9584771322666
epoch: 60 test_true_pfm: -0.3549249334844977 sim_pfm: -999.8415984779925
episode: 240 training return: -999.9706107460677
episode: 241 training return: -999.9835879322455
episode: 242 training return: -999.9848982640631
episode: 243 training return: -999.9858519131594
epoch: 61 test_true_pfm: -0.3134188878490503 sim_pfm: -999.8420879988576
episode: 244 training return: -999.9849085804492
episode: 245 training return: -999.9322058695441
episode: 246 training return: -999.9552583708603
episode: 247 training return: -999.934316204255
epoch: 62 test_true_pfm: 0.2592265640876394 sim_pfm: -999.8428454880412
episode: 248 training return: -999.9531940298518
episode: 249 training return: -999.9772259994886
episode: 250 training return: -999.9847583578348
episode: 251 training return: -999.9878744168506
epoch: 63 test_true_pfm: -0.31966561204198024 sim_pfm: -999.8402720640069
episode: 252 training return: -999.9835017426927
episode: 253 training return: -999.9570386787241
episode: 254 training return: -999.982892984237
episode: 255 training return: -999.9857593908299
epoch: 64 test_true_pfm: 0.31327881552184883 sim_pfm: -999.8420382220132
episode: 256 training return: -1000.0261528055437
episode: 257 training return: -999.982051441106
episode: 258 training return: -999.9696408930472
episode: 259 training return: -999.915973476014
epoch: 65 test_true_pfm: -1.1955042764695925 sim_pfm: -999.8414421811812
episode: 260 training return: -999.9334558950471
episode: 261 training return: -999.9819728789793
episode: 262 training return: -999.9482458512592
episode: 263 training return: -999.94763388007
epoch: 66 test_true_pfm: -0.18395116786140497 sim_pfm: -999.8415962477308
episode: 264 training return: -999.9852863718016
episode: 265 training return: -999.983928249871
episode: 266 training return: -999.9470712635913
episode: 267 training return: -999.983578227321
epoch: 67 test_true_pfm: 0.5726154558565729 sim_pfm: -999.8423120815987
episode: 268 training return: -999.9248601455947
episode: 269 training return: -999.9781607161016
episode: 270 training return: -999.9636407798436
episode: 271 training return: -999.9827655823898
epoch: 68 test_true_pfm: -0.47507349047132813 sim_pfm: -999.841483679218
episode: 272 training return: -999.9819773539472
episode: 273 training return: -999.9795183014453
episode: 274 training return: -999.9771830160787
episode: 275 training return: -999.9645897787741
epoch: 69 test_true_pfm: -0.19229459752520772 sim_pfm: -999.8421730397757
episode: 276 training return: -999.9411550127079
episode: 277 training return: -999.9837251874605
episode: 278 training return: -999.9802661450799
episode: 279 training return: -999.9855708099578
epoch: 70 test_true_pfm: -0.14644752614493728 sim_pfm: -999.8428265313408
episode: 280 training return: -999.9400348359289
episode: 281 training return: -999.9853042132368
episode: 282 training return: -999.9800730798918
episode: 283 training return: -999.9824198582687
epoch: 71 test_true_pfm: 0.4287697466133315 sim_pfm: -999.8421685931784
episode: 284 training return: -999.985251798314
episode: 285 training return: -999.9540805655477
episode: 286 training return: -999.9828006414779
episode: 287 training return: -999.9449511409655
epoch: 72 test_true_pfm: 0.04709054578314881 sim_pfm: -999.8411128856129
episode: 288 training return: -999.9695706089398
episode: 289 training return: -999.985813004305
episode: 290 training return: -999.9868202585536
episode: 291 training return: -999.9811059225722
epoch: 73 test_true_pfm: -0.5152713198373069 sim_pfm: -999.8410719486407
episode: 292 training return: -999.9797354302252
episode: 293 training return: -999.98052006769
episode: 294 training return: -999.9779733244998
episode: 295 training return: -999.944343171084
epoch: 74 test_true_pfm: -0.04404276847567814 sim_pfm: -999.8433490768552
episode: 296 training return: -999.9845863287512
episode: 297 training return: -999.9843683290701
episode: 298 training return: -999.9854672954071
episode: 299 training return: -999.9626821858335
epoch: 75 test_true_pfm: -0.8210214706936253 sim_pfm: -999.8415326486205
episode: 300 training return: -999.9317648802028
episode: 301 training return: -999.9739295543941
episode: 302 training return: -999.9442539717098
episode: 303 training return: -999.9803066669068
epoch: 76 test_true_pfm: -0.8472110356050754 sim_pfm: -999.8417467149694
episode: 304 training return: -999.9799064775475
episode: 305 training return: -999.9402526841866
episode: 306 training return: -999.973674963797
episode: 307 training return: -999.9342039930723
epoch: 77 test_true_pfm: 0.6257569920058543 sim_pfm: -999.8411244683149
episode: 308 training return: -999.9739370382387
episode: 309 training return: -999.983035872777
episode: 310 training return: -999.9806566779226
episode: 311 training return: -999.9349959502238
epoch: 78 test_true_pfm: -0.3533506855855643 sim_pfm: -999.8411079786976
episode: 312 training return: -999.9439032005936
episode: 313 training return: -999.9265644625038
episode: 314 training return: -999.973051276135
episode: 315 training return: -999.9728549035331
epoch: 79 test_true_pfm: -1.1714251662647284 sim_pfm: -999.8415650248031
episode: 316 training return: -999.9248918599408
episode: 317 training return: -999.9512217595006
episode: 318 training return: -1001.1797385893957
episode: 319 training return: -999.9831689784548
epoch: 80 test_true_pfm: -0.03754331909823049 sim_pfm: -999.8425513238657
episode: 320 training return: -999.9710820479838
episode: 321 training return: -999.9863525789866
episode: 322 training return: -999.9849144796714
episode: 323 training return: -999.9573853999829
epoch: 81 test_true_pfm: 0.3264887485162924 sim_pfm: -999.8414702987876
episode: 324 training return: -999.9488402328111
episode: 325 training return: -999.9835246503679
episode: 326 training return: -999.9480601499583
episode: 327 training return: -999.9620820524141
epoch: 82 test_true_pfm: -0.05995721632965878 sim_pfm: -999.8429282867231
episode: 328 training return: -999.9790833161862
episode: 329 training return: -999.974886332736
episode: 330 training return: -999.9871446764423
episode: 331 training return: -999.9813691854403
epoch: 83 test_true_pfm: -0.40463949164665913 sim_pfm: -999.8400755117258
episode: 332 training return: -999.982565235299
episode: 333 training return: -999.9837621412336
episode: 334 training return: -999.9819816746821
episode: 335 training return: -999.9619587985786
epoch: 84 test_true_pfm: -0.3301177128056823 sim_pfm: -999.8414517904504
episode: 336 training return: -999.9620473172655
episode: 337 training return: -999.9174390547014
episode: 338 training return: -999.9412500445118
episode: 339 training return: -999.9868240113033
epoch: 85 test_true_pfm: -0.28718795589137364 sim_pfm: -999.841474040914
episode: 340 training return: -999.9870229730058
episode: 341 training return: -999.96857312955
episode: 342 training return: -999.984570096251
episode: 343 training return: -999.9313229217128
epoch: 86 test_true_pfm: -0.19781622685503297 sim_pfm: -999.842350517724
episode: 344 training return: -999.9821015119973
episode: 345 training return: -999.9362381133175
episode: 346 training return: -999.9848925081209
episode: 347 training return: -999.9838290441342
epoch: 87 test_true_pfm: 0.8949483101300766 sim_pfm: -999.8418997017521
episode: 348 training return: -999.9825738025759
episode: 349 training return: -999.9749524198086
episode: 350 training return: -999.9862934232067
episode: 351 training return: -999.981335533757
epoch: 88 test_true_pfm: 1.3750095967620215 sim_pfm: -999.8411801693345
episode: 352 training return: -999.9740876481823
episode: 353 training return: -999.9834719052142
episode: 354 training return: -999.984962425312
episode: 355 training return: -999.982796080158
epoch: 89 test_true_pfm: 0.007935467396893686 sim_pfm: -999.8418788113942
episode: 356 training return: -999.956601668712
episode: 357 training return: -999.9532498421088
episode: 358 training return: -999.9246172857082
episode: 359 training return: -999.9818283170147
epoch: 90 test_true_pfm: -0.011379256313069741 sim_pfm: -999.842467191104
episode: 360 training return: -999.9804863091497
episode: 361 training return: -999.9602319161374
episode: 362 training return: -999.9793075372498
episode: 363 training return: -999.9844359007377
epoch: 91 test_true_pfm: -0.02479766927853976 sim_pfm: -999.8408715777759
episode: 364 training return: -999.9583514325655
episode: 365 training return: -999.9851308149499
episode: 366 training return: -999.9838336841894
episode: 367 training return: -999.9827004498127
epoch: 92 test_true_pfm: -0.0008098641270244261 sim_pfm: -999.8405355308441
episode: 368 training return: -999.9722001924334
episode: 369 training return: -999.9805688250076
episode: 370 training return: -999.9822644439909
episode: 371 training return: -999.9911268279717
epoch: 93 test_true_pfm: -0.4172412453085946 sim_pfm: -999.8427879641707
episode: 372 training return: -999.9690201627913
episode: 373 training return: -999.9883715040896
episode: 374 training return: -999.9136404651069
episode: 375 training return: -999.9442715548603
epoch: 94 test_true_pfm: 0.43945689188952414 sim_pfm: -999.8415627783252
episode: 376 training return: -999.9756835401383
episode: 377 training return: -999.9826242842352
episode: 378 training return: -999.9837956325703
episode: 379 training return: -999.9819342876347
epoch: 95 test_true_pfm: 0.004586768384204539 sim_pfm: -999.8420690448621
episode: 380 training return: -999.9819172012939
episode: 381 training return: -999.9341593636289
episode: 382 training return: -999.9818366589775
episode: 383 training return: -999.9199236602769
epoch: 96 test_true_pfm: 0.17554674314512134 sim_pfm: -999.8407972065752
episode: 384 training return: -999.9696032917535
episode: 385 training return: -999.9836235637753
episode: 386 training return: -999.9212450780666
episode: 387 training return: -999.9863152106191
epoch: 97 test_true_pfm: 0.19061587469896454 sim_pfm: -999.8394449722282
episode: 388 training return: -999.9843328523245
episode: 389 training return: -999.9857675939608
episode: 390 training return: -999.9835348737728
episode: 391 training return: -999.9851550533785
epoch: 98 test_true_pfm: -1.361626566151129 sim_pfm: -999.8396703127067
episode: 392 training return: -999.9830502836431
episode: 393 training return: -999.9846802487278
episode: 394 training return: -999.9444930150443
episode: 395 training return: -999.9673722338077
epoch: 99 test_true_pfm: -0.5448027933678189 sim_pfm: -999.840175793444
episode: 396 training return: -999.9842385746439
episode: 397 training return: -999.9861526003767
episode: 398 training return: -999.9843417973851
episode: 399 training return: -999.9794620256885
epoch: 100 test_true_pfm: -0.09639189037347477 sim_pfm: -999.8426422676592
episode: 400 training return: -999.9463002750192
episode: 401 training return: -999.9666462688286
episode: 402 training return: -999.9820379587942
episode: 403 training return: -999.9638470118376
epoch: 101 test_true_pfm: -0.3360178085392726 sim_pfm: -999.8391224066639
episode: 404 training return: -999.9823893083501
episode: 405 training return: -999.9857325023225
episode: 406 training return: -999.9827017746504
episode: 407 training return: -999.9743915200117
epoch: 102 test_true_pfm: 0.07312865132824027 sim_pfm: -999.8414046358521
episode: 408 training return: -999.9312647291898
episode: 409 training return: -999.9585065800837
episode: 410 training return: -999.9249919190871
episode: 411 training return: -999.9815001005456
epoch: 103 test_true_pfm: 0.10562019086703489 sim_pfm: -999.8399470826718
episode: 412 training return: -999.9359784306271
episode: 413 training return: -999.9711585248127
episode: 414 training return: -999.9784548247371
episode: 415 training return: -999.9395600317857
epoch: 104 test_true_pfm: 0.21718935840588152 sim_pfm: -999.8403166293614
episode: 416 training return: -999.9877578386742
episode: 417 training return: -999.9848474982058
episode: 418 training return: -999.8710168832636
episode: 419 training return: -999.9823626848954
epoch: 105 test_true_pfm: -0.6265226424050655 sim_pfm: -999.8403779915003
episode: 420 training return: -999.9627336823563
episode: 421 training return: -999.9562785033464
episode: 422 training return: -999.9806101236552
episode: 423 training return: -999.9387176895656
epoch: 106 test_true_pfm: 0.1320596344254571 sim_pfm: -999.83893346468
episode: 424 training return: -999.9838692475583
episode: 425 training return: -999.9342282596732
episode: 426 training return: -999.9798419958688
episode: 427 training return: -999.9089396101348
epoch: 107 test_true_pfm: -0.6884557956548311 sim_pfm: -999.8415617680088
episode: 428 training return: -999.9421712173535
episode: 429 training return: -999.9820591925109
episode: 430 training return: -999.97566270014
episode: 431 training return: -999.9853496218058
epoch: 108 test_true_pfm: -0.7496512403244511 sim_pfm: -999.8409728118986
episode: 432 training return: -999.9699563977792
episode: 433 training return: -999.9835303708951
episode: 434 training return: -999.9810648533432
episode: 435 training return: -999.9589506054117
epoch: 109 test_true_pfm: -0.594180249166712 sim_pfm: -999.8398589825669
episode: 436 training return: -999.9835849718891
episode: 437 training return: -999.9845554856768
episode: 438 training return: -999.985813574268
episode: 439 training return: -999.9839852095805
epoch: 110 test_true_pfm: -0.021058990148970464 sim_pfm: -999.8415652502664
episode: 440 training return: -999.9857636566264
episode: 441 training return: -999.9840005957548
episode: 442 training return: -999.9817318469939
episode: 443 training return: -999.9471102607288
epoch: 111 test_true_pfm: 0.4037023737917507 sim_pfm: -999.8417924472125
episode: 444 training return: -999.9530569148717
episode: 445 training return: -999.9641029304283
episode: 446 training return: -999.9642367063815
episode: 447 training return: -999.9820597545008
epoch: 112 test_true_pfm: -0.2600045677299276 sim_pfm: -999.8414803612991
episode: 448 training return: -999.9337846310493
episode: 449 training return: -999.9824149497044
episode: 450 training return: -999.9791500081667
episode: 451 training return: -999.9855731891332
epoch: 113 test_true_pfm: -0.30149738631513096 sim_pfm: -999.8426121708038
episode: 452 training return: -999.9827038771073
episode: 453 training return: -999.9807940752306
episode: 454 training return: -999.9847662308721
episode: 455 training return: -999.9849563552366
epoch: 114 test_true_pfm: -0.11535678611946705 sim_pfm: -999.8413460341926
episode: 456 training return: -999.9835399168563
episode: 457 training return: -999.9493527033786
episode: 458 training return: -999.984097749031
episode: 459 training return: -999.9849814148254
epoch: 115 test_true_pfm: -0.4807361949537863 sim_pfm: -999.8414609446628
episode: 460 training return: -999.9847478729165
episode: 461 training return: -999.981594589644
episode: 462 training return: -999.9833910658742
episode: 463 training return: -999.9265393294805
epoch: 116 test_true_pfm: 0.1666755949090962 sim_pfm: -999.840936718027
episode: 464 training return: -999.9423001867121
episode: 465 training return: -999.9840629116195
episode: 466 training return: -999.9801730111875
episode: 467 training return: -999.9752983270363
epoch: 117 test_true_pfm: 0.567855734524101 sim_pfm: -999.8411644965732
episode: 468 training return: -999.9841143634338
episode: 469 training return: -999.9817241233242
episode: 470 training return: -999.9811150306987
episode: 471 training return: -999.9847554756236
epoch: 118 test_true_pfm: 0.14230856442375328 sim_pfm: -999.8412298564544
episode: 472 training return: -999.954529226826
episode: 473 training return: -999.9776948526543
episode: 474 training return: -999.9781534793518
episode: 475 training return: -999.9837003371434
epoch: 119 test_true_pfm: -0.3004824636236666 sim_pfm: -999.843843804562
episode: 476 training return: -999.9240019279708
episode: 477 training return: -999.9828566674706
episode: 478 training return: -999.9822578518548
episode: 479 training return: -999.9790788730736
epoch: 120 test_true_pfm: 0.2709228113132018 sim_pfm: -999.8409464409556
episode: 480 training return: -999.9818856701061
episode: 481 training return: -999.9797620976093
episode: 482 training return: -999.9242257104261
episode: 483 training return: -999.9820222407191
epoch: 121 test_true_pfm: -0.278537647192218 sim_pfm: -999.8427165155007
episode: 484 training return: -999.9250896734922
episode: 485 training return: -999.9830320331807
episode: 486 training return: -999.9847152598526
episode: 487 training return: -999.9563076129698
epoch: 122 test_true_pfm: -0.6943271242025157 sim_pfm: -999.8425830367141
episode: 488 training return: -999.9822672163226
episode: 489 training return: -999.9745274197335
episode: 490 training return: -999.9453642041483
episode: 491 training return: -999.9851922567436
epoch: 123 test_true_pfm: -0.027163633915229527 sim_pfm: -999.8410993769606
episode: 492 training return: -999.9843335940235
episode: 493 training return: -999.9838385474493
episode: 494 training return: -999.9871742041263
episode: 495 training return: -999.9697841339628
epoch: 124 test_true_pfm: -0.675321846412829 sim_pfm: -999.842576822073
episode: 496 training return: -999.9579872328419
episode: 497 training return: -999.9557493261499
episode: 498 training return: -999.9809621169419
episode: 499 training return: -999.9370827551868
epoch: 125 test_true_pfm: 0.021207453222922872 sim_pfm: -999.8407777551188
episode: 500 training return: -999.9810489285258
episode: 501 training return: -999.9676715145358
episode: 502 training return: -999.929706819309
episode: 503 training return: -999.9809263786857
epoch: 126 test_true_pfm: 0.1485735562531456 sim_pfm: -999.8417210602369
episode: 504 training return: -999.9827750138924
episode: 505 training return: -999.9855289010211
episode: 506 training return: -999.9678994144593
episode: 507 training return: -999.9810116422235
epoch: 127 test_true_pfm: -0.21909817304939141 sim_pfm: -999.8430710518055
episode: 508 training return: -999.9866232915251
episode: 509 training return: -999.985627151151
episode: 510 training return: -999.9469525733708
episode: 511 training return: -999.956451556907
epoch: 128 test_true_pfm: 0.679677754425197 sim_pfm: -999.8404494083319
episode: 512 training return: -999.9693585478011
episode: 513 training return: -999.9854457085895
episode: 514 training return: -999.9841739986203
episode: 515 training return: -999.9690296016224
epoch: 129 test_true_pfm: 0.1693170933852566 sim_pfm: -999.8420036837173
episode: 516 training return: -999.9798126514331
episode: 517 training return: -999.9834039075442
episode: 518 training return: -999.9492338761294
episode: 519 training return: -999.9493753423264
epoch: 130 test_true_pfm: -0.10612256491950516 sim_pfm: -999.8401986841849
episode: 520 training return: -999.9377013135988
episode: 521 training return: -999.9856565407374
episode: 522 training return: -999.987524916288
episode: 523 training return: -999.9615453446613
epoch: 131 test_true_pfm: -0.40238081619827576 sim_pfm: -999.8409399839755
episode: 524 training return: -999.9784646417642
episode: 525 training return: -999.9860241674492
episode: 526 training return: -999.9795080099371
episode: 527 training return: -999.9836122534217
epoch: 132 test_true_pfm: -0.18739337885914123 sim_pfm: -999.8405029878558
episode: 528 training return: -999.9504811961679
episode: 529 training return: -999.9808387533266
episode: 530 training return: -999.9864405193662
episode: 531 training return: -999.9755288413198
epoch: 133 test_true_pfm: -0.15730936356949532 sim_pfm: -999.8421093924626
episode: 532 training return: -999.9806371263546
episode: 533 training return: -999.9853956094015
episode: 534 training return: -999.9763083501169
episode: 535 training return: -999.9845506748557
epoch: 134 test_true_pfm: 0.16061276572273014 sim_pfm: -999.8400319034208
episode: 536 training return: -999.9821888864487
episode: 537 training return: -999.9827550590867
episode: 538 training return: -999.9812934533015
episode: 539 training return: -999.9772855029818
epoch: 135 test_true_pfm: -0.13569189866332693 sim_pfm: -999.8423797753353
episode: 540 training return: -999.9307926899752
episode: 541 training return: -999.9858243908131
episode: 542 training return: -999.9750886354598
episode: 543 training return: -999.9826888142115
epoch: 136 test_true_pfm: -0.019464383752574237 sim_pfm: -999.8421024003161
episode: 544 training return: -999.9838257147294
episode: 545 training return: -999.9814082809944
episode: 546 training return: -999.8983207798501
episode: 547 training return: -999.9557610903794
epoch: 137 test_true_pfm: 0.2985838934103447 sim_pfm: -999.8404160095132
episode: 548 training return: -999.9853926083575
episode: 549 training return: -999.9796851627003
episode: 550 training return: -999.9786870392866
episode: 551 training return: -999.9791845826368
epoch: 138 test_true_pfm: 0.002496633954404951 sim_pfm: -999.8402410401951
episode: 552 training return: -999.9843825613528
episode: 553 training return: -999.9720686857698
episode: 554 training return: -999.9503158328126
episode: 555 training return: -999.9826672536007
epoch: 139 test_true_pfm: -0.9375449618158654 sim_pfm: -999.8421677140881
episode: 556 training return: -999.9307828133271
episode: 557 training return: -999.9314279213933
episode: 558 training return: -999.9457713510303
episode: 559 training return: -999.9145469773089
epoch: 140 test_true_pfm: -0.5123155747755542 sim_pfm: -999.8416609064949
episode: 560 training return: -999.9772710824337
episode: 561 training return: -999.9105724023121
episode: 562 training return: -1000.0098234296593
episode: 563 training return: -999.9825242865113
epoch: 141 test_true_pfm: -0.26671054075620365 sim_pfm: -999.8419061755767
episode: 564 training return: -999.9499255672936
episode: 565 training return: -999.9818664649829
episode: 566 training return: -999.9827308191126
episode: 567 training return: -999.9862176624176
epoch: 142 test_true_pfm: -0.5407716507926242 sim_pfm: -999.840890570152
episode: 568 training return: -999.960121184209
episode: 569 training return: -999.983289351693
episode: 570 training return: -999.9835088478354
episode: 571 training return: -999.9836715405844
epoch: 143 test_true_pfm: -0.08599304431742245 sim_pfm: -999.8420840460462
episode: 572 training return: -999.9341442734515
episode: 573 training return: -999.9861341904784
episode: 574 training return: -999.9529995748406
episode: 575 training return: -999.984631956024
epoch: 144 test_true_pfm: 0.0029116216082266666 sim_pfm: -999.8414777430731
episode: 576 training return: -999.9622415868123
episode: 577 training return: -999.9763785952073
episode: 578 training return: -999.980200659171
episode: 579 training return: -999.9386052681539
epoch: 145 test_true_pfm: -1.2518087226010004 sim_pfm: -999.8417939592728
episode: 580 training return: -999.911316083746
episode: 581 training return: -999.9844073506665
episode: 582 training return: -999.9660267544126
episode: 583 training return: -999.9841856394567
epoch: 146 test_true_pfm: -0.3178460902412493 sim_pfm: -999.8407929232638
episode: 584 training return: -999.9840067800818
episode: 585 training return: -999.9835754773542
episode: 586 training return: -999.9664378973184
episode: 587 training return: -999.9818064000932
epoch: 147 test_true_pfm: -0.2210282457605799 sim_pfm: -999.8408717831718
episode: 588 training return: -999.9819547795164
episode: 589 training return: -999.9816255188538
episode: 590 training return: -999.9500414712331
episode: 591 training return: -999.912973915582
epoch: 148 test_true_pfm: -0.6312409926337063 sim_pfm: -999.8410448345227
episode: 592 training return: -999.9806971834475
episode: 593 training return: -999.9710465890344
episode: 594 training return: -999.9855867026369
episode: 595 training return: -999.9825780170455
epoch: 149 test_true_pfm: 0.00870130197297462 sim_pfm: -999.8410586122477
episode: 596 training return: -999.9800022222291
episode: 597 training return: -999.982928352045
episode: 598 training return: -999.9713496561078
episode: 599 training return: -999.9825306732654
epoch: 150 test_true_pfm: -0.9621278789322952 sim_pfm: -999.8412113163655
