['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.34071128964424136 test_loss: 0.23545615673065184
epoch: 1 training_loss 0.20830506950616837 test_loss: 0.18501136302947999
epoch: 2 training_loss 0.19517010129988194 test_loss: 0.1873571515083313
epoch: 3 training_loss 0.17192883789539337 test_loss: 0.16566829681396483
epoch: 4 training_loss 0.15346001543104648 test_loss: 0.1683003306388855
epoch: 5 training_loss 0.14846046250313522 test_loss: 0.15828113555908202
epoch: 6 training_loss 0.133968640640378 test_loss: 0.14104121923446655
epoch: 7 training_loss 0.14164565600454807 test_loss: 0.13767944574356078
epoch: 8 training_loss 0.14175014942884445 test_loss: 0.15075533390045165
epoch: 9 training_loss 0.1341593648865819 test_loss: 0.13937177658081054
epoch: 10 training_loss 0.1264553526043892 test_loss: 0.1292169451713562
epoch: 11 training_loss 0.12936007525771856 test_loss: 0.1376807451248169
epoch: 12 training_loss 0.12958275984972714 test_loss: 0.12572906017303467
epoch: 13 training_loss 0.12781284995377062 test_loss: 0.11624982357025146
epoch: 14 training_loss 0.12112857233732939 test_loss: 0.11495851278305054
epoch: 15 training_loss 0.13494255434721708 test_loss: 0.12121973037719727
epoch: 16 training_loss 0.1239077603071928 test_loss: 0.12309485673904419
epoch: 17 training_loss 0.11893135625869036 test_loss: 0.12694940567016602
epoch: 18 training_loss 0.1109453447163105 test_loss: 0.11186830997467041
epoch: 19 training_loss 0.1164126194640994 test_loss: 0.11931487321853637
epoch: 20 training_loss 0.11217898722738028 test_loss: 0.12062370777130127
epoch: 21 training_loss 0.11917647518217564 test_loss: 0.12749319076538085
epoch: 22 training_loss 0.12174042124301195 test_loss: 0.1297762393951416
epoch: 23 training_loss 0.11852641630917787 test_loss: 0.1512550711631775
epoch: 24 training_loss 0.11932477343827486 test_loss: 0.11222696304321289
epoch: 25 training_loss 0.11053768668323755 test_loss: 0.15205384492874147
epoch: 26 training_loss 0.12420974964275956 test_loss: 0.13539477586746215
epoch: 27 training_loss 0.11779643915593624 test_loss: 0.11151773929595947
epoch: 28 training_loss 0.11113953683525324 test_loss: 0.12245863676071167
epoch: 29 training_loss 0.11978922795504332 test_loss: 0.11904369592666626
epoch: 30 training_loss 0.11350417450070381 test_loss: 0.12021013498306274
epoch: 31 training_loss 0.11290239103138447 test_loss: 0.12663743495941163
epoch: 32 training_loss 0.11175112079828978 test_loss: 0.12408593893051148
epoch: 33 training_loss 0.11748113315552473 test_loss: 0.11842076778411866
epoch: 34 training_loss 0.11115128256380558 test_loss: 0.11945379972457885
epoch: 35 training_loss 0.12098688878118992 test_loss: 0.12538440227508546
epoch: 36 training_loss 0.1086433457955718 test_loss: 0.11097028255462646
epoch: 37 training_loss 0.11101069334894419 test_loss: 0.12972075939178468
epoch: 38 training_loss 0.1113604474440217 test_loss: 0.11028443574905396
epoch: 39 training_loss 0.1143303780630231 test_loss: 0.11330081224441528
epoch: 40 training_loss 0.11448624357581139 test_loss: 0.1169702410697937
epoch: 41 training_loss 0.11664331421256065 test_loss: 0.11261688470840454
epoch: 42 training_loss 0.11269109543412924 test_loss: 0.1232038140296936
epoch: 43 training_loss 0.1110184021666646 test_loss: 0.11651054620742798
epoch: 44 training_loss 0.11402718454599381 test_loss: 0.11593677997589111
epoch: 45 training_loss 0.11949424587190151 test_loss: 0.12077786922454833
epoch: 46 training_loss 0.11337625974789262 test_loss: 0.11587531566619873
epoch: 47 training_loss 0.10829355007037521 test_loss: 0.11127310991287231
epoch: 48 training_loss 0.11454321824014187 test_loss: 0.12218215465545654
epoch: 49 training_loss 0.1063121835514903 test_loss: 0.10639044046401977
epoch: 50 training_loss 0.11262515604496003 test_loss: 0.12310898303985596
epoch: 51 training_loss 0.11373938050121069 test_loss: 0.10981539487838746
epoch: 52 training_loss 0.0971982817351818 test_loss: 0.13704328536987304
epoch: 53 training_loss 0.11378319272771478 test_loss: 0.09827117323875427
epoch: 54 training_loss 0.11218780752271414 test_loss: 0.1022109866142273
epoch: 55 training_loss 0.10836388792842627 test_loss: 0.11189992427825927
epoch: 56 training_loss 0.1102365966513753 test_loss: 0.11366950273513794
epoch: 57 training_loss 0.10804093271493911 test_loss: 0.10529154539108276
epoch: 58 training_loss 0.11593634624034166 test_loss: 0.12104778289794922
epoch: 59 training_loss 0.10455644302070141 test_loss: 0.11021667718887329
epoch: 60 training_loss 0.10720193795859814 test_loss: 0.10125776529312133
epoch: 61 training_loss 0.10964159592986107 test_loss: 0.11391900777816773
epoch: 62 training_loss 0.1158427969366312 test_loss: 0.11324795484542846
epoch: 63 training_loss 0.11135631877928973 test_loss: 0.11067740917205811
epoch: 64 training_loss 0.12047413490712643 test_loss: 0.10717247724533081
epoch: 65 training_loss 0.11090815015137195 test_loss: 0.11621102094650268
epoch: 66 training_loss 0.10847388550639153 test_loss: 0.11336443424224854
epoch: 67 training_loss 0.11394208781421185 test_loss: 0.11305595636367798
epoch: 68 training_loss 0.10380176015198231 test_loss: 0.10998190641403198
epoch: 69 training_loss 0.11042440328747034 test_loss: 0.1159018874168396
epoch: 70 training_loss 0.11128030262887478 test_loss: 0.10994213819503784
epoch: 71 training_loss 0.1094395463168621 test_loss: 0.11669594049453735
epoch: 72 training_loss 0.1116393206641078 test_loss: 0.11416325569152833
epoch: 73 training_loss 0.11181290943175554 test_loss: 0.12590487003326417
epoch: 74 training_loss 0.11498025350272656 test_loss: 0.10784401893615722
epoch: 75 training_loss 0.11122587921097875 test_loss: 0.11795427799224853
epoch: 76 training_loss 0.10554783152416349 test_loss: 0.1077283501625061
epoch: 77 training_loss 0.11001182602718472 test_loss: 0.11715772151947021
epoch: 78 training_loss 0.10831087198108434 test_loss: 0.1111456274986267
epoch: 79 training_loss 0.11002625074237585 test_loss: 0.10272995233535767
epoch: 80 training_loss 0.11636475872248411 test_loss: 0.12296454906463623
epoch: 81 training_loss 0.11346454787999391 test_loss: 0.11481033563613892
epoch: 82 training_loss 0.11875194638967514 test_loss: 0.10459104776382447
epoch: 83 training_loss 0.11241984907537698 test_loss: 0.1123551607131958
epoch: 84 training_loss 0.10801570445299148 test_loss: 0.1234081506729126
epoch: 85 training_loss 0.10362405829131603 test_loss: 0.11671900749206543
epoch: 86 training_loss 0.10714324694126845 test_loss: 0.10621782541275024
epoch: 87 training_loss 0.10093144755810499 test_loss: 0.11000978946685791
epoch: 88 training_loss 0.11293728664517402 test_loss: 0.09790469408035278
epoch: 89 training_loss 0.10879482958465815 test_loss: 0.10162732601165772
epoch: 90 training_loss 0.11254542879760265 test_loss: 0.09039456248283387
epoch: 91 training_loss 0.11665857519954442 test_loss: 0.10245254039764404
epoch: 92 training_loss 0.10672517497092486 test_loss: 0.12365374565124512
epoch: 93 training_loss 0.11092343553900719 test_loss: 0.11783884763717652
epoch: 94 training_loss 0.11101837020367383 test_loss: 0.11437489986419677
epoch: 95 training_loss 0.10825383855029941 test_loss: 0.10792176723480225
epoch: 96 training_loss 0.1125854378566146 test_loss: 0.13078588247299194
epoch: 97 training_loss 0.11185057239606977 test_loss: 0.11222511529922485
epoch: 98 training_loss 0.10675122756510973 test_loss: 0.10476078987121581
epoch: 99 training_loss 0.10783222902566195 test_loss: 0.1121062159538269
epoch: 100 training_loss 0.1047784006409347 test_loss: 0.12315948009490967
epoch: 101 training_loss 0.10447226003743708 test_loss: 0.10720115900039673
epoch: 102 training_loss 0.10516306567937135 test_loss: 0.10444194078445435
epoch: 103 training_loss 0.10916171511635184 test_loss: 0.12148628234863282
epoch: 104 training_loss 0.10879928205162287 test_loss: 0.11244794130325317
epoch: 105 training_loss 0.10683686386793852 test_loss: 0.11760810613632203
epoch: 106 training_loss 0.11116935722529889 test_loss: 0.11142077445983886
epoch: 107 training_loss 0.1002807160653174 test_loss: 0.10117647647857667
epoch: 108 training_loss 0.10899626851081848 test_loss: 0.10120667219161987
epoch: 109 training_loss 0.1105442699417472 test_loss: 0.11669588088989258
epoch: 110 training_loss 0.11262174803763628 test_loss: 0.10336672067642212
epoch: 111 training_loss 0.11088514711707831 test_loss: 0.10909503698348999
epoch: 112 training_loss 0.11302349131554365 test_loss: 0.11876522302627564
epoch: 113 training_loss 0.10510167613625526 test_loss: 0.11611088514328002
epoch: 114 training_loss 0.10458480183035135 test_loss: 0.11693370342254639
epoch: 115 training_loss 0.11008542779833079 test_loss: 0.12655091285705566
epoch: 116 training_loss 0.10240400906652213 test_loss: 0.10404018163681031
epoch: 117 training_loss 0.10914646174758673 test_loss: 0.12511557340621948
epoch: 118 training_loss 0.10360563758760691 test_loss: 0.11322653293609619
epoch: 119 training_loss 0.10733508329838515 test_loss: 0.1074091911315918
epoch: 120 training_loss 0.10563796309754253 test_loss: 0.09721453785896302
epoch: 121 training_loss 0.10410454235970974 test_loss: 0.12123360633850097
epoch: 122 training_loss 0.11341473642736673 test_loss: 0.11958645582199097
epoch: 123 training_loss 0.11130531009286643 test_loss: 0.1139491081237793
epoch: 124 training_loss 0.09930594239383936 test_loss: 0.11804869174957275
epoch: 125 training_loss 0.10727597359567881 test_loss: 0.09984126091003417
epoch: 126 training_loss 0.11419739421457052 test_loss: 0.1211700439453125
epoch: 127 training_loss 0.10639326009899377 test_loss: 0.12906816005706787
epoch: 128 training_loss 0.10472964745014907 test_loss: 0.1169777274131775
epoch: 129 training_loss 0.10472249824553728 test_loss: 0.10822509527206421
epoch: 130 training_loss 0.10582294084131717 test_loss: 0.09634562730789184
epoch: 131 training_loss 0.09667311785742641 test_loss: 0.11495825052261352
epoch: 132 training_loss 0.10214608324691653 test_loss: 0.10596493482589722
epoch: 133 training_loss 0.10965176995843649 test_loss: 0.12476596832275391
epoch: 134 training_loss 0.10472586818039417 test_loss: 0.11649664640426635
epoch: 135 training_loss 0.10641470685601234 test_loss: 0.10802570581436158
epoch: 136 training_loss 0.10147125706076622 test_loss: 0.11683335304260253
epoch: 137 training_loss 0.10458617795258761 test_loss: 0.11767569780349732
epoch: 138 training_loss 0.09612287253141404 test_loss: 0.1152532458305359
epoch: 139 training_loss 0.1041920805722475 test_loss: 0.10503165721893311
epoch: 140 training_loss 0.10154124343767762 test_loss: 0.12842499017715453
epoch: 141 training_loss 0.10046696782112122 test_loss: 0.11669366359710694
epoch: 142 training_loss 0.10880529440939427 test_loss: 0.1312986731529236
epoch: 143 training_loss 0.10519634744152427 test_loss: 0.121579110622406
epoch: 144 training_loss 0.11291979011148215 test_loss: 0.10966413021087647
epoch: 145 training_loss 0.10077540909871459 test_loss: 0.11505624055862426
epoch: 146 training_loss 0.10589280603453517 test_loss: 0.11040160655975342
epoch: 147 training_loss 0.10309623206034303 test_loss: 0.11920474767684937
epoch: 148 training_loss 0.10724340014159679 test_loss: 0.11511937379837037
epoch: 149 training_loss 0.1037421934492886 test_loss: 0.10446958541870117
epoch: 0 training_loss 0.33575271964073183 test_loss: 0.25085813999176027
epoch: 1 training_loss 0.22287760138511659 test_loss: 0.20196831226348877
epoch: 2 training_loss 0.19640881203114988 test_loss: 0.18877832889556884
epoch: 3 training_loss 0.1726003098487854 test_loss: 0.15714826583862304
epoch: 4 training_loss 0.1558152875304222 test_loss: 0.16392451524734497
epoch: 5 training_loss 0.14426221154630184 test_loss: 0.15950947999954224
epoch: 6 training_loss 0.13979353941977024 test_loss: 0.1458030343055725
epoch: 7 training_loss 0.1454855367541313 test_loss: 0.14345871210098265
epoch: 8 training_loss 0.13383015386760236 test_loss: 0.1349789619445801
epoch: 9 training_loss 0.13211459521204233 test_loss: 0.13077878952026367
epoch: 10 training_loss 0.1182245821878314 test_loss: 0.11427544355392456
epoch: 11 training_loss 0.1354563956707716 test_loss: 0.14964851140975952
epoch: 12 training_loss 0.13087252013385295 test_loss: 0.12089754343032837
epoch: 13 training_loss 0.12218758508563042 test_loss: 0.1415459394454956
epoch: 14 training_loss 0.1268960640206933 test_loss: 0.13906381130218506
epoch: 15 training_loss 0.1146683294326067 test_loss: 0.12500326633453368
epoch: 16 training_loss 0.13391913797706365 test_loss: 0.1261378288269043
epoch: 17 training_loss 0.12201107699424028 test_loss: 0.133962082862854
epoch: 18 training_loss 0.11518583696335555 test_loss: 0.1259823203086853
epoch: 19 training_loss 0.11709033522754908 test_loss: 0.12724519968032838
epoch: 20 training_loss 0.12189712800085545 test_loss: 0.10591864585876465
epoch: 21 training_loss 0.11783836361020804 test_loss: 0.10477685928344727
epoch: 22 training_loss 0.11381552800536156 test_loss: 0.12598341703414917
epoch: 23 training_loss 0.11936681225895882 test_loss: 0.13333117961883545
epoch: 24 training_loss 0.11559392195194959 test_loss: 0.11811162233352661
epoch: 25 training_loss 0.11777208860963582 test_loss: 0.12510055303573608
epoch: 26 training_loss 0.11093658547848463 test_loss: 0.12164013385772705
epoch: 27 training_loss 0.11453115019947291 test_loss: 0.12493401765823364
epoch: 28 training_loss 0.10602227356284857 test_loss: 0.10124688148498535
epoch: 29 training_loss 0.11671228107064963 test_loss: 0.12891217470169067
epoch: 30 training_loss 0.1146472954377532 test_loss: 0.11827353239059449
epoch: 31 training_loss 0.11465737011283636 test_loss: 0.12792073488235473
epoch: 32 training_loss 0.11549899287521839 test_loss: 0.12247458696365357
epoch: 33 training_loss 0.11212353393435479 test_loss: 0.10051034688949585
epoch: 34 training_loss 0.12055381882935762 test_loss: 0.12303839921951294
epoch: 35 training_loss 0.11555203352123498 test_loss: 0.1169252872467041
epoch: 36 training_loss 0.11240836400538683 test_loss: 0.11858969926834106
epoch: 37 training_loss 0.10856978690251708 test_loss: 0.11180949211120605
epoch: 38 training_loss 0.1122067591547966 test_loss: 0.12408697605133057
epoch: 39 training_loss 0.10864828500896692 test_loss: 0.11214789152145385
epoch: 40 training_loss 0.11303170885890722 test_loss: 0.11004823446273804
epoch: 41 training_loss 0.10953974120318889 test_loss: 0.11116735935211182
epoch: 42 training_loss 0.11416399355977774 test_loss: 0.138433575630188
epoch: 43 training_loss 0.11522981986403465 test_loss: 0.11308951377868652
epoch: 44 training_loss 0.11301014993339777 test_loss: 0.11268452405929566
epoch: 45 training_loss 0.11919997334480285 test_loss: 0.12095508575439454
epoch: 46 training_loss 0.10797091662883758 test_loss: 0.11693402528762817
epoch: 47 training_loss 0.11419393667951226 test_loss: 0.11963399648666381
epoch: 48 training_loss 0.10655380621552467 test_loss: 0.12160413265228272
epoch: 49 training_loss 0.11124032841995359 test_loss: 0.11407517194747925
epoch: 50 training_loss 0.11171904753893613 test_loss: 0.10229181051254273
epoch: 51 training_loss 0.10633916864171625 test_loss: 0.1079149603843689
epoch: 52 training_loss 0.11727508064359426 test_loss: 0.12383285760879517
epoch: 53 training_loss 0.10652887817472219 test_loss: 0.11426103115081787
epoch: 54 training_loss 0.11652026664465666 test_loss: 0.11022251844406128
epoch: 55 training_loss 0.1120646259561181 test_loss: 0.11417737007141113
epoch: 56 training_loss 0.11287626251578331 test_loss: 0.11432089805603027
epoch: 57 training_loss 0.10819254964590072 test_loss: 0.11058549880981446
epoch: 58 training_loss 0.10915199097245931 test_loss: 0.1302490234375
epoch: 59 training_loss 0.11399695195257664 test_loss: 0.10478572845458985
epoch: 60 training_loss 0.11194432262331247 test_loss: 0.11423811912536622
epoch: 61 training_loss 0.1029176715016365 test_loss: 0.11877777576446533
epoch: 62 training_loss 0.1055821860767901 test_loss: 0.12277354001998901
epoch: 63 training_loss 0.10498789861798287 test_loss: 0.10931259393692017
epoch: 64 training_loss 0.10675304140895606 test_loss: 0.10789215564727783
epoch: 65 training_loss 0.1058960234746337 test_loss: 0.10145770311355591
epoch: 66 training_loss 0.11030441829934716 test_loss: 0.11011008024215699
epoch: 67 training_loss 0.1145549015700817 test_loss: 0.1077069640159607
epoch: 68 training_loss 0.10981625416316092 test_loss: 0.13611438274383544
epoch: 69 training_loss 0.11034777417778968 test_loss: 0.11942600011825562
epoch: 70 training_loss 0.11295962691307068 test_loss: 0.11827707290649414
epoch: 71 training_loss 0.1113124749995768 test_loss: 0.11775000095367431
epoch: 72 training_loss 0.10903332345187663 test_loss: 0.10148053169250489
epoch: 73 training_loss 0.10908106211572885 test_loss: 0.10384743213653565
epoch: 74 training_loss 0.10894503708928824 test_loss: 0.11256904602050781
epoch: 75 training_loss 0.10181333484128118 test_loss: 0.12243187427520752
epoch: 76 training_loss 0.10589838288724422 test_loss: 0.11052740812301635
epoch: 77 training_loss 0.10798173325136304 test_loss: 0.12174732685089111
epoch: 78 training_loss 0.10093574896454811 test_loss: 0.1275730848312378
epoch: 79 training_loss 0.1049740444123745 test_loss: 0.1131919264793396
epoch: 80 training_loss 0.106729187797755 test_loss: 0.1172408938407898
epoch: 81 training_loss 0.1059277592971921 test_loss: 0.09752142429351807
epoch: 82 training_loss 0.10164994953200221 test_loss: 0.11425513029098511
epoch: 83 training_loss 0.10776650194078684 test_loss: 0.10784109830856323
epoch: 84 training_loss 0.10634352311491967 test_loss: 0.10947967767715454
epoch: 85 training_loss 0.10578670002520084 test_loss: 0.10353050231933594
epoch: 86 training_loss 0.11196138244122267 test_loss: 0.10731742382049561
epoch: 87 training_loss 0.10800064422190189 test_loss: 0.09611313343048096
epoch: 88 training_loss 0.10295705877244472 test_loss: 0.11736389398574829
epoch: 89 training_loss 0.1109912845864892 test_loss: 0.11045013666152954
epoch: 90 training_loss 0.10883800216019153 test_loss: 0.11339924335479737
epoch: 91 training_loss 0.10365337584167719 test_loss: 0.10504362583160401
epoch: 92 training_loss 0.10832288350909948 test_loss: 0.11400352716445923
epoch: 93 training_loss 0.11433874614536763 test_loss: 0.11431303024291992
epoch: 94 training_loss 0.10676587140187621 test_loss: 0.11290450096130371
epoch: 95 training_loss 0.11077691819518805 test_loss: 0.11405261754989623
epoch: 96 training_loss 0.10118241900578141 test_loss: 0.11328622102737426
epoch: 97 training_loss 0.10940508726984262 test_loss: 0.1112341046333313
epoch: 98 training_loss 0.1104762114211917 test_loss: 0.10710728168487549
epoch: 99 training_loss 0.1119468142837286 test_loss: 0.12261849641799927
epoch: 100 training_loss 0.10153982672840357 test_loss: 0.10560346841812134
epoch: 101 training_loss 0.11350727047771215 test_loss: 0.10213736295700074
epoch: 102 training_loss 0.10026916704140604 test_loss: 0.11102052927017211
epoch: 103 training_loss 0.10803608607500792 test_loss: 0.13318054676055907
epoch: 104 training_loss 0.09786571964621543 test_loss: 0.10091724395751953
epoch: 105 training_loss 0.11359048958867789 test_loss: 0.11881034374237061
epoch: 106 training_loss 0.10882563123479486 test_loss: 0.09684860706329346
epoch: 107 training_loss 0.10251040700823069 test_loss: 0.10957839488983154
epoch: 108 training_loss 0.10614994943141937 test_loss: 0.12229294776916504
epoch: 109 training_loss 0.10216654499992728 test_loss: 0.09053051471710205
epoch: 110 training_loss 0.10894005462527274 test_loss: 0.1096218466758728
epoch: 111 training_loss 0.11016527332365512 test_loss: 0.11986898183822632
epoch: 112 training_loss 0.10917020685970784 test_loss: 0.11755101680755616
epoch: 113 training_loss 0.10378103353083133 test_loss: 0.10435707569122314
epoch: 114 training_loss 0.10209912236779928 test_loss: 0.12516366243362426
epoch: 115 training_loss 0.09788897763937712 test_loss: 0.10792348384857178
epoch: 116 training_loss 0.10564267419278622 test_loss: 0.1047980785369873
epoch: 117 training_loss 0.11054737083613872 test_loss: 0.11381610631942748
epoch: 118 training_loss 0.10636787597090006 test_loss: 0.10225539207458496
epoch: 119 training_loss 0.10530333630740643 test_loss: 0.12151831388473511
epoch: 120 training_loss 0.1008062632009387 test_loss: 0.11401245594024659
epoch: 121 training_loss 0.10702495368197561 test_loss: 0.11291677951812744
epoch: 122 training_loss 0.1063289438188076 test_loss: 0.10751142501831054
epoch: 123 training_loss 0.11349852956831455 test_loss: 0.11743472814559937
epoch: 124 training_loss 0.10231229655444622 test_loss: 0.11698883771896362
epoch: 125 training_loss 0.10702476002275944 test_loss: 0.11312940120697021
epoch: 126 training_loss 0.1027301663160324 test_loss: 0.10982590913772583
epoch: 127 training_loss 0.10238740790635348 test_loss: 0.10091423988342285
epoch: 128 training_loss 0.10167590329423547 test_loss: 0.11147969961166382
epoch: 129 training_loss 0.1093336227722466 test_loss: 0.11149458885192871
epoch: 130 training_loss 0.104122793097049 test_loss: 0.1065687894821167
epoch: 131 training_loss 0.1070285126566887 test_loss: 0.11789156198501587
epoch: 132 training_loss 0.1003154319524765 test_loss: 0.12780710458755493
epoch: 133 training_loss 0.09405887676402927 test_loss: 0.1124537706375122
epoch: 134 training_loss 0.10879069652408362 test_loss: 0.10786831378936768
epoch: 135 training_loss 0.10086180509999394 test_loss: 0.11070719957351685
epoch: 136 training_loss 0.10436019558459521 test_loss: 0.1083184003829956
epoch: 137 training_loss 0.10720079874619842 test_loss: 0.1014103889465332
epoch: 138 training_loss 0.10137754490599037 test_loss: 0.11039633750915527
epoch: 139 training_loss 0.09767115652561188 test_loss: 0.1110960602760315
epoch: 140 training_loss 0.09543034888803958 test_loss: 0.11167477369308472
epoch: 141 training_loss 0.1089915032684803 test_loss: 0.11597466468811035
epoch: 142 training_loss 0.09936013840138912 test_loss: 0.1343638300895691
epoch: 143 training_loss 0.10159519154578447 test_loss: 0.12177896499633789
epoch: 144 training_loss 0.11098292101174594 test_loss: 0.12058970928192139
epoch: 145 training_loss 0.10112835990265012 test_loss: 0.09976812601089477
epoch: 146 training_loss 0.09583776856772602 test_loss: 0.10564596652984619
epoch: 147 training_loss 0.0980499679222703 test_loss: 0.10638023614883423
epoch: 148 training_loss 0.09992264833301306 test_loss: 0.10879149436950683
epoch: 149 training_loss 0.09866743806749582 test_loss: 0.11028261184692383
epoch: 0 training_loss 0.3258692120015621 test_loss: 0.22687675952911376
epoch: 1 training_loss 0.20922227159142495 test_loss: 0.19244104623794556
epoch: 2 training_loss 0.18222570236772298 test_loss: 0.14686801433563232
epoch: 3 training_loss 0.1634075789153576 test_loss: 0.17532211542129517
epoch: 4 training_loss 0.1531111367791891 test_loss: 0.17683597803115844
epoch: 5 training_loss 0.15334157541394233 test_loss: 0.13221347332000732
epoch: 6 training_loss 0.1346388266608119 test_loss: 0.13391112089157103
epoch: 7 training_loss 0.1345473790541291 test_loss: 0.13107575178146363
epoch: 8 training_loss 0.1333274557814002 test_loss: 0.12168456315994262
epoch: 9 training_loss 0.12917818263173103 test_loss: 0.12320842742919921
epoch: 10 training_loss 0.1258508998900652 test_loss: 0.12389360666275025
epoch: 11 training_loss 0.13194012582302095 test_loss: 0.11253869533538818
epoch: 12 training_loss 0.12701962824910878 test_loss: 0.11382569074630737
epoch: 13 training_loss 0.12054911367595196 test_loss: 0.11565467119216918
epoch: 14 training_loss 0.12073067311197519 test_loss: 0.13292521238327026
epoch: 15 training_loss 0.12866743575781583 test_loss: 0.12324156761169433
epoch: 16 training_loss 0.114012137837708 test_loss: 0.11926606893539429
epoch: 17 training_loss 0.11720391813665629 test_loss: 0.106556236743927
epoch: 18 training_loss 0.12483616583049298 test_loss: 0.10645904541015624
epoch: 19 training_loss 0.11586994379758835 test_loss: 0.11337058544158936
epoch: 20 training_loss 0.11247965943068267 test_loss: 0.12215231657028199
epoch: 21 training_loss 0.11433585129678249 test_loss: 0.10584815740585327
epoch: 22 training_loss 0.11307510901242494 test_loss: 0.11820000410079956
epoch: 23 training_loss 0.1101682860404253 test_loss: 0.1022455334663391
epoch: 24 training_loss 0.1120121981576085 test_loss: 0.10368211269378662
epoch: 25 training_loss 0.11174319084733725 test_loss: 0.09859283566474915
epoch: 26 training_loss 0.11235129028558731 test_loss: 0.12059756517410278
epoch: 27 training_loss 0.10896578945219516 test_loss: 0.11417610645294189
epoch: 28 training_loss 0.11051063995808363 test_loss: 0.12340273857116699
epoch: 29 training_loss 0.1207669111341238 test_loss: 0.11116691827774047
epoch: 30 training_loss 0.10734545387327671 test_loss: 0.10788447856903076
epoch: 31 training_loss 0.11274031452834606 test_loss: 0.11546657085418702
epoch: 32 training_loss 0.12259753085672856 test_loss: 0.10650076866149902
epoch: 33 training_loss 0.10480724116787314 test_loss: 0.1165440559387207
epoch: 34 training_loss 0.1085295457020402 test_loss: 0.105270516872406
epoch: 35 training_loss 0.11021758187562228 test_loss: 0.10939421653747558
epoch: 36 training_loss 0.11160027965903282 test_loss: 0.13162016868591309
epoch: 37 training_loss 0.10420746713876725 test_loss: 0.11426944732666015
epoch: 38 training_loss 0.10655183244496584 test_loss: 0.11500774621963501
epoch: 39 training_loss 0.0994215039908886 test_loss: 0.11348817348480225
epoch: 40 training_loss 0.11100805858150124 test_loss: 0.11301178932189941
epoch: 41 training_loss 0.10465870318934321 test_loss: 0.11882115602493286
epoch: 42 training_loss 0.10992832776159048 test_loss: 0.11159442663192749
epoch: 43 training_loss 0.10647465072572232 test_loss: 0.11518867015838623
epoch: 44 training_loss 0.11790873073041439 test_loss: 0.10414597988128663
epoch: 45 training_loss 0.11543010162189603 test_loss: 0.12788547277450563
epoch: 46 training_loss 0.10832892883569002 test_loss: 0.1048853874206543
epoch: 47 training_loss 0.10743746651336551 test_loss: 0.10207756757736205
epoch: 48 training_loss 0.10681665159761905 test_loss: 0.10112273693084717
epoch: 49 training_loss 0.10262735649943351 test_loss: 0.09106591939926148
epoch: 50 training_loss 0.10773745056241751 test_loss: 0.10164395570755005
epoch: 51 training_loss 0.1109799775481224 test_loss: 0.12345466613769532
epoch: 52 training_loss 0.10719932116568089 test_loss: 0.10850759744644164
epoch: 53 training_loss 0.10857040107250214 test_loss: 0.10538054704666137
epoch: 54 training_loss 0.11205098889768124 test_loss: 0.10325925350189209
epoch: 55 training_loss 0.10667177319526672 test_loss: 0.10527539253234863
epoch: 56 training_loss 0.10319536749273539 test_loss: 0.1068204402923584
epoch: 57 training_loss 0.1081638978421688 test_loss: 0.1089705228805542
epoch: 58 training_loss 0.10771265733987093 test_loss: 0.09214279651641846
epoch: 59 training_loss 0.10417931526899338 test_loss: 0.10418254137039185
epoch: 60 training_loss 0.10341191451996565 test_loss: 0.10925495624542236
epoch: 61 training_loss 0.1066060164384544 test_loss: 0.09772472381591797
epoch: 62 training_loss 0.11032350689172744 test_loss: 0.11323368549346924
epoch: 63 training_loss 0.10549942879006266 test_loss: 0.10598483085632324
epoch: 64 training_loss 0.10294856991618871 test_loss: 0.1121991753578186
epoch: 65 training_loss 0.1085348086990416 test_loss: 0.11151540279388428
epoch: 66 training_loss 0.10479827679693698 test_loss: 0.10634490251541137
epoch: 67 training_loss 0.10757046621292829 test_loss: 0.10684267282485962
epoch: 68 training_loss 0.10591384977102279 test_loss: 0.11022502183914185
epoch: 69 training_loss 0.09895303200930357 test_loss: 0.10540652275085449
epoch: 70 training_loss 0.10247050948441029 test_loss: 0.10235110521316529
epoch: 71 training_loss 0.10031848624348641 test_loss: 0.11091606616973877
epoch: 72 training_loss 0.10947095561772585 test_loss: 0.10507333278656006
epoch: 73 training_loss 0.10775779895484447 test_loss: 0.10683848857879638
epoch: 74 training_loss 0.10440831191837788 test_loss: 0.09736320972442628
epoch: 75 training_loss 0.11597879704087972 test_loss: 0.1052091360092163
epoch: 76 training_loss 0.10183824941515923 test_loss: 0.09836971759796143
epoch: 77 training_loss 0.10113138761371374 test_loss: 0.1115103006362915
epoch: 78 training_loss 0.10130953645333647 test_loss: 0.12371773719787597
epoch: 79 training_loss 0.10586643693968653 test_loss: 0.10003228187561035
epoch: 80 training_loss 0.10327578246593476 test_loss: 0.11258450746536255
epoch: 81 training_loss 0.10493940826505423 test_loss: 0.09912198781967163
epoch: 82 training_loss 0.10721159942448139 test_loss: 0.10361007452011109
epoch: 83 training_loss 0.10623726695775985 test_loss: 0.10661686658859253
epoch: 84 training_loss 0.1020275966450572 test_loss: 0.09929031729698182
epoch: 85 training_loss 0.10016930066049098 test_loss: 0.11440293788909912
epoch: 86 training_loss 0.09713548390194773 test_loss: 0.12476409673690796
epoch: 87 training_loss 0.10461396828293801 test_loss: 0.11171616315841675
epoch: 88 training_loss 0.10695905227214098 test_loss: 0.14019283056259155
epoch: 89 training_loss 0.10583283795043825 test_loss: 0.10830752849578858
epoch: 90 training_loss 0.10193380400538445 test_loss: 0.10325173139572144
epoch: 91 training_loss 0.10308598190546035 test_loss: 0.11284117698669434
epoch: 92 training_loss 0.1072224449366331 test_loss: 0.11137763261795045
epoch: 93 training_loss 0.09889367604628205 test_loss: 0.10304728746414185
epoch: 94 training_loss 0.1077751548960805 test_loss: 0.10881665945053101
epoch: 95 training_loss 0.10725825771689415 test_loss: 0.10787674188613891
epoch: 96 training_loss 0.1062743430584669 test_loss: 0.0997007131576538
epoch: 97 training_loss 0.11281074952334165 test_loss: 0.09776625633239747
epoch: 98 training_loss 0.10330777447670698 test_loss: 0.0971967875957489
epoch: 99 training_loss 0.10061093792319298 test_loss: 0.0964985430240631
epoch: 100 training_loss 0.11487723592668772 test_loss: 0.10023210048675538
epoch: 101 training_loss 0.10748751558363438 test_loss: 0.09581565260887145
epoch: 102 training_loss 0.10301756933331489 test_loss: 0.09928233623504638
epoch: 103 training_loss 0.10708914190530777 test_loss: 0.09222477674484253
epoch: 104 training_loss 0.10250046379864215 test_loss: 0.09389130473136902
epoch: 105 training_loss 0.10127614066004753 test_loss: 0.10836262702941894
epoch: 106 training_loss 0.10268342293798924 test_loss: 0.09031862020492554
epoch: 107 training_loss 0.09474085345864296 test_loss: 0.10875104665756226
epoch: 108 training_loss 0.10229299705475568 test_loss: 0.11429206132888795
epoch: 109 training_loss 0.10708171935752034 test_loss: 0.1087486743927002
epoch: 110 training_loss 0.10410745471715926 test_loss: 0.09966642260551453
epoch: 111 training_loss 0.10021066436544061 test_loss: 0.09094183444976807
epoch: 112 training_loss 0.10699118416756391 test_loss: 0.10422217845916748
epoch: 113 training_loss 0.10411621626466512 test_loss: 0.10391814708709717
epoch: 114 training_loss 0.09304936356842518 test_loss: 0.10020743608474732
epoch: 115 training_loss 0.10081978069618344 test_loss: 0.1162909984588623
epoch: 116 training_loss 0.1044010192900896 test_loss: 0.10921690464019776
epoch: 117 training_loss 0.09938850488513708 test_loss: 0.12616820335388185
epoch: 118 training_loss 0.09880531456321479 test_loss: 0.11084269285202027
epoch: 119 training_loss 0.1037479486130178 test_loss: 0.09872108101844787
epoch: 120 training_loss 0.10692078743129968 test_loss: 0.11800012588500977
epoch: 121 training_loss 0.09761555798351765 test_loss: 0.09876174926757812
epoch: 122 training_loss 0.09792214278131724 test_loss: 0.09654165506362915
epoch: 123 training_loss 0.09293635332956911 test_loss: 0.11539077758789062
epoch: 124 training_loss 0.10406229309737683 test_loss: 0.10930603742599487
epoch: 125 training_loss 0.10707060778513551 test_loss: 0.11424273252487183
epoch: 126 training_loss 0.10305840577930211 test_loss: 0.10136516094207763
epoch: 127 training_loss 0.0994260660558939 test_loss: 0.09894118905067444
epoch: 128 training_loss 0.10306093268096447 test_loss: 0.11751114130020142
epoch: 129 training_loss 0.10245016600936652 test_loss: 0.10361846685409545
epoch: 130 training_loss 0.1042483895085752 test_loss: 0.09401332736015319
epoch: 131 training_loss 0.09631304897367954 test_loss: 0.11408286094665528
epoch: 132 training_loss 0.09986069638282061 test_loss: 0.10343462228775024
epoch: 133 training_loss 0.09798737049102783 test_loss: 0.10054985284805298
epoch: 134 training_loss 0.10193322269245982 test_loss: 0.10093080997467041
epoch: 135 training_loss 0.09245241899043322 test_loss: 0.1063989281654358
epoch: 136 training_loss 0.10211697997525335 test_loss: 0.11424466371536254
epoch: 137 training_loss 0.09879244271665812 test_loss: 0.12284411191940307
epoch: 138 training_loss 0.11667748166248203 test_loss: 0.09445436000823974
epoch: 139 training_loss 0.09867119934409857 test_loss: 0.10779846906661987
epoch: 140 training_loss 0.09368300525471568 test_loss: 0.10486315488815308
epoch: 141 training_loss 0.10503626067191363 test_loss: 0.12098238468170167
epoch: 142 training_loss 0.09968278244137764 test_loss: 0.110908842086792
epoch: 143 training_loss 0.10360233945772052 test_loss: 0.09936567544937133
epoch: 144 training_loss 0.1023970764875412 test_loss: 0.08542485237121582
epoch: 145 training_loss 0.11051992941647767 test_loss: 0.09442903399467469
epoch: 146 training_loss 0.10868648216128349 test_loss: 0.10488996505737305
epoch: 147 training_loss 0.0936919859610498 test_loss: 0.10044596195220948
epoch: 148 training_loss 0.102282560095191 test_loss: 0.11587637662887573
epoch: 149 training_loss 0.0993924088589847 test_loss: 0.09849178194999694
epoch: 0 training_loss 0.3472326546907425 test_loss: 0.23228874206542968
epoch: 1 training_loss 0.23155892863869668 test_loss: 0.19507657289505004
epoch: 2 training_loss 0.1927274303138256 test_loss: 0.1803956985473633
epoch: 3 training_loss 0.17038736134767532 test_loss: 0.14790632724761962
epoch: 4 training_loss 0.16278997160494327 test_loss: 0.14710254669189454
epoch: 5 training_loss 0.150392633266747 test_loss: 0.13311187028884888
epoch: 6 training_loss 0.1396368218958378 test_loss: 0.1588873505592346
epoch: 7 training_loss 0.1390059158205986 test_loss: 0.14395973682403565
epoch: 8 training_loss 0.13961955841630697 test_loss: 0.13849260807037353
epoch: 9 training_loss 0.1345879500359297 test_loss: 0.12405169010162354
epoch: 10 training_loss 0.13124911408871412 test_loss: 0.14347426891326903
epoch: 11 training_loss 0.12382391154766083 test_loss: 0.14453500509262085
epoch: 12 training_loss 0.1346891900524497 test_loss: 0.11430486440658569
epoch: 13 training_loss 0.11483516234904528 test_loss: 0.11545614004135132
epoch: 14 training_loss 0.12448412530124188 test_loss: 0.13741850852966309
epoch: 15 training_loss 0.11880013845860958 test_loss: 0.12604172229766847
epoch: 16 training_loss 0.12475370675325394 test_loss: 0.12564806938171386
epoch: 17 training_loss 0.11200380627065896 test_loss: 0.12707871198654175
epoch: 18 training_loss 0.12438629869371652 test_loss: 0.10024770498275756
epoch: 19 training_loss 0.11688325136899948 test_loss: 0.11543717384338378
epoch: 20 training_loss 0.11847787369042635 test_loss: 0.11711958646774293
epoch: 21 training_loss 0.11832096029073 test_loss: 0.12373613119125366
epoch: 22 training_loss 0.1145415822416544 test_loss: 0.12394857406616211
epoch: 23 training_loss 0.11372134862467646 test_loss: 0.13095082044601442
epoch: 24 training_loss 0.11224604669958353 test_loss: 0.12201509475708008
epoch: 25 training_loss 0.12042000938206911 test_loss: 0.10879647731781006
epoch: 26 training_loss 0.12401043102145196 test_loss: 0.1106713891029358
epoch: 27 training_loss 0.11101788718253375 test_loss: 0.11530663967132568
epoch: 28 training_loss 0.11213731899857521 test_loss: 0.10943233966827393
epoch: 29 training_loss 0.11509933989495039 test_loss: 0.1232046365737915
epoch: 30 training_loss 0.1131965072453022 test_loss: 0.119694983959198
epoch: 31 training_loss 0.1118671290576458 test_loss: 0.12054630517959594
epoch: 32 training_loss 0.10897649485617876 test_loss: 0.12438738346099854
epoch: 33 training_loss 0.11707803100347519 test_loss: 0.11145050525665283
epoch: 34 training_loss 0.10897839646786452 test_loss: 0.12362964153289795
epoch: 35 training_loss 0.11425411146134139 test_loss: 0.11363259553909302
epoch: 36 training_loss 0.10740852238610386 test_loss: 0.10875344276428223
epoch: 37 training_loss 0.10994213171303273 test_loss: 0.10483334064483643
epoch: 38 training_loss 0.11161017183214426 test_loss: 0.10525052547454834
epoch: 39 training_loss 0.10216835163533687 test_loss: 0.1134046196937561
epoch: 40 training_loss 0.10729719577357173 test_loss: 0.12290487289428711
epoch: 41 training_loss 0.11229476561769842 test_loss: 0.09970084428787232
epoch: 42 training_loss 0.10981079425662756 test_loss: 0.11305338144302368
epoch: 43 training_loss 0.11859087083488702 test_loss: 0.10882036685943604
epoch: 44 training_loss 0.10276397317647934 test_loss: 0.11686856746673584
epoch: 45 training_loss 0.10709480080753565 test_loss: 0.11167888641357422
epoch: 46 training_loss 0.10916082292795182 test_loss: 0.1253356456756592
epoch: 47 training_loss 0.10755502332001925 test_loss: 0.10998325347900391
epoch: 48 training_loss 0.10797419276088477 test_loss: 0.1280913233757019
epoch: 49 training_loss 0.10276558574289084 test_loss: 0.11870158910751342
epoch: 50 training_loss 0.10507370864972472 test_loss: 0.11244707107543946
epoch: 51 training_loss 0.11315376564860344 test_loss: 0.11573832035064698
epoch: 52 training_loss 0.10616653054952621 test_loss: 0.10797570943832398
epoch: 53 training_loss 0.1141415386646986 test_loss: 0.11995476484298706
epoch: 54 training_loss 0.10614223927259445 test_loss: 0.09658629298210145
epoch: 55 training_loss 0.11454947777092457 test_loss: 0.11874040365219116
epoch: 56 training_loss 0.10810739304870368 test_loss: 0.09839140176773072
epoch: 57 training_loss 0.10215613003820181 test_loss: 0.10603375434875488
epoch: 58 training_loss 0.10514200087636709 test_loss: 0.1126009464263916
epoch: 59 training_loss 0.10737035304307938 test_loss: 0.11503446102142334
epoch: 60 training_loss 0.11024203345179558 test_loss: 0.09876357316970825
epoch: 61 training_loss 0.11431184135377408 test_loss: 0.11653846502304077
epoch: 62 training_loss 0.11249545868486166 test_loss: 0.11700240373611451
epoch: 63 training_loss 0.10371797103434802 test_loss: 0.11218286752700805
epoch: 64 training_loss 0.10931596353650093 test_loss: 0.11886646747589111
epoch: 65 training_loss 0.11985315345227718 test_loss: 0.09621571302413941
epoch: 66 training_loss 0.11517082124948502 test_loss: 0.09679297208786011
epoch: 67 training_loss 0.11611035611480475 test_loss: 0.10479836463928223
epoch: 68 training_loss 0.10640003869310022 test_loss: 0.12002582550048828
epoch: 69 training_loss 0.10372202839702367 test_loss: 0.11126112937927246
epoch: 70 training_loss 0.1025379059650004 test_loss: 0.11301295757293701
epoch: 71 training_loss 0.11111540928483009 test_loss: 0.12389405965805053
epoch: 72 training_loss 0.1068862383812666 test_loss: 0.1036196231842041
epoch: 73 training_loss 0.10944207966327667 test_loss: 0.11044782400131226
epoch: 74 training_loss 0.10685908399522305 test_loss: 0.10685627460479737
epoch: 75 training_loss 0.10658210799098015 test_loss: 0.10546348094940186
epoch: 76 training_loss 0.1081986746750772 test_loss: 0.12873295545578003
epoch: 77 training_loss 0.1090355321019888 test_loss: 0.11056851148605347
epoch: 78 training_loss 0.10746644027531146 test_loss: 0.10280430316925049
epoch: 79 training_loss 0.10810850508511066 test_loss: 0.11798300743103027
epoch: 80 training_loss 0.1123728171736002 test_loss: 0.1229629397392273
epoch: 81 training_loss 0.11350765313953161 test_loss: 0.10835919380187989
epoch: 82 training_loss 0.10653794325888157 test_loss: 0.10637590885162354
epoch: 83 training_loss 0.10446203267201781 test_loss: 0.10296422243118286
epoch: 84 training_loss 0.10337619617581367 test_loss: 0.11192014217376708
epoch: 85 training_loss 0.10490345891565084 test_loss: 0.10739226341247558
epoch: 86 training_loss 0.10772584889084101 test_loss: 0.10785616636276245
epoch: 87 training_loss 0.10951469920575618 test_loss: 0.10238161087036132
epoch: 88 training_loss 0.11171038579195738 test_loss: 0.09343959093093872
epoch: 89 training_loss 0.10946225501596928 test_loss: 0.11834746599197388
epoch: 90 training_loss 0.11272890787571668 test_loss: 0.10173490047454833
epoch: 91 training_loss 0.11140573048964143 test_loss: 0.11403623819351197
epoch: 92 training_loss 0.1078227436542511 test_loss: 0.10743898153305054
epoch: 93 training_loss 0.10650108521804214 test_loss: 0.11346818208694458
epoch: 94 training_loss 0.10510185860097408 test_loss: 0.10374540090560913
epoch: 95 training_loss 0.10864154182374477 test_loss: 0.127697217464447
epoch: 96 training_loss 0.1014681545086205 test_loss: 0.11690623760223388
epoch: 97 training_loss 0.10791496243327856 test_loss: 0.11258273124694824
epoch: 98 training_loss 0.10558343131095171 test_loss: 0.11449605226516724
epoch: 99 training_loss 0.1049297270923853 test_loss: 0.11724528074264526
epoch: 100 training_loss 0.10830322328954935 test_loss: 0.11754870414733887
epoch: 101 training_loss 0.10758059602230788 test_loss: 0.09907816648483277
epoch: 102 training_loss 0.10319802686572074 test_loss: 0.1287718176841736
epoch: 103 training_loss 0.10395636521279812 test_loss: 0.10198140144348145
epoch: 104 training_loss 0.10062443319708109 test_loss: 0.12148420810699463
epoch: 105 training_loss 0.10892081523314118 test_loss: 0.10733267068862914
epoch: 106 training_loss 0.10641196887940169 test_loss: 0.11175044775009155
epoch: 107 training_loss 0.10815227333456277 test_loss: 0.11098415851593017
epoch: 108 training_loss 0.1088966479152441 test_loss: 0.11693236827850342
epoch: 109 training_loss 0.11053584095090628 test_loss: 0.116363263130188
epoch: 110 training_loss 0.10454232200980186 test_loss: 0.11177386045455932
epoch: 111 training_loss 0.09574795104563236 test_loss: 0.12562819719314575
epoch: 112 training_loss 0.1098038737103343 test_loss: 0.09947096705436706
epoch: 113 training_loss 0.10303698811680079 test_loss: 0.10232720375061036
epoch: 114 training_loss 0.10870210070163011 test_loss: 0.12007962465286255
epoch: 115 training_loss 0.10497177662327886 test_loss: 0.12648248672485352
epoch: 116 training_loss 0.11096704296767712 test_loss: 0.10978507995605469
epoch: 117 training_loss 0.10627837989479304 test_loss: 0.11348798274993896
epoch: 118 training_loss 0.10519827971234917 test_loss: 0.1061358094215393
epoch: 119 training_loss 0.09738708853721618 test_loss: 0.09198179841041565
epoch: 120 training_loss 0.10617506137117744 test_loss: 0.12031688690185546
epoch: 121 training_loss 0.09991499677300453 test_loss: 0.09707279205322265
epoch: 122 training_loss 0.11093859273940325 test_loss: 0.10735958814620972
epoch: 123 training_loss 0.10211997281759977 test_loss: 0.10480420589447022
epoch: 124 training_loss 0.1128032935038209 test_loss: 0.09543015360832215
epoch: 125 training_loss 0.10158139266073704 test_loss: 0.12399241924285889
epoch: 126 training_loss 0.10708529353141785 test_loss: 0.11971464157104492
epoch: 127 training_loss 0.10473829861730337 test_loss: 0.11630030870437622
epoch: 128 training_loss 0.10367347706109285 test_loss: 0.09997358322143554
epoch: 129 training_loss 0.10465218974277377 test_loss: 0.11916074752807618
epoch: 130 training_loss 0.09193589974194766 test_loss: 0.12143696546554565
epoch: 131 training_loss 0.10881841260939837 test_loss: 0.11324765682220458
epoch: 132 training_loss 0.10674481296911836 test_loss: 0.10188699960708618
epoch: 133 training_loss 0.09990465825423599 test_loss: 0.10625452995300293
epoch: 134 training_loss 0.1024349520355463 test_loss: 0.11767536401748657
epoch: 135 training_loss 0.1057799944281578 test_loss: 0.11869659423828124
epoch: 136 training_loss 0.10403293449431658 test_loss: 0.12077738046646118
epoch: 137 training_loss 0.10067386358976364 test_loss: 0.10949119329452514
epoch: 138 training_loss 0.10068437688052655 test_loss: 0.12330702543258668
epoch: 139 training_loss 0.11128116644918919 test_loss: 0.12838664054870605
epoch: 140 training_loss 0.103350980412215 test_loss: 0.10476341247558593
epoch: 141 training_loss 0.10200085179880261 test_loss: 0.10515036582946777
epoch: 142 training_loss 0.1048288494348526 test_loss: 0.12348557710647583
epoch: 143 training_loss 0.1030992615967989 test_loss: 0.10424357652664185
epoch: 144 training_loss 0.10834153076633811 test_loss: 0.10327483415603637
epoch: 145 training_loss 0.11037781067192555 test_loss: 0.10734493732452392
epoch: 146 training_loss 0.10314657280221581 test_loss: 0.10958245992660523
epoch: 147 training_loss 0.111124911531806 test_loss: 0.11245591640472412
epoch: 148 training_loss 0.10010505307465792 test_loss: 0.09456539154052734
epoch: 149 training_loss 0.09864533632993698 test_loss: 0.09097689986228943
episode: 0 training return: -1017.8414251077562
episode: 1 training return: -1014.22348670505
episode: 2 training return: -1015.5357477548665
episode: 3 training return: -1033.2204389904307
epoch: 1 test_true_pfm: -1.150153590347964 sim_pfm: -999.9593034615851
episode: 4 training return: -1012.3828884583254
episode: 5 training return: -1028.481325190326
episode: 6 training return: -1025.7238003995246
episode: 7 training return: -1011.4343266211544
epoch: 2 test_true_pfm: -11.934662829905468 sim_pfm: -999.9782228171092
episode: 8 training return: -1006.7270631988913
episode: 9 training return: -1019.9684365809588
episode: 10 training return: -1013.7550754398237
episode: 11 training return: -1008.9545296716979
epoch: 3 test_true_pfm: -1.6810015829158145 sim_pfm: -999.9606394711427
episode: 12 training return: -1008.1424101632015
episode: 13 training return: -1014.3041397785254
episode: 14 training return: -1004.4970654687634
episode: 15 training return: -1004.9739755088061
epoch: 4 test_true_pfm: -0.9592053410825908 sim_pfm: -999.960896838265
episode: 16 training return: -1002.3946464381223
episode: 17 training return: -1004.9649484673358
episode: 18 training return: -1000.4838962370553
episode: 19 training return: -1008.0375605841947
epoch: 5 test_true_pfm: -0.5797346247095668 sim_pfm: -999.9495522844163
episode: 20 training return: -1004.1360156467539
episode: 21 training return: -1001.1268844394439
episode: 22 training return: -1003.0421256558608
episode: 23 training return: -1009.541694859053
epoch: 6 test_true_pfm: -0.5422712842726577 sim_pfm: -999.9479852482951
episode: 24 training return: -1002.5589121703182
episode: 25 training return: -1003.9425757142824
episode: 26 training return: -1002.4886718975598
episode: 27 training return: -1000.876236073473
epoch: 7 test_true_pfm: -0.10282614113775346 sim_pfm: -999.9470626169091
episode: 28 training return: -1009.0296286643803
episode: 29 training return: -1008.9413328878506
episode: 30 training return: -1009.1719396775823
episode: 31 training return: -1001.9086419108816
epoch: 8 test_true_pfm: -0.39142960361673756 sim_pfm: -999.9481577175003
episode: 32 training return: -1005.4828437511904
episode: 33 training return: -1003.5199096220401
episode: 34 training return: -1010.1842874986872
episode: 35 training return: -1002.9644394001143
epoch: 9 test_true_pfm: -0.620557482623496 sim_pfm: -999.9466197164733
episode: 36 training return: -1017.5454702032227
episode: 37 training return: -1005.5681385684795
episode: 38 training return: -1010.1522489677624
episode: 39 training return: -1011.6187710292999
epoch: 10 test_true_pfm: -0.21000837801950023 sim_pfm: -999.9438516511264
episode: 40 training return: -1008.6486467975975
episode: 41 training return: -1010.2565328705716
episode: 42 training return: -1005.4397445151652
episode: 43 training return: -1007.0766145276033
epoch: 11 test_true_pfm: -0.9408410005960306 sim_pfm: -999.9439581936784
episode: 44 training return: -1006.4905217601859
episode: 45 training return: -1002.5592107197238
episode: 46 training return: -1000.1041058907755
episode: 47 training return: -1004.6311123192309
epoch: 12 test_true_pfm: -0.003346408897326255 sim_pfm: -999.9460788405312
episode: 48 training return: -1006.8319225702166
episode: 49 training return: -1003.864385797541
episode: 50 training return: -1004.1857482001219
episode: 51 training return: -1009.1300152835026
epoch: 13 test_true_pfm: -0.8064259542508142 sim_pfm: -999.9406706834883
episode: 52 training return: -1001.4892002107662
episode: 53 training return: -1005.0241972629503
episode: 54 training return: -1006.7213747910215
episode: 55 training return: -1010.1910727101249
epoch: 14 test_true_pfm: -0.34672448483190116 sim_pfm: -999.9468384114019
episode: 56 training return: -1010.8093182949908
episode: 57 training return: -1005.594022190082
episode: 58 training return: -1001.3895317146447
episode: 59 training return: -1005.9751720564657
epoch: 15 test_true_pfm: 0.13680020961346648 sim_pfm: -999.9421409663897
episode: 60 training return: -1005.3364707402784
episode: 61 training return: -1002.6091312099196
episode: 62 training return: -1003.8306930093197
episode: 63 training return: -1004.8383891883623
epoch: 16 test_true_pfm: -0.21079054160422964 sim_pfm: -999.9446652921857
episode: 64 training return: -1004.7935563113424
episode: 65 training return: -1001.6524120383592
episode: 66 training return: -1008.5037299847849
episode: 67 training return: -1004.5170754689901
epoch: 17 test_true_pfm: -0.49283436257815555 sim_pfm: -999.943018465498
episode: 68 training return: -1000.0339726378847
episode: 69 training return: -1008.6288296830621
episode: 70 training return: -1001.7982070459088
episode: 71 training return: -1008.9476841373161
epoch: 18 test_true_pfm: -0.861171311192691 sim_pfm: -999.9432783423166
episode: 72 training return: -1001.9371878867718
episode: 73 training return: -1008.4250714361248
episode: 74 training return: -1005.3817113955338
episode: 75 training return: -1005.3629502644793
epoch: 19 test_true_pfm: -0.10980079430909945 sim_pfm: -999.9464311237483
episode: 76 training return: -1003.7845082491195
episode: 77 training return: -1008.4584390199112
episode: 78 training return: -1001.8088068123045
episode: 79 training return: -1004.3030502850985
epoch: 20 test_true_pfm: -0.45376563092107536 sim_pfm: -999.948067576444
episode: 80 training return: -1008.1764213402045
episode: 81 training return: -1003.6750444236471
episode: 82 training return: -1002.6908762760543
episode: 83 training return: -1006.4370366752818
epoch: 21 test_true_pfm: -0.15932599646141646 sim_pfm: -999.944628866518
episode: 84 training return: -1003.271348192907
episode: 85 training return: -1001.3804114495198
episode: 86 training return: -1007.4981893926152
episode: 87 training return: -1006.2220572844628
epoch: 22 test_true_pfm: -0.6053497317731934 sim_pfm: -999.9434028525483
episode: 88 training return: -1003.5504406310806
episode: 89 training return: -1010.938272171058
episode: 90 training return: -1003.5673930633893
episode: 91 training return: -1009.0036327078213
epoch: 23 test_true_pfm: -0.25991344991939785 sim_pfm: -999.9424748150653
episode: 92 training return: -1003.6848416267308
episode: 93 training return: -1002.9762762519857
episode: 94 training return: -1004.224550402564
episode: 95 training return: -1002.2365150696774
epoch: 24 test_true_pfm: -0.6446554538205183 sim_pfm: -999.943747404198
episode: 96 training return: -1005.4914266923863
episode: 97 training return: -1000.1652310441872
episode: 98 training return: -1006.7412885886243
episode: 99 training return: -1015.1093201927947
epoch: 25 test_true_pfm: -0.5543305422364763 sim_pfm: -999.9431215596136
episode: 100 training return: -1005.0418389951898
episode: 101 training return: -1008.6278094725635
episode: 102 training return: -1005.0668880414939
episode: 103 training return: -1003.5041153106208
epoch: 26 test_true_pfm: -0.29993999898882534 sim_pfm: -999.942741289139
episode: 104 training return: -1005.5711893326328
episode: 105 training return: -1001.0397821970884
episode: 106 training return: -1002.3661311808366
episode: 107 training return: -1001.7647010708121
epoch: 27 test_true_pfm: -0.25138178499456254 sim_pfm: -999.9439573312217
episode: 108 training return: -1006.3204670395413
episode: 109 training return: -1004.5385420207008
episode: 110 training return: -1002.2012766334711
episode: 111 training return: -1013.7684792886149
epoch: 28 test_true_pfm: 0.07464272043283865 sim_pfm: -999.9412807699879
episode: 112 training return: -1003.2611878714296
episode: 113 training return: -1008.6018505626417
episode: 114 training return: -1003.9254610813272
episode: 115 training return: -1006.0349558854778
epoch: 29 test_true_pfm: -0.14139860390804893 sim_pfm: -999.941840512171
episode: 116 training return: -1000.4937499505465
episode: 117 training return: -1011.5180315807395
episode: 118 training return: -1004.8971356674095
episode: 119 training return: -1003.9804318493821
epoch: 30 test_true_pfm: -0.6982792090800899 sim_pfm: -999.9433796806543
episode: 120 training return: -1007.9158777025975
episode: 121 training return: -1003.1584102156901
episode: 122 training return: -1002.4975077520439
episode: 123 training return: -1000.5786059563267
epoch: 31 test_true_pfm: -0.6498152473806883 sim_pfm: -999.9442319810656
episode: 124 training return: -1000.4923073564862
episode: 125 training return: -1000.8607135390405
episode: 126 training return: -1008.9790712782526
episode: 127 training return: -1003.3841285147034
epoch: 32 test_true_pfm: -1.2793710774121234 sim_pfm: -999.9404515669329
episode: 128 training return: -1000.8521202964313
episode: 129 training return: -1009.9637112885172
episode: 130 training return: -1000.6014455544695
episode: 131 training return: -1000.2745220144359
epoch: 33 test_true_pfm: -0.44630685194979947 sim_pfm: -999.9452720688491
episode: 132 training return: -1006.4033049511406
episode: 133 training return: -1001.0123390147593
episode: 134 training return: -1007.9002567987709
episode: 135 training return: -1003.5488976637338
epoch: 34 test_true_pfm: -1.1983924483556427 sim_pfm: -999.9446264567995
episode: 136 training return: -1003.1327633184713
episode: 137 training return: -1005.0747305230171
episode: 138 training return: -1006.9387302405767
episode: 139 training return: -1009.7633757076563
epoch: 35 test_true_pfm: 0.17745496444192835 sim_pfm: -999.9435305010447
episode: 140 training return: -1014.3462158449915
episode: 141 training return: -999.8581079900658
episode: 142 training return: -1003.5926663347242
episode: 143 training return: -1001.1067359478777
epoch: 36 test_true_pfm: -0.07979509106724003 sim_pfm: -999.9452491632895
episode: 144 training return: -1011.8836610793163
episode: 145 training return: -1000.0674635575726
episode: 146 training return: -1005.6915375363278
episode: 147 training return: -1003.0492466888164
epoch: 37 test_true_pfm: -0.6287076222501629 sim_pfm: -999.944317860818
episode: 148 training return: -1004.3585074501776
episode: 149 training return: -1003.8019469276845
episode: 150 training return: -1003.8174109548856
episode: 151 training return: -1004.5164797058649
epoch: 38 test_true_pfm: -0.823329044147004 sim_pfm: -999.942797559468
episode: 152 training return: -1002.0889113996441
episode: 153 training return: -1004.2315882536967
episode: 154 training return: -1001.1847966871392
episode: 155 training return: -1004.6673287101662
epoch: 39 test_true_pfm: -1.0794210264237039 sim_pfm: -999.942360172978
episode: 156 training return: -999.9248252639455
episode: 157 training return: -1012.8181778700919
episode: 158 training return: -1004.1344118143787
episode: 159 training return: -1000.5998157294614
epoch: 40 test_true_pfm: -0.2921098130034362 sim_pfm: -999.9427410747625
episode: 160 training return: -1005.1711807714352
episode: 161 training return: -1000.08314053076
episode: 162 training return: -1003.8457616466453
episode: 163 training return: -1009.4313119682192
epoch: 41 test_true_pfm: -0.94738815948467 sim_pfm: -999.9436227925745
episode: 164 training return: -1007.762790507986
episode: 165 training return: -1005.8976462228826
episode: 166 training return: -1008.9658090683739
episode: 167 training return: -1000.5753161029442
epoch: 42 test_true_pfm: -0.8224696137742219 sim_pfm: -999.9421246262045
episode: 168 training return: -1003.6750829574489
episode: 169 training return: -1002.2445698474022
episode: 170 training return: -1004.7535614987714
episode: 171 training return: -1003.5550005180205
epoch: 43 test_true_pfm: -0.24514101850531125 sim_pfm: -999.939258592075
episode: 172 training return: -1002.1832034554101
episode: 173 training return: -1010.2451573411277
episode: 174 training return: -1008.3613250296773
episode: 175 training return: -1006.8584646920547
epoch: 44 test_true_pfm: -0.5346259252735763 sim_pfm: -999.9451532261081
episode: 176 training return: -1007.2089899125384
episode: 177 training return: -1004.0478781523706
episode: 178 training return: -1003.3539759420958
episode: 179 training return: -1008.6899043695358
epoch: 45 test_true_pfm: -0.20395616799640906 sim_pfm: -999.9447420002693
episode: 180 training return: -1003.505133030952
episode: 181 training return: -1006.1366216982893
episode: 182 training return: -1001.4575460871475
episode: 183 training return: -1006.750440224462
epoch: 46 test_true_pfm: -1.1173467681721074 sim_pfm: -999.9448808842803
episode: 184 training return: -1003.4836415175417
episode: 185 training return: -1004.1235076514197
episode: 186 training return: -1001.9074927826399
episode: 187 training return: -1001.0988301405761
epoch: 47 test_true_pfm: 0.011814908930737514 sim_pfm: -999.9394662931683
episode: 188 training return: -1002.6021778404927
episode: 189 training return: -1000.176818473941
episode: 190 training return: -1005.9805491473247
episode: 191 training return: -1004.8522514209794
epoch: 48 test_true_pfm: -0.696369380703236 sim_pfm: -999.9459890727508
episode: 192 training return: -999.9125265062672
episode: 193 training return: -1003.2377873122904
episode: 194 training return: -1002.1512923438199
episode: 195 training return: -1005.4325948209867
epoch: 49 test_true_pfm: -0.09689837776800934 sim_pfm: -999.9441516066281
episode: 196 training return: -1004.2205030840643
episode: 197 training return: -1004.005087838349
episode: 198 training return: -1002.8547944607135
episode: 199 training return: -999.917118345713
epoch: 50 test_true_pfm: -0.5751999642492821 sim_pfm: -999.9412861329212
episode: 200 training return: -1003.2041783584642
episode: 201 training return: -1003.4589345611261
episode: 202 training return: -1003.591196063031
episode: 203 training return: -1003.6873213024926
epoch: 51 test_true_pfm: -0.2553191282932716 sim_pfm: -999.9457668246623
episode: 204 training return: -1006.1322754849971
episode: 205 training return: -1004.2867062796716
episode: 206 training return: -1005.8097266561285
episode: 207 training return: -1004.4147513649071
epoch: 52 test_true_pfm: -0.23190639283024747 sim_pfm: -999.9426482263951
episode: 208 training return: -1009.8515764828701
episode: 209 training return: -1000.3380769840415
episode: 210 training return: -1009.0836382149662
episode: 211 training return: -1000.2062617345994
epoch: 53 test_true_pfm: -1.5265456402404027 sim_pfm: -999.9429978896878
episode: 212 training return: -1002.8629108707408
episode: 213 training return: -1001.8745584041556
episode: 214 training return: -1002.4948712315106
episode: 215 training return: -1002.0293010625519
epoch: 54 test_true_pfm: -0.14786347951631054 sim_pfm: -999.9430440722122
episode: 216 training return: -1003.8234475486546
episode: 217 training return: -1006.8963096698453
episode: 218 training return: -1005.5416692332684
episode: 219 training return: -1011.5827106640899
epoch: 55 test_true_pfm: -1.5079415449911793 sim_pfm: -999.9446721065063
episode: 220 training return: -1000.2447383486157
episode: 221 training return: -1002.3139459443488
episode: 222 training return: -1004.046931304492
episode: 223 training return: -1006.7462926141261
epoch: 56 test_true_pfm: -0.10154022160133098 sim_pfm: -999.943325743582
episode: 224 training return: -1009.7123429284312
episode: 225 training return: -1001.1657239981113
episode: 226 training return: -1005.3613885985196
episode: 227 training return: -1005.1897102117533
epoch: 57 test_true_pfm: -0.2674194374324366 sim_pfm: -999.9465266715296
episode: 228 training return: -1003.2263523274285
episode: 229 training return: -1008.9824210081796
episode: 230 training return: -999.9824041363443
episode: 231 training return: -1003.8842691170937
epoch: 58 test_true_pfm: -0.5296636869640706 sim_pfm: -999.9453548585288
episode: 232 training return: -1005.0153776671139
episode: 233 training return: -1004.1049262493672
episode: 234 training return: -1004.1299184843817
episode: 235 training return: -1002.5378223386932
epoch: 59 test_true_pfm: -0.847721331212698 sim_pfm: -999.9458009052729
episode: 236 training return: -1009.4297690870102
episode: 237 training return: -1006.3351575906729
episode: 238 training return: -1005.0901748833373
episode: 239 training return: -1000.917807509071
epoch: 60 test_true_pfm: 0.6135654386921899 sim_pfm: -999.9434187785131
episode: 240 training return: -1010.7865931750239
episode: 241 training return: -1003.8939324471578
episode: 242 training return: -1005.6080729859913
episode: 243 training return: -1000.4531061177094
epoch: 61 test_true_pfm: -0.790611013142076 sim_pfm: -999.9417544366687
episode: 244 training return: -999.8933009789531
episode: 245 training return: -1006.7586638969051
episode: 246 training return: -1002.2577538434692
episode: 247 training return: -1005.9824540768335
epoch: 62 test_true_pfm: 0.004531876935520744 sim_pfm: -999.9427757491795
episode: 248 training return: -1008.4650466520386
episode: 249 training return: -1001.6242982443681
episode: 250 training return: -1000.3382475455984
episode: 251 training return: -1005.643711242617
epoch: 63 test_true_pfm: -0.7734404648461847 sim_pfm: -999.9469009554633
episode: 252 training return: -1000.4547895202098
episode: 253 training return: -1009.5432877571382
episode: 254 training return: -1002.5702757021221
episode: 255 training return: -1002.923823390587
epoch: 64 test_true_pfm: -0.7659714595415054 sim_pfm: -999.9452587197328
episode: 256 training return: -1005.4177897568678
episode: 257 training return: -1002.5882406374437
episode: 258 training return: -1002.3174178891811
episode: 259 training return: -1002.2569371385152
epoch: 65 test_true_pfm: -0.2234172424397991 sim_pfm: -999.9440138119453
episode: 260 training return: -1005.9221521611277
episode: 261 training return: -1004.0424628125718
episode: 262 training return: -1000.2119494647785
episode: 263 training return: -1004.4934397284883
epoch: 66 test_true_pfm: -0.6495172127595948 sim_pfm: -999.9429879377977
episode: 264 training return: -1002.5913486405399
episode: 265 training return: -1002.249604611687
episode: 266 training return: -1005.7980159003297
episode: 267 training return: -1002.4758706786339
epoch: 67 test_true_pfm: 0.08810132794976973 sim_pfm: -999.9445653434228
episode: 268 training return: -1006.5396449424287
episode: 269 training return: -1001.1314731896769
episode: 270 training return: -1005.2875132702354
episode: 271 training return: -1000.4510840936725
epoch: 68 test_true_pfm: 0.38488657570595813 sim_pfm: -999.9403528172492
episode: 272 training return: -1009.2109515439844
episode: 273 training return: -1002.6773696467651
episode: 274 training return: -1002.2988402137448
episode: 275 training return: -1002.5341890568893
epoch: 69 test_true_pfm: -0.4551643016350014 sim_pfm: -999.94331448879
episode: 276 training return: -1000.0639434973449
episode: 277 training return: -1004.9762844502715
episode: 278 training return: -1006.1901415424984
episode: 279 training return: -1001.391744264733
epoch: 70 test_true_pfm: -0.6882138895281708 sim_pfm: -999.9451757776355
episode: 280 training return: -1012.0265525451418
episode: 281 training return: -1008.2350344103959
episode: 282 training return: -1004.751122747735
episode: 283 training return: -1002.6136334581035
epoch: 71 test_true_pfm: -0.2394873683612354 sim_pfm: -999.9440716335703
episode: 284 training return: -1005.7501201514973
episode: 285 training return: -1003.5991920872146
episode: 286 training return: -1005.4881405392059
episode: 287 training return: -1005.9183321869212
epoch: 72 test_true_pfm: -0.13519896868850684 sim_pfm: -999.944064461728
episode: 288 training return: -1006.6696754234188
episode: 289 training return: -1001.0462871776054
episode: 290 training return: -1004.0211398997084
episode: 291 training return: -1006.7059549019598
epoch: 73 test_true_pfm: -0.14999408120379037 sim_pfm: -999.9455654029606
episode: 292 training return: -1004.6783973980896
episode: 293 training return: -1008.7825940579504
episode: 294 training return: -1007.0906084871767
episode: 295 training return: -1006.7188453808484
epoch: 74 test_true_pfm: -0.050445648474806336 sim_pfm: -999.9427544318447
episode: 296 training return: -1003.691384426065
episode: 297 training return: -1006.3084428660846
episode: 298 training return: -1002.0607812536413
episode: 299 training return: -1000.6160878765938
epoch: 75 test_true_pfm: -0.9039274750356113 sim_pfm: -999.9453088631698
episode: 300 training return: -1002.1157887347653
episode: 301 training return: -1000.7343218855157
episode: 302 training return: -999.8573930323126
episode: 303 training return: -1002.8269282549948
epoch: 76 test_true_pfm: -0.678773379414281 sim_pfm: -999.9450323799183
episode: 304 training return: -1002.922548608594
episode: 305 training return: -1001.8548787181752
episode: 306 training return: -1005.3987911407118
episode: 307 training return: -1001.989783097561
epoch: 77 test_true_pfm: -0.6723833999965391 sim_pfm: -999.9439003073898
episode: 308 training return: -1002.7614217333584
episode: 309 training return: -1001.619419359706
episode: 310 training return: -1016.4723597054402
episode: 311 training return: -1007.4584620503994
epoch: 78 test_true_pfm: -0.28863391045540315 sim_pfm: -999.9462825265623
episode: 312 training return: -1004.215334298566
episode: 313 training return: -1002.9886411964344
episode: 314 training return: -1003.0838987417594
episode: 315 training return: -1004.102159459111
epoch: 79 test_true_pfm: -0.44310381322841863 sim_pfm: -999.9448971637494
episode: 316 training return: -1000.5590094574534
episode: 317 training return: -1002.6327563384722
episode: 318 training return: -999.901926503237
episode: 319 training return: -1009.3632484909857
epoch: 80 test_true_pfm: -0.476743512200876 sim_pfm: -999.9446132118534
episode: 320 training return: -1004.3927509332498
episode: 321 training return: -1004.4527927872433
episode: 322 training return: -1000.7687140900905
episode: 323 training return: -1006.1937224285243
epoch: 81 test_true_pfm: -0.42769240082351717 sim_pfm: -999.9459406329335
episode: 324 training return: -1006.1297063553643
episode: 325 training return: -1001.1087857402823
episode: 326 training return: -1006.0477811149335
episode: 327 training return: -1005.4206488189042
epoch: 82 test_true_pfm: 0.32669363063846596 sim_pfm: -999.9436180783428
episode: 328 training return: -1002.2957285755822
episode: 329 training return: -1004.0677063345345
episode: 330 training return: -1005.8256764384731
episode: 331 training return: -1013.2205256872727
epoch: 83 test_true_pfm: -0.5120875154787604 sim_pfm: -999.9435652418188
episode: 332 training return: -1007.4593169549202
episode: 333 training return: -1001.1729601259774
episode: 334 training return: -1004.9220839120361
episode: 335 training return: -1001.3408895050757
epoch: 84 test_true_pfm: -0.5595821879359112 sim_pfm: -999.9445635114407
episode: 336 training return: -1005.05290745243
episode: 337 training return: -1001.6787463612532
episode: 338 training return: -1003.0088333655134
episode: 339 training return: -1010.153121016821
epoch: 85 test_true_pfm: -0.004090263949543853 sim_pfm: -999.9450182364907
episode: 340 training return: -1003.6731632482063
episode: 341 training return: -999.9057529847705
episode: 342 training return: -1004.0106876391923
episode: 343 training return: -1005.3066556114436
epoch: 86 test_true_pfm: -0.5116969704149398 sim_pfm: -999.9446487271167
episode: 344 training return: -1003.1687724957077
episode: 345 training return: -1005.9743433330093
episode: 346 training return: -1002.0385773897634
episode: 347 training return: -1002.8503102938546
epoch: 87 test_true_pfm: 0.20156296193718878 sim_pfm: -999.9478243639413
episode: 348 training return: -1005.944720657465
episode: 349 training return: -1004.7023091664624
episode: 350 training return: -1002.429645935249
episode: 351 training return: -1003.522675470038
epoch: 88 test_true_pfm: -0.16164663398487855 sim_pfm: -999.9462238415799
episode: 352 training return: -1008.6342698642447
episode: 353 training return: -1003.0456765661662
episode: 354 training return: -1001.2313322087662
episode: 355 training return: -1003.9923471125949
epoch: 89 test_true_pfm: -0.37067573570603257 sim_pfm: -999.9460941505677
episode: 356 training return: -1004.4169493483812
episode: 357 training return: -1004.0768814044609
episode: 358 training return: -1003.7329918395327
episode: 359 training return: -1003.339135303775
epoch: 90 test_true_pfm: -0.4708842780349319 sim_pfm: -999.945348620486
episode: 360 training return: -1001.1318155457525
episode: 361 training return: -1002.4572106129133
episode: 362 training return: -1001.1283689647943
episode: 363 training return: -1002.0977983431967
epoch: 91 test_true_pfm: -0.591698011839289 sim_pfm: -999.9450646744775
episode: 364 training return: -1006.7123407836533
episode: 365 training return: -1005.8628258163216
episode: 366 training return: -1006.3755635163523
episode: 367 training return: -1003.7453790385092
epoch: 92 test_true_pfm: -0.7250474165694986 sim_pfm: -999.9447882961202
episode: 368 training return: -1009.5439835237596
episode: 369 training return: -1007.8184213743522
episode: 370 training return: -1002.4576600092179
episode: 371 training return: -1003.2588063065735
epoch: 93 test_true_pfm: -0.6885258309872978 sim_pfm: -999.9457263240515
episode: 372 training return: -1001.6351715919229
episode: 373 training return: -1002.8323160503139
episode: 374 training return: -1005.6418371603147
episode: 375 training return: -1001.507540811021
epoch: 94 test_true_pfm: -0.4019626479783594 sim_pfm: -999.9470090454619
episode: 376 training return: -1002.3129251819994
episode: 377 training return: -1007.6857078105731
episode: 378 training return: -1007.4920476781346
episode: 379 training return: -1004.9873217217521
epoch: 95 test_true_pfm: -0.48750735193903477 sim_pfm: -999.9436079606445
episode: 380 training return: -999.9870769044956
episode: 381 training return: -1002.3117675027266
episode: 382 training return: -1003.6895248993466
episode: 383 training return: -1006.5198558503629
epoch: 96 test_true_pfm: 0.43632648704931426 sim_pfm: -999.9488940825162
episode: 384 training return: -1002.7586079111178
episode: 385 training return: -1005.2989042653873
episode: 386 training return: -1001.3789309994867
episode: 387 training return: -1002.4857607520125
epoch: 97 test_true_pfm: -0.27280340629216987 sim_pfm: -999.9458761797297
episode: 388 training return: -1002.7940801744958
episode: 389 training return: -1001.8503558253728
episode: 390 training return: -1002.85052252922
episode: 391 training return: -1001.3750595178107
epoch: 98 test_true_pfm: -0.7252531632572149 sim_pfm: -999.9437983736689
episode: 392 training return: -1009.0800126618993
episode: 393 training return: -1003.6153191021973
episode: 394 training return: -1001.3982571714575
episode: 395 training return: -1001.2670497211349
epoch: 99 test_true_pfm: -0.8296151829008807 sim_pfm: -999.9467591891761
episode: 396 training return: -1005.8119171725584
episode: 397 training return: -1007.0093838041496
episode: 398 training return: -1000.1851413175966
episode: 399 training return: -1003.9281927323269
epoch: 100 test_true_pfm: -0.661800111768526 sim_pfm: -999.9451660248302
episode: 400 training return: -1003.4974294063704
episode: 401 training return: -1001.7302178337129
episode: 402 training return: -1006.151064242274
episode: 403 training return: -1003.5447577272319
epoch: 101 test_true_pfm: -0.3844450796847829 sim_pfm: -999.9441288615685
episode: 404 training return: -1002.0052823979664
episode: 405 training return: -1007.3270438081139
episode: 406 training return: -1003.2235993038507
episode: 407 training return: -1003.896350639067
epoch: 102 test_true_pfm: -0.6455903105829478 sim_pfm: -999.9452332680099
episode: 408 training return: -1004.7846951425048
episode: 409 training return: -1004.0460756443504
episode: 410 training return: -1005.5037615678567
episode: 411 training return: -1001.3054034515911
epoch: 103 test_true_pfm: -0.5824949085799672 sim_pfm: -999.9455318633803
episode: 412 training return: -1006.0980050870666
episode: 413 training return: -1000.9363922272142
episode: 414 training return: -1004.8511838200984
episode: 415 training return: -1005.8957949320404
epoch: 104 test_true_pfm: -0.39530539883013277 sim_pfm: -999.9457522118969
episode: 416 training return: -1003.8561650117584
episode: 417 training return: -1000.2036864872974
episode: 418 training return: -1001.5982666815236
episode: 419 training return: -1001.6167474768664
epoch: 105 test_true_pfm: -1.0683993199227853 sim_pfm: -999.9464914171135
episode: 420 training return: -1000.5900407163193
episode: 421 training return: -1005.783528246292
episode: 422 training return: -1004.9287753282534
episode: 423 training return: -1001.8481549365473
epoch: 106 test_true_pfm: -0.21597754064457 sim_pfm: -999.9482551473383
episode: 424 training return: -1003.4193537546837
episode: 425 training return: -1006.2419901356153
episode: 426 training return: -1002.2101420292422
episode: 427 training return: -1005.8211324331367
epoch: 107 test_true_pfm: 0.1474533658824446 sim_pfm: -999.9434474705737
episode: 428 training return: -1000.496533582148
episode: 429 training return: -1002.7486636883906
episode: 430 training return: -1001.6265672494545
episode: 431 training return: -1008.7568808517112
epoch: 108 test_true_pfm: 0.013016546052515253 sim_pfm: -999.9449388513302
episode: 432 training return: -1003.3897655632705
episode: 433 training return: -1005.5890174484608
episode: 434 training return: -1008.9280071106327
episode: 435 training return: -1004.3727071846376
epoch: 109 test_true_pfm: -0.5219592509311347 sim_pfm: -999.9435799239694
episode: 436 training return: -1003.4259516281733
episode: 437 training return: -1011.0031652203414
episode: 438 training return: -1008.9792002927527
episode: 439 training return: -1000.782558934091
epoch: 110 test_true_pfm: -0.3357347494314232 sim_pfm: -999.9470642141736
episode: 440 training return: -1000.7284710219126
episode: 441 training return: -1005.0505907059488
episode: 442 training return: -1004.2058831474111
episode: 443 training return: -1006.2775322369864
epoch: 111 test_true_pfm: -0.9030742100442054 sim_pfm: -999.9464473480938
episode: 444 training return: -1005.147214812594
episode: 445 training return: -1002.2716438048637
episode: 446 training return: -1002.0098883881384
episode: 447 training return: -1004.2602053036871
epoch: 112 test_true_pfm: 0.1652521183897813 sim_pfm: -999.9486902002185
episode: 448 training return: -1009.0185460576846
episode: 449 training return: -1000.9141829835089
episode: 450 training return: -1003.4215056378961
episode: 451 training return: -1000.1819286987492
epoch: 113 test_true_pfm: -0.5093811973370895 sim_pfm: -999.945752270588
episode: 452 training return: -1000.5300072448587
episode: 453 training return: -1003.2484586586734
episode: 454 training return: -1000.5272295018044
episode: 455 training return: -1005.4234945410354
epoch: 114 test_true_pfm: -0.7833706740314886 sim_pfm: -999.9453477121102
episode: 456 training return: -1001.1079385445165
episode: 457 training return: -999.7967237708909
episode: 458 training return: -1008.102496187159
episode: 459 training return: -1007.6269972928477
epoch: 115 test_true_pfm: -0.061994983194040336 sim_pfm: -999.9467796977864
episode: 460 training return: -1008.164621607612
episode: 461 training return: -1001.9730024537412
episode: 462 training return: -1000.9447535534741
episode: 463 training return: -1000.7038915112105
epoch: 116 test_true_pfm: -0.5091765357275646 sim_pfm: -999.9449865023889
episode: 464 training return: -1005.2079195526294
episode: 465 training return: -1001.4230161709687
episode: 466 training return: -999.9053917569612
episode: 467 training return: -1005.2577372686728
epoch: 117 test_true_pfm: 0.2610397410384116 sim_pfm: -999.9475506982071
episode: 468 training return: -1000.0041377611493
episode: 469 training return: -1005.6449800410041
episode: 470 training return: -1004.7485813670918
episode: 471 training return: -1002.20458962727
epoch: 118 test_true_pfm: -0.131217229866463 sim_pfm: -999.9439055766483
episode: 472 training return: -1004.7766887532533
episode: 473 training return: -1002.0506925235343
episode: 474 training return: -1001.4061383172442
episode: 475 training return: -1002.8458452003019
epoch: 119 test_true_pfm: -0.6758748778953662 sim_pfm: -999.9465274501862
episode: 476 training return: -1002.6271287330072
episode: 477 training return: -999.9325391265256
episode: 478 training return: -1003.6506143889225
episode: 479 training return: -1002.3580412918801
epoch: 120 test_true_pfm: 0.16559361207856996 sim_pfm: -999.9464738532514
episode: 480 training return: -1004.8044940382497
episode: 481 training return: -1005.2905249584393
episode: 482 training return: -1006.1314174817219
episode: 483 training return: -1001.1952557920798
epoch: 121 test_true_pfm: -0.985548848191956 sim_pfm: -999.9479970905473
episode: 484 training return: -1008.1135500723457
episode: 485 training return: -1003.7815991735472
episode: 486 training return: -1006.733894584226
episode: 487 training return: -1001.0017528776897
epoch: 122 test_true_pfm: -1.1421327182821637 sim_pfm: -999.9469476780938
episode: 488 training return: -1001.1036857843453
episode: 489 training return: -1001.2579672770408
episode: 490 training return: -1006.0393445890594
episode: 491 training return: -1008.8624388583323
epoch: 123 test_true_pfm: -0.557518737391833 sim_pfm: -999.9444309468759
episode: 492 training return: -1003.3993276985963
episode: 493 training return: -1000.028288592751
episode: 494 training return: -1002.1728682476369
episode: 495 training return: -1001.4780696025579
epoch: 124 test_true_pfm: -0.005605640135383005 sim_pfm: -999.9481371791886
episode: 496 training return: -1003.012142977704
episode: 497 training return: -1008.93559890976
episode: 498 training return: -1003.9834938577128
episode: 499 training return: -1007.8442956360252
epoch: 125 test_true_pfm: 0.3742810833302244 sim_pfm: -999.9456768988078
episode: 500 training return: -1005.8237238256239
episode: 501 training return: -1002.2483804946796
episode: 502 training return: -1009.1240144522793
episode: 503 training return: -1001.7146230636413
epoch: 126 test_true_pfm: -0.1992037265785751 sim_pfm: -999.9450541372822
episode: 504 training return: -1002.2405720931948
episode: 505 training return: -1000.700249457093
episode: 506 training return: -1005.099656179257
episode: 507 training return: -1002.7012270067452
epoch: 127 test_true_pfm: 0.07856945798923924 sim_pfm: -999.9466356007612
episode: 508 training return: -1000.8656373171602
episode: 509 training return: -1005.692480801701
episode: 510 training return: -1000.9954661329335
episode: 511 training return: -1004.1772968965562
epoch: 128 test_true_pfm: -0.6498619956588408 sim_pfm: -999.9461412534782
episode: 512 training return: -1002.966467988683
episode: 513 training return: -1003.4156546689102
episode: 514 training return: -1002.5825690156772
episode: 515 training return: -1003.5102946876926
epoch: 129 test_true_pfm: -0.32353352856743056 sim_pfm: -999.9458075875937
episode: 516 training return: -1006.0034813751469
episode: 517 training return: -1002.1470739021602
episode: 518 training return: -1009.9795803858831
episode: 519 training return: -1004.1683636091385
epoch: 130 test_true_pfm: -0.8712864866955837 sim_pfm: -999.9459918282968
episode: 520 training return: -1005.9377994955998
episode: 521 training return: -1008.0241803976103
episode: 522 training return: -1004.275503869577
episode: 523 training return: -1005.8418882143447
epoch: 131 test_true_pfm: 0.274929879312535 sim_pfm: -999.9464898510514
episode: 524 training return: -1002.5830371778238
episode: 525 training return: -1000.0383383806656
episode: 526 training return: -1005.2304686558064
episode: 527 training return: -1005.6206458071102
epoch: 132 test_true_pfm: 0.33569203529450475 sim_pfm: -999.9467787780277
episode: 528 training return: -1003.8715575774324
episode: 529 training return: -1004.022679455578
episode: 530 training return: -1004.1182593602125
episode: 531 training return: -1004.9602786302859
epoch: 133 test_true_pfm: -0.6815942224801352 sim_pfm: -999.945881122697
episode: 532 training return: -1001.0756078383203
episode: 533 training return: -1006.0070005397893
episode: 534 training return: -1003.0250288286278
episode: 535 training return: -1001.852706775518
epoch: 134 test_true_pfm: -0.5516987549018094 sim_pfm: -999.9481043025579
episode: 536 training return: -1003.3727927440553
episode: 537 training return: -1003.7353952055155
episode: 538 training return: -1001.4626528245627
episode: 539 training return: -1007.1539985238375
epoch: 135 test_true_pfm: 0.25863030280464755 sim_pfm: -999.9467910214177
episode: 540 training return: -1003.0220824466711
episode: 541 training return: -1003.8947356219035
episode: 542 training return: -1004.7689624256993
episode: 543 training return: -1006.1892050815836
epoch: 136 test_true_pfm: 0.0806213260847624 sim_pfm: -999.9452133969497
episode: 544 training return: -1002.9818224193224
episode: 545 training return: -1006.0205379194239
episode: 546 training return: -1004.1744414431214
episode: 547 training return: -1007.9571589995237
epoch: 137 test_true_pfm: -0.5349775242037387 sim_pfm: -999.9441944548058
episode: 548 training return: -1004.5599276720772
episode: 549 training return: -1006.0982215364863
episode: 550 training return: -1002.5952822948999
episode: 551 training return: -1002.8558616300772
epoch: 138 test_true_pfm: -0.8825786121274826 sim_pfm: -999.9494271248444
episode: 552 training return: -1002.0536311131218
episode: 553 training return: -1004.9221280430862
episode: 554 training return: -1001.8090473129175
episode: 555 training return: -1000.157118222019
epoch: 139 test_true_pfm: -0.8125912994946468 sim_pfm: -999.9473487180145
episode: 556 training return: -1007.1912637533065
episode: 557 training return: -1009.0166703386867
episode: 558 training return: -1008.5703754083592
episode: 559 training return: -1000.709243938012
epoch: 140 test_true_pfm: -0.4902210745050337 sim_pfm: -999.9473969369451
episode: 560 training return: -1003.8912305035491
episode: 561 training return: -1001.0618673410208
episode: 562 training return: -1004.4117813549163
episode: 563 training return: -1004.0475716526108
epoch: 141 test_true_pfm: -0.589428220605461 sim_pfm: -999.9466797677333
episode: 564 training return: -1009.8417150194904
episode: 565 training return: -1003.8023392184219
episode: 566 training return: -1008.0851609070592
episode: 567 training return: -1005.6000196314321
epoch: 142 test_true_pfm: -0.3984058331854135 sim_pfm: -999.9477229967961
episode: 568 training return: -1009.550348054505
episode: 569 training return: -1005.1970051537213
episode: 570 training return: -1000.9850691177635
episode: 571 training return: -1001.3172329672442
epoch: 143 test_true_pfm: -0.952841050365319 sim_pfm: -999.9459173599479
episode: 572 training return: -1014.6327764105617
episode: 573 training return: -1004.7715384434517
episode: 574 training return: -1002.4740405547363
episode: 575 training return: -1005.0109806413546
epoch: 144 test_true_pfm: -0.49535157801056173 sim_pfm: -999.9441947773676
episode: 576 training return: -1008.731086081501
episode: 577 training return: -1005.7971437076277
episode: 578 training return: -1007.1818672086555
episode: 579 training return: -1003.102930374532
epoch: 145 test_true_pfm: -1.3829265302186646 sim_pfm: -999.9434344952323
episode: 580 training return: -1007.5830465209008
episode: 581 training return: -1003.7619640281597
episode: 582 training return: -1003.3162540812795
episode: 583 training return: -1000.2231309869227
epoch: 146 test_true_pfm: -0.6467072047958146 sim_pfm: -999.9424261108483
episode: 584 training return: -999.9569341828071
episode: 585 training return: -1009.7228751993008
episode: 586 training return: -1001.1241477279613
episode: 587 training return: -1002.7211725256485
epoch: 147 test_true_pfm: -0.3810098502870631 sim_pfm: -999.9467558931583
episode: 588 training return: -1003.2852653928924
episode: 589 training return: -1001.0260152616389
episode: 590 training return: -1007.7735547642978
episode: 591 training return: -1007.0997099022014
epoch: 148 test_true_pfm: -0.16131771480581789 sim_pfm: -999.9471638451208
episode: 592 training return: -1000.0128184720966
episode: 593 training return: -1006.6033937332447
episode: 594 training return: -1006.055751443289
episode: 595 training return: -1003.2629904684072
epoch: 149 test_true_pfm: -0.25643777344278795 sim_pfm: -999.9449276419592
episode: 596 training return: -1003.5576989795321
episode: 597 training return: -1002.3835919282832
episode: 598 training return: -1004.2673717033455
episode: 599 training return: -1003.1412732998288
epoch: 150 test_true_pfm: 0.5200845178143771 sim_pfm: -999.9517050220629
