['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'mixed', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.2091319140791893 test_loss: 0.1614466667175293
epoch: 1 training_loss 0.1426605112105608 test_loss: 0.13351874351501464
epoch: 2 training_loss 0.13030347365885972 test_loss: 0.11002386808395385
epoch: 3 training_loss 0.12525577459484338 test_loss: 0.12331368923187255
epoch: 4 training_loss 0.12147266168147325 test_loss: 0.13785213232040405
epoch: 5 training_loss 0.11558829072862864 test_loss: 0.11960045099258423
epoch: 6 training_loss 0.11197584256529808 test_loss: 0.11683051586151123
epoch: 7 training_loss 0.11229086872190237 test_loss: 0.12359025478363037
epoch: 8 training_loss 0.10924154553562403 test_loss: 0.118235182762146
epoch: 9 training_loss 0.11383379895240069 test_loss: 0.10216710567474366
epoch: 10 training_loss 0.10844451949000358 test_loss: 0.10481452941894531
epoch: 11 training_loss 0.11242247559130192 test_loss: 0.10467950105667115
epoch: 12 training_loss 0.10492826364934445 test_loss: 0.11190609931945801
epoch: 13 training_loss 0.10679836012423038 test_loss: 0.1034460186958313
epoch: 14 training_loss 0.10239479664713144 test_loss: 0.1124683141708374
epoch: 15 training_loss 0.10114064652472735 test_loss: 0.10816624164581298
epoch: 16 training_loss 0.10485490251332522 test_loss: 0.10159045457839966
epoch: 17 training_loss 0.09762970481067895 test_loss: 0.10119434595108032
epoch: 18 training_loss 0.09834458947181701 test_loss: 0.09937052726745606
epoch: 19 training_loss 0.09041496830061078 test_loss: 0.09699608087539673
epoch: 20 training_loss 0.09618818391114474 test_loss: 0.0894711434841156
epoch: 21 training_loss 0.09733037926256656 test_loss: 0.09967620372772217
epoch: 22 training_loss 0.09362045515328646 test_loss: 0.09927623271942139
epoch: 23 training_loss 0.08887068003416061 test_loss: 0.10408581495285034
epoch: 24 training_loss 0.09581190997734666 test_loss: 0.09145304560661316
epoch: 25 training_loss 0.0884341536089778 test_loss: 0.10376414060592651
epoch: 26 training_loss 0.09338783986866474 test_loss: 0.10030151605606079
epoch: 27 training_loss 0.08773354060947895 test_loss: 0.08643590211868286
epoch: 28 training_loss 0.08953644659370184 test_loss: 0.06591115593910217
epoch: 29 training_loss 0.08515982473269106 test_loss: 0.08326843976974488
epoch: 30 training_loss 0.08558058328926563 test_loss: 0.10938684940338135
epoch: 31 training_loss 0.07690985338762403 test_loss: 0.08109772205352783
epoch: 32 training_loss 0.07660325072705745 test_loss: 0.07864629626274108
epoch: 33 training_loss 0.07983486624434591 test_loss: 0.07294175028800964
epoch: 34 training_loss 0.07745606657117605 test_loss: 0.086235511302948
epoch: 35 training_loss 0.08006550915539265 test_loss: 0.09571297168731689
epoch: 36 training_loss 0.08414523659273981 test_loss: 0.07108311057090759
epoch: 37 training_loss 0.07697556575760245 test_loss: 0.07899249196052552
epoch: 38 training_loss 0.07677813120186329 test_loss: 0.08661499619483948
epoch: 39 training_loss 0.0747318721935153 test_loss: 0.08042742013931274
epoch: 40 training_loss 0.0728834879398346 test_loss: 0.07111547589302063
epoch: 41 training_loss 0.077467095637694 test_loss: 0.0838126540184021
epoch: 42 training_loss 0.07773183017969132 test_loss: 0.08528289198875427
epoch: 43 training_loss 0.08160665709525347 test_loss: 0.07599663138389587
epoch: 44 training_loss 0.07578267009928823 test_loss: 0.0781169056892395
epoch: 45 training_loss 0.07065232891589403 test_loss: 0.06902859210968018
epoch: 46 training_loss 0.07122386141680181 test_loss: 0.07859990000724792
epoch: 47 training_loss 0.07300931382924318 test_loss: 0.07398416996002197
epoch: 48 training_loss 0.07516007451340556 test_loss: 0.07469695806503296
epoch: 49 training_loss 0.07552437780424953 test_loss: 0.08123286366462708
epoch: 50 training_loss 0.07002794531174004 test_loss: 0.06791669130325317
epoch: 51 training_loss 0.07259462030604481 test_loss: 0.0740941345691681
epoch: 52 training_loss 0.07171489356085659 test_loss: 0.08602164387702942
epoch: 53 training_loss 0.07143620692193509 test_loss: 0.08145512342453003
epoch: 54 training_loss 0.0662866041995585 test_loss: 0.07858827114105224
epoch: 55 training_loss 0.0729706466384232 test_loss: 0.06392067074775695
epoch: 56 training_loss 0.07679300116375089 test_loss: 0.06980550885200501
epoch: 57 training_loss 0.07040895802900195 test_loss: 0.062202101945877074
epoch: 58 training_loss 0.07131402546539903 test_loss: 0.07605612277984619
epoch: 59 training_loss 0.06872049640864134 test_loss: 0.06143571138381958
epoch: 60 training_loss 0.07147270367480814 test_loss: 0.07495539784431457
epoch: 61 training_loss 0.06449873266741633 test_loss: 0.06602969169616699
epoch: 62 training_loss 0.06930148063693195 test_loss: 0.06022595763206482
epoch: 63 training_loss 0.07025665566325187 test_loss: 0.07892664670944213
epoch: 64 training_loss 0.072462017275393 test_loss: 0.07244798541069031
epoch: 65 training_loss 0.06838773816823959 test_loss: 0.06529403924942016
epoch: 66 training_loss 0.06704616943374277 test_loss: 0.07268146276474
epoch: 67 training_loss 0.07268852898851037 test_loss: 0.07346411943435668
epoch: 68 training_loss 0.07149250488728284 test_loss: 0.07465718984603882
epoch: 69 training_loss 0.06792904347181321 test_loss: 0.06686009764671326
epoch: 70 training_loss 0.0680818421766162 test_loss: 0.07527354955673218
epoch: 71 training_loss 0.07428311310708523 test_loss: 0.06342816352844238
epoch: 72 training_loss 0.06842573557049036 test_loss: 0.0702245593070984
epoch: 73 training_loss 0.06533599263988435 test_loss: 0.06798115372657776
epoch: 74 training_loss 0.07103083333000541 test_loss: 0.06336516737937928
epoch: 75 training_loss 0.07222973331809043 test_loss: 0.08142303228378296
epoch: 76 training_loss 0.07428706664592027 test_loss: 0.08080232739448548
epoch: 77 training_loss 0.06744367429986596 test_loss: 0.06681566834449768
epoch: 78 training_loss 0.06838982725515962 test_loss: 0.0753325343132019
epoch: 79 training_loss 0.0750456758402288 test_loss: 0.0800609529018402
epoch: 80 training_loss 0.06552472166717052 test_loss: 0.07343099117279053
epoch: 81 training_loss 0.06902194192633032 test_loss: 0.07162773609161377
epoch: 82 training_loss 0.061458844020962716 test_loss: 0.06434055566787719
epoch: 83 training_loss 0.06620914538390935 test_loss: 0.06535977721214295
epoch: 84 training_loss 0.0662002969905734 test_loss: 0.07990944981575013
epoch: 85 training_loss 0.06748943451792001 test_loss: 0.05946750044822693
epoch: 86 training_loss 0.07100420771166682 test_loss: 0.06479811668395996
epoch: 87 training_loss 0.06328859694302082 test_loss: 0.061523228883743286
epoch: 88 training_loss 0.06659335993230343 test_loss: 0.06874129176139832
epoch: 89 training_loss 0.06640336994081736 test_loss: 0.07616626024246216
epoch: 90 training_loss 0.06293784281238914 test_loss: 0.06858777403831481
epoch: 91 training_loss 0.06809883292764425 test_loss: 0.08569232821464538
epoch: 92 training_loss 0.07078890299424528 test_loss: 0.07056448459625245
epoch: 93 training_loss 0.07592947350814938 test_loss: 0.0720298707485199
epoch: 94 training_loss 0.06991817813366652 test_loss: 0.07421641349792481
epoch: 95 training_loss 0.0643147887662053 test_loss: 0.07845276594161987
epoch: 96 training_loss 0.06799531605094672 test_loss: 0.06418071389198303
epoch: 97 training_loss 0.0643992929533124 test_loss: 0.06334955096244813
epoch: 98 training_loss 0.06544622365385294 test_loss: 0.062195616960525515
epoch: 99 training_loss 0.06764208430424333 test_loss: 0.06253890991210938
epoch: 100 training_loss 0.06687100315466524 test_loss: 0.05745335817337036
epoch: 101 training_loss 0.06415931433439255 test_loss: 0.06794726848602295
epoch: 102 training_loss 0.06554423335939646 test_loss: 0.0757525086402893
epoch: 103 training_loss 0.06744603836908937 test_loss: 0.06811600923538208
epoch: 104 training_loss 0.07141235483810306 test_loss: 0.07404212951660157
epoch: 105 training_loss 0.06358161026611925 test_loss: 0.06714032888412476
epoch: 106 training_loss 0.06813029589131475 test_loss: 0.06646718978881835
epoch: 107 training_loss 0.06210113544017076 test_loss: 0.06541173458099366
epoch: 108 training_loss 0.06678862763568759 test_loss: 0.07247356176376343
epoch: 109 training_loss 0.0653708801791072 test_loss: 0.05947850346565246
epoch: 110 training_loss 0.07233817998319864 test_loss: 0.07356684803962707
epoch: 111 training_loss 0.06719457361847163 test_loss: 0.06241651773452759
epoch: 112 training_loss 0.06154343629255891 test_loss: 0.05799562931060791
epoch: 113 training_loss 0.06440456688404084 test_loss: 0.0691942572593689
epoch: 114 training_loss 0.06436082564294338 test_loss: 0.07849194407463074
epoch: 115 training_loss 0.06424312280490994 test_loss: 0.06483901143074036
epoch: 116 training_loss 0.06320561917498707 test_loss: 0.07109286785125732
epoch: 117 training_loss 0.06633962114341557 test_loss: 0.06585822105407715
epoch: 118 training_loss 0.0676563212648034 test_loss: 0.0635818064212799
epoch: 119 training_loss 0.06310092955827713 test_loss: 0.06531329154968261
epoch: 120 training_loss 0.06529805237427354 test_loss: 0.05791916847229004
epoch: 121 training_loss 0.06163949819281697 test_loss: 0.06735621690750122
epoch: 122 training_loss 0.058985097091645004 test_loss: 0.06431838274002075
epoch: 123 training_loss 0.06772553538903595 test_loss: 0.06487029790878296
epoch: 124 training_loss 0.06266743243671953 test_loss: 0.06266697645187377
epoch: 125 training_loss 0.06292480563744902 test_loss: 0.06820790767669678
epoch: 126 training_loss 0.06447165683843195 test_loss: 0.07362126111984253
epoch: 127 training_loss 0.06980795461684465 test_loss: 0.06496786475181579
epoch: 128 training_loss 0.06363773580640554 test_loss: 0.0645987868309021
epoch: 129 training_loss 0.06337003689259291 test_loss: 0.07172710299491883
epoch: 130 training_loss 0.0653670178540051 test_loss: 0.07153933644294738
epoch: 131 training_loss 0.06438591619953513 test_loss: 0.0659312665462494
epoch: 132 training_loss 0.0633721860870719 test_loss: 0.06233828067779541
epoch: 133 training_loss 0.06724813982844352 test_loss: 0.07034512758255004
epoch: 134 training_loss 0.06872974624857306 test_loss: 0.06004843711853027
epoch: 135 training_loss 0.0627516614831984 test_loss: 0.08254477381706238
epoch: 136 training_loss 0.06382447872310877 test_loss: 0.06949605345726013
epoch: 137 training_loss 0.057667624615132806 test_loss: 0.055745828151702884
epoch: 138 training_loss 0.06680573828518391 test_loss: 0.06343652606010437
epoch: 139 training_loss 0.0663540948741138 test_loss: 0.07387155294418335
epoch: 140 training_loss 0.07138638339936733 test_loss: 0.060917609930038454
epoch: 141 training_loss 0.06314026407897472 test_loss: 0.06332379579544067
epoch: 142 training_loss 0.05917169605847448 test_loss: 0.06493066549301148
epoch: 143 training_loss 0.06519324036315083 test_loss: 0.06951599717140197
epoch: 144 training_loss 0.06622636893764139 test_loss: 0.07011460661888122
epoch: 145 training_loss 0.06548609733581542 test_loss: 0.07177430391311646
epoch: 146 training_loss 0.061104152174666526 test_loss: 0.06181589365005493
epoch: 147 training_loss 0.06747928587719798 test_loss: 0.06779608130455017
epoch: 148 training_loss 0.0667875330336392 test_loss: 0.07539662718772888
epoch: 149 training_loss 0.06402254343964159 test_loss: 0.05890015959739685
epoch: 0 training_loss 57.81740016937256 test_loss: 34.97917175292969
epoch: 1 training_loss 27.548159713745118 test_loss: 22.229986572265624
epoch: 2 training_loss 20.12455360412598 test_loss: 17.91014862060547
epoch: 3 training_loss 16.8963175201416 test_loss: 15.716604614257813
epoch: 4 training_loss 14.869716005325317 test_loss: 13.628634643554687
epoch: 5 training_loss 13.236947402954101 test_loss: 12.47965316772461
epoch: 6 training_loss 11.92580753326416 test_loss: 11.253738403320312
epoch: 7 training_loss 10.865573568344116 test_loss: 10.474013519287109
epoch: 8 training_loss 10.153060150146484 test_loss: 9.918390655517578
epoch: 9 training_loss 9.497174444198608 test_loss: 9.310111999511719
epoch: 10 training_loss 8.926614527702332 test_loss: 8.52072982788086
epoch: 11 training_loss 8.498393468856811 test_loss: 8.12340850830078
epoch: 12 training_loss 8.151852140426636 test_loss: 8.082753753662109
epoch: 13 training_loss 7.772881393432617 test_loss: 7.471502685546875
epoch: 14 training_loss 7.334716944694519 test_loss: 7.248944091796875
epoch: 15 training_loss 7.240610280036926 test_loss: 6.9521339416503904
epoch: 16 training_loss 6.983521952629089 test_loss: 6.990213012695312
epoch: 17 training_loss 6.670203261375427 test_loss: 6.618659973144531
epoch: 18 training_loss 6.442225580215454 test_loss: 6.360775756835937
epoch: 19 training_loss 6.164955487251282 test_loss: 5.971854782104492
epoch: 20 training_loss 6.041740894317627 test_loss: 6.078263854980468
epoch: 21 training_loss 6.033527297973633 test_loss: 5.792947006225586
epoch: 22 training_loss 5.9156169605255124 test_loss: 6.045511627197266
epoch: 23 training_loss 5.625773916244507 test_loss: 5.687407684326172
epoch: 24 training_loss 5.671800394058227 test_loss: 5.358766174316406
epoch: 25 training_loss 5.4767547130584715 test_loss: 5.5457908630371096
epoch: 26 training_loss 5.456395554542541 test_loss: 5.351204681396484
epoch: 27 training_loss 5.295155024528503 test_loss: 5.498760986328125
epoch: 28 training_loss 5.2678515815734865 test_loss: 5.291650390625
epoch: 29 training_loss 5.1421502161026 test_loss: 5.249425888061523
epoch: 30 training_loss 5.165304388999939 test_loss: 5.311355209350586
epoch: 31 training_loss 5.033147501945495 test_loss: 5.247093200683594
epoch: 32 training_loss 4.991663012504578 test_loss: 5.298638153076172
epoch: 33 training_loss 4.9090311503410335 test_loss: 5.055742645263672
epoch: 34 training_loss 4.814825837612152 test_loss: 4.7659149169921875
epoch: 35 training_loss 4.752681264877319 test_loss: 4.7061115264892575
epoch: 36 training_loss 4.664643828868866 test_loss: 4.581660461425781
epoch: 37 training_loss 4.588010365962982 test_loss: 4.862402725219726
epoch: 38 training_loss 4.551738760471344 test_loss: 4.449509811401367
epoch: 39 training_loss 4.531237003803253 test_loss: 4.582562637329102
epoch: 40 training_loss 4.447129890918732 test_loss: 4.460814666748047
epoch: 41 training_loss 4.516005961894989 test_loss: 4.630503845214844
epoch: 42 training_loss 4.533226094245911 test_loss: 4.578551483154297
epoch: 43 training_loss 4.345119633674622 test_loss: 4.512550354003906
epoch: 44 training_loss 4.362364325523377 test_loss: 4.394916534423828
epoch: 45 training_loss 4.319092085361481 test_loss: 4.3963359832763675
epoch: 46 training_loss 4.247637825012207 test_loss: 4.0632282257080075
epoch: 47 training_loss 4.299534115791321 test_loss: 4.313798141479492
epoch: 48 training_loss 4.158179633617401 test_loss: 4.232576370239258
epoch: 49 training_loss 4.31048017501831 test_loss: 4.113598251342774
epoch: 50 training_loss 4.200080893039703 test_loss: 4.341866683959961
epoch: 51 training_loss 4.14995965719223 test_loss: 4.1659706115722654
epoch: 52 training_loss 4.038795685768127 test_loss: 4.209161758422852
epoch: 53 training_loss 4.154811573028565 test_loss: 3.980762481689453
epoch: 54 training_loss 4.005295000076294 test_loss: 4.0680694580078125
epoch: 55 training_loss 4.014410970211029 test_loss: 3.935546875
epoch: 56 training_loss 3.969383707046509 test_loss: 4.030059814453125
epoch: 57 training_loss 3.9330518555641176 test_loss: 3.8012920379638673
epoch: 58 training_loss 3.930417149066925 test_loss: 3.909226989746094
epoch: 59 training_loss 3.8339042258262634 test_loss: 3.9361804962158202
epoch: 60 training_loss 3.838501765727997 test_loss: 3.775595474243164
epoch: 61 training_loss 3.758317108154297 test_loss: 3.8551212310791017
epoch: 62 training_loss 3.896380269527435 test_loss: 3.8096561431884766
epoch: 63 training_loss 3.8724335646629333 test_loss: 4.011629486083985
epoch: 64 training_loss 3.775768768787384 test_loss: 3.908525085449219
epoch: 65 training_loss 3.771876254081726 test_loss: 3.9099700927734373
epoch: 66 training_loss 3.7198979687690734 test_loss: 3.814646911621094
epoch: 67 training_loss 3.70697114944458 test_loss: 3.88865966796875
epoch: 68 training_loss 3.6982098746299745 test_loss: 3.68126220703125
epoch: 69 training_loss 3.6787776279449464 test_loss: 3.681521987915039
epoch: 70 training_loss 3.6419464993476867 test_loss: 3.624772644042969
epoch: 71 training_loss 3.6371687841415405 test_loss: 3.661042404174805
epoch: 72 training_loss 3.572794933319092 test_loss: 3.636025619506836
epoch: 73 training_loss 3.550468022823334 test_loss: 3.37799072265625
epoch: 74 training_loss 3.5753692603111267 test_loss: 3.590592956542969
epoch: 75 training_loss 3.533645236492157 test_loss: 3.440326690673828
epoch: 76 training_loss 3.61384250164032 test_loss: 3.500564193725586
epoch: 77 training_loss 3.5306277203559877 test_loss: 3.51251106262207
epoch: 78 training_loss 3.4581251764297485 test_loss: 3.581212615966797
epoch: 79 training_loss 3.535179946422577 test_loss: 3.608380126953125
epoch: 80 training_loss 3.4892164301872253 test_loss: 3.5705448150634767
epoch: 81 training_loss 3.501075255870819 test_loss: 3.35157470703125
epoch: 82 training_loss 3.45350280046463 test_loss: 3.5643821716308595
epoch: 83 training_loss 3.362551920413971 test_loss: 3.411315155029297
epoch: 84 training_loss 3.461954696178436 test_loss: 3.4112258911132813
epoch: 85 training_loss 3.4091871643066405 test_loss: 3.3813545227050783
epoch: 86 training_loss 3.3266745162010194 test_loss: 3.3051753997802735
epoch: 87 training_loss 3.3280225086212156 test_loss: 3.3540348052978515
epoch: 88 training_loss 3.3671872854232787 test_loss: 3.2958797454833983
epoch: 89 training_loss 3.337875769138336 test_loss: 3.4411136627197267
epoch: 90 training_loss 3.3564573884010316 test_loss: 3.396442413330078
epoch: 91 training_loss 3.297189960479736 test_loss: 3.361104965209961
epoch: 92 training_loss 3.2641362237930296 test_loss: 3.284316635131836
epoch: 93 training_loss 3.336477119922638 test_loss: 3.219044494628906
epoch: 94 training_loss 3.3643349862098693 test_loss: 3.247750091552734
epoch: 95 training_loss 3.337170956134796 test_loss: 3.271708297729492
epoch: 96 training_loss 3.2943348574638365 test_loss: 3.3160140991210936
epoch: 97 training_loss 3.2317861104011536 test_loss: 3.3359378814697265
epoch: 98 training_loss 3.2689471578598024 test_loss: 3.3451053619384767
epoch: 99 training_loss 3.2840452218055725 test_loss: 3.15950870513916
epoch: 100 training_loss 3.2720378375053407 test_loss: 3.258567047119141
epoch: 101 training_loss 3.149371042251587 test_loss: 3.1787614822387695
epoch: 102 training_loss 3.1901643204689027 test_loss: 3.311506652832031
epoch: 103 training_loss 3.1553226637840273 test_loss: 3.251201629638672
epoch: 104 training_loss 3.262144138813019 test_loss: 3.2350112915039064
epoch: 105 training_loss 3.1464458441734315 test_loss: 3.3183853149414064
epoch: 106 training_loss 3.241713922023773 test_loss: 3.3418880462646485
epoch: 107 training_loss 3.1112447810173034 test_loss: 3.232502746582031
epoch: 108 training_loss 3.180597114562988 test_loss: 3.11559944152832
epoch: 109 training_loss 3.1198569178581237 test_loss: 3.177836608886719
epoch: 110 training_loss 3.1061101269721987 test_loss: 3.1708534240722654
epoch: 111 training_loss 3.0903924012184145 test_loss: 3.052469825744629
epoch: 112 training_loss 3.121738061904907 test_loss: 3.174843978881836
epoch: 113 training_loss 3.080105757713318 test_loss: 3.2255020141601562
epoch: 114 training_loss 3.171064009666443 test_loss: 3.1392412185668945
epoch: 115 training_loss 3.1341919016838076 test_loss: 3.023700141906738
epoch: 116 training_loss 3.047170193195343 test_loss: 3.033732032775879
epoch: 117 training_loss 3.1214179158210755 test_loss: 3.1895069122314452
epoch: 118 training_loss 3.0039651823043823 test_loss: 3.0999141693115235
epoch: 119 training_loss 3.056813473701477 test_loss: 3.1875385284423827
epoch: 120 training_loss 3.083623263835907 test_loss: 3.052474784851074
epoch: 121 training_loss 3.089632017612457 test_loss: 2.935292625427246
epoch: 122 training_loss 3.044492015838623 test_loss: 3.0606996536254885
epoch: 123 training_loss 3.0212870144844057 test_loss: 2.8847360610961914
epoch: 124 training_loss 2.9910319662094116 test_loss: 3.0288824081420898
epoch: 125 training_loss 3.0212724208831787 test_loss: 3.0753337860107424
epoch: 126 training_loss 2.998156180381775 test_loss: 3.1515878677368163
epoch: 127 training_loss 2.985908796787262 test_loss: 2.910934257507324
epoch: 128 training_loss 2.987549982070923 test_loss: 3.008602523803711
epoch: 129 training_loss 2.967762632369995 test_loss: 2.8669179916381835
epoch: 130 training_loss 3.0219355702400206 test_loss: 2.928157424926758
epoch: 131 training_loss 2.965368468761444 test_loss: 2.9499637603759767
epoch: 132 training_loss 3.010872664451599 test_loss: 2.9959480285644533
epoch: 133 training_loss 2.9686906266212465 test_loss: 3.10728759765625
epoch: 134 training_loss 2.896766781806946 test_loss: 2.968741035461426
epoch: 135 training_loss 2.983874943256378 test_loss: 2.882196807861328
epoch: 136 training_loss 2.971815390586853 test_loss: 2.9291080474853515
epoch: 137 training_loss 2.9327906799316406 test_loss: 2.9053449630737305
epoch: 138 training_loss 2.923967480659485 test_loss: 2.970640754699707
epoch: 139 training_loss 2.948107216358185 test_loss: 2.8540056228637694
epoch: 140 training_loss 2.887821280956268 test_loss: 2.900403594970703
epoch: 141 training_loss 2.8952981781959535 test_loss: 3.1324031829833983
epoch: 142 training_loss 2.885429654121399 test_loss: 2.9323368072509766
epoch: 143 training_loss 2.839115891456604 test_loss: 3.018845558166504
epoch: 144 training_loss 2.8852857756614685 test_loss: 2.9544286727905273
epoch: 145 training_loss 2.8685040187835695 test_loss: 2.8196168899536134
epoch: 146 training_loss 2.854861602783203 test_loss: 2.8474489212036134
epoch: 147 training_loss 2.875040934085846 test_loss: 2.9321353912353514
epoch: 148 training_loss 2.830094909667969 test_loss: 2.9247840881347655
epoch: 149 training_loss 2.849394817352295 test_loss: 2.8610342025756834
5130.552595747268
episode: 0 training return: tensor(-434.5534, device='cuda:0')
episode: 1 training return: tensor(-416.5078, device='cuda:0')
episode: 2 training return: tensor(-463.3267, device='cuda:0')
episode: 3 training return: tensor(-494.1910, device='cuda:0')
epoch: 1 test_true_pfm: 5082.209807508567 sim_pfm: -418.3096895321699
episode: 4 training return: tensor(-471.5408, device='cuda:0')
episode: 5 training return: tensor(-525.7104, device='cuda:0')
episode: 6 training return: tensor(-439.7060, device='cuda:0')
episode: 7 training return: tensor(-468.0965, device='cuda:0')
epoch: 2 test_true_pfm: 5090.808635718801 sim_pfm: -513.9807238596259
episode: 8 training return: tensor(-442.6812, device='cuda:0')
episode: 9 training return: tensor(-400.8893, device='cuda:0')
episode: 10 training return: tensor(-491.7189, device='cuda:0')
episode: 11 training return: tensor(-430.3857, device='cuda:0')
epoch: 3 test_true_pfm: 5119.3786375683385 sim_pfm: -434.258556312571
episode: 12 training return: tensor(-487.9883, device='cuda:0')
episode: 13 training return: tensor(-386.9393, device='cuda:0')
episode: 14 training return: tensor(-398.4436, device='cuda:0')
episode: 15 training return: tensor(-528.5173, device='cuda:0')
epoch: 4 test_true_pfm: 5096.228131600129 sim_pfm: -408.9662342952021
episode: 16 training return: tensor(-502.7836, device='cuda:0')
episode: 17 training return: tensor(-509.2834, device='cuda:0')
episode: 18 training return: tensor(-549.5099, device='cuda:0')
episode: 19 training return: tensor(-465.6685, device='cuda:0')
epoch: 5 test_true_pfm: 4993.857071105119 sim_pfm: -457.74501147136715
episode: 20 training return: tensor(-544.7521, device='cuda:0')
episode: 21 training return: tensor(-397.4296, device='cuda:0')
episode: 22 training return: tensor(-503.9216, device='cuda:0')
episode: 23 training return: tensor(-392.6901, device='cuda:0')
epoch: 6 test_true_pfm: 5106.434886080028 sim_pfm: -481.65751646687085
episode: 24 training return: tensor(-469.3581, device='cuda:0')
episode: 25 training return: tensor(-470.9779, device='cuda:0')
episode: 26 training return: tensor(-435.6400, device='cuda:0')
episode: 27 training return: tensor(-496.3920, device='cuda:0')
epoch: 7 test_true_pfm: 5150.59852879156 sim_pfm: -454.8999162327964
episode: 28 training return: tensor(-560.5767, device='cuda:0')
episode: 29 training return: tensor(-435.6906, device='cuda:0')
episode: 30 training return: tensor(-389.9083, device='cuda:0')
episode: 31 training return: tensor(-488.6194, device='cuda:0')
epoch: 8 test_true_pfm: 5074.792970917206 sim_pfm: -424.49836000988336
episode: 32 training return: tensor(-555.9589, device='cuda:0')
episode: 33 training return: tensor(-372.2629, device='cuda:0')
episode: 34 training return: tensor(-508.1493, device='cuda:0')
episode: 35 training return: tensor(-519.8046, device='cuda:0')
epoch: 9 test_true_pfm: 4210.285788529087 sim_pfm: -410.9906529039242
episode: 36 training return: tensor(-477.2135, device='cuda:0')
episode: 37 training return: tensor(-379.3903, device='cuda:0')
episode: 38 training return: tensor(-461.4984, device='cuda:0')
episode: 39 training return: tensor(-488.6787, device='cuda:0')
epoch: 10 test_true_pfm: 5023.0479650508505 sim_pfm: -461.82209504550946
episode: 40 training return: tensor(-415.6055, device='cuda:0')
episode: 41 training return: tensor(-421.2475, device='cuda:0')
episode: 42 training return: tensor(-546.8407, device='cuda:0')
episode: 43 training return: tensor(-463.6688, device='cuda:0')
epoch: 11 test_true_pfm: 5173.142963053331 sim_pfm: -402.8918684535117
episode: 44 training return: tensor(-448.4568, device='cuda:0')
episode: 45 training return: tensor(-410.4429, device='cuda:0')
episode: 46 training return: tensor(-425.1573, device='cuda:0')
episode: 47 training return: tensor(-459.9785, device='cuda:0')
epoch: 12 test_true_pfm: 5247.425506182008 sim_pfm: -381.53136908604455
episode: 48 training return: tensor(-551.0909, device='cuda:0')
episode: 49 training return: tensor(-476.1756, device='cuda:0')
episode: 50 training return: tensor(-365.7747, device='cuda:0')
episode: 51 training return: tensor(-385.9832, device='cuda:0')
epoch: 13 test_true_pfm: 5136.990613004308 sim_pfm: -451.975444334986
episode: 52 training return: tensor(-454.2605, device='cuda:0')
episode: 53 training return: tensor(-423.6335, device='cuda:0')
episode: 54 training return: tensor(-438.6973, device='cuda:0')
episode: 55 training return: tensor(-475.6667, device='cuda:0')
epoch: 14 test_true_pfm: 5828.052135416314 sim_pfm: -400.13275990006514
episode: 56 training return: tensor(-453.5990, device='cuda:0')
episode: 57 training return: tensor(-406.0715, device='cuda:0')
episode: 58 training return: tensor(-476.4218, device='cuda:0')
episode: 59 training return: tensor(-427.0546, device='cuda:0')
epoch: 15 test_true_pfm: 5320.503847897213 sim_pfm: -389.1154548189758
episode: 60 training return: tensor(-425.4670, device='cuda:0')
episode: 61 training return: tensor(-424.3305, device='cuda:0')
episode: 62 training return: tensor(-529.3575, device='cuda:0')
episode: 63 training return: tensor(-484.4407, device='cuda:0')
epoch: 16 test_true_pfm: 5154.650875986187 sim_pfm: -362.4863984387969
episode: 64 training return: tensor(-482.8632, device='cuda:0')
episode: 65 training return: tensor(-471.8330, device='cuda:0')
episode: 66 training return: tensor(-449.1668, device='cuda:0')
episode: 67 training return: tensor(-491.9414, device='cuda:0')
epoch: 17 test_true_pfm: 4729.411740578581 sim_pfm: -377.738873091992
episode: 68 training return: tensor(-452.4524, device='cuda:0')
episode: 69 training return: tensor(-355.1530, device='cuda:0')
episode: 70 training return: tensor(-466.5867, device='cuda:0')
episode: 71 training return: tensor(-455.4914, device='cuda:0')
epoch: 18 test_true_pfm: 5266.324469542441 sim_pfm: -356.37405471562914
episode: 72 training return: tensor(-527.8613, device='cuda:0')
episode: 73 training return: tensor(-438.2479, device='cuda:0')
episode: 74 training return: tensor(-447.3148, device='cuda:0')
episode: 75 training return: tensor(-393.4177, device='cuda:0')
epoch: 19 test_true_pfm: 5705.32451049121 sim_pfm: -360.26893574097386
episode: 76 training return: tensor(-367.1145, device='cuda:0')
episode: 77 training return: tensor(-369.3361, device='cuda:0')
episode: 78 training return: tensor(-475.4046, device='cuda:0')
episode: 79 training return: tensor(-441.5335, device='cuda:0')
epoch: 20 test_true_pfm: 5364.033503393449 sim_pfm: -375.72059461131965
episode: 80 training return: tensor(-505.1902, device='cuda:0')
episode: 81 training return: tensor(-451.5390, device='cuda:0')
episode: 82 training return: tensor(-420.3593, device='cuda:0')
episode: 83 training return: tensor(-607.3775, device='cuda:0')
epoch: 21 test_true_pfm: 5276.409296306694 sim_pfm: -371.93290956902393
episode: 84 training return: tensor(-500.1473, device='cuda:0')
episode: 85 training return: tensor(-397.7823, device='cuda:0')
episode: 86 training return: tensor(-462.3215, device='cuda:0')
episode: 87 training return: tensor(-487.1916, device='cuda:0')
epoch: 22 test_true_pfm: 5155.366832700524 sim_pfm: -361.41502867403324
episode: 88 training return: tensor(-376.3055, device='cuda:0')
episode: 89 training return: tensor(-349.8945, device='cuda:0')
episode: 90 training return: tensor(-461.7949, device='cuda:0')
episode: 91 training return: tensor(-502.0969, device='cuda:0')
epoch: 23 test_true_pfm: 5226.332303651417 sim_pfm: -362.9859740081204
episode: 92 training return: tensor(-451.8604, device='cuda:0')
episode: 93 training return: tensor(-432.0321, device='cuda:0')
episode: 94 training return: tensor(-414.2784, device='cuda:0')
episode: 95 training return: tensor(-261.2681, device='cuda:0')
epoch: 24 test_true_pfm: 3897.311754143855 sim_pfm: -344.26635432559607
episode: 96 training return: tensor(-405.2939, device='cuda:0')
episode: 97 training return: tensor(-444.4840, device='cuda:0')
episode: 98 training return: tensor(-346.0417, device='cuda:0')
episode: 99 training return: tensor(-531.5005, device='cuda:0')
epoch: 25 test_true_pfm: 4890.989935628936 sim_pfm: -351.1247311673651
episode: 100 training return: tensor(-435.1849, device='cuda:0')
episode: 101 training return: tensor(-404.1167, device='cuda:0')
episode: 102 training return: tensor(-581.3019, device='cuda:0')
episode: 103 training return: tensor(-505.7773, device='cuda:0')
epoch: 26 test_true_pfm: 4378.876776469021 sim_pfm: -332.615028439662
episode: 104 training return: tensor(-349.8169, device='cuda:0')
episode: 105 training return: tensor(-483.5572, device='cuda:0')
episode: 106 training return: tensor(-316.0717, device='cuda:0')
episode: 107 training return: tensor(-439.5062, device='cuda:0')
epoch: 27 test_true_pfm: 5334.6331579839 sim_pfm: -365.73741169207887
episode: 108 training return: tensor(-368.7179, device='cuda:0')
episode: 109 training return: tensor(-368.0631, device='cuda:0')
episode: 110 training return: tensor(-425.6737, device='cuda:0')
episode: 111 training return: tensor(-355.4933, device='cuda:0')
epoch: 28 test_true_pfm: 5401.256698260554 sim_pfm: -271.4081996339567
episode: 112 training return: tensor(-502.6578, device='cuda:0')
episode: 113 training return: tensor(-418.4343, device='cuda:0')
episode: 114 training return: tensor(-461.5032, device='cuda:0')
episode: 115 training return: tensor(-426.0274, device='cuda:0')
epoch: 29 test_true_pfm: 5389.358382436721 sim_pfm: -314.5793154782344
episode: 116 training return: tensor(-457.2782, device='cuda:0')
episode: 117 training return: tensor(-436.3025, device='cuda:0')
episode: 118 training return: tensor(-423.1689, device='cuda:0')
episode: 119 training return: tensor(-344.4853, device='cuda:0')
epoch: 30 test_true_pfm: 5194.081835329304 sim_pfm: -332.0038841342903
episode: 120 training return: tensor(-408.1735, device='cuda:0')
episode: 121 training return: tensor(-452.7567, device='cuda:0')
episode: 122 training return: tensor(-405.5009, device='cuda:0')
episode: 123 training return: tensor(-307.1173, device='cuda:0')
epoch: 31 test_true_pfm: 5248.201509826881 sim_pfm: -339.7066889385266
episode: 124 training return: tensor(-447.2436, device='cuda:0')
episode: 125 training return: tensor(-421.2638, device='cuda:0')
episode: 126 training return: tensor(-391.0020, device='cuda:0')
episode: 127 training return: tensor(-612.2436, device='cuda:0')
epoch: 32 test_true_pfm: 5419.736366398595 sim_pfm: -303.39755706609384
episode: 128 training return: tensor(-353.8802, device='cuda:0')
episode: 129 training return: tensor(-445.9852, device='cuda:0')
episode: 130 training return: tensor(-445.2099, device='cuda:0')
episode: 131 training return: tensor(-437.6313, device='cuda:0')
epoch: 33 test_true_pfm: 5307.457088992243 sim_pfm: -331.7145202641259
episode: 132 training return: tensor(-354.8492, device='cuda:0')
episode: 133 training return: tensor(-373.9729, device='cuda:0')
episode: 134 training return: tensor(-387.4310, device='cuda:0')
episode: 135 training return: tensor(-508.4436, device='cuda:0')
epoch: 34 test_true_pfm: 5378.709428312541 sim_pfm: -336.2453085895977
episode: 136 training return: tensor(-476.2102, device='cuda:0')
episode: 137 training return: tensor(-441.5805, device='cuda:0')
episode: 138 training return: tensor(-454.7657, device='cuda:0')
episode: 139 training return: tensor(-458.0869, device='cuda:0')
epoch: 35 test_true_pfm: 5369.1629460474915 sim_pfm: -397.5799523851213
episode: 140 training return: tensor(-392.0234, device='cuda:0')
episode: 141 training return: tensor(-393.4438, device='cuda:0')
episode: 142 training return: tensor(-395.6312, device='cuda:0')
episode: 143 training return: tensor(-460.7498, device='cuda:0')
epoch: 36 test_true_pfm: 5341.147503867263 sim_pfm: -350.7196920440377
episode: 144 training return: tensor(-407.5142, device='cuda:0')
episode: 145 training return: tensor(-373.5759, device='cuda:0')
episode: 146 training return: tensor(-524.0640, device='cuda:0')
episode: 147 training return: tensor(-397.8395, device='cuda:0')
epoch: 37 test_true_pfm: 5392.033481315025 sim_pfm: -282.07243384285056
episode: 148 training return: tensor(-366.9713, device='cuda:0')
episode: 149 training return: tensor(-437.5984, device='cuda:0')
episode: 150 training return: tensor(-401.9767, device='cuda:0')
episode: 151 training return: tensor(-375.9721, device='cuda:0')
epoch: 38 test_true_pfm: 5367.499308178857 sim_pfm: -299.3088851517144
episode: 152 training return: tensor(-391.8470, device='cuda:0')
episode: 153 training return: tensor(-429.1404, device='cuda:0')
episode: 154 training return: tensor(-499.2302, device='cuda:0')
episode: 155 training return: tensor(-340.7130, device='cuda:0')
epoch: 39 test_true_pfm: 5319.468702496702 sim_pfm: -303.78651165035745
episode: 156 training return: tensor(-432.7898, device='cuda:0')
episode: 157 training return: tensor(-324.2509, device='cuda:0')
episode: 158 training return: tensor(-409.5963, device='cuda:0')
episode: 159 training return: tensor(-475.0940, device='cuda:0')
epoch: 40 test_true_pfm: 5330.7655103553325 sim_pfm: -358.76210097350605
episode: 160 training return: tensor(-367.5022, device='cuda:0')
episode: 161 training return: tensor(-405.3931, device='cuda:0')
episode: 162 training return: tensor(-386.4617, device='cuda:0')
episode: 163 training return: tensor(-789.2703, device='cuda:0')
epoch: 41 test_true_pfm: 5332.8845611657025 sim_pfm: -300.7420794444236
episode: 164 training return: tensor(-368.3592, device='cuda:0')
episode: 165 training return: tensor(-477.1090, device='cuda:0')
episode: 166 training return: tensor(-632.3787, device='cuda:0')
episode: 167 training return: tensor(-436.4413, device='cuda:0')
epoch: 42 test_true_pfm: 5280.146358626811 sim_pfm: -304.4540439432215
episode: 168 training return: tensor(-430.6709, device='cuda:0')
episode: 169 training return: tensor(-404.7791, device='cuda:0')
episode: 170 training return: tensor(-379.2908, device='cuda:0')
episode: 171 training return: tensor(-528.0434, device='cuda:0')
epoch: 43 test_true_pfm: 5406.261555831402 sim_pfm: -367.1098089754814
episode: 172 training return: tensor(-384.2505, device='cuda:0')
episode: 173 training return: tensor(-407.5536, device='cuda:0')
episode: 174 training return: tensor(-400.6781, device='cuda:0')
episode: 175 training return: tensor(-400.4993, device='cuda:0')
epoch: 44 test_true_pfm: 5305.2956366993685 sim_pfm: -283.9562011371211
episode: 176 training return: tensor(-393.0659, device='cuda:0')
episode: 177 training return: tensor(-352.1709, device='cuda:0')
episode: 178 training return: tensor(-616.1525, device='cuda:0')
episode: 179 training return: tensor(-526.3710, device='cuda:0')
epoch: 45 test_true_pfm: 5416.008088347776 sim_pfm: -331.7237265330623
episode: 180 training return: tensor(-394.9903, device='cuda:0')
episode: 181 training return: tensor(-424.3719, device='cuda:0')
episode: 182 training return: tensor(-348.2869, device='cuda:0')
episode: 183 training return: tensor(-408.5918, device='cuda:0')
epoch: 46 test_true_pfm: 5423.539504197367 sim_pfm: -306.4375531527039
episode: 184 training return: tensor(-394.6761, device='cuda:0')
episode: 185 training return: tensor(-371.4713, device='cuda:0')
episode: 186 training return: tensor(-384.7877, device='cuda:0')
episode: 187 training return: tensor(-376.0593, device='cuda:0')
epoch: 47 test_true_pfm: 5428.972575345327 sim_pfm: -329.64358301080455
episode: 188 training return: tensor(-400.0241, device='cuda:0')
episode: 189 training return: tensor(-454.2362, device='cuda:0')
episode: 190 training return: tensor(-370.6039, device='cuda:0')
episode: 191 training return: tensor(-404.8004, device='cuda:0')
epoch: 48 test_true_pfm: 5408.737055651394 sim_pfm: -319.0359081258066
episode: 192 training return: tensor(-441.3928, device='cuda:0')
episode: 193 training return: tensor(-293.4887, device='cuda:0')
episode: 194 training return: tensor(-336.1988, device='cuda:0')
episode: 195 training return: tensor(-359.7542, device='cuda:0')
epoch: 49 test_true_pfm: 5420.110874905185 sim_pfm: -356.183067299445
episode: 196 training return: tensor(-433.6812, device='cuda:0')
episode: 197 training return: tensor(-448.9792, device='cuda:0')
episode: 198 training return: tensor(-434.3134, device='cuda:0')
episode: 199 training return: tensor(-469.8061, device='cuda:0')
epoch: 50 test_true_pfm: 5464.462175244287 sim_pfm: -321.05883548441733
episode: 200 training return: tensor(-434.7495, device='cuda:0')
episode: 201 training return: tensor(-374.6857, device='cuda:0')
episode: 202 training return: tensor(-371.0457, device='cuda:0')
episode: 203 training return: tensor(-383.8658, device='cuda:0')
epoch: 51 test_true_pfm: 5507.15679513224 sim_pfm: -322.4020092872088
episode: 204 training return: tensor(-387.3827, device='cuda:0')
episode: 205 training return: tensor(-303.5486, device='cuda:0')
episode: 206 training return: tensor(-363.7314, device='cuda:0')
episode: 207 training return: tensor(-444.0540, device='cuda:0')
epoch: 52 test_true_pfm: 5423.181278236664 sim_pfm: -289.23846258513123
episode: 208 training return: tensor(-424.9160, device='cuda:0')
episode: 209 training return: tensor(-465.5156, device='cuda:0')
episode: 210 training return: tensor(-490.2472, device='cuda:0')
episode: 211 training return: tensor(-353.1793, device='cuda:0')
epoch: 53 test_true_pfm: 5450.392424771911 sim_pfm: -367.9546752892299
episode: 212 training return: tensor(-400.5306, device='cuda:0')
episode: 213 training return: tensor(-425.8687, device='cuda:0')
episode: 214 training return: tensor(-447.6567, device='cuda:0')
episode: 215 training return: tensor(-305.7440, device='cuda:0')
epoch: 54 test_true_pfm: 5488.160511994575 sim_pfm: -323.68476923070074
episode: 216 training return: tensor(-354.4144, device='cuda:0')
episode: 217 training return: tensor(-407.1753, device='cuda:0')
episode: 218 training return: tensor(-362.4412, device='cuda:0')
episode: 219 training return: tensor(-363.4711, device='cuda:0')
epoch: 55 test_true_pfm: 5318.564496381222 sim_pfm: -295.2943020860451
episode: 220 training return: tensor(-352.5978, device='cuda:0')
episode: 221 training return: tensor(-394.3539, device='cuda:0')
episode: 222 training return: tensor(-439.8070, device='cuda:0')
episode: 223 training return: tensor(-350.1616, device='cuda:0')
epoch: 56 test_true_pfm: 5389.462937255947 sim_pfm: -291.3666335666979
episode: 224 training return: tensor(-412.9179, device='cuda:0')
episode: 225 training return: tensor(-344.7756, device='cuda:0')
episode: 226 training return: tensor(-377.9386, device='cuda:0')
episode: 227 training return: tensor(-383.3147, device='cuda:0')
epoch: 57 test_true_pfm: 5460.070255526275 sim_pfm: -362.9774699779712
episode: 228 training return: tensor(-462.2554, device='cuda:0')
episode: 229 training return: tensor(-467.0478, device='cuda:0')
episode: 230 training return: tensor(-455.5618, device='cuda:0')
episode: 231 training return: tensor(-883.6494, device='cuda:0')
epoch: 58 test_true_pfm: 5401.796426405268 sim_pfm: -307.1095112214195
episode: 232 training return: tensor(-390.2899, device='cuda:0')
episode: 233 training return: tensor(-410.6481, device='cuda:0')
episode: 234 training return: tensor(-351.8648, device='cuda:0')
episode: 235 training return: tensor(-407.2895, device='cuda:0')
epoch: 59 test_true_pfm: 5487.173657336549 sim_pfm: -297.4525602111632
episode: 236 training return: tensor(-424.0634, device='cuda:0')
episode: 237 training return: tensor(-422.0031, device='cuda:0')
episode: 238 training return: tensor(-430.6068, device='cuda:0')
episode: 239 training return: tensor(-356.2787, device='cuda:0')
epoch: 60 test_true_pfm: 5356.891042658656 sim_pfm: -336.5458686449371
episode: 240 training return: tensor(-482.5912, device='cuda:0')
episode: 241 training return: tensor(-310.3101, device='cuda:0')
episode: 242 training return: tensor(-312.8337, device='cuda:0')
episode: 243 training return: tensor(-402.2105, device='cuda:0')
epoch: 61 test_true_pfm: 5456.71027517266 sim_pfm: -346.94517689194373
episode: 244 training return: tensor(-476.7109, device='cuda:0')
episode: 245 training return: tensor(-371.2361, device='cuda:0')
episode: 246 training return: tensor(-413.0756, device='cuda:0')
episode: 247 training return: tensor(-416.6247, device='cuda:0')
epoch: 62 test_true_pfm: 5351.515622253705 sim_pfm: -295.04492092485697
episode: 248 training return: tensor(-423.1881, device='cuda:0')
episode: 249 training return: tensor(-330.3760, device='cuda:0')
episode: 250 training return: tensor(-372.1122, device='cuda:0')
episode: 251 training return: tensor(-328.6747, device='cuda:0')
epoch: 63 test_true_pfm: 5597.442819361159 sim_pfm: -284.37781580091297
episode: 252 training return: tensor(-433.6655, device='cuda:0')
episode: 253 training return: tensor(-320.5040, device='cuda:0')
episode: 254 training return: tensor(-406.0921, device='cuda:0')
episode: 255 training return: tensor(-292.7885, device='cuda:0')
epoch: 64 test_true_pfm: 5538.0970060769905 sim_pfm: -268.4202425602125
episode: 256 training return: tensor(-374.2000, device='cuda:0')
episode: 257 training return: tensor(-487.8608, device='cuda:0')
episode: 258 training return: tensor(-339.5572, device='cuda:0')
episode: 259 training return: tensor(-336.4018, device='cuda:0')
epoch: 65 test_true_pfm: 4480.525173407121 sim_pfm: -299.2880950610852
episode: 260 training return: tensor(-399.6182, device='cuda:0')
episode: 261 training return: tensor(-224.5372, device='cuda:0')
episode: 262 training return: tensor(-378.8978, device='cuda:0')
episode: 263 training return: tensor(-385.3705, device='cuda:0')
epoch: 66 test_true_pfm: 5411.274646875575 sim_pfm: -282.5384813862426
episode: 264 training return: tensor(-437.9799, device='cuda:0')
episode: 265 training return: tensor(-370.9078, device='cuda:0')
episode: 266 training return: tensor(-445.6155, device='cuda:0')
episode: 267 training return: tensor(-433.6443, device='cuda:0')
epoch: 67 test_true_pfm: 5401.1777539896 sim_pfm: -279.4017609219688
episode: 268 training return: tensor(-436.7155, device='cuda:0')
episode: 269 training return: tensor(-463.7436, device='cuda:0')
episode: 270 training return: tensor(-338.6771, device='cuda:0')
episode: 271 training return: tensor(-329.8244, device='cuda:0')
epoch: 68 test_true_pfm: 5355.860134501924 sim_pfm: -280.80298465471907
episode: 272 training return: tensor(-277.4724, device='cuda:0')
episode: 273 training return: tensor(-369.6514, device='cuda:0')
episode: 274 training return: tensor(-365.1209, device='cuda:0')
episode: 275 training return: tensor(-448.0440, device='cuda:0')
epoch: 69 test_true_pfm: 5535.373439861648 sim_pfm: -292.163602706433
episode: 276 training return: tensor(-350.1967, device='cuda:0')
episode: 277 training return: tensor(-388.0061, device='cuda:0')
episode: 278 training return: tensor(-405.4315, device='cuda:0')
episode: 279 training return: tensor(-455.4730, device='cuda:0')
epoch: 70 test_true_pfm: 5402.55082411275 sim_pfm: -281.00822787241003
episode: 280 training return: tensor(-371.3371, device='cuda:0')
episode: 281 training return: tensor(-390.2488, device='cuda:0')
episode: 282 training return: tensor(-452.7497, device='cuda:0')
episode: 283 training return: tensor(-437.7929, device='cuda:0')
epoch: 71 test_true_pfm: 5363.771284897005 sim_pfm: -297.13496641306364
episode: 284 training return: tensor(-400.9163, device='cuda:0')
episode: 285 training return: tensor(-306.5506, device='cuda:0')
episode: 286 training return: tensor(-363.3162, device='cuda:0')
episode: 287 training return: tensor(-438.2508, device='cuda:0')
epoch: 72 test_true_pfm: 5428.609462805024 sim_pfm: -279.93780376701034
episode: 288 training return: tensor(-391.7318, device='cuda:0')
episode: 289 training return: tensor(-352.7116, device='cuda:0')
episode: 290 training return: tensor(-367.4511, device='cuda:0')
episode: 291 training return: tensor(-253.2915, device='cuda:0')
epoch: 73 test_true_pfm: 5394.852870935603 sim_pfm: -301.2156922729143
episode: 292 training return: tensor(-289.2314, device='cuda:0')
episode: 293 training return: tensor(-371.8172, device='cuda:0')
episode: 294 training return: tensor(-369.9961, device='cuda:0')
episode: 295 training return: tensor(-399.1008, device='cuda:0')
epoch: 74 test_true_pfm: 5448.013956112868 sim_pfm: -322.14901144981076
episode: 296 training return: tensor(-538.1227, device='cuda:0')
episode: 297 training return: tensor(-326.4035, device='cuda:0')
episode: 298 training return: tensor(-353.4095, device='cuda:0')
episode: 299 training return: tensor(-309.9353, device='cuda:0')
epoch: 75 test_true_pfm: 5477.328785952734 sim_pfm: -246.235375388409
episode: 300 training return: tensor(-402.5235, device='cuda:0')
episode: 301 training return: tensor(-404.0731, device='cuda:0')
episode: 302 training return: tensor(-382.1869, device='cuda:0')
episode: 303 training return: tensor(-367.3622, device='cuda:0')
epoch: 76 test_true_pfm: 5484.846810524199 sim_pfm: -275.19613259247853
episode: 304 training return: tensor(-460.3224, device='cuda:0')
episode: 305 training return: tensor(-409.3563, device='cuda:0')
episode: 306 training return: tensor(-363.3315, device='cuda:0')
episode: 307 training return: tensor(-376.5702, device='cuda:0')
epoch: 77 test_true_pfm: 5458.604585377314 sim_pfm: -298.7773958671799
episode: 308 training return: tensor(-369.8477, device='cuda:0')
episode: 309 training return: tensor(-460.4590, device='cuda:0')
episode: 310 training return: tensor(-369.8773, device='cuda:0')
episode: 311 training return: tensor(-383.2107, device='cuda:0')
epoch: 78 test_true_pfm: 5578.435855034273 sim_pfm: -234.5694391070865
episode: 312 training return: tensor(-429.6832, device='cuda:0')
episode: 313 training return: tensor(-471.1488, device='cuda:0')
episode: 314 training return: tensor(-434.9826, device='cuda:0')
episode: 315 training return: tensor(-318.7071, device='cuda:0')
epoch: 79 test_true_pfm: 5638.405872145682 sim_pfm: -263.08873400753754
episode: 316 training return: tensor(-363.2797, device='cuda:0')
episode: 317 training return: tensor(-451.2853, device='cuda:0')
episode: 318 training return: tensor(-354.4733, device='cuda:0')
episode: 319 training return: tensor(-349.9814, device='cuda:0')
epoch: 80 test_true_pfm: 5486.322900623733 sim_pfm: -250.85075035743648
episode: 320 training return: tensor(-332.8294, device='cuda:0')
episode: 321 training return: tensor(-264.6458, device='cuda:0')
episode: 322 training return: tensor(-329.4576, device='cuda:0')
episode: 323 training return: tensor(-397.7084, device='cuda:0')
epoch: 81 test_true_pfm: 5473.830580111132 sim_pfm: -223.02237953663766
episode: 324 training return: tensor(-294.7971, device='cuda:0')
episode: 325 training return: tensor(-379.6957, device='cuda:0')
episode: 326 training return: tensor(-419.5995, device='cuda:0')
episode: 327 training return: tensor(-397.7464, device='cuda:0')
epoch: 82 test_true_pfm: 5564.415360670219 sim_pfm: -239.45316120576658
episode: 328 training return: tensor(-439.8026, device='cuda:0')
episode: 329 training return: tensor(-383.2013, device='cuda:0')
episode: 330 training return: tensor(-348.3522, device='cuda:0')
episode: 331 training return: tensor(-373.4191, device='cuda:0')
epoch: 83 test_true_pfm: 5466.963298289782 sim_pfm: -287.2710539893208
episode: 332 training return: tensor(-437.8314, device='cuda:0')
episode: 333 training return: tensor(-394.1458, device='cuda:0')
episode: 334 training return: tensor(-454.4518, device='cuda:0')
episode: 335 training return: tensor(-442.5931, device='cuda:0')
epoch: 84 test_true_pfm: 5626.03445811689 sim_pfm: -301.4594626586477
episode: 336 training return: tensor(-353.0585, device='cuda:0')
episode: 337 training return: tensor(-422.5078, device='cuda:0')
episode: 338 training return: tensor(-302.4108, device='cuda:0')
episode: 339 training return: tensor(-360.7561, device='cuda:0')
epoch: 85 test_true_pfm: 5447.135454837738 sim_pfm: -277.4956349899876
episode: 340 training return: tensor(-378.7034, device='cuda:0')
episode: 341 training return: tensor(-354.0436, device='cuda:0')
episode: 342 training return: tensor(-298.1322, device='cuda:0')
episode: 343 training return: tensor(-366.3279, device='cuda:0')
epoch: 86 test_true_pfm: 5509.059861274279 sim_pfm: -277.7967119932194
episode: 344 training return: tensor(-337.5818, device='cuda:0')
episode: 345 training return: tensor(-293.9285, device='cuda:0')
episode: 346 training return: tensor(-322.4925, device='cuda:0')
episode: 347 training return: tensor(-348.4675, device='cuda:0')
epoch: 87 test_true_pfm: 5408.960086202297 sim_pfm: -235.7400441012578
episode: 348 training return: tensor(-359.4607, device='cuda:0')
episode: 349 training return: tensor(-479.9570, device='cuda:0')
episode: 350 training return: tensor(-383.2880, device='cuda:0')
episode: 351 training return: tensor(-332.7703, device='cuda:0')
epoch: 88 test_true_pfm: 5584.937293350243 sim_pfm: -283.505763479751
episode: 352 training return: tensor(-323.6247, device='cuda:0')
episode: 353 training return: tensor(-316.2926, device='cuda:0')
episode: 354 training return: tensor(-384.9453, device='cuda:0')
episode: 355 training return: tensor(-326.7968, device='cuda:0')
epoch: 89 test_true_pfm: 5406.208276147668 sim_pfm: -268.21570898828213
episode: 356 training return: tensor(-386.1631, device='cuda:0')
episode: 357 training return: tensor(-375.9899, device='cuda:0')
episode: 358 training return: tensor(-413.2220, device='cuda:0')
episode: 359 training return: tensor(-360.5950, device='cuda:0')
epoch: 90 test_true_pfm: 5472.307120228005 sim_pfm: -244.4121292254422
episode: 360 training return: tensor(-361.7603, device='cuda:0')
episode: 361 training return: tensor(-363.6214, device='cuda:0')
episode: 362 training return: tensor(-349.3526, device='cuda:0')
episode: 363 training return: tensor(-229.8300, device='cuda:0')
epoch: 91 test_true_pfm: 5636.204216737505 sim_pfm: -271.2895101264391
episode: 364 training return: tensor(-414.1311, device='cuda:0')
episode: 365 training return: tensor(-365.5139, device='cuda:0')
episode: 366 training return: tensor(-341.7743, device='cuda:0')
episode: 367 training return: tensor(-385.2857, device='cuda:0')
epoch: 92 test_true_pfm: 5570.473579625439 sim_pfm: -236.49492018529176
episode: 368 training return: tensor(-375.3951, device='cuda:0')
episode: 369 training return: tensor(-379.2950, device='cuda:0')
episode: 370 training return: tensor(-290.6516, device='cuda:0')
episode: 371 training return: tensor(-254.0399, device='cuda:0')
epoch: 93 test_true_pfm: 5588.94550597487 sim_pfm: -213.47186533998078
episode: 372 training return: tensor(-360.3690, device='cuda:0')
episode: 373 training return: tensor(-344.7314, device='cuda:0')
episode: 374 training return: tensor(-311.8126, device='cuda:0')
episode: 375 training return: tensor(-384.9613, device='cuda:0')
epoch: 94 test_true_pfm: 5362.204504875655 sim_pfm: -256.7075629699975
episode: 376 training return: tensor(-440.2791, device='cuda:0')
episode: 377 training return: tensor(-397.5074, device='cuda:0')
episode: 378 training return: tensor(-355.2071, device='cuda:0')
episode: 379 training return: tensor(-355.5533, device='cuda:0')
epoch: 95 test_true_pfm: 5649.7566864969585 sim_pfm: -295.8863280406513
episode: 380 training return: tensor(-309.9874, device='cuda:0')
episode: 381 training return: tensor(-436.5429, device='cuda:0')
episode: 382 training return: tensor(-266.0318, device='cuda:0')
episode: 383 training return: tensor(-475.9315, device='cuda:0')
epoch: 96 test_true_pfm: 5380.146661040798 sim_pfm: -356.228618684089
episode: 384 training return: tensor(-294.6820, device='cuda:0')
episode: 385 training return: tensor(-594.7892, device='cuda:0')
episode: 386 training return: tensor(-386.0443, device='cuda:0')
episode: 387 training return: tensor(-322.9507, device='cuda:0')
epoch: 97 test_true_pfm: 5496.353063811631 sim_pfm: -318.98246917146025
episode: 388 training return: tensor(-297.5230, device='cuda:0')
episode: 389 training return: tensor(-353.0649, device='cuda:0')
episode: 390 training return: tensor(-411.8870, device='cuda:0')
episode: 391 training return: tensor(-398.3299, device='cuda:0')
epoch: 98 test_true_pfm: 5528.269713391276 sim_pfm: -241.44268543321718
episode: 392 training return: tensor(-441.5016, device='cuda:0')
episode: 393 training return: tensor(-301.1629, device='cuda:0')
episode: 394 training return: tensor(-296.4747, device='cuda:0')
episode: 395 training return: tensor(-498.4017, device='cuda:0')
epoch: 99 test_true_pfm: 6264.250584322842 sim_pfm: -233.17065560074602
episode: 396 training return: tensor(-468.2999, device='cuda:0')
episode: 397 training return: tensor(-285.0703, device='cuda:0')
episode: 398 training return: tensor(-354.1835, device='cuda:0')
episode: 399 training return: tensor(-330.3154, device='cuda:0')
epoch: 100 test_true_pfm: 5536.272543013892 sim_pfm: -234.6472057775827
episode: 400 training return: tensor(-462.9433, device='cuda:0')
episode: 401 training return: tensor(-422.4650, device='cuda:0')
episode: 402 training return: tensor(-342.6112, device='cuda:0')
episode: 403 training return: tensor(-311.0027, device='cuda:0')
epoch: 101 test_true_pfm: 5565.468672355244 sim_pfm: -217.42068604906672
episode: 404 training return: tensor(-311.5520, device='cuda:0')
episode: 405 training return: tensor(-462.1355, device='cuda:0')
episode: 406 training return: tensor(-369.2941, device='cuda:0')
episode: 407 training return: tensor(-312.0636, device='cuda:0')
epoch: 102 test_true_pfm: 5503.357043185002 sim_pfm: -213.85030421147044
episode: 408 training return: tensor(-346.3899, device='cuda:0')
episode: 409 training return: tensor(-280.4845, device='cuda:0')
episode: 410 training return: tensor(-364.5392, device='cuda:0')
episode: 411 training return: tensor(-315.7653, device='cuda:0')
epoch: 103 test_true_pfm: 5605.536681076045 sim_pfm: -220.23576333065284
episode: 412 training return: tensor(-376.1082, device='cuda:0')
episode: 413 training return: tensor(-346.2027, device='cuda:0')
episode: 414 training return: tensor(-304.8214, device='cuda:0')
episode: 415 training return: tensor(-343.8872, device='cuda:0')
epoch: 104 test_true_pfm: 5507.883720424413 sim_pfm: -225.29155327306944
episode: 416 training return: tensor(-358.9836, device='cuda:0')
episode: 417 training return: tensor(-372.8405, device='cuda:0')
episode: 418 training return: tensor(-351.5967, device='cuda:0')
episode: 419 training return: tensor(-366.9729, device='cuda:0')
epoch: 105 test_true_pfm: 5491.304308225405 sim_pfm: -238.88402179741146
episode: 420 training return: tensor(-411.4278, device='cuda:0')
episode: 421 training return: tensor(-389.5979, device='cuda:0')
episode: 422 training return: tensor(-395.2619, device='cuda:0')
episode: 423 training return: tensor(-365.0727, device='cuda:0')
epoch: 106 test_true_pfm: 5650.076778251579 sim_pfm: -272.40879872697406
episode: 424 training return: tensor(-336.3894, device='cuda:0')
episode: 425 training return: tensor(-315.5732, device='cuda:0')
episode: 426 training return: tensor(-441.9265, device='cuda:0')
episode: 427 training return: tensor(-406.3803, device='cuda:0')
epoch: 107 test_true_pfm: 5515.512223827362 sim_pfm: -247.68307665062216
episode: 428 training return: tensor(-371.4144, device='cuda:0')
episode: 429 training return: tensor(-344.3576, device='cuda:0')
episode: 430 training return: tensor(-306.2984, device='cuda:0')
episode: 431 training return: tensor(-442.0639, device='cuda:0')
epoch: 108 test_true_pfm: 5549.8775969929 sim_pfm: -258.8571865094612
episode: 432 training return: tensor(-312.9689, device='cuda:0')
episode: 433 training return: tensor(-327.3123, device='cuda:0')
episode: 434 training return: tensor(-307.5882, device='cuda:0')
episode: 435 training return: tensor(-341.6421, device='cuda:0')
epoch: 109 test_true_pfm: 5533.426452550113 sim_pfm: -301.31656481302343
episode: 436 training return: tensor(-347.6901, device='cuda:0')
episode: 437 training return: tensor(-417.8962, device='cuda:0')
episode: 438 training return: tensor(-352.8411, device='cuda:0')
episode: 439 training return: tensor(-258.6283, device='cuda:0')
epoch: 110 test_true_pfm: 5605.571309950649 sim_pfm: -281.49130017488886
episode: 440 training return: tensor(-323.2469, device='cuda:0')
episode: 441 training return: tensor(-395.3088, device='cuda:0')
episode: 442 training return: tensor(-359.8256, device='cuda:0')
episode: 443 training return: tensor(-267.3000, device='cuda:0')
epoch: 111 test_true_pfm: 5577.951808615129 sim_pfm: -258.7987255778086
episode: 444 training return: tensor(-313.8410, device='cuda:0')
episode: 445 training return: tensor(-303.9425, device='cuda:0')
episode: 446 training return: tensor(-305.4364, device='cuda:0')
episode: 447 training return: tensor(-401.4224, device='cuda:0')
epoch: 112 test_true_pfm: 5505.3247580479 sim_pfm: -265.27583788963966
episode: 448 training return: tensor(-373.3648, device='cuda:0')
episode: 449 training return: tensor(-391.7215, device='cuda:0')
episode: 450 training return: tensor(-374.1012, device='cuda:0')
episode: 451 training return: tensor(-413.3869, device='cuda:0')
epoch: 113 test_true_pfm: 5500.671418925697 sim_pfm: -230.17087223482667
episode: 452 training return: tensor(-300.0004, device='cuda:0')
episode: 453 training return: tensor(-384.2783, device='cuda:0')
episode: 454 training return: tensor(-351.3943, device='cuda:0')
episode: 455 training return: tensor(-323.5620, device='cuda:0')
epoch: 114 test_true_pfm: 5491.359353829882 sim_pfm: -246.00036549626384
episode: 456 training return: tensor(-351.4762, device='cuda:0')
episode: 457 training return: tensor(-398.5038, device='cuda:0')
episode: 458 training return: tensor(-272.5087, device='cuda:0')
episode: 459 training return: tensor(-320.9875, device='cuda:0')
epoch: 115 test_true_pfm: 5595.2258826901925 sim_pfm: -237.92120659981933
episode: 460 training return: tensor(-317.0471, device='cuda:0')
episode: 461 training return: tensor(-361.7370, device='cuda:0')
episode: 462 training return: tensor(-336.5225, device='cuda:0')
episode: 463 training return: tensor(-354.7794, device='cuda:0')
epoch: 116 test_true_pfm: 5447.609611409229 sim_pfm: -248.84940496286922
episode: 464 training return: tensor(-319.1879, device='cuda:0')
episode: 465 training return: tensor(-362.3881, device='cuda:0')
episode: 466 training return: tensor(-358.6211, device='cuda:0')
episode: 467 training return: tensor(-369.3199, device='cuda:0')
epoch: 117 test_true_pfm: 5555.311453688402 sim_pfm: -231.25171195919393
episode: 468 training return: tensor(-298.7875, device='cuda:0')
episode: 469 training return: tensor(-266.5329, device='cuda:0')
episode: 470 training return: tensor(-420.4242, device='cuda:0')
episode: 471 training return: tensor(-310.5763, device='cuda:0')
epoch: 118 test_true_pfm: 5571.613718121066 sim_pfm: -275.8324632940348
episode: 472 training return: tensor(-309.5358, device='cuda:0')
episode: 473 training return: tensor(-370.5802, device='cuda:0')
episode: 474 training return: tensor(-280.1113, device='cuda:0')
episode: 475 training return: tensor(-393.5678, device='cuda:0')
epoch: 119 test_true_pfm: 5463.768722187929 sim_pfm: -205.21559511332694
episode: 476 training return: tensor(-291.1037, device='cuda:0')
episode: 477 training return: tensor(-348.2201, device='cuda:0')
episode: 478 training return: tensor(-418.2347, device='cuda:0')
episode: 479 training return: tensor(-251.5969, device='cuda:0')
epoch: 120 test_true_pfm: 5526.660629814162 sim_pfm: -232.56283193688918
episode: 480 training return: tensor(-252.8640, device='cuda:0')
episode: 481 training return: tensor(-293.8103, device='cuda:0')
episode: 482 training return: tensor(-315.0654, device='cuda:0')
episode: 483 training return: tensor(-342.7610, device='cuda:0')
epoch: 121 test_true_pfm: 5589.774247096085 sim_pfm: -202.24486690672347
episode: 484 training return: tensor(-348.2131, device='cuda:0')
episode: 485 training return: tensor(-415.2353, device='cuda:0')
episode: 486 training return: tensor(-297.4409, device='cuda:0')
episode: 487 training return: tensor(-312.6686, device='cuda:0')
epoch: 122 test_true_pfm: 5576.504450896176 sim_pfm: -256.27385221699177
episode: 488 training return: tensor(-309.8191, device='cuda:0')
episode: 489 training return: tensor(-273.8294, device='cuda:0')
episode: 490 training return: tensor(-375.7321, device='cuda:0')
episode: 491 training return: tensor(-329.6648, device='cuda:0')
epoch: 123 test_true_pfm: 5601.575863723231 sim_pfm: -236.84391806080626
episode: 492 training return: tensor(-692.1496, device='cuda:0')
episode: 493 training return: tensor(-308.5540, device='cuda:0')
episode: 494 training return: tensor(-372.8435, device='cuda:0')
episode: 495 training return: tensor(-407.6486, device='cuda:0')
epoch: 124 test_true_pfm: 5588.541574625859 sim_pfm: -264.7645171305242
episode: 496 training return: tensor(-307.5621, device='cuda:0')
episode: 497 training return: tensor(-266.4958, device='cuda:0')
episode: 498 training return: tensor(-279.8051, device='cuda:0')
episode: 499 training return: tensor(-348.1031, device='cuda:0')
epoch: 125 test_true_pfm: 5545.428766151687 sim_pfm: -193.6143150785647
episode: 500 training return: tensor(-394.3913, device='cuda:0')
episode: 501 training return: tensor(-363.0251, device='cuda:0')
episode: 502 training return: tensor(-364.7160, device='cuda:0')
episode: 503 training return: tensor(-322.9245, device='cuda:0')
epoch: 126 test_true_pfm: 5523.7245672034405 sim_pfm: -295.92970312569133
episode: 504 training return: tensor(-250.1819, device='cuda:0')
episode: 505 training return: tensor(-354.2701, device='cuda:0')
episode: 506 training return: tensor(-292.2833, device='cuda:0')
episode: 507 training return: tensor(-410.7329, device='cuda:0')
epoch: 127 test_true_pfm: 5575.92344765347 sim_pfm: -196.5677381733452
episode: 508 training return: tensor(-287.5223, device='cuda:0')
episode: 509 training return: tensor(-281.8935, device='cuda:0')
episode: 510 training return: tensor(-357.8855, device='cuda:0')
episode: 511 training return: tensor(-313.2726, device='cuda:0')
epoch: 128 test_true_pfm: 5595.082907902367 sim_pfm: -254.6221762528488
episode: 512 training return: tensor(-307.5587, device='cuda:0')
episode: 513 training return: tensor(-280.7492, device='cuda:0')
episode: 514 training return: tensor(-372.9906, device='cuda:0')
episode: 515 training return: tensor(-310.6980, device='cuda:0')
epoch: 129 test_true_pfm: 5422.677376966961 sim_pfm: -245.2616821230913
episode: 516 training return: tensor(-283.0090, device='cuda:0')
episode: 517 training return: tensor(-338.4020, device='cuda:0')
episode: 518 training return: tensor(-311.6870, device='cuda:0')
episode: 519 training return: tensor(-322.0418, device='cuda:0')
epoch: 130 test_true_pfm: 5644.035775562967 sim_pfm: -240.80575616830416
episode: 520 training return: tensor(-358.9128, device='cuda:0')
episode: 521 training return: tensor(-297.7703, device='cuda:0')
episode: 522 training return: tensor(-314.2901, device='cuda:0')
episode: 523 training return: tensor(-389.0923, device='cuda:0')
epoch: 131 test_true_pfm: 5562.8464273142245 sim_pfm: -276.4357512065035
episode: 524 training return: tensor(-384.9077, device='cuda:0')
episode: 525 training return: tensor(-265.2935, device='cuda:0')
episode: 526 training return: tensor(-348.9344, device='cuda:0')
episode: 527 training return: tensor(-308.0446, device='cuda:0')
epoch: 132 test_true_pfm: 5484.168128524162 sim_pfm: -224.0671490902896
episode: 528 training return: tensor(-376.4647, device='cuda:0')
episode: 529 training return: tensor(-353.2894, device='cuda:0')
episode: 530 training return: tensor(-391.3713, device='cuda:0')
episode: 531 training return: tensor(-320.4220, device='cuda:0')
epoch: 133 test_true_pfm: 5599.81322603067 sim_pfm: -254.0315117656719
episode: 532 training return: tensor(-324.8031, device='cuda:0')
episode: 533 training return: tensor(-452.9693, device='cuda:0')
episode: 534 training return: tensor(-411.2038, device='cuda:0')
episode: 535 training return: tensor(-463.5660, device='cuda:0')
epoch: 134 test_true_pfm: 5556.469430882999 sim_pfm: -247.84721187026784
episode: 536 training return: tensor(-302.3150, device='cuda:0')
episode: 537 training return: tensor(-359.8498, device='cuda:0')
episode: 538 training return: tensor(-327.1212, device='cuda:0')
episode: 539 training return: tensor(-319.5337, device='cuda:0')
epoch: 135 test_true_pfm: 5561.465709763424 sim_pfm: -236.84345700762546
episode: 540 training return: tensor(-251.9961, device='cuda:0')
episode: 541 training return: tensor(-326.6625, device='cuda:0')
episode: 542 training return: tensor(-320.9321, device='cuda:0')
episode: 543 training return: tensor(-377.9262, device='cuda:0')
epoch: 136 test_true_pfm: 5527.650890097699 sim_pfm: -186.84688718169733
episode: 544 training return: tensor(-388.9200, device='cuda:0')
episode: 545 training return: tensor(-310.0736, device='cuda:0')
episode: 546 training return: tensor(-360.5246, device='cuda:0')
episode: 547 training return: tensor(-228.4165, device='cuda:0')
epoch: 137 test_true_pfm: 5610.012487969189 sim_pfm: -277.1136160574388
episode: 548 training return: tensor(-363.8506, device='cuda:0')
episode: 549 training return: tensor(-395.0676, device='cuda:0')
episode: 550 training return: tensor(-365.5580, device='cuda:0')
episode: 551 training return: tensor(-280.1127, device='cuda:0')
epoch: 138 test_true_pfm: 5649.274299135151 sim_pfm: -247.8318844899574
episode: 552 training return: tensor(-308.8279, device='cuda:0')
episode: 553 training return: tensor(-377.5884, device='cuda:0')
episode: 554 training return: tensor(-292.0869, device='cuda:0')
episode: 555 training return: tensor(-331.9065, device='cuda:0')
epoch: 139 test_true_pfm: 5609.854981304282 sim_pfm: -252.47494488041653
episode: 556 training return: tensor(-334.3855, device='cuda:0')
episode: 557 training return: tensor(-298.2593, device='cuda:0')
episode: 558 training return: tensor(-346.9914, device='cuda:0')
episode: 559 training return: tensor(-401.0910, device='cuda:0')
epoch: 140 test_true_pfm: 5602.716607326397 sim_pfm: -236.26536044473565
episode: 560 training return: tensor(-307.7674, device='cuda:0')
episode: 561 training return: tensor(-342.3057, device='cuda:0')
episode: 562 training return: tensor(-367.6778, device='cuda:0')
episode: 563 training return: tensor(-393.5158, device='cuda:0')
epoch: 141 test_true_pfm: 5557.262671804336 sim_pfm: -234.85226352348886
episode: 564 training return: tensor(-386.8655, device='cuda:0')
episode: 565 training return: tensor(-252.5450, device='cuda:0')
episode: 566 training return: tensor(-326.4533, device='cuda:0')
episode: 567 training return: tensor(-319.5612, device='cuda:0')
epoch: 142 test_true_pfm: 5502.998089019559 sim_pfm: -243.53262943864684
episode: 568 training return: tensor(-367.1864, device='cuda:0')
episode: 569 training return: tensor(-325.8130, device='cuda:0')
episode: 570 training return: tensor(-254.1523, device='cuda:0')
episode: 571 training return: tensor(-386.8988, device='cuda:0')
epoch: 143 test_true_pfm: 5603.087752538327 sim_pfm: -254.8476206715762
episode: 572 training return: tensor(-283.9227, device='cuda:0')
episode: 573 training return: tensor(-271.1320, device='cuda:0')
episode: 574 training return: tensor(-330.9785, device='cuda:0')
episode: 575 training return: tensor(-343.6610, device='cuda:0')
epoch: 144 test_true_pfm: 5585.871986383218 sim_pfm: -209.36331074710083
episode: 576 training return: tensor(-351.8089, device='cuda:0')
episode: 577 training return: tensor(-318.0438, device='cuda:0')
episode: 578 training return: tensor(-376.1841, device='cuda:0')
episode: 579 training return: tensor(-449.0414, device='cuda:0')
epoch: 145 test_true_pfm: 5559.155711602497 sim_pfm: -250.31625121169296
episode: 580 training return: tensor(-306.9770, device='cuda:0')
episode: 581 training return: tensor(-243.5510, device='cuda:0')
episode: 582 training return: tensor(-358.8269, device='cuda:0')
episode: 583 training return: tensor(-340.9809, device='cuda:0')
epoch: 146 test_true_pfm: 5589.5931070466895 sim_pfm: -226.6049423852431
episode: 584 training return: tensor(-461.3812, device='cuda:0')
episode: 585 training return: tensor(-326.0401, device='cuda:0')
episode: 586 training return: tensor(-337.3136, device='cuda:0')
episode: 587 training return: tensor(-283.9237, device='cuda:0')
epoch: 147 test_true_pfm: 5591.653142835029 sim_pfm: -256.23314174812793
episode: 588 training return: tensor(-304.9432, device='cuda:0')
episode: 589 training return: tensor(-254.3893, device='cuda:0')
episode: 590 training return: tensor(-263.2729, device='cuda:0')
episode: 591 training return: tensor(-256.2984, device='cuda:0')
epoch: 148 test_true_pfm: 5665.125049923285 sim_pfm: -249.26616278213137
episode: 592 training return: tensor(-312.4455, device='cuda:0')
episode: 593 training return: tensor(-322.8357, device='cuda:0')
episode: 594 training return: tensor(-301.4570, device='cuda:0')
episode: 595 training return: tensor(-291.6172, device='cuda:0')
epoch: 149 test_true_pfm: 5569.731626529225 sim_pfm: -285.3687978759796
episode: 596 training return: tensor(-306.9280, device='cuda:0')
episode: 597 training return: tensor(-381.0793, device='cuda:0')
episode: 598 training return: tensor(-280.7812, device='cuda:0')
episode: 599 training return: tensor(-316.0086, device='cuda:0')
epoch: 150 test_true_pfm: 5502.915943003455 sim_pfm: -282.1699594517316
