['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.40921601936221125 test_loss: 0.39804782867431643
epoch: 1 training_loss 0.36097625076770784 test_loss: 0.35844275951385496
epoch: 2 training_loss 0.36727493375539777 test_loss: 0.3463705062866211
epoch: 3 training_loss 0.3433723060786724 test_loss: 0.372418475151062
epoch: 4 training_loss 0.3577382865548134 test_loss: 0.36216328144073484
epoch: 5 training_loss 0.3555959242582321 test_loss: 0.32338519096374513
epoch: 6 training_loss 0.35529580518603326 test_loss: 0.34308762550354005
epoch: 7 training_loss 0.34043742299079893 test_loss: 0.35590567588806155
epoch: 8 training_loss 0.3593254017829895 test_loss: 0.3585461139678955
epoch: 9 training_loss 0.35348990112543105 test_loss: 0.351385760307312
epoch: 10 training_loss 0.3432000383734703 test_loss: 0.34640960693359374
epoch: 11 training_loss 0.3340568122267723 test_loss: 0.36652774810791017
epoch: 12 training_loss 0.34930042415857315 test_loss: 0.3450861215591431
epoch: 13 training_loss 0.3424211472272873 test_loss: 0.34070847034454343
epoch: 14 training_loss 0.34569077447056773 test_loss: 0.3556759595870972
epoch: 15 training_loss 0.33854964673519133 test_loss: 0.35757362842559814
epoch: 16 training_loss 0.34258357077836993 test_loss: 0.3333817720413208
epoch: 17 training_loss 0.35171639621257783 test_loss: 0.3560843229293823
epoch: 18 training_loss 0.34467410057783127 test_loss: 0.34545063972473145
epoch: 19 training_loss 0.3405589272081852 test_loss: 0.3690673589706421
epoch: 20 training_loss 0.33750646814703944 test_loss: 0.3885409593582153
epoch: 21 training_loss 0.33460057571530344 test_loss: 0.3645353078842163
epoch: 22 training_loss 0.34232223525643346 test_loss: 0.34689817428588865
epoch: 23 training_loss 0.3437545210123062 test_loss: 0.3655911207199097
epoch: 24 training_loss 0.3398437625169754 test_loss: 0.34189610481262206
epoch: 25 training_loss 0.3533768980205059 test_loss: 0.3405597448348999
epoch: 26 training_loss 0.34392244517803194 test_loss: 0.34637298583984377
epoch: 27 training_loss 0.3398949944972992 test_loss: 0.328432297706604
epoch: 28 training_loss 0.3433487895131111 test_loss: 0.35836052894592285
epoch: 29 training_loss 0.34952846869826315 test_loss: 0.32535104751586913
epoch: 30 training_loss 0.3381326296925545 test_loss: 0.353656268119812
epoch: 31 training_loss 0.34806115344166755 test_loss: 0.35137310028076174
epoch: 32 training_loss 0.346203420907259 test_loss: 0.3292132139205933
epoch: 33 training_loss 0.3341454426944256 test_loss: 0.3509513854980469
epoch: 34 training_loss 0.3434058938920498 test_loss: 0.3445206880569458
epoch: 35 training_loss 0.34258477479219435 test_loss: 0.31967408657073976
epoch: 36 training_loss 0.34208443865180016 test_loss: 0.34696216583251954
epoch: 37 training_loss 0.3398785062134266 test_loss: 0.3273088216781616
epoch: 38 training_loss 0.34040491566061976 test_loss: 0.3560492992401123
epoch: 39 training_loss 0.3328073087334633 test_loss: 0.3565077781677246
epoch: 40 training_loss 0.32667418584227564 test_loss: 0.34893093109130857
epoch: 41 training_loss 0.33072797939181325 test_loss: 0.35506582260131836
epoch: 42 training_loss 0.3437680377066135 test_loss: 0.36085236072540283
epoch: 43 training_loss 0.3427616718411446 test_loss: 0.3111231565475464
epoch: 44 training_loss 0.3363607130944729 test_loss: 0.33306639194488524
epoch: 45 training_loss 0.3370124737918377 test_loss: 0.3532984495162964
epoch: 46 training_loss 0.34483891636133196 test_loss: 0.33550856113433836
epoch: 47 training_loss 0.3457983860373497 test_loss: 0.3489236831665039
epoch: 48 training_loss 0.34406020894646644 test_loss: 0.32103695869445803
epoch: 49 training_loss 0.338666545599699 test_loss: 0.3310281038284302
epoch: 50 training_loss 0.3413446009159088 test_loss: 0.3340418577194214
epoch: 51 training_loss 0.3585173961520195 test_loss: 0.35836913585662844
epoch: 52 training_loss 0.3478902304172516 test_loss: 0.35153243541717527
epoch: 53 training_loss 0.34141445979475976 test_loss: 0.33201329708099364
epoch: 54 training_loss 0.3469475443661213 test_loss: 0.3392614841461182
epoch: 55 training_loss 0.3412262028455734 test_loss: 0.34995326995849607
epoch: 56 training_loss 0.3430403985083103 test_loss: 0.32828693389892577
epoch: 57 training_loss 0.3389865846931934 test_loss: 0.348524284362793
epoch: 58 training_loss 0.3376235334575176 test_loss: 0.347147798538208
epoch: 59 training_loss 0.33342310041189194 test_loss: 0.35505897998809816
epoch: 60 training_loss 0.33683097511529925 test_loss: 0.34380857944488524
epoch: 61 training_loss 0.3334967961907387 test_loss: 0.3678027391433716
epoch: 62 training_loss 0.3403860719501972 test_loss: 0.3431380033493042
epoch: 63 training_loss 0.34201275154948235 test_loss: 0.3220071315765381
epoch: 64 training_loss 0.3442519898712635 test_loss: 0.3399446725845337
epoch: 65 training_loss 0.34154275447130206 test_loss: 0.36601905822753905
epoch: 66 training_loss 0.3338700357079506 test_loss: 0.3316666841506958
epoch: 67 training_loss 0.3403225687146187 test_loss: 0.31085500717163084
epoch: 68 training_loss 0.334619667083025 test_loss: 0.3686178922653198
epoch: 69 training_loss 0.3334816916286945 test_loss: 0.34945068359375
epoch: 70 training_loss 0.3340009154379368 test_loss: 0.3375631093978882
epoch: 71 training_loss 0.33774712786078454 test_loss: 0.3438882827758789
epoch: 72 training_loss 0.3378780768811703 test_loss: 0.34919564723968505
epoch: 73 training_loss 0.34113177672028544 test_loss: 0.35713355541229247
epoch: 74 training_loss 0.3330861546099186 test_loss: 0.34000041484832766
epoch: 75 training_loss 0.32963852494955065 test_loss: 0.31982419490814207
epoch: 76 training_loss 0.33034002885222435 test_loss: 0.3544776201248169
epoch: 77 training_loss 0.3341346964240074 test_loss: 0.36436150074005125
epoch: 78 training_loss 0.34833166509866714 test_loss: 0.3462237358093262
epoch: 79 training_loss 0.3397745674848556 test_loss: 0.317610764503479
epoch: 80 training_loss 0.33668839395046235 test_loss: 0.33741686344146726
epoch: 81 training_loss 0.34745368629693985 test_loss: 0.34994001388549806
epoch: 82 training_loss 0.34566237971186636 test_loss: 0.35105094909667967
epoch: 83 training_loss 0.3399538236856461 test_loss: 0.345028018951416
epoch: 84 training_loss 0.340158753246069 test_loss: 0.343707799911499
epoch: 85 training_loss 0.34422903403639793 test_loss: 0.3515281677246094
epoch: 86 training_loss 0.3410755677521229 test_loss: 0.3251294374465942
epoch: 87 training_loss 0.3388777121901512 test_loss: 0.3324683427810669
epoch: 88 training_loss 0.3402339819073677 test_loss: 0.3507507562637329
epoch: 89 training_loss 0.34908162951469424 test_loss: 0.32805964946746824
epoch: 90 training_loss 0.34357502847909926 test_loss: 0.33167271614074706
epoch: 91 training_loss 0.32963063061237335 test_loss: 0.34635062217712403
epoch: 92 training_loss 0.3382984045147896 test_loss: 0.33833601474761965
epoch: 93 training_loss 0.3383768084645271 test_loss: 0.33576240539550783
epoch: 94 training_loss 0.3342933413386345 test_loss: 0.35310826301574705
epoch: 95 training_loss 0.3400080817937851 test_loss: 0.33123347759246824
epoch: 96 training_loss 0.34210359513759614 test_loss: 0.35306107997894287
epoch: 97 training_loss 0.3487635062634945 test_loss: 0.3338047504425049
epoch: 98 training_loss 0.35142053380608557 test_loss: 0.34930431842803955
epoch: 99 training_loss 0.3393389093875885 test_loss: 0.34910705089569094
epoch: 100 training_loss 0.3330009678006172 test_loss: 0.3386970043182373
epoch: 101 training_loss 0.33559242844581605 test_loss: 0.3382575273513794
epoch: 102 training_loss 0.3445275889337063 test_loss: 0.33440101146698
epoch: 103 training_loss 0.3465080666542053 test_loss: 0.35515618324279785
epoch: 104 training_loss 0.33650349780917166 test_loss: 0.3185628890991211
epoch: 105 training_loss 0.3419752667844296 test_loss: 0.33861231803894043
epoch: 106 training_loss 0.32518089398741723 test_loss: 0.35988483428955076
epoch: 107 training_loss 0.33796164140105245 test_loss: 0.3454607963562012
epoch: 108 training_loss 0.3380244742333889 test_loss: 0.3400815486907959
epoch: 109 training_loss 0.3320781488716602 test_loss: 0.3394617557525635
epoch: 110 training_loss 0.33220286935567855 test_loss: 0.33545784950256347
epoch: 111 training_loss 0.3292324161529541 test_loss: 0.34439830780029296
epoch: 112 training_loss 0.3371651792526245 test_loss: 0.3614001750946045
epoch: 113 training_loss 0.3471051688492298 test_loss: 0.3337740659713745
epoch: 114 training_loss 0.34265883937478064 test_loss: 0.35281240940093994
epoch: 115 training_loss 0.3301875641942024 test_loss: 0.34843409061431885
epoch: 116 training_loss 0.3363528661429882 test_loss: 0.33257856369018557
epoch: 117 training_loss 0.3367832015454769 test_loss: 0.34149789810180664
epoch: 118 training_loss 0.3456165526807308 test_loss: 0.3229890584945679
epoch: 119 training_loss 0.34113181844353674 test_loss: 0.31545062065124513
epoch: 120 training_loss 0.3442280514538288 test_loss: 0.34212279319763184
epoch: 121 training_loss 0.3353232312202454 test_loss: 0.34596028327941897
epoch: 122 training_loss 0.3392454390227795 test_loss: 0.3594348907470703
epoch: 123 training_loss 0.34555674403905867 test_loss: 0.35931994915008547
epoch: 124 training_loss 0.3357913732528687 test_loss: 0.3079927682876587
epoch: 125 training_loss 0.3374473966658115 test_loss: 0.3516183137893677
epoch: 126 training_loss 0.34021260008215903 test_loss: 0.3414206266403198
epoch: 127 training_loss 0.3347973103821278 test_loss: 0.3573646306991577
epoch: 128 training_loss 0.33591620638966563 test_loss: 0.33281779289245605
epoch: 129 training_loss 0.32371626183390617 test_loss: 0.3222837448120117
epoch: 130 training_loss 0.33292662233114245 test_loss: 0.3257495164871216
epoch: 131 training_loss 0.33927787020802497 test_loss: 0.3633860111236572
epoch: 132 training_loss 0.34783159047365186 test_loss: 0.35682704448699953
epoch: 133 training_loss 0.34824050068855283 test_loss: 0.3278160333633423
epoch: 134 training_loss 0.33600000575184824 test_loss: 0.36207332611083987
epoch: 135 training_loss 0.3269547866284847 test_loss: 0.3371375322341919
epoch: 136 training_loss 0.33261751487851143 test_loss: 0.3283299684524536
epoch: 137 training_loss 0.33860732927918435 test_loss: 0.35193595886230467
epoch: 138 training_loss 0.34199312075972554 test_loss: 0.3483868598937988
epoch: 139 training_loss 0.3357354173064232 test_loss: 0.3575885772705078
epoch: 140 training_loss 0.3351187226176262 test_loss: 0.3375707149505615
epoch: 141 training_loss 0.34037518575787545 test_loss: 0.31948602199554443
epoch: 142 training_loss 0.3336139735579491 test_loss: 0.3321811676025391
epoch: 143 training_loss 0.3335445526242256 test_loss: 0.35505475997924807
epoch: 144 training_loss 0.3494306568801403 test_loss: 0.3561686038970947
epoch: 145 training_loss 0.33928268238902093 test_loss: 0.3391650438308716
epoch: 146 training_loss 0.33510488986968995 test_loss: 0.3457270383834839
epoch: 147 training_loss 0.33513991311192515 test_loss: 0.3377847671508789
epoch: 148 training_loss 0.32904492437839505 test_loss: 0.32587556838989257
epoch: 149 training_loss 0.3374213108420372 test_loss: 0.3491663932800293
epoch: 0 training_loss 39.01853902816772 test_loss: 28.293768310546874
epoch: 1 training_loss 23.531154689788817 test_loss: 20.399678039550782
epoch: 2 training_loss 18.048778924942017 test_loss: 16.865208435058594
epoch: 3 training_loss 15.807412042617798 test_loss: 15.26672821044922
epoch: 4 training_loss 14.07274227142334 test_loss: 13.351866149902344
epoch: 5 training_loss 12.97299292564392 test_loss: 12.623625946044921
epoch: 6 training_loss 12.144215049743652 test_loss: 12.286157989501953
epoch: 7 training_loss 11.6340771484375 test_loss: 11.37154312133789
epoch: 8 training_loss 11.017019844055175 test_loss: 11.137336730957031
epoch: 9 training_loss 10.57522629737854 test_loss: 10.35608139038086
epoch: 10 training_loss 9.974988527297974 test_loss: 10.359271240234374
epoch: 11 training_loss 9.918191499710083 test_loss: 9.831991577148438
epoch: 12 training_loss 9.480584473609925 test_loss: 9.779840850830078
epoch: 13 training_loss 9.278757243156432 test_loss: 9.590351867675782
epoch: 14 training_loss 9.17810655117035 test_loss: 9.164850616455078
epoch: 15 training_loss 9.082207612991333 test_loss: 8.544380950927735
epoch: 16 training_loss 8.850316591262818 test_loss: 8.895321655273438
epoch: 17 training_loss 8.738932404518128 test_loss: 9.099811553955078
epoch: 18 training_loss 8.32608503818512 test_loss: 8.359114837646484
epoch: 19 training_loss 8.340977745056152 test_loss: 8.171131134033203
epoch: 20 training_loss 8.156575117111206 test_loss: 8.476412963867187
epoch: 21 training_loss 8.125439205169677 test_loss: 8.24381332397461
epoch: 22 training_loss 7.858315372467041 test_loss: 8.383531951904297
epoch: 23 training_loss 7.883670482635498 test_loss: 7.804891967773438
epoch: 24 training_loss 7.656805529594421 test_loss: 7.820860290527344
epoch: 25 training_loss 7.682106986045837 test_loss: 7.612734985351563
epoch: 26 training_loss 7.548391089439392 test_loss: 7.631455993652343
epoch: 27 training_loss 7.486996631622315 test_loss: 7.2215019226074215
epoch: 28 training_loss 7.363928184509278 test_loss: 7.134024810791016
epoch: 29 training_loss 7.337432703971863 test_loss: 7.677154541015625
epoch: 30 training_loss 7.291112112998962 test_loss: 7.313578796386719
epoch: 31 training_loss 7.140670294761658 test_loss: 7.037761688232422
epoch: 32 training_loss 6.97068943977356 test_loss: 6.789826202392578
epoch: 33 training_loss 7.008440089225769 test_loss: 7.073280334472656
epoch: 34 training_loss 7.093336334228516 test_loss: 7.466325378417968
epoch: 35 training_loss 6.735345516204834 test_loss: 6.847981262207031
epoch: 36 training_loss 6.753816905021668 test_loss: 6.908196258544922
epoch: 37 training_loss 6.642699484825134 test_loss: 6.540323638916016
epoch: 38 training_loss 6.49533323764801 test_loss: 6.435662841796875
epoch: 39 training_loss 6.458944630622864 test_loss: 6.186261367797852
epoch: 40 training_loss 6.445875024795532 test_loss: 6.284395217895508
epoch: 41 training_loss 6.458150625228882 test_loss: 6.3082939147949215
epoch: 42 training_loss 6.2332790660858155 test_loss: 6.451854705810547
epoch: 43 training_loss 6.260052018165588 test_loss: 6.423037719726563
epoch: 44 training_loss 6.066085829734802 test_loss: 5.9770549774169925
epoch: 45 training_loss 6.001798801422119 test_loss: 5.937236404418945
epoch: 46 training_loss 5.914109673500061 test_loss: 5.963188934326172
epoch: 47 training_loss 6.005041699409485 test_loss: 6.385031127929688
epoch: 48 training_loss 6.159355020523071 test_loss: 6.016242599487304
epoch: 49 training_loss 5.727702941894531 test_loss: 5.6566017150878904
epoch: 50 training_loss 5.780379071235656 test_loss: 5.5199932098388675
epoch: 51 training_loss 5.650622720718384 test_loss: 5.851080703735351
epoch: 52 training_loss 5.738255896568298 test_loss: 5.949073791503906
epoch: 53 training_loss 5.590823140144348 test_loss: 5.439794921875
epoch: 54 training_loss 5.529624228477478 test_loss: 5.827293395996094
epoch: 55 training_loss 5.690092329978943 test_loss: 5.704536819458008
epoch: 56 training_loss 5.428794856071472 test_loss: 5.626879119873047
epoch: 57 training_loss 5.358166754245758 test_loss: 5.173457336425781
epoch: 58 training_loss 5.218739595413208 test_loss: 5.416464614868164
epoch: 59 training_loss 5.327368240356446 test_loss: 5.642013168334961
epoch: 60 training_loss 5.385763983726502 test_loss: 6.518039703369141
epoch: 61 training_loss 5.3244229245185855 test_loss: 5.660090637207031
epoch: 62 training_loss 5.317717409133911 test_loss: 5.2206169128417965
epoch: 63 training_loss 5.152540254592895 test_loss: 5.149711990356446
epoch: 64 training_loss 5.103883829116821 test_loss: 4.988341522216797
epoch: 65 training_loss 5.067744855880737 test_loss: 5.149779510498047
epoch: 66 training_loss 5.196794300079346 test_loss: 5.219632720947265
epoch: 67 training_loss 4.9546108818054195 test_loss: 5.059322738647461
epoch: 68 training_loss 5.028979341983796 test_loss: 5.257261657714844
epoch: 69 training_loss 4.894353337287903 test_loss: 4.835359573364258
epoch: 70 training_loss 4.90770450592041 test_loss: 4.920143890380859
epoch: 71 training_loss 4.872431018352509 test_loss: 4.885043334960938
epoch: 72 training_loss 4.8595895981788635 test_loss: 5.3403167724609375
epoch: 73 training_loss 4.88770298242569 test_loss: 4.955304336547852
epoch: 74 training_loss 5.0382218527793885 test_loss: 5.246308135986328
epoch: 75 training_loss 4.806905837059021 test_loss: 5.018992233276367
epoch: 76 training_loss 4.6792650890350345 test_loss: 4.562849807739258
epoch: 77 training_loss 4.673930435180664 test_loss: 4.756104278564453
epoch: 78 training_loss 4.690579826831818 test_loss: 4.997640991210938
epoch: 79 training_loss 4.6780444169044495 test_loss: 4.413419342041015
epoch: 80 training_loss 4.616053111553192 test_loss: 4.730420684814453
epoch: 81 training_loss 4.6382571530342105 test_loss: 5.398720550537109
epoch: 82 training_loss 4.607728614807129 test_loss: 4.3907733917236325
epoch: 83 training_loss 4.622827770709992 test_loss: 4.562917327880859
epoch: 84 training_loss 4.566806828975677 test_loss: 4.645018768310547
epoch: 85 training_loss 4.434145481586456 test_loss: 4.529180526733398
epoch: 86 training_loss 4.3719497132301335 test_loss: 4.2964622497558596
epoch: 87 training_loss 4.308919804096222 test_loss: 4.368342590332031
epoch: 88 training_loss 4.337364485263825 test_loss: 4.529347991943359
epoch: 89 training_loss 4.369620320796966 test_loss: 4.196250915527344
epoch: 90 training_loss 4.342769989967346 test_loss: 4.29940185546875
epoch: 91 training_loss 4.373778555393219 test_loss: 4.436763763427734
epoch: 92 training_loss 4.330459668636322 test_loss: 4.112977600097656
epoch: 93 training_loss 4.43113079071045 test_loss: 4.302392578125
epoch: 94 training_loss 4.281502048969269 test_loss: 4.235943984985352
epoch: 95 training_loss 4.402955601215362 test_loss: 4.151259994506836
epoch: 96 training_loss 4.130966911315918 test_loss: 4.214898681640625
epoch: 97 training_loss 4.19233283996582 test_loss: 4.434152603149414
epoch: 98 training_loss 4.160419254302979 test_loss: 4.3451484680175785
epoch: 99 training_loss 4.10404851436615 test_loss: 3.964996337890625
epoch: 100 training_loss 4.181134521961212 test_loss: 4.174318313598633
epoch: 101 training_loss 4.102513935565948 test_loss: 4.0270122528076175
epoch: 102 training_loss 4.0946897745132445 test_loss: 4.5480304718017575
epoch: 103 training_loss 4.116374907493591 test_loss: 4.1219970703125
epoch: 104 training_loss 4.044121465682983 test_loss: 4.447673416137695
epoch: 105 training_loss 4.084927020072937 test_loss: 4.080446243286133
epoch: 106 training_loss 4.0848215246200565 test_loss: 3.9714927673339844
epoch: 107 training_loss 4.05333250284195 test_loss: 3.9554676055908202
epoch: 108 training_loss 4.016115250587464 test_loss: 4.154111099243164
epoch: 109 training_loss 3.9494353246688845 test_loss: 4.210539627075195
epoch: 110 training_loss 3.9494375085830686 test_loss: 4.060822296142578
epoch: 111 training_loss 3.931058015823364 test_loss: 3.8452667236328124
epoch: 112 training_loss 3.984490396976471 test_loss: 3.8040355682373046
epoch: 113 training_loss 3.9878147554397585 test_loss: 4.35328254699707
epoch: 114 training_loss 4.023223092556 test_loss: 4.197283935546875
epoch: 115 training_loss 3.856690742969513 test_loss: 3.944713592529297
epoch: 116 training_loss 3.8313406348228454 test_loss: 3.8141517639160156
epoch: 117 training_loss 3.9335091042518617 test_loss: 3.931096649169922
epoch: 118 training_loss 3.8007982921600343 test_loss: 3.9614799499511717
epoch: 119 training_loss 3.8619101905822752 test_loss: 4.031852722167969
epoch: 120 training_loss 3.8036123275756837 test_loss: 4.090860366821289
epoch: 121 training_loss 3.8562807607650758 test_loss: 3.831399917602539
epoch: 122 training_loss 3.768563709259033 test_loss: 3.7331233978271485
epoch: 123 training_loss 3.852221155166626 test_loss: 3.615968704223633
epoch: 124 training_loss 3.796080086231232 test_loss: 3.7716415405273436
epoch: 125 training_loss 3.850113413333893 test_loss: 3.925531768798828
epoch: 126 training_loss 3.8365533351898193 test_loss: 3.71131591796875
epoch: 127 training_loss 3.7229148030281065 test_loss: 3.502032470703125
epoch: 128 training_loss 3.8794072914123534 test_loss: 3.9322948455810547
epoch: 129 training_loss 3.778928816318512 test_loss: 3.816332244873047
epoch: 130 training_loss 3.8334122347831725 test_loss: 4.279177856445313
epoch: 131 training_loss 3.7351333594322202 test_loss: 4.059132766723633
epoch: 132 training_loss 3.7686497163772583 test_loss: 3.604702377319336
epoch: 133 training_loss 3.727532250881195 test_loss: 3.7694889068603517
epoch: 134 training_loss 3.7845657062530518 test_loss: 3.72880859375
epoch: 135 training_loss 3.651130475997925 test_loss: 3.556132507324219
epoch: 136 training_loss 3.668842244148254 test_loss: 3.5057018280029295
epoch: 137 training_loss 3.5884153747558596 test_loss: 3.6428722381591796
epoch: 138 training_loss 3.594100308418274 test_loss: 3.7283382415771484
epoch: 139 training_loss 3.5484082674980164 test_loss: 3.8637069702148437
epoch: 140 training_loss 3.5146463465690614 test_loss: 3.592479705810547
epoch: 141 training_loss 3.5859002590179445 test_loss: 3.236466979980469
epoch: 142 training_loss 3.616297001838684 test_loss: 3.6920665740966796
epoch: 143 training_loss 3.5409712028503417 test_loss: 3.921556854248047
epoch: 144 training_loss 3.413630435466766 test_loss: 3.6038349151611326
epoch: 145 training_loss 3.5489696288108825 test_loss: 3.458592987060547
epoch: 146 training_loss 3.4723376202583314 test_loss: 3.4056400299072265
epoch: 147 training_loss 3.4066853952407836 test_loss: 3.514975738525391
epoch: 148 training_loss 3.4722877812385557 test_loss: 3.4977081298828123
epoch: 149 training_loss 3.461868784427643 test_loss: 3.6132686614990233
56.523175001612984
episode: 0 training return: tensor(-10.0152, device='cuda:0')
episode: 1 training return: tensor(-23.2493, device='cuda:0')
episode: 2 training return: tensor(-19.9278, device='cuda:0')
episode: 3 training return: tensor(-43.6107, device='cuda:0')
epoch: 1 test_true_pfm: 57.572236588368796 sim_pfm: -28.171091510285624
episode: 4 training return: tensor(-30.2560, device='cuda:0')
episode: 5 training return: tensor(-30.1054, device='cuda:0')
episode: 6 training return: tensor(-12.3775, device='cuda:0')
episode: 7 training return: tensor(-29.7025, device='cuda:0')
epoch: 2 test_true_pfm: 68.75056430484145 sim_pfm: -3.2368278767564336
episode: 8 training return: tensor(-37.0544, device='cuda:0')
episode: 9 training return: tensor(-2.6470, device='cuda:0')
episode: 10 training return: tensor(-18.4329, device='cuda:0')
episode: 11 training return: tensor(-20.8737, device='cuda:0')
epoch: 3 test_true_pfm: 57.75681070502084 sim_pfm: -26.548569318657975
episode: 12 training return: tensor(-34.9792, device='cuda:0')
episode: 13 training return: tensor(-9.5349, device='cuda:0')
episode: 14 training return: tensor(-25.9658, device='cuda:0')
episode: 15 training return: tensor(-10.9548, device='cuda:0')
epoch: 4 test_true_pfm: 65.53056866140822 sim_pfm: -26.018286134727532
episode: 16 training return: tensor(-18.7560, device='cuda:0')
episode: 17 training return: tensor(-7.4913, device='cuda:0')
episode: 18 training return: tensor(-12.4233, device='cuda:0')
episode: 19 training return: tensor(-21.4302, device='cuda:0')
epoch: 5 test_true_pfm: 72.06113460940014 sim_pfm: -20.41758454045048
episode: 20 training return: tensor(-5.6696, device='cuda:0')
episode: 21 training return: tensor(-8.3346, device='cuda:0')
episode: 22 training return: tensor(22.1162, device='cuda:0')
episode: 23 training return: tensor(-29.6129, device='cuda:0')
epoch: 6 test_true_pfm: 60.24036818633152 sim_pfm: -4.363351410877658
episode: 24 training return: tensor(-36.5170, device='cuda:0')
episode: 25 training return: tensor(-25.5708, device='cuda:0')
episode: 26 training return: tensor(-33.1208, device='cuda:0')
episode: 27 training return: tensor(-30.6732, device='cuda:0')
epoch: 7 test_true_pfm: 56.442009917321535 sim_pfm: -28.93203694318072
episode: 28 training return: tensor(-4.8975, device='cuda:0')
episode: 29 training return: tensor(-8.7287, device='cuda:0')
episode: 30 training return: tensor(-29.3350, device='cuda:0')
episode: 31 training return: tensor(-31.7622, device='cuda:0')
epoch: 8 test_true_pfm: 70.62289722342987 sim_pfm: -10.061670491861879
episode: 32 training return: tensor(-15.9758, device='cuda:0')
episode: 33 training return: tensor(23.4794, device='cuda:0')
episode: 34 training return: tensor(14.8347, device='cuda:0')
episode: 35 training return: tensor(-22.4846, device='cuda:0')
epoch: 9 test_true_pfm: 69.67340692161483 sim_pfm: -15.67383278789348
episode: 36 training return: tensor(-5.0243, device='cuda:0')
episode: 37 training return: tensor(-13.2486, device='cuda:0')
episode: 38 training return: tensor(-37.1078, device='cuda:0')
episode: 39 training return: tensor(-34.8419, device='cuda:0')
epoch: 10 test_true_pfm: 68.73803214679037 sim_pfm: 3.719836628576741
episode: 40 training return: tensor(-33.0517, device='cuda:0')
episode: 41 training return: tensor(-28.7976, device='cuda:0')
episode: 42 training return: tensor(25.2157, device='cuda:0')
episode: 43 training return: tensor(15.6538, device='cuda:0')
epoch: 11 test_true_pfm: 86.34464792651595 sim_pfm: 1.24068056717515
episode: 44 training return: tensor(29.2163, device='cuda:0')
episode: 45 training return: tensor(-28.9906, device='cuda:0')
episode: 46 training return: tensor(-28.1272, device='cuda:0')
episode: 47 training return: tensor(-25.1598, device='cuda:0')
epoch: 12 test_true_pfm: 52.94789304329048 sim_pfm: -26.87391816478339
episode: 48 training return: tensor(-28.9163, device='cuda:0')
episode: 49 training return: tensor(-11.4358, device='cuda:0')
episode: 50 training return: tensor(-28.4447, device='cuda:0')
episode: 51 training return: tensor(-19.5455, device='cuda:0')
epoch: 13 test_true_pfm: 69.19481437012064 sim_pfm: -7.622371302975807
episode: 52 training return: tensor(-2.0990, device='cuda:0')
episode: 53 training return: tensor(11.8032, device='cuda:0')
episode: 54 training return: tensor(25.8972, device='cuda:0')
episode: 55 training return: tensor(23.2280, device='cuda:0')
epoch: 14 test_true_pfm: 103.04585828808938 sim_pfm: -0.7232468703645282
episode: 56 training return: tensor(26.3136, device='cuda:0')
episode: 57 training return: tensor(32.4601, device='cuda:0')
episode: 58 training return: tensor(27.2523, device='cuda:0')
episode: 59 training return: tensor(-26.5541, device='cuda:0')
epoch: 15 test_true_pfm: 107.92957426199216 sim_pfm: -14.106523855315753
episode: 60 training return: tensor(-26.8495, device='cuda:0')
episode: 61 training return: tensor(-12.0577, device='cuda:0')
episode: 62 training return: tensor(-28.3735, device='cuda:0')
episode: 63 training return: tensor(-30.0952, device='cuda:0')
epoch: 16 test_true_pfm: 90.15321457002393 sim_pfm: -10.565715041110526
episode: 64 training return: tensor(-23.6306, device='cuda:0')
episode: 65 training return: tensor(24.5175, device='cuda:0')
episode: 66 training return: tensor(-9.6098, device='cuda:0')
episode: 67 training return: tensor(31.2984, device='cuda:0')
epoch: 17 test_true_pfm: 112.4075823671553 sim_pfm: -3.0290233451407405
episode: 68 training return: tensor(-12.7005, device='cuda:0')
episode: 69 training return: tensor(12.4885, device='cuda:0')
episode: 70 training return: tensor(1.1815, device='cuda:0')
episode: 71 training return: tensor(-36.4941, device='cuda:0')
epoch: 18 test_true_pfm: 66.39099811380751 sim_pfm: -14.829113189975033
episode: 72 training return: tensor(17.6622, device='cuda:0')
episode: 73 training return: tensor(27.8037, device='cuda:0')
episode: 74 training return: tensor(1.9841, device='cuda:0')
episode: 75 training return: tensor(-33.9999, device='cuda:0')
epoch: 19 test_true_pfm: 94.68394473789996 sim_pfm: -16.416720943088876
episode: 76 training return: tensor(-23.7000, device='cuda:0')
episode: 77 training return: tensor(-17.9334, device='cuda:0')
episode: 78 training return: tensor(-44.7691, device='cuda:0')
episode: 79 training return: tensor(-26.9456, device='cuda:0')
epoch: 20 test_true_pfm: 95.50820853262734 sim_pfm: -8.16551345376647
episode: 80 training return: tensor(-5.4192, device='cuda:0')
episode: 81 training return: tensor(-28.4663, device='cuda:0')
episode: 82 training return: tensor(-19.6741, device='cuda:0')
episode: 83 training return: tensor(-23.1161, device='cuda:0')
epoch: 21 test_true_pfm: 73.11187180570094 sim_pfm: -7.128599438088713
episode: 84 training return: tensor(-29.5501, device='cuda:0')
episode: 85 training return: tensor(-20.6391, device='cuda:0')
episode: 86 training return: tensor(-30.2201, device='cuda:0')
episode: 87 training return: tensor(-27.1961, device='cuda:0')
epoch: 22 test_true_pfm: 80.67788374496996 sim_pfm: -4.4006825658725575
episode: 88 training return: tensor(-27.9381, device='cuda:0')
episode: 89 training return: tensor(-7.2911, device='cuda:0')
episode: 90 training return: tensor(-9.5077, device='cuda:0')
episode: 91 training return: tensor(-2.3141, device='cuda:0')
epoch: 23 test_true_pfm: 88.38495354581059 sim_pfm: 5.183184713678202
episode: 92 training return: tensor(26.6361, device='cuda:0')
episode: 93 training return: tensor(-2.1233, device='cuda:0')
episode: 94 training return: tensor(30.6178, device='cuda:0')
episode: 95 training return: tensor(-28.6458, device='cuda:0')
epoch: 24 test_true_pfm: 100.1162544664141 sim_pfm: 10.511177847970975
episode: 96 training return: tensor(24.8930, device='cuda:0')
episode: 97 training return: tensor(-9.9639, device='cuda:0')
episode: 98 training return: tensor(-11.4237, device='cuda:0')
episode: 99 training return: tensor(-25.1596, device='cuda:0')
epoch: 25 test_true_pfm: 95.22843281593592 sim_pfm: -1.221920256636804
episode: 100 training return: tensor(-26.2630, device='cuda:0')
episode: 101 training return: tensor(-38.1453, device='cuda:0')
episode: 102 training return: tensor(-30.3377, device='cuda:0')
episode: 103 training return: tensor(23.1344, device='cuda:0')
epoch: 26 test_true_pfm: 72.53648021982158 sim_pfm: 1.0463118357816712
episode: 104 training return: tensor(-27.6908, device='cuda:0')
episode: 105 training return: tensor(-22.4512, device='cuda:0')
episode: 106 training return: tensor(16.0083, device='cuda:0')
episode: 107 training return: tensor(-31.2968, device='cuda:0')
epoch: 27 test_true_pfm: 91.13391737879849 sim_pfm: -21.383389776508558
episode: 108 training return: tensor(-14.5800, device='cuda:0')
episode: 109 training return: tensor(-31.0214, device='cuda:0')
episode: 110 training return: tensor(27.9232, device='cuda:0')
episode: 111 training return: tensor(-15.2156, device='cuda:0')
epoch: 28 test_true_pfm: 88.22026558091288 sim_pfm: 13.730550363170915
episode: 112 training return: tensor(-30.6815, device='cuda:0')
episode: 113 training return: tensor(-1.5012, device='cuda:0')
episode: 114 training return: tensor(31.7441, device='cuda:0')
episode: 115 training return: tensor(-30.6767, device='cuda:0')
epoch: 29 test_true_pfm: 58.696729354638 sim_pfm: -24.215200482221555
episode: 116 training return: tensor(20.2501, device='cuda:0')
episode: 117 training return: tensor(26.7376, device='cuda:0')
episode: 118 training return: tensor(-27.3585, device='cuda:0')
episode: 119 training return: tensor(-29.8850, device='cuda:0')
epoch: 30 test_true_pfm: 93.69162033562556 sim_pfm: 9.639027024293318
episode: 120 training return: tensor(10.1581, device='cuda:0')
episode: 121 training return: tensor(-19.2873, device='cuda:0')
episode: 122 training return: tensor(-37.5615, device='cuda:0')
episode: 123 training return: tensor(33.0955, device='cuda:0')
epoch: 31 test_true_pfm: 54.48666633492886 sim_pfm: -10.967646942328429
episode: 124 training return: tensor(-4.0853, device='cuda:0')
episode: 125 training return: tensor(-18.8308, device='cuda:0')
episode: 126 training return: tensor(-3.4801, device='cuda:0')
episode: 127 training return: tensor(-32.2109, device='cuda:0')
epoch: 32 test_true_pfm: 73.43224638781405 sim_pfm: 16.741646196396324
episode: 128 training return: tensor(-10.9157, device='cuda:0')
episode: 129 training return: tensor(6.5771, device='cuda:0')
episode: 130 training return: tensor(0.9658, device='cuda:0')
episode: 131 training return: tensor(-32.5324, device='cuda:0')
epoch: 33 test_true_pfm: 77.1635783845601 sim_pfm: 3.957520414632745
episode: 132 training return: tensor(-27.8617, device='cuda:0')
episode: 133 training return: tensor(-5.4283, device='cuda:0')
episode: 134 training return: tensor(31.6595, device='cuda:0')
episode: 135 training return: tensor(11.0873, device='cuda:0')
epoch: 34 test_true_pfm: 112.52022737699838 sim_pfm: -11.264109111583092
episode: 136 training return: tensor(-10.7851, device='cuda:0')
episode: 137 training return: tensor(-32.2033, device='cuda:0')
episode: 138 training return: tensor(-29.6337, device='cuda:0')
episode: 139 training return: tensor(-11.3003, device='cuda:0')
epoch: 35 test_true_pfm: 57.572042693168 sim_pfm: -10.591749012225772
episode: 140 training return: tensor(-6.3039, device='cuda:0')
episode: 141 training return: tensor(28.7193, device='cuda:0')
episode: 142 training return: tensor(7.7241, device='cuda:0')
episode: 143 training return: tensor(17.0355, device='cuda:0')
epoch: 36 test_true_pfm: 70.24087844327464 sim_pfm: -20.983457880234347
episode: 144 training return: tensor(23.8793, device='cuda:0')
episode: 145 training return: tensor(-27.9669, device='cuda:0')
episode: 146 training return: tensor(-31.8019, device='cuda:0')
episode: 147 training return: tensor(-12.9506, device='cuda:0')
epoch: 37 test_true_pfm: 83.65851659689909 sim_pfm: -19.27738816049532
episode: 148 training return: tensor(-45.3463, device='cuda:0')
episode: 149 training return: tensor(-20.5367, device='cuda:0')
episode: 150 training return: tensor(-32.3550, device='cuda:0')
episode: 151 training return: tensor(-18.2056, device='cuda:0')
epoch: 38 test_true_pfm: 110.70149232646956 sim_pfm: -10.338036573276622
episode: 152 training return: tensor(-17.0832, device='cuda:0')
episode: 153 training return: tensor(-32.3580, device='cuda:0')
episode: 154 training return: tensor(-3.0613, device='cuda:0')
episode: 155 training return: tensor(-6.9328, device='cuda:0')
epoch: 39 test_true_pfm: 77.98448971236901 sim_pfm: -6.879119773389538
episode: 156 training return: tensor(-17.0338, device='cuda:0')
episode: 157 training return: tensor(24.9746, device='cuda:0')
episode: 158 training return: tensor(28.5362, device='cuda:0')
episode: 159 training return: tensor(-31.3562, device='cuda:0')
epoch: 40 test_true_pfm: 84.42389249013036 sim_pfm: 3.451954312744783
episode: 160 training return: tensor(-27.2285, device='cuda:0')
episode: 161 training return: tensor(2.6155, device='cuda:0')
episode: 162 training return: tensor(29.7154, device='cuda:0')
episode: 163 training return: tensor(-26.3349, device='cuda:0')
epoch: 41 test_true_pfm: 52.036846052108864 sim_pfm: 2.8129640898492654
episode: 164 training return: tensor(-23.9344, device='cuda:0')
episode: 165 training return: tensor(22.8210, device='cuda:0')
episode: 166 training return: tensor(26.0683, device='cuda:0')
episode: 167 training return: tensor(20.1120, device='cuda:0')
epoch: 42 test_true_pfm: 68.12015794067801 sim_pfm: 5.42248600552557
episode: 168 training return: tensor(-32.4128, device='cuda:0')
episode: 169 training return: tensor(7.4448, device='cuda:0')
episode: 170 training return: tensor(29.6541, device='cuda:0')
episode: 171 training return: tensor(-22.8670, device='cuda:0')
epoch: 43 test_true_pfm: 89.75900705335839 sim_pfm: -5.311802998837083
episode: 172 training return: tensor(31.8793, device='cuda:0')
episode: 173 training return: tensor(23.7823, device='cuda:0')
episode: 174 training return: tensor(-5.7750, device='cuda:0')
episode: 175 training return: tensor(-31.8113, device='cuda:0')
epoch: 44 test_true_pfm: 86.97113482967897 sim_pfm: -19.618138321611333
episode: 176 training return: tensor(-3.6080, device='cuda:0')
episode: 177 training return: tensor(-4.2339, device='cuda:0')
episode: 178 training return: tensor(7.0274, device='cuda:0')
episode: 179 training return: tensor(14.2867, device='cuda:0')
epoch: 45 test_true_pfm: 57.37628105027146 sim_pfm: -18.169146129622824
episode: 180 training return: tensor(-14.1299, device='cuda:0')
episode: 181 training return: tensor(-29.1634, device='cuda:0')
episode: 182 training return: tensor(26.1770, device='cuda:0')
episode: 183 training return: tensor(-11.5269, device='cuda:0')
epoch: 46 test_true_pfm: 84.07595395998997 sim_pfm: -6.51836931356811
episode: 184 training return: tensor(-14.5177, device='cuda:0')
episode: 185 training return: tensor(-29.6192, device='cuda:0')
episode: 186 training return: tensor(19.0810, device='cuda:0')
episode: 187 training return: tensor(-29.3408, device='cuda:0')
epoch: 47 test_true_pfm: 50.648082402260556 sim_pfm: -10.951898707129294
episode: 188 training return: tensor(-29.9882, device='cuda:0')
episode: 189 training return: tensor(-0.4621, device='cuda:0')
episode: 190 training return: tensor(29.8054, device='cuda:0')
episode: 191 training return: tensor(-29.3809, device='cuda:0')
epoch: 48 test_true_pfm: 98.89620632148963 sim_pfm: -5.523919378023129
episode: 192 training return: tensor(34.4422, device='cuda:0')
episode: 193 training return: tensor(-20.2806, device='cuda:0')
episode: 194 training return: tensor(-3.5978, device='cuda:0')
episode: 195 training return: tensor(26.3315, device='cuda:0')
epoch: 49 test_true_pfm: 69.15692396905038 sim_pfm: -4.22852621524944
episode: 196 training return: tensor(-3.1390, device='cuda:0')
episode: 197 training return: tensor(10.4160, device='cuda:0')
episode: 198 training return: tensor(28.0459, device='cuda:0')
episode: 199 training return: tensor(-22.1271, device='cuda:0')
epoch: 50 test_true_pfm: 103.06467171713834 sim_pfm: -17.034478130989008
episode: 200 training return: tensor(28.5403, device='cuda:0')
episode: 201 training return: tensor(-31.9490, device='cuda:0')
episode: 202 training return: tensor(24.8469, device='cuda:0')
episode: 203 training return: tensor(-19.3955, device='cuda:0')
epoch: 51 test_true_pfm: 76.37495648882495 sim_pfm: -22.216444181394763
episode: 204 training return: tensor(17.6335, device='cuda:0')
episode: 205 training return: tensor(15.9252, device='cuda:0')
episode: 206 training return: tensor(-2.5930, device='cuda:0')
episode: 207 training return: tensor(-28.5120, device='cuda:0')
epoch: 52 test_true_pfm: 97.37880090802805 sim_pfm: -24.69025521686417
episode: 208 training return: tensor(-0.1994, device='cuda:0')
episode: 209 training return: tensor(18.8506, device='cuda:0')
episode: 210 training return: tensor(-17.6849, device='cuda:0')
episode: 211 training return: tensor(-20.3876, device='cuda:0')
epoch: 53 test_true_pfm: 59.51038683052404 sim_pfm: -12.317669719230617
episode: 212 training return: tensor(29.8438, device='cuda:0')
episode: 213 training return: tensor(-28.5078, device='cuda:0')
episode: 214 training return: tensor(-33.6153, device='cuda:0')
episode: 215 training return: tensor(-4.3076, device='cuda:0')
epoch: 54 test_true_pfm: 68.72065313965737 sim_pfm: -4.27487396618235
episode: 216 training return: tensor(-10.9219, device='cuda:0')
episode: 217 training return: tensor(-21.0599, device='cuda:0')
episode: 218 training return: tensor(-20.6602, device='cuda:0')
episode: 219 training return: tensor(-30.4573, device='cuda:0')
epoch: 55 test_true_pfm: 101.8759661392939 sim_pfm: -25.271767281292703
episode: 220 training return: tensor(-25.3686, device='cuda:0')
episode: 221 training return: tensor(-6.5383, device='cuda:0')
episode: 222 training return: tensor(-13.0338, device='cuda:0')
episode: 223 training return: tensor(-30.1488, device='cuda:0')
epoch: 56 test_true_pfm: 89.02820000995459 sim_pfm: -7.75756846870645
episode: 224 training return: tensor(27.4807, device='cuda:0')
episode: 225 training return: tensor(-24.4999, device='cuda:0')
episode: 226 training return: tensor(32.6118, device='cuda:0')
episode: 227 training return: tensor(31.8513, device='cuda:0')
epoch: 57 test_true_pfm: 100.24157574683815 sim_pfm: -3.5425115171005017
episode: 228 training return: tensor(-16.0616, device='cuda:0')
episode: 229 training return: tensor(-18.2842, device='cuda:0')
episode: 230 training return: tensor(17.7659, device='cuda:0')
episode: 231 training return: tensor(-11.2025, device='cuda:0')
epoch: 58 test_true_pfm: 64.6903626546411 sim_pfm: -21.88706280441256
episode: 232 training return: tensor(-29.0820, device='cuda:0')
episode: 233 training return: tensor(-28.7946, device='cuda:0')
episode: 234 training return: tensor(-33.1307, device='cuda:0')
episode: 235 training return: tensor(-28.9746, device='cuda:0')
epoch: 59 test_true_pfm: 78.66356188045222 sim_pfm: -27.89818941137637
episode: 236 training return: tensor(-29.2669, device='cuda:0')
episode: 237 training return: tensor(-28.4734, device='cuda:0')
episode: 238 training return: tensor(-32.1454, device='cuda:0')
episode: 239 training return: tensor(-8.0648, device='cuda:0')
epoch: 60 test_true_pfm: 55.00934941477343 sim_pfm: -4.252785810793284
episode: 240 training return: tensor(-28.0649, device='cuda:0')
episode: 241 training return: tensor(30.3063, device='cuda:0')
episode: 242 training return: tensor(-28.2038, device='cuda:0')
episode: 243 training return: tensor(-26.9237, device='cuda:0')
epoch: 61 test_true_pfm: 52.2754542909752 sim_pfm: -26.216778874350712
episode: 244 training return: tensor(29.5916, device='cuda:0')
episode: 245 training return: tensor(-9.0913, device='cuda:0')
episode: 246 training return: tensor(8.9175, device='cuda:0')
episode: 247 training return: tensor(-17.1006, device='cuda:0')
epoch: 62 test_true_pfm: 52.43324744689265 sim_pfm: -3.6844850759312977
episode: 248 training return: tensor(-6.2822, device='cuda:0')
episode: 249 training return: tensor(28.9688, device='cuda:0')
episode: 250 training return: tensor(-18.0263, device='cuda:0')
episode: 251 training return: tensor(-31.5268, device='cuda:0')
epoch: 63 test_true_pfm: 85.7137711465822 sim_pfm: -33.33893010544125
episode: 252 training return: tensor(19.3028, device='cuda:0')
episode: 253 training return: tensor(-25.9201, device='cuda:0')
episode: 254 training return: tensor(27.1970, device='cuda:0')
episode: 255 training return: tensor(-2.5505, device='cuda:0')
epoch: 64 test_true_pfm: 51.567004937095774 sim_pfm: -31.2824401985039
episode: 256 training return: tensor(-28.1775, device='cuda:0')
episode: 257 training return: tensor(-28.6168, device='cuda:0')
episode: 258 training return: tensor(28.3242, device='cuda:0')
episode: 259 training return: tensor(-29.0403, device='cuda:0')
epoch: 65 test_true_pfm: 88.05381729028845 sim_pfm: 5.562268987274729
episode: 260 training return: tensor(-13.3957, device='cuda:0')
episode: 261 training return: tensor(-7.3841, device='cuda:0')
episode: 262 training return: tensor(-7.6699, device='cuda:0')
episode: 263 training return: tensor(-6.2331, device='cuda:0')
epoch: 66 test_true_pfm: 84.0964061958739 sim_pfm: -16.21758872332284
episode: 264 training return: tensor(-3.0748, device='cuda:0')
episode: 265 training return: tensor(-27.2098, device='cuda:0')
episode: 266 training return: tensor(-33.0015, device='cuda:0')
episode: 267 training return: tensor(-29.5889, device='cuda:0')
epoch: 67 test_true_pfm: 68.91470889804037 sim_pfm: 6.245341402519261
episode: 268 training return: tensor(0.1135, device='cuda:0')
episode: 269 training return: tensor(-14.5590, device='cuda:0')
episode: 270 training return: tensor(-6.5836, device='cuda:0')
episode: 271 training return: tensor(-32.4430, device='cuda:0')
epoch: 68 test_true_pfm: 58.11942156212101 sim_pfm: 14.349576406815322
episode: 272 training return: tensor(-3.1153, device='cuda:0')
episode: 273 training return: tensor(-28.6671, device='cuda:0')
episode: 274 training return: tensor(-0.3139, device='cuda:0')
episode: 275 training return: tensor(27.1195, device='cuda:0')
epoch: 69 test_true_pfm: 63.723422431813745 sim_pfm: -9.235797318123513
episode: 276 training return: tensor(-0.1055, device='cuda:0')
episode: 277 training return: tensor(23.8241, device='cuda:0')
episode: 278 training return: tensor(-10.0187, device='cuda:0')
episode: 279 training return: tensor(-28.9962, device='cuda:0')
epoch: 70 test_true_pfm: 92.53072205798439 sim_pfm: -2.674919206893537
episode: 280 training return: tensor(-34.4119, device='cuda:0')
episode: 281 training return: tensor(-35.0775, device='cuda:0')
episode: 282 training return: tensor(-6.0857, device='cuda:0')
episode: 283 training return: tensor(-28.4063, device='cuda:0')
epoch: 71 test_true_pfm: 89.3989317086608 sim_pfm: 20.046350562525912
episode: 284 training return: tensor(-5.0215, device='cuda:0')
episode: 285 training return: tensor(-29.0964, device='cuda:0')
episode: 286 training return: tensor(15.1972, device='cuda:0')
episode: 287 training return: tensor(17.1825, device='cuda:0')
epoch: 72 test_true_pfm: 91.29698667650447 sim_pfm: -3.2964250191813336
episode: 288 training return: tensor(-27.4800, device='cuda:0')
episode: 289 training return: tensor(-3.3489, device='cuda:0')
episode: 290 training return: tensor(-25.5834, device='cuda:0')
episode: 291 training return: tensor(28.5220, device='cuda:0')
epoch: 73 test_true_pfm: 96.69604129551807 sim_pfm: -18.429290969314753
episode: 292 training return: tensor(-30.0559, device='cuda:0')
episode: 293 training return: tensor(-32.4723, device='cuda:0')
episode: 294 training return: tensor(23.6885, device='cuda:0')
episode: 295 training return: tensor(24.7243, device='cuda:0')
epoch: 74 test_true_pfm: 81.77115255338722 sim_pfm: -27.27836286483216
episode: 296 training return: tensor(-15.5919, device='cuda:0')
episode: 297 training return: tensor(24.3296, device='cuda:0')
episode: 298 training return: tensor(-32.6927, device='cuda:0')
episode: 299 training return: tensor(-23.9430, device='cuda:0')
epoch: 75 test_true_pfm: 50.47981423592936 sim_pfm: -32.98284077036078
episode: 300 training return: tensor(-30.3642, device='cuda:0')
episode: 301 training return: tensor(25.1095, device='cuda:0')
episode: 302 training return: tensor(-28.2634, device='cuda:0')
episode: 303 training return: tensor(28.6278, device='cuda:0')
epoch: 76 test_true_pfm: 116.349605022816 sim_pfm: 7.266865330393193
episode: 304 training return: tensor(29.2694, device='cuda:0')
episode: 305 training return: tensor(-7.7038, device='cuda:0')
episode: 306 training return: tensor(-28.9400, device='cuda:0')
episode: 307 training return: tensor(-27.6251, device='cuda:0')
epoch: 77 test_true_pfm: 62.61318531201052 sim_pfm: 3.6099748120002912
episode: 308 training return: tensor(-3.5510, device='cuda:0')
episode: 309 training return: tensor(-29.8418, device='cuda:0')
episode: 310 training return: tensor(13.2615, device='cuda:0')
episode: 311 training return: tensor(-27.1024, device='cuda:0')
epoch: 78 test_true_pfm: 63.526979058671564 sim_pfm: -17.221973586903186
episode: 312 training return: tensor(-5.0110, device='cuda:0')
episode: 313 training return: tensor(22.8495, device='cuda:0')
episode: 314 training return: tensor(-33.2971, device='cuda:0')
episode: 315 training return: tensor(-33.3706, device='cuda:0')
epoch: 79 test_true_pfm: 65.1052307686796 sim_pfm: -22.656871689105174
episode: 316 training return: tensor(-33.5798, device='cuda:0')
episode: 317 training return: tensor(-33.0712, device='cuda:0')
episode: 318 training return: tensor(-28.3381, device='cuda:0')
episode: 319 training return: tensor(-30.2654, device='cuda:0')
epoch: 80 test_true_pfm: 56.98272404323872 sim_pfm: -13.388815058459295
episode: 320 training return: tensor(-28.9090, device='cuda:0')
episode: 321 training return: tensor(-28.9686, device='cuda:0')
episode: 322 training return: tensor(23.2634, device='cuda:0')
episode: 323 training return: tensor(-28.2138, device='cuda:0')
epoch: 81 test_true_pfm: 59.39086425063299 sim_pfm: -14.395489093271317
episode: 324 training return: tensor(-30.2123, device='cuda:0')
episode: 325 training return: tensor(-31.4303, device='cuda:0')
episode: 326 training return: tensor(6.6737, device='cuda:0')
episode: 327 training return: tensor(-28.0444, device='cuda:0')
epoch: 82 test_true_pfm: 68.97642468109336 sim_pfm: -8.287892920325977
episode: 328 training return: tensor(-28.1658, device='cuda:0')
episode: 329 training return: tensor(-7.2426, device='cuda:0')
episode: 330 training return: tensor(-14.5231, device='cuda:0')
episode: 331 training return: tensor(25.4810, device='cuda:0')
epoch: 83 test_true_pfm: 56.71558963649801 sim_pfm: -26.140159182681238
episode: 332 training return: tensor(-29.7840, device='cuda:0')
episode: 333 training return: tensor(-28.3889, device='cuda:0')
episode: 334 training return: tensor(-27.2802, device='cuda:0')
episode: 335 training return: tensor(-30.5020, device='cuda:0')
epoch: 84 test_true_pfm: 52.21184613440731 sim_pfm: -30.938942113128725
episode: 336 training return: tensor(-27.8164, device='cuda:0')
episode: 337 training return: tensor(-3.6516, device='cuda:0')
episode: 338 training return: tensor(-27.1821, device='cuda:0')
episode: 339 training return: tensor(-4.3449, device='cuda:0')
epoch: 85 test_true_pfm: 85.79236987703466 sim_pfm: -9.289938267419348
episode: 340 training return: tensor(13.9135, device='cuda:0')
episode: 341 training return: tensor(-30.9544, device='cuda:0')
episode: 342 training return: tensor(-12.5325, device='cuda:0')
episode: 343 training return: tensor(-33.4192, device='cuda:0')
epoch: 86 test_true_pfm: 75.72267913258756 sim_pfm: -9.563931376003893
episode: 344 training return: tensor(26.6745, device='cuda:0')
episode: 345 training return: tensor(-28.1853, device='cuda:0')
episode: 346 training return: tensor(-30.0461, device='cuda:0')
episode: 347 training return: tensor(-9.8386, device='cuda:0')
epoch: 87 test_true_pfm: 60.785884569191865 sim_pfm: -3.779450633312808
episode: 348 training return: tensor(-32.6977, device='cuda:0')
episode: 349 training return: tensor(-33.3260, device='cuda:0')
episode: 350 training return: tensor(30.1459, device='cuda:0')
episode: 351 training return: tensor(-29.7474, device='cuda:0')
epoch: 88 test_true_pfm: 80.8860490133594 sim_pfm: -22.726961308007596
episode: 352 training return: tensor(-25.0500, device='cuda:0')
episode: 353 training return: tensor(-22.1271, device='cuda:0')
episode: 354 training return: tensor(-24.8893, device='cuda:0')
episode: 355 training return: tensor(-20.7851, device='cuda:0')
epoch: 89 test_true_pfm: 93.36214248294957 sim_pfm: -5.281294476578478
episode: 356 training return: tensor(26.0206, device='cuda:0')
episode: 357 training return: tensor(-31.2810, device='cuda:0')
episode: 358 training return: tensor(-31.9725, device='cuda:0')
episode: 359 training return: tensor(-21.5180, device='cuda:0')
epoch: 90 test_true_pfm: 74.42682631066066 sim_pfm: -26.297177157585974
episode: 360 training return: tensor(-14.0547, device='cuda:0')
episode: 361 training return: tensor(-28.7412, device='cuda:0')
episode: 362 training return: tensor(-3.0987, device='cuda:0')
episode: 363 training return: tensor(-6.7414, device='cuda:0')
epoch: 91 test_true_pfm: 62.69849083780921 sim_pfm: -26.986675771995216
episode: 364 training return: tensor(-21.6046, device='cuda:0')
episode: 365 training return: tensor(-29.1122, device='cuda:0')
episode: 366 training return: tensor(-7.1332, device='cuda:0')
episode: 367 training return: tensor(-29.4484, device='cuda:0')
epoch: 92 test_true_pfm: 88.88607647333022 sim_pfm: -16.566321868728846
episode: 368 training return: tensor(-32.2956, device='cuda:0')
episode: 369 training return: tensor(-29.9092, device='cuda:0')
episode: 370 training return: tensor(27.3498, device='cuda:0')
episode: 371 training return: tensor(-30.7489, device='cuda:0')
epoch: 93 test_true_pfm: 77.60173500925234 sim_pfm: -18.029838958417532
episode: 372 training return: tensor(-20.4130, device='cuda:0')
episode: 373 training return: tensor(-16.7396, device='cuda:0')
episode: 374 training return: tensor(-28.4757, device='cuda:0')
episode: 375 training return: tensor(25.7768, device='cuda:0')
epoch: 94 test_true_pfm: 93.95995458249374 sim_pfm: -11.630630971945356
episode: 376 training return: tensor(-10.3207, device='cuda:0')
episode: 377 training return: tensor(21.1552, device='cuda:0')
episode: 378 training return: tensor(-21.4420, device='cuda:0')
episode: 379 training return: tensor(-2.0953, device='cuda:0')
epoch: 95 test_true_pfm: 60.34989177245084 sim_pfm: -19.824794831161853
episode: 380 training return: tensor(-27.1245, device='cuda:0')
episode: 381 training return: tensor(-30.9654, device='cuda:0')
episode: 382 training return: tensor(-33.4886, device='cuda:0')
episode: 383 training return: tensor(-44.2862, device='cuda:0')
epoch: 96 test_true_pfm: 73.948602211412 sim_pfm: -8.201879406208173
episode: 384 training return: tensor(-31.1268, device='cuda:0')
episode: 385 training return: tensor(-7.9098, device='cuda:0')
episode: 386 training return: tensor(-22.2138, device='cuda:0')
episode: 387 training return: tensor(26.7431, device='cuda:0')
epoch: 97 test_true_pfm: 71.82720722659879 sim_pfm: -16.231127453321825
episode: 388 training return: tensor(28.8293, device='cuda:0')
episode: 389 training return: tensor(-19.9233, device='cuda:0')
episode: 390 training return: tensor(-13.7351, device='cuda:0')
episode: 391 training return: tensor(-4.9980, device='cuda:0')
epoch: 98 test_true_pfm: 77.02494042152611 sim_pfm: -15.963991575303954
episode: 392 training return: tensor(-30.6817, device='cuda:0')
episode: 393 training return: tensor(-29.5548, device='cuda:0')
episode: 394 training return: tensor(9.4471, device='cuda:0')
episode: 395 training return: tensor(-29.4710, device='cuda:0')
epoch: 99 test_true_pfm: 84.02225000487039 sim_pfm: -14.170256878255168
episode: 396 training return: tensor(-30.5148, device='cuda:0')
episode: 397 training return: tensor(-30.7292, device='cuda:0')
episode: 398 training return: tensor(-4.8890, device='cuda:0')
episode: 399 training return: tensor(-39.1009, device='cuda:0')
epoch: 100 test_true_pfm: 72.42247341903172 sim_pfm: -9.646196648024489
episode: 400 training return: tensor(-12.5298, device='cuda:0')
episode: 401 training return: tensor(-28.2154, device='cuda:0')
episode: 402 training return: tensor(26.1783, device='cuda:0')
episode: 403 training return: tensor(-33.1139, device='cuda:0')
epoch: 101 test_true_pfm: 86.65761320149193 sim_pfm: -7.350510601268615
episode: 404 training return: tensor(-29.8053, device='cuda:0')
episode: 405 training return: tensor(-7.3593, device='cuda:0')
episode: 406 training return: tensor(-4.6043, device='cuda:0')
episode: 407 training return: tensor(-30.3752, device='cuda:0')
epoch: 102 test_true_pfm: 79.85980321574564 sim_pfm: -1.5817643453134225
episode: 408 training return: tensor(3.0065, device='cuda:0')
episode: 409 training return: tensor(-6.0353, device='cuda:0')
episode: 410 training return: tensor(-27.2682, device='cuda:0')
episode: 411 training return: tensor(-12.3539, device='cuda:0')
epoch: 103 test_true_pfm: 88.48492968164655 sim_pfm: -23.92487314027967
episode: 412 training return: tensor(26.3871, device='cuda:0')
episode: 413 training return: tensor(23.3994, device='cuda:0')
episode: 414 training return: tensor(27.6789, device='cuda:0')
episode: 415 training return: tensor(-22.3757, device='cuda:0')
epoch: 104 test_true_pfm: 92.46958410307245 sim_pfm: -20.064211858052296
episode: 416 training return: tensor(22.6769, device='cuda:0')
episode: 417 training return: tensor(-30.0751, device='cuda:0')
episode: 418 training return: tensor(-31.2158, device='cuda:0')
episode: 419 training return: tensor(-30.7903, device='cuda:0')
epoch: 105 test_true_pfm: 73.49854410044638 sim_pfm: -21.941829485900236
episode: 420 training return: tensor(-1.9890, device='cuda:0')
episode: 421 training return: tensor(-10.7762, device='cuda:0')
episode: 422 training return: tensor(-33.4142, device='cuda:0')
episode: 423 training return: tensor(-28.9576, device='cuda:0')
epoch: 106 test_true_pfm: 74.04982094446235 sim_pfm: -19.877480860543436
episode: 424 training return: tensor(-7.2349, device='cuda:0')
episode: 425 training return: tensor(25.4265, device='cuda:0')
episode: 426 training return: tensor(-29.7963, device='cuda:0')
episode: 427 training return: tensor(-13.8032, device='cuda:0')
epoch: 107 test_true_pfm: 58.35956719669715 sim_pfm: -14.854280438902787
episode: 428 training return: tensor(30.2514, device='cuda:0')
episode: 429 training return: tensor(30.5828, device='cuda:0')
episode: 430 training return: tensor(-8.3848, device='cuda:0')
episode: 431 training return: tensor(28.5018, device='cuda:0')
epoch: 108 test_true_pfm: 74.88053402158637 sim_pfm: 2.2979393082263413
episode: 432 training return: tensor(-3.4780, device='cuda:0')
episode: 433 training return: tensor(-32.2139, device='cuda:0')
episode: 434 training return: tensor(-8.1665, device='cuda:0')
episode: 435 training return: tensor(-15.1410, device='cuda:0')
epoch: 109 test_true_pfm: 89.44793925799183 sim_pfm: -17.386260899913033
episode: 436 training return: tensor(30.7359, device='cuda:0')
episode: 437 training return: tensor(-19.2387, device='cuda:0')
episode: 438 training return: tensor(24.8117, device='cuda:0')
episode: 439 training return: tensor(-21.3352, device='cuda:0')
epoch: 110 test_true_pfm: 87.74276286234041 sim_pfm: -5.907445231301244
episode: 440 training return: tensor(24.3872, device='cuda:0')
episode: 441 training return: tensor(-8.5242, device='cuda:0')
episode: 442 training return: tensor(6.2350, device='cuda:0')
episode: 443 training return: tensor(-5.6753, device='cuda:0')
epoch: 111 test_true_pfm: 73.25115260535509 sim_pfm: -23.790672004432416
episode: 444 training return: tensor(-26.9446, device='cuda:0')
episode: 445 training return: tensor(-9.3934, device='cuda:0')
episode: 446 training return: tensor(47.9171, device='cuda:0')
episode: 447 training return: tensor(-28.9434, device='cuda:0')
epoch: 112 test_true_pfm: 95.80455181128929 sim_pfm: -12.44053066681372
episode: 448 training return: tensor(-17.3778, device='cuda:0')
episode: 449 training return: tensor(-9.2215, device='cuda:0')
episode: 450 training return: tensor(-28.4433, device='cuda:0')
episode: 451 training return: tensor(-8.3305, device='cuda:0')
epoch: 113 test_true_pfm: 90.78039227699705 sim_pfm: 2.533927734417375
episode: 452 training return: tensor(7.4022, device='cuda:0')
episode: 453 training return: tensor(-3.6095, device='cuda:0')
episode: 454 training return: tensor(-5.8003, device='cuda:0')
episode: 455 training return: tensor(30.9195, device='cuda:0')
epoch: 114 test_true_pfm: 72.80248429132098 sim_pfm: -10.822318415244808
episode: 456 training return: tensor(42.6709, device='cuda:0')
episode: 457 training return: tensor(-32.3450, device='cuda:0')
episode: 458 training return: tensor(-22.0103, device='cuda:0')
episode: 459 training return: tensor(18.1382, device='cuda:0')
epoch: 115 test_true_pfm: 64.38918722538267 sim_pfm: 3.4348928757593966
episode: 460 training return: tensor(-39.1406, device='cuda:0')
episode: 461 training return: tensor(-25.0299, device='cuda:0')
episode: 462 training return: tensor(-11.8500, device='cuda:0')
episode: 463 training return: tensor(19.8444, device='cuda:0')
epoch: 116 test_true_pfm: 54.21222479948286 sim_pfm: -2.3746419109171257
episode: 464 training return: tensor(24.4001, device='cuda:0')
episode: 465 training return: tensor(14.1875, device='cuda:0')
episode: 466 training return: tensor(-8.3089, device='cuda:0')
episode: 467 training return: tensor(-18.7815, device='cuda:0')
epoch: 117 test_true_pfm: 77.63908383846156 sim_pfm: -25.962460128450765
episode: 468 training return: tensor(22.2073, device='cuda:0')
episode: 469 training return: tensor(23.9483, device='cuda:0')
episode: 470 training return: tensor(-17.4680, device='cuda:0')
episode: 471 training return: tensor(-37.7244, device='cuda:0')
epoch: 118 test_true_pfm: 87.0851722459186 sim_pfm: 0.5945215526444372
episode: 472 training return: tensor(-1.3346, device='cuda:0')
episode: 473 training return: tensor(-10.8718, device='cuda:0')
episode: 474 training return: tensor(-28.1824, device='cuda:0')
episode: 475 training return: tensor(-7.3011, device='cuda:0')
epoch: 119 test_true_pfm: 83.14404695887818 sim_pfm: 1.540159296517959
episode: 476 training return: tensor(-33.9211, device='cuda:0')
episode: 477 training return: tensor(-32.8347, device='cuda:0')
episode: 478 training return: tensor(-19.2294, device='cuda:0')
episode: 479 training return: tensor(-22.8106, device='cuda:0')
epoch: 120 test_true_pfm: 106.32453581819303 sim_pfm: 8.680453576816944
episode: 480 training return: tensor(13.2675, device='cuda:0')
episode: 481 training return: tensor(-15.2056, device='cuda:0')
episode: 482 training return: tensor(30.9349, device='cuda:0')
episode: 483 training return: tensor(-29.1215, device='cuda:0')
epoch: 121 test_true_pfm: 96.35798358721452 sim_pfm: -4.969522953714477
episode: 484 training return: tensor(-33.0880, device='cuda:0')
episode: 485 training return: tensor(27.8117, device='cuda:0')
episode: 486 training return: tensor(-3.4908, device='cuda:0')
episode: 487 training return: tensor(35.7433, device='cuda:0')
epoch: 122 test_true_pfm: 89.30159334730052 sim_pfm: -6.691846501431428
episode: 488 training return: tensor(-12.5561, device='cuda:0')
episode: 489 training return: tensor(29.8211, device='cuda:0')
episode: 490 training return: tensor(-31.3562, device='cuda:0')
episode: 491 training return: tensor(30.6271, device='cuda:0')
epoch: 123 test_true_pfm: 102.44314152587803 sim_pfm: 15.229811272380175
episode: 492 training return: tensor(22.3221, device='cuda:0')
episode: 493 training return: tensor(-37.7176, device='cuda:0')
episode: 494 training return: tensor(-31.3402, device='cuda:0')
episode: 495 training return: tensor(-6.4942, device='cuda:0')
epoch: 124 test_true_pfm: 101.901035805524 sim_pfm: 7.771187515271595
episode: 496 training return: tensor(-33.9193, device='cuda:0')
episode: 497 training return: tensor(-12.0296, device='cuda:0')
episode: 498 training return: tensor(13.4684, device='cuda:0')
episode: 499 training return: tensor(-43.1517, device='cuda:0')
epoch: 125 test_true_pfm: 87.7155019264246 sim_pfm: -2.658620576444082
episode: 500 training return: tensor(-29.5714, device='cuda:0')
episode: 501 training return: tensor(-28.4649, device='cuda:0')
episode: 502 training return: tensor(-31.1844, device='cuda:0')
episode: 503 training return: tensor(-31.5476, device='cuda:0')
epoch: 126 test_true_pfm: 81.10063873001572 sim_pfm: 4.1956212627119385
episode: 504 training return: tensor(-32.2306, device='cuda:0')
episode: 505 training return: tensor(-39.6004, device='cuda:0')
episode: 506 training return: tensor(-31.7543, device='cuda:0')
episode: 507 training return: tensor(-34.8573, device='cuda:0')
epoch: 127 test_true_pfm: 69.55327917862508 sim_pfm: -31.96516623214702
episode: 508 training return: tensor(30.1075, device='cuda:0')
episode: 509 training return: tensor(-30.8509, device='cuda:0')
episode: 510 training return: tensor(-13.4768, device='cuda:0')
episode: 511 training return: tensor(-24.6920, device='cuda:0')
epoch: 128 test_true_pfm: 90.71133563311574 sim_pfm: -5.997712512494763
episode: 512 training return: tensor(-32.0982, device='cuda:0')
episode: 513 training return: tensor(-28.8753, device='cuda:0')
episode: 514 training return: tensor(19.6766, device='cuda:0')
episode: 515 training return: tensor(28.7803, device='cuda:0')
epoch: 129 test_true_pfm: 67.73067950812668 sim_pfm: -7.751681367581478
episode: 516 training return: tensor(-33.8546, device='cuda:0')
episode: 517 training return: tensor(-29.8131, device='cuda:0')
episode: 518 training return: tensor(-23.3113, device='cuda:0')
episode: 519 training return: tensor(-29.7753, device='cuda:0')
epoch: 130 test_true_pfm: 50.00187751354151 sim_pfm: -8.420006931677927
episode: 520 training return: tensor(-32.8819, device='cuda:0')
episode: 521 training return: tensor(-5.5338, device='cuda:0')
episode: 522 training return: tensor(-29.9940, device='cuda:0')
episode: 523 training return: tensor(21.2474, device='cuda:0')
epoch: 131 test_true_pfm: 71.13382548303368 sim_pfm: -13.799344642687355
episode: 524 training return: tensor(-28.1561, device='cuda:0')
episode: 525 training return: tensor(-28.6363, device='cuda:0')
episode: 526 training return: tensor(-29.6957, device='cuda:0')
episode: 527 training return: tensor(-29.6371, device='cuda:0')
epoch: 132 test_true_pfm: 95.98870124934659 sim_pfm: -11.82755906835664
episode: 528 training return: tensor(-28.8925, device='cuda:0')
episode: 529 training return: tensor(-4.3280, device='cuda:0')
episode: 530 training return: tensor(-13.9776, device='cuda:0')
episode: 531 training return: tensor(-16.2531, device='cuda:0')
epoch: 133 test_true_pfm: 73.8966261251438 sim_pfm: -21.295403558755062
episode: 532 training return: tensor(28.1825, device='cuda:0')
episode: 533 training return: tensor(-20.8731, device='cuda:0')
episode: 534 training return: tensor(-5.2720, device='cuda:0')
episode: 535 training return: tensor(0.3814, device='cuda:0')
epoch: 134 test_true_pfm: 51.76358874516619 sim_pfm: -18.74914696837659
episode: 536 training return: tensor(-30.0593, device='cuda:0')
episode: 537 training return: tensor(-27.4655, device='cuda:0')
episode: 538 training return: tensor(27.0944, device='cuda:0')
episode: 539 training return: tensor(-26.3427, device='cuda:0')
epoch: 135 test_true_pfm: 73.60247265804954 sim_pfm: -19.87831627356354
episode: 540 training return: tensor(-32.1819, device='cuda:0')
episode: 541 training return: tensor(-12.5745, device='cuda:0')
episode: 542 training return: tensor(-31.3690, device='cuda:0')
episode: 543 training return: tensor(-31.8926, device='cuda:0')
epoch: 136 test_true_pfm: 72.45669320066497 sim_pfm: -8.940369058266516
episode: 544 training return: tensor(-27.9495, device='cuda:0')
episode: 545 training return: tensor(-31.6783, device='cuda:0')
episode: 546 training return: tensor(-3.8557, device='cuda:0')
episode: 547 training return: tensor(18.8970, device='cuda:0')
epoch: 137 test_true_pfm: 83.996609336038 sim_pfm: -2.291197233495768
episode: 548 training return: tensor(5.7353, device='cuda:0')
episode: 549 training return: tensor(-33.3979, device='cuda:0')
episode: 550 training return: tensor(-3.1505, device='cuda:0')
episode: 551 training return: tensor(-34.5199, device='cuda:0')
epoch: 138 test_true_pfm: 82.48222589895299 sim_pfm: -10.07755864833016
episode: 552 training return: tensor(-24.6628, device='cuda:0')
episode: 553 training return: tensor(-8.5847, device='cuda:0')
episode: 554 training return: tensor(12.2701, device='cuda:0')
episode: 555 training return: tensor(-3.0588, device='cuda:0')
epoch: 139 test_true_pfm: 69.50716261487136 sim_pfm: 14.132745458191494
episode: 556 training return: tensor(26.5656, device='cuda:0')
episode: 557 training return: tensor(-41.6577, device='cuda:0')
episode: 558 training return: tensor(-30.8292, device='cuda:0')
episode: 559 training return: tensor(-34.8836, device='cuda:0')
epoch: 140 test_true_pfm: 75.94751337630271 sim_pfm: -20.092802347696853
episode: 560 training return: tensor(-32.4614, device='cuda:0')
episode: 561 training return: tensor(10.2754, device='cuda:0')
episode: 562 training return: tensor(-3.9222, device='cuda:0')
episode: 563 training return: tensor(-32.2564, device='cuda:0')
epoch: 141 test_true_pfm: 86.80484800516332 sim_pfm: -21.158994199999142
episode: 564 training return: tensor(-6.1719, device='cuda:0')
episode: 565 training return: tensor(-38.6437, device='cuda:0')
episode: 566 training return: tensor(-40.7780, device='cuda:0')
episode: 567 training return: tensor(-30.5334, device='cuda:0')
epoch: 142 test_true_pfm: 72.31008642699703 sim_pfm: -12.618513819068903
episode: 568 training return: tensor(-30.1419, device='cuda:0')
episode: 569 training return: tensor(-5.5765, device='cuda:0')
episode: 570 training return: tensor(-4.2986, device='cuda:0')
episode: 571 training return: tensor(-6.4351, device='cuda:0')
epoch: 143 test_true_pfm: 52.87333342012543 sim_pfm: -11.122226521157426
episode: 572 training return: tensor(-4.0140, device='cuda:0')
episode: 573 training return: tensor(11.2993, device='cuda:0')
episode: 574 training return: tensor(-42.6661, device='cuda:0')
episode: 575 training return: tensor(-29.8841, device='cuda:0')
epoch: 144 test_true_pfm: 73.10110911915056 sim_pfm: -22.370538428571308
episode: 576 training return: tensor(-2.5709, device='cuda:0')
episode: 577 training return: tensor(-26.9595, device='cuda:0')
episode: 578 training return: tensor(-41.4589, device='cuda:0')
episode: 579 training return: tensor(-5.0105, device='cuda:0')
epoch: 145 test_true_pfm: 76.61998263746125 sim_pfm: -17.39961753588286
episode: 580 training return: tensor(2.3681, device='cuda:0')
episode: 581 training return: tensor(-41.4087, device='cuda:0')
episode: 582 training return: tensor(-29.7227, device='cuda:0')
episode: 583 training return: tensor(-33.1848, device='cuda:0')
epoch: 146 test_true_pfm: 58.528081742658834 sim_pfm: -35.439341059618165
episode: 584 training return: tensor(1.1061, device='cuda:0')
episode: 585 training return: tensor(-29.1990, device='cuda:0')
episode: 586 training return: tensor(-31.5216, device='cuda:0')
episode: 587 training return: tensor(-7.9335, device='cuda:0')
epoch: 147 test_true_pfm: 74.36474357320287 sim_pfm: -20.23913923613145
episode: 588 training return: tensor(-31.7273, device='cuda:0')
episode: 589 training return: tensor(-13.0237, device='cuda:0')
episode: 590 training return: tensor(-0.3985, device='cuda:0')
episode: 591 training return: tensor(-6.8404, device='cuda:0')
epoch: 148 test_true_pfm: 60.160352801469436 sim_pfm: -13.57267022356391
episode: 592 training return: tensor(10.3161, device='cuda:0')
episode: 593 training return: tensor(-4.8910, device='cuda:0')
episode: 594 training return: tensor(-31.2509, device='cuda:0')
episode: 595 training return: tensor(-31.9464, device='cuda:0')
epoch: 149 test_true_pfm: 73.92816788334618 sim_pfm: -27.36355433819699
episode: 596 training return: tensor(-5.8319, device='cuda:0')
episode: 597 training return: tensor(-31.9779, device='cuda:0')
episode: 598 training return: tensor(-24.7062, device='cuda:0')
episode: 599 training return: tensor(8.0159, device='cuda:0')
epoch: 150 test_true_pfm: 67.22400679997148 sim_pfm: -11.520053457294125
