['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0']
epoch: 0 training_loss 0.23518588725477457 test_loss: 0.17870265245437622
epoch: 1 training_loss 0.1539712255075574 test_loss: 0.14170054197311402
epoch: 2 training_loss 0.1305276934430003 test_loss: 0.12111036777496338
epoch: 3 training_loss 0.12315855499356985 test_loss: 0.11632008552551269
epoch: 4 training_loss 0.10987950894981623 test_loss: 0.14693331718444824
epoch: 5 training_loss 0.11744012307375669 test_loss: 0.11390123367309571
epoch: 6 training_loss 0.1257886005938053 test_loss: 0.12244621515274048
epoch: 7 training_loss 0.10847465064376592 test_loss: 0.10030660629272461
epoch: 8 training_loss 0.10851467866450548 test_loss: 0.10958195924758911
epoch: 9 training_loss 0.10774207722395658 test_loss: 0.10884331464767456
epoch: 10 training_loss 0.10314897397533059 test_loss: 0.10963865518569946
epoch: 11 training_loss 0.10863614875823259 test_loss: 0.10841823816299438
epoch: 12 training_loss 0.10523096786811947 test_loss: 0.11223996877670288
epoch: 13 training_loss 0.10538839293643833 test_loss: 0.1269892930984497
epoch: 14 training_loss 0.10018921121954918 test_loss: 0.09125838875770569
epoch: 15 training_loss 0.0928510383889079 test_loss: 0.11004341840744018
epoch: 16 training_loss 0.1095073913410306 test_loss: 0.11090915203094483
epoch: 17 training_loss 0.10830478137359023 test_loss: 0.10037626028060913
epoch: 18 training_loss 0.0997441976889968 test_loss: 0.12680790424346924
epoch: 19 training_loss 0.10579613072797656 test_loss: 0.11141936779022217
epoch: 20 training_loss 0.1025890677422285 test_loss: 0.11776057481765748
epoch: 21 training_loss 0.09271963872015476 test_loss: 0.10565314292907715
epoch: 22 training_loss 0.09503487112000585 test_loss: 0.09739959836006165
epoch: 23 training_loss 0.09489216100424529 test_loss: 0.09486696720123292
epoch: 24 training_loss 0.09346702221781016 test_loss: 0.11623897552490234
epoch: 25 training_loss 0.09860911089926958 test_loss: 0.11140670776367187
epoch: 26 training_loss 0.09590963071212172 test_loss: 0.10338823795318604
epoch: 27 training_loss 0.09818214453756809 test_loss: 0.11300997734069824
epoch: 28 training_loss 0.09795305667445063 test_loss: 0.09608436822891235
epoch: 29 training_loss 0.08950754012912512 test_loss: 0.1057101845741272
epoch: 30 training_loss 0.09935075217857957 test_loss: 0.08248257637023926
epoch: 31 training_loss 0.10013501059263945 test_loss: 0.11384755373001099
epoch: 32 training_loss 0.10007124353200197 test_loss: 0.09741548895835876
epoch: 33 training_loss 0.09916039686650038 test_loss: 0.10409945249557495
epoch: 34 training_loss 0.09579955834895372 test_loss: 0.09590166211128234
epoch: 35 training_loss 0.08858343649655581 test_loss: 0.10480191707611083
epoch: 36 training_loss 0.09455861020833253 test_loss: 0.09216406345367431
epoch: 37 training_loss 0.09209073428064585 test_loss: 0.10859179496765137
epoch: 38 training_loss 0.10233985155820846 test_loss: 0.09411437511444092
epoch: 39 training_loss 0.10020935939624906 test_loss: 0.09399778842926025
epoch: 40 training_loss 0.09326063001528383 test_loss: 0.09830323457717896
epoch: 41 training_loss 0.09125367065891624 test_loss: 0.10600597858428955
epoch: 42 training_loss 0.09408369965851307 test_loss: 0.10515105724334717
epoch: 43 training_loss 0.10014376565814018 test_loss: 0.0953327775001526
epoch: 44 training_loss 0.09598027447238565 test_loss: 0.09375925064086914
epoch: 45 training_loss 0.09508237907662988 test_loss: 0.1104328989982605
epoch: 46 training_loss 0.09636533811688423 test_loss: 0.10049196481704711
epoch: 47 training_loss 0.0920629858598113 test_loss: 0.10677995681762695
epoch: 48 training_loss 0.10079245705157519 test_loss: 0.09255328178405761
epoch: 49 training_loss 0.0962680614553392 test_loss: 0.0815801739692688
epoch: 50 training_loss 0.08594806961715222 test_loss: 0.09840738177299499
epoch: 51 training_loss 0.09405375588685275 test_loss: 0.09632488489151
epoch: 52 training_loss 0.09602194579318166 test_loss: 0.10609102249145508
epoch: 53 training_loss 0.09469967735931277 test_loss: 0.09464524984359741
epoch: 54 training_loss 0.08941649217158557 test_loss: 0.09710015058517456
epoch: 55 training_loss 0.09227671960368752 test_loss: 0.09251140356063843
epoch: 56 training_loss 0.09193746823817492 test_loss: 0.10569257736206054
epoch: 57 training_loss 0.08787143411114812 test_loss: 0.08686349391937256
epoch: 58 training_loss 0.09253031192347408 test_loss: 0.10109866857528686
epoch: 59 training_loss 0.09333715425804258 test_loss: 0.10344709157943725
epoch: 60 training_loss 0.09409424452111124 test_loss: 0.10346764326095581
epoch: 61 training_loss 0.08755052879452706 test_loss: 0.08904477953910828
epoch: 62 training_loss 0.09257129717618227 test_loss: 0.096737539768219
epoch: 63 training_loss 0.09733733171597123 test_loss: 0.09296544790267944
epoch: 64 training_loss 0.08718637472018599 test_loss: 0.10225793123245239
epoch: 65 training_loss 0.09501013826578855 test_loss: 0.08514023423194886
epoch: 66 training_loss 0.09175433410331607 test_loss: 0.11445748805999756
epoch: 67 training_loss 0.08702792268246412 test_loss: 0.10571188926696777
epoch: 68 training_loss 0.09360302425920963 test_loss: 0.10157642364501954
epoch: 69 training_loss 0.09105296049267053 test_loss: 0.08514562249183655
epoch: 70 training_loss 0.09935321224853397 test_loss: 0.07395791411399841
epoch: 71 training_loss 0.09473461203277111 test_loss: 0.10142512321472168
epoch: 72 training_loss 0.09067294841632247 test_loss: 0.08432934284210206
epoch: 73 training_loss 0.0833250630274415 test_loss: 0.09365010261535645
epoch: 74 training_loss 0.09408728651702404 test_loss: 0.079509037733078
epoch: 75 training_loss 0.09216911828145385 test_loss: 0.08872148990631104
epoch: 76 training_loss 0.09390071839094162 test_loss: 0.08811028599739075
epoch: 77 training_loss 0.08691758310422301 test_loss: 0.10600612163543702
epoch: 78 training_loss 0.08882776398211717 test_loss: 0.09677653908729553
epoch: 79 training_loss 0.09554370179772377 test_loss: 0.10940380096435547
epoch: 80 training_loss 0.09525863744318486 test_loss: 0.08580031991004944
epoch: 81 training_loss 0.09278244461864232 test_loss: 0.10141420364379883
epoch: 82 training_loss 0.09706013701856137 test_loss: 0.10233793258666993
epoch: 83 training_loss 0.08742074802517891 test_loss: 0.08955250978469849
epoch: 84 training_loss 0.09248030237853527 test_loss: 0.08841062188148499
epoch: 85 training_loss 0.09042171837761998 test_loss: 0.09559559226036071
epoch: 86 training_loss 0.08654989317059517 test_loss: 0.07796132564544678
epoch: 87 training_loss 0.09031527439132332 test_loss: 0.09378337860107422
epoch: 88 training_loss 0.09432164007797837 test_loss: 0.08919838070869446
epoch: 89 training_loss 0.09072736477479339 test_loss: 0.08812674283981323
epoch: 90 training_loss 0.0936414778418839 test_loss: 0.09070870280265808
epoch: 91 training_loss 0.09206204457208514 test_loss: 0.09600569009780884
epoch: 92 training_loss 0.08485196368768812 test_loss: 0.09812315702438354
epoch: 93 training_loss 0.09000686381012202 test_loss: 0.0775565505027771
epoch: 94 training_loss 0.08556864526122808 test_loss: 0.09828541874885559
epoch: 95 training_loss 0.09036589624360204 test_loss: 0.1058154582977295
epoch: 96 training_loss 0.09664966825395822 test_loss: 0.08491604924201965
epoch: 97 training_loss 0.0904640487395227 test_loss: 0.11378792524337769
epoch: 98 training_loss 0.09369668073952198 test_loss: 0.07946698665618897
epoch: 99 training_loss 0.08899717275053262 test_loss: 0.1106842041015625
epoch: 100 training_loss 0.08600011760368943 test_loss: 0.10784380435943604
epoch: 101 training_loss 0.08934592694044113 test_loss: 0.07766746282577515
epoch: 102 training_loss 0.09064155070111156 test_loss: 0.08882607221603393
epoch: 103 training_loss 0.08570242650806904 test_loss: 0.09527732133865356
epoch: 104 training_loss 0.09559721145778895 test_loss: 0.09463870525360107
epoch: 105 training_loss 0.08371963895857334 test_loss: 0.0991489827632904
epoch: 106 training_loss 0.08678997453302145 test_loss: 0.09046007394790649
epoch: 107 training_loss 0.08446788810193538 test_loss: 0.08858378529548645
epoch: 108 training_loss 0.08736899022012949 test_loss: 0.08877180218696594
epoch: 109 training_loss 0.08940028052777052 test_loss: 0.10710690021514893
epoch: 110 training_loss 0.08867700571194291 test_loss: 0.08628027439117432
epoch: 111 training_loss 0.08920327274128795 test_loss: 0.07311106324195862
epoch: 112 training_loss 0.08374826518818736 test_loss: 0.09916484355926514
epoch: 113 training_loss 0.09491788674145937 test_loss: 0.09865870475769042
epoch: 114 training_loss 0.09224222863093018 test_loss: 0.10643165111541748
epoch: 115 training_loss 0.08965255023911595 test_loss: 0.10198549032211304
epoch: 116 training_loss 0.08448385475203395 test_loss: 0.09209757447242736
epoch: 117 training_loss 0.09219082338735461 test_loss: 0.09311594367027283
epoch: 118 training_loss 0.0862514302507043 test_loss: 0.09512755870819092
epoch: 119 training_loss 0.08061427097767591 test_loss: 0.07673872113227845
epoch: 120 training_loss 0.08903653081506491 test_loss: 0.0962869644165039
epoch: 121 training_loss 0.08760073320940137 test_loss: 0.09629455208778381
epoch: 122 training_loss 0.0867871730402112 test_loss: 0.10318701267242432
epoch: 123 training_loss 0.09290569759905339 test_loss: 0.09356355667114258
epoch: 124 training_loss 0.0906967568397522 test_loss: 0.0875848412513733
epoch: 125 training_loss 0.08680126324295998 test_loss: 0.10165727138519287
epoch: 126 training_loss 0.08871140129864216 test_loss: 0.09494156241416932
epoch: 127 training_loss 0.08499218373559415 test_loss: 0.07907945513725281
epoch: 128 training_loss 0.08510947186499834 test_loss: 0.08222972750663757
epoch: 129 training_loss 0.0870251153409481 test_loss: 0.07759596705436707
epoch: 130 training_loss 0.08483010238036513 test_loss: 0.09853867292404175
epoch: 131 training_loss 0.08723446574062109 test_loss: 0.06932188272476196
epoch: 132 training_loss 0.08801682643592358 test_loss: 0.10909798145294189
epoch: 133 training_loss 0.08953866235911846 test_loss: 0.09159178137779236
epoch: 134 training_loss 0.08972224723547698 test_loss: 0.0924305260181427
epoch: 135 training_loss 0.08396340511739254 test_loss: 0.09601705074310303
epoch: 136 training_loss 0.08545169459655881 test_loss: 0.08595571517944336
epoch: 137 training_loss 0.09178798591718078 test_loss: 0.08414870500564575
epoch: 138 training_loss 0.08207933541387319 test_loss: 0.08339099884033203
epoch: 139 training_loss 0.08263609793037176 test_loss: 0.08777335286140442
epoch: 140 training_loss 0.0814110384322703 test_loss: 0.10173614025115967
epoch: 141 training_loss 0.09211089063435793 test_loss: 0.08813376426696777
epoch: 142 training_loss 0.08623021678067744 test_loss: 0.08973585367202759
epoch: 143 training_loss 0.0871771408058703 test_loss: 0.09840558171272278
epoch: 144 training_loss 0.08805493609979749 test_loss: 0.10249059200286866
epoch: 145 training_loss 0.08431797018274664 test_loss: 0.0837094247341156
epoch: 146 training_loss 0.08224727496504784 test_loss: 0.08145892024040222
epoch: 147 training_loss 0.0875768282264471 test_loss: 0.09254472255706787
epoch: 148 training_loss 0.09501198828220367 test_loss: 0.09072446227073669
epoch: 149 training_loss 0.0905230220220983 test_loss: 0.10389096736907959
epoch: 0 training_loss 41.12234878540039 test_loss: 23.240281677246095
epoch: 1 training_loss 18.193716354370117 test_loss: 14.926493835449218
epoch: 2 training_loss 12.940742435455322 test_loss: 11.624124908447266
epoch: 3 training_loss 10.748729982376098 test_loss: 9.950679779052734
epoch: 4 training_loss 9.496813201904297 test_loss: 8.964698791503906
epoch: 5 training_loss 8.699378709793091 test_loss: 8.206961059570313
epoch: 6 training_loss 7.909968118667603 test_loss: 7.44892578125
epoch: 7 training_loss 7.391903481483459 test_loss: 7.313331604003906
epoch: 8 training_loss 6.88979974269867 test_loss: 6.4975135803222654
epoch: 9 training_loss 6.562105631828308 test_loss: 6.24676284790039
epoch: 10 training_loss 6.220392651557923 test_loss: 6.09190788269043
epoch: 11 training_loss 5.997318563461303 test_loss: 5.878644180297852
epoch: 12 training_loss 5.754948091506958 test_loss: 5.516830825805664
epoch: 13 training_loss 5.526987428665161 test_loss: 5.525053787231445
epoch: 14 training_loss 5.3977089643478395 test_loss: 5.177533340454102
epoch: 15 training_loss 5.21995350599289 test_loss: 5.112622451782227
epoch: 16 training_loss 4.975071721076965 test_loss: 5.078705215454102
epoch: 17 training_loss 4.819161190986633 test_loss: 4.925476837158203
epoch: 18 training_loss 4.75775961637497 test_loss: 4.748839569091797
epoch: 19 training_loss 4.5822259092330935 test_loss: 4.576986312866211
epoch: 20 training_loss 4.539160206317901 test_loss: 4.5434425354003904
epoch: 21 training_loss 4.435584499835968 test_loss: 4.418195724487305
epoch: 22 training_loss 4.31787787437439 test_loss: 4.365658950805664
epoch: 23 training_loss 4.342589936256409 test_loss: 4.145240402221679
epoch: 24 training_loss 4.198919866085053 test_loss: 4.114693069458008
epoch: 25 training_loss 4.140730266571045 test_loss: 4.18889274597168
epoch: 26 training_loss 4.037065942287445 test_loss: 4.131549453735351
epoch: 27 training_loss 3.944571433067322 test_loss: 3.8524322509765625
epoch: 28 training_loss 3.8864192366600037 test_loss: 3.8853931427001953
epoch: 29 training_loss 3.8489917135238647 test_loss: 3.8449554443359375
epoch: 30 training_loss 3.703069059848785 test_loss: 3.7695556640625
epoch: 31 training_loss 3.8142141008377077 test_loss: 3.559963607788086
epoch: 32 training_loss 3.679918420314789 test_loss: 3.558905029296875
epoch: 33 training_loss 3.6481439590454103 test_loss: 3.74329833984375
epoch: 34 training_loss 3.544466187953949 test_loss: 3.6175521850585937
epoch: 35 training_loss 3.58242623090744 test_loss: 3.5720149993896486
epoch: 36 training_loss 3.4714808344841 test_loss: 3.5056407928466795
epoch: 37 training_loss 3.5375286531448364 test_loss: 3.6199420928955077
epoch: 38 training_loss 3.496400773525238 test_loss: 3.307582473754883
epoch: 39 training_loss 3.416968836784363 test_loss: 3.1816030502319337
epoch: 40 training_loss 3.4111356687545777 test_loss: 3.212553787231445
epoch: 41 training_loss 3.3522366952896117 test_loss: 3.193490982055664
epoch: 42 training_loss 3.3009860205650328 test_loss: 3.458110809326172
epoch: 43 training_loss 3.230595290660858 test_loss: 3.293595886230469
epoch: 44 training_loss 3.2249305939674375 test_loss: 3.2534034729003904
epoch: 45 training_loss 3.2091057753562926 test_loss: 3.2192943572998045
epoch: 46 training_loss 3.247077486515045 test_loss: 3.138257598876953
epoch: 47 training_loss 3.1587633228302003 test_loss: 3.1757755279541016
epoch: 48 training_loss 3.0804351663589475 test_loss: 3.1965356826782227
epoch: 49 training_loss 3.1390205144882204 test_loss: 3.1209856033325196
epoch: 50 training_loss 3.081073486804962 test_loss: 3.1462635040283202
epoch: 51 training_loss 3.1246447706222535 test_loss: 2.911300468444824
epoch: 52 training_loss 3.0288010692596434 test_loss: 2.9898454666137697
epoch: 53 training_loss 3.0225801968574526 test_loss: 3.0041709899902345
epoch: 54 training_loss 3.001463921070099 test_loss: 2.810605049133301
epoch: 55 training_loss 2.9432507085800172 test_loss: 2.920802688598633
epoch: 56 training_loss 2.930229811668396 test_loss: 2.77222900390625
epoch: 57 training_loss 2.95508065700531 test_loss: 2.9578105926513674
epoch: 58 training_loss 2.8837051510810854 test_loss: 2.943441390991211
epoch: 59 training_loss 2.894607496261597 test_loss: 2.8400094985961912
epoch: 60 training_loss 2.7915908217430117 test_loss: 2.8766098022460938
epoch: 61 training_loss 2.9195618653297424 test_loss: 2.778530502319336
epoch: 62 training_loss 2.8376715683937075 test_loss: 2.8435897827148438
epoch: 63 training_loss 2.7845041871070864 test_loss: 2.8302818298339845
epoch: 64 training_loss 2.8363031125068665 test_loss: 2.9008390426635744
epoch: 65 training_loss 2.739874187707901 test_loss: 2.9963136672973634
epoch: 66 training_loss 2.8531298565864565 test_loss: 2.7129207611083985
epoch: 67 training_loss 2.8266151213645934 test_loss: 2.7853086471557615
epoch: 68 training_loss 2.796773540973663 test_loss: 2.648800086975098
epoch: 69 training_loss 2.6536894154548647 test_loss: 2.6803632736206056
epoch: 70 training_loss 2.741875698566437 test_loss: 2.595777702331543
epoch: 71 training_loss 2.6974339509010314 test_loss: 2.8021722793579102
epoch: 72 training_loss 2.6368178260326385 test_loss: 2.546965408325195
epoch: 73 training_loss 2.7072379064559935 test_loss: 2.739779472351074
epoch: 74 training_loss 2.727953609228134 test_loss: 2.6364917755126953
epoch: 75 training_loss 2.7157343149185182 test_loss: 2.6662378311157227
epoch: 76 training_loss 2.715659532546997 test_loss: 2.670528221130371
epoch: 77 training_loss 2.627140928506851 test_loss: 2.636266326904297
epoch: 78 training_loss 2.6269745016098023 test_loss: 2.6152509689331054
epoch: 79 training_loss 2.607685394287109 test_loss: 2.6196908950805664
epoch: 80 training_loss 2.642383463382721 test_loss: 2.551111602783203
epoch: 81 training_loss 2.5968345403671265 test_loss: 2.7395708084106447
epoch: 82 training_loss 2.61204936504364 test_loss: 2.688363456726074
epoch: 83 training_loss 2.6222542321681974 test_loss: 2.591901969909668
epoch: 84 training_loss 2.605803223848343 test_loss: 2.5801515579223633
epoch: 85 training_loss 2.5149674153327943 test_loss: 2.6079690933227537
epoch: 86 training_loss 2.545657465457916 test_loss: 2.5480804443359375
epoch: 87 training_loss 2.610887440443039 test_loss: 2.562269401550293
epoch: 88 training_loss 2.584580543041229 test_loss: 2.6418701171875
epoch: 89 training_loss 2.5704570031166076 test_loss: 2.604454231262207
epoch: 90 training_loss 2.5142588925361635 test_loss: 2.507583236694336
epoch: 91 training_loss 2.522815909385681 test_loss: 2.3564731597900392
epoch: 92 training_loss 2.462444919347763 test_loss: 2.5109075546264648
epoch: 93 training_loss 2.544085694551468 test_loss: 2.452892875671387
epoch: 94 training_loss 2.5181954598426817 test_loss: 2.4293645858764648
epoch: 95 training_loss 2.473246178627014 test_loss: 2.534489059448242
epoch: 96 training_loss 2.497546011209488 test_loss: 2.5764892578125
epoch: 97 training_loss 2.483892798423767 test_loss: 2.426823043823242
epoch: 98 training_loss 2.441197701692581 test_loss: 2.4288331985473635
epoch: 99 training_loss 2.433414286375046 test_loss: 2.5642230987548826
epoch: 100 training_loss 2.4305956745147705 test_loss: 2.5179283142089846
epoch: 101 training_loss 2.413690025806427 test_loss: 2.268144989013672
epoch: 102 training_loss 2.4042491841316225 test_loss: 2.4263797760009767
epoch: 103 training_loss 2.442529684305191 test_loss: 2.303915023803711
epoch: 104 training_loss 2.474354764223099 test_loss: 2.5597202301025392
epoch: 105 training_loss 2.4548151898384094 test_loss: 2.380641555786133
epoch: 106 training_loss 2.407843043804169 test_loss: 2.436552619934082
epoch: 107 training_loss 2.3656793212890626 test_loss: 2.4098928451538084
epoch: 108 training_loss 2.411960161924362 test_loss: 2.28420352935791
epoch: 109 training_loss 2.3871885788440705 test_loss: 2.4581268310546873
epoch: 110 training_loss 2.3830128705501554 test_loss: 2.3663963317871093
epoch: 111 training_loss 2.3807454919815063 test_loss: 2.4060415267944335
epoch: 112 training_loss 2.385904219150543 test_loss: 2.4268815994262694
epoch: 113 training_loss 2.3592907226085664 test_loss: 2.3141250610351562
epoch: 114 training_loss 2.3361765909194947 test_loss: 2.387677001953125
epoch: 115 training_loss 2.352926548719406 test_loss: 2.3062005996704102
epoch: 116 training_loss 2.394460695981979 test_loss: 2.424336242675781
epoch: 117 training_loss 2.3534138119220733 test_loss: 2.340610122680664
epoch: 118 training_loss 2.342995833158493 test_loss: 2.342305564880371
epoch: 119 training_loss 2.316561442613602 test_loss: 2.326688766479492
epoch: 120 training_loss 2.2945087158679964 test_loss: 2.3363027572631836
epoch: 121 training_loss 2.3542901992797853 test_loss: 2.4408740997314453
epoch: 122 training_loss 2.291118139028549 test_loss: 2.3158788681030273
epoch: 123 training_loss 2.3157632994651793 test_loss: 2.377671241760254
epoch: 124 training_loss 2.2851528215408323 test_loss: 2.2006330490112305
epoch: 125 training_loss 2.282084324359894 test_loss: 2.273603820800781
epoch: 126 training_loss 2.315842125415802 test_loss: 2.3083356857299804
epoch: 127 training_loss 2.299482271671295 test_loss: 2.19924259185791
epoch: 128 training_loss 2.272300074100494 test_loss: 2.3306594848632813
epoch: 129 training_loss 2.2819224202632906 test_loss: 2.253236198425293
epoch: 130 training_loss 2.3008714973926545 test_loss: 2.248080825805664
epoch: 131 training_loss 2.234534124135971 test_loss: 2.3412282943725584
epoch: 132 training_loss 2.260232089757919 test_loss: 2.3522216796875
epoch: 133 training_loss 2.303682119846344 test_loss: 2.254916191101074
epoch: 134 training_loss 2.2607939267158508 test_loss: 2.322805976867676
epoch: 135 training_loss 2.250507688522339 test_loss: 2.18309326171875
epoch: 136 training_loss 2.2398645055294035 test_loss: 2.221976470947266
epoch: 137 training_loss 2.2434934401512145 test_loss: 2.339982604980469
epoch: 138 training_loss 2.190534738302231 test_loss: 2.26358642578125
epoch: 139 training_loss 2.2618780720233915 test_loss: 2.249446487426758
epoch: 140 training_loss 2.216862701177597 test_loss: 2.251659965515137
epoch: 141 training_loss 2.21611398935318 test_loss: 2.2373985290527343
epoch: 142 training_loss 2.2401222765445707 test_loss: 2.201396369934082
epoch: 143 training_loss 2.2849983620643615 test_loss: 2.222700500488281
epoch: 144 training_loss 2.2207976293563845 test_loss: 2.1718177795410156
epoch: 145 training_loss 2.196370075941086 test_loss: 2.28619499206543
epoch: 146 training_loss 2.213102784156799 test_loss: 2.125489616394043
epoch: 147 training_loss 2.207883982658386 test_loss: 2.393451690673828
epoch: 148 training_loss 2.2333057844638824 test_loss: 2.2393329620361326
epoch: 149 training_loss 2.172337745428085 test_loss: 2.1490509033203127
2007.9692388737149
episode: 0 training return: tensor(373.5644, device='cuda:0')
episode: 1 training return: tensor(285.0263, device='cuda:0')
episode: 2 training return: tensor(276.5045, device='cuda:0')
episode: 3 training return: tensor(-102.1176, device='cuda:0')
epoch: 1 test_true_pfm: 3108.5923335714974 sim_pfm: 215.47055257705506
episode: 4 training return: tensor(338.5572, device='cuda:0')
episode: 5 training return: tensor(341.0420, device='cuda:0')
episode: 6 training return: tensor(-62.2824, device='cuda:0')
episode: 7 training return: tensor(290.7316, device='cuda:0')
epoch: 2 test_true_pfm: 3395.808772002161 sim_pfm: 376.5589206472505
episode: 8 training return: tensor(-148.2896, device='cuda:0')
episode: 9 training return: tensor(3.9841, device='cuda:0')
episode: 10 training return: tensor(310.4886, device='cuda:0')
episode: 11 training return: tensor(343.6284, device='cuda:0')
epoch: 3 test_true_pfm: 2973.543449892328 sim_pfm: 123.0071088715146
episode: 12 training return: tensor(31.4270, device='cuda:0')
episode: 13 training return: tensor(351.1243, device='cuda:0')
episode: 14 training return: tensor(217.9230, device='cuda:0')
episode: 15 training return: tensor(-218.1870, device='cuda:0')
epoch: 4 test_true_pfm: 3393.965734601082 sim_pfm: 179.44586960263163
episode: 16 training return: tensor(146.9723, device='cuda:0')
episode: 17 training return: tensor(309.0756, device='cuda:0')
episode: 18 training return: tensor(205.6417, device='cuda:0')
episode: 19 training return: tensor(-170.0166, device='cuda:0')
epoch: 5 test_true_pfm: 2900.8572980430527 sim_pfm: 198.63149658774879
episode: 20 training return: tensor(-186.0788, device='cuda:0')
episode: 21 training return: tensor(355.2422, device='cuda:0')
episode: 22 training return: tensor(443.7320, device='cuda:0')
episode: 23 training return: tensor(304.8181, device='cuda:0')
epoch: 6 test_true_pfm: 2458.324790094648 sim_pfm: 162.17943446993982
episode: 24 training return: tensor(168.1411, device='cuda:0')
episode: 25 training return: tensor(-44.9343, device='cuda:0')
episode: 26 training return: tensor(370.7032, device='cuda:0')
episode: 27 training return: tensor(74.8820, device='cuda:0')
epoch: 7 test_true_pfm: 3048.876574013683 sim_pfm: 234.19189673229508
episode: 28 training return: tensor(-77.8682, device='cuda:0')
episode: 29 training return: tensor(268.6940, device='cuda:0')
episode: 30 training return: tensor(350.3256, device='cuda:0')
episode: 31 training return: tensor(342.0732, device='cuda:0')
epoch: 8 test_true_pfm: 2490.077617718018 sim_pfm: 66.75217183385394
episode: 32 training return: tensor(292.7974, device='cuda:0')
episode: 33 training return: tensor(-109.9423, device='cuda:0')
episode: 34 training return: tensor(316.7189, device='cuda:0')
episode: 35 training return: tensor(306.7642, device='cuda:0')
epoch: 9 test_true_pfm: 3376.1574883844273 sim_pfm: 94.1418723576741
episode: 36 training return: tensor(93.9917, device='cuda:0')
episode: 37 training return: tensor(-188.3647, device='cuda:0')
episode: 38 training return: tensor(-269.1365, device='cuda:0')
episode: 39 training return: tensor(-160.6563, device='cuda:0')
epoch: 10 test_true_pfm: 3043.1453510356973 sim_pfm: 241.44752165844935
episode: 40 training return: tensor(279.6255, device='cuda:0')
episode: 41 training return: tensor(363.9178, device='cuda:0')
episode: 42 training return: tensor(368.4786, device='cuda:0')
episode: 43 training return: tensor(365.8067, device='cuda:0')
epoch: 11 test_true_pfm: 3403.384735292588 sim_pfm: 284.47390660082846
episode: 44 training return: tensor(311.5605, device='cuda:0')
episode: 45 training return: tensor(287.3653, device='cuda:0')
episode: 46 training return: tensor(-90.7743, device='cuda:0')
episode: 47 training return: tensor(2.0234, device='cuda:0')
epoch: 12 test_true_pfm: 3054.189799471665 sim_pfm: 352.1018649788069
episode: 48 training return: tensor(371.6459, device='cuda:0')
episode: 49 training return: tensor(302.8828, device='cuda:0')
episode: 50 training return: tensor(284.2932, device='cuda:0')
episode: 51 training return: tensor(318.9845, device='cuda:0')
epoch: 13 test_true_pfm: 2484.1844089923534 sim_pfm: 279.8293847115613
episode: 52 training return: tensor(395.6469, device='cuda:0')
episode: 53 training return: tensor(142.5857, device='cuda:0')
episode: 54 training return: tensor(-278.3321, device='cuda:0')
episode: 55 training return: tensor(346.3635, device='cuda:0')
epoch: 14 test_true_pfm: 2868.276132584227 sim_pfm: -5.786592211399693
episode: 56 training return: tensor(359.3552, device='cuda:0')
episode: 57 training return: tensor(316.3889, device='cuda:0')
episode: 58 training return: tensor(355.7830, device='cuda:0')
episode: 59 training return: tensor(304.3936, device='cuda:0')
epoch: 15 test_true_pfm: 3257.9380553174055 sim_pfm: 273.26061755235423
episode: 60 training return: tensor(357.8529, device='cuda:0')
episode: 61 training return: tensor(318.8582, device='cuda:0')
episode: 62 training return: tensor(274.5792, device='cuda:0')
episode: 63 training return: tensor(-25.8969, device='cuda:0')
epoch: 16 test_true_pfm: 3368.3522891183807 sim_pfm: 257.5584324588029
episode: 64 training return: tensor(329.7621, device='cuda:0')
episode: 65 training return: tensor(352.4751, device='cuda:0')
episode: 66 training return: tensor(363.2490, device='cuda:0')
episode: 67 training return: tensor(377.9792, device='cuda:0')
epoch: 17 test_true_pfm: 3389.987418409197 sim_pfm: 338.40858769465314
episode: 68 training return: tensor(367.6742, device='cuda:0')
episode: 69 training return: tensor(374.3833, device='cuda:0')
episode: 70 training return: tensor(347.5208, device='cuda:0')
episode: 71 training return: tensor(285.0913, device='cuda:0')
epoch: 18 test_true_pfm: 3219.4889957306136 sim_pfm: 278.51762420649175
episode: 72 training return: tensor(261.4058, device='cuda:0')
episode: 73 training return: tensor(173.8333, device='cuda:0')
episode: 74 training return: tensor(435.6698, device='cuda:0')
episode: 75 training return: tensor(368.5772, device='cuda:0')
epoch: 19 test_true_pfm: 3354.3950893110705 sim_pfm: 352.8378743250699
episode: 76 training return: tensor(377.0583, device='cuda:0')
episode: 77 training return: tensor(-78.6377, device='cuda:0')
episode: 78 training return: tensor(279.9371, device='cuda:0')
episode: 79 training return: tensor(-173.0048, device='cuda:0')
epoch: 20 test_true_pfm: 3360.528970675532 sim_pfm: 182.12270005109409
episode: 80 training return: tensor(387.7898, device='cuda:0')
episode: 81 training return: tensor(340.9517, device='cuda:0')
episode: 82 training return: tensor(349.3918, device='cuda:0')
episode: 83 training return: tensor(295.4353, device='cuda:0')
epoch: 21 test_true_pfm: 3363.046935768072 sim_pfm: 145.81023965372393
episode: 84 training return: tensor(374.6019, device='cuda:0')
episode: 85 training return: tensor(220.1211, device='cuda:0')
episode: 86 training return: tensor(288.3615, device='cuda:0')
episode: 87 training return: tensor(384.0955, device='cuda:0')
epoch: 22 test_true_pfm: 3290.6537742437954 sim_pfm: 332.9533675833566
episode: 88 training return: tensor(-285.9424, device='cuda:0')
episode: 89 training return: tensor(311.8750, device='cuda:0')
episode: 90 training return: tensor(323.7772, device='cuda:0')
episode: 91 training return: tensor(233.7388, device='cuda:0')
epoch: 23 test_true_pfm: 3374.252861024979 sim_pfm: 345.80530829188257
episode: 92 training return: tensor(424.6631, device='cuda:0')
episode: 93 training return: tensor(147.8635, device='cuda:0')
episode: 94 training return: tensor(363.6464, device='cuda:0')
episode: 95 training return: tensor(327.3456, device='cuda:0')
epoch: 24 test_true_pfm: 2678.3360458220627 sim_pfm: 322.9106147818384
episode: 96 training return: tensor(267.7743, device='cuda:0')
episode: 97 training return: tensor(362.1397, device='cuda:0')
episode: 98 training return: tensor(324.6736, device='cuda:0')
episode: 99 training return: tensor(330.0468, device='cuda:0')
epoch: 25 test_true_pfm: 3371.699606471442 sim_pfm: 328.65582921582
episode: 100 training return: tensor(356.6588, device='cuda:0')
episode: 101 training return: tensor(272.3519, device='cuda:0')
episode: 102 training return: tensor(125.3810, device='cuda:0')
episode: 103 training return: tensor(285.4863, device='cuda:0')
epoch: 26 test_true_pfm: 3273.9259499597415 sim_pfm: 156.0211770715347
episode: 104 training return: tensor(382.3464, device='cuda:0')
episode: 105 training return: tensor(392.1757, device='cuda:0')
episode: 106 training return: tensor(433.4186, device='cuda:0')
episode: 107 training return: tensor(-118.5557, device='cuda:0')
epoch: 27 test_true_pfm: 3362.7601313498076 sim_pfm: 340.24390413311386
episode: 108 training return: tensor(-52.0268, device='cuda:0')
episode: 109 training return: tensor(317.1773, device='cuda:0')
episode: 110 training return: tensor(-131.7146, device='cuda:0')
episode: 111 training return: tensor(393.4763, device='cuda:0')
epoch: 28 test_true_pfm: 3347.585065748188 sim_pfm: 319.2773470119573
episode: 112 training return: tensor(257.0752, device='cuda:0')
episode: 113 training return: tensor(348.7985, device='cuda:0')
episode: 114 training return: tensor(408.9400, device='cuda:0')
episode: 115 training return: tensor(327.9687, device='cuda:0')
epoch: 29 test_true_pfm: 3383.6287132974635 sim_pfm: 339.59336484840605
episode: 116 training return: tensor(346.6572, device='cuda:0')
episode: 117 training return: tensor(320.3350, device='cuda:0')
episode: 118 training return: tensor(418.5268, device='cuda:0')
episode: 119 training return: tensor(414.6316, device='cuda:0')
epoch: 30 test_true_pfm: 3354.0088604064927 sim_pfm: 324.75741182346246
episode: 120 training return: tensor(352.5064, device='cuda:0')
episode: 121 training return: tensor(377.3114, device='cuda:0')
episode: 122 training return: tensor(37.2177, device='cuda:0')
episode: 123 training return: tensor(354.8086, device='cuda:0')
epoch: 31 test_true_pfm: 3389.7831127457594 sim_pfm: 380.58962267220096
episode: 124 training return: tensor(388.3060, device='cuda:0')
episode: 125 training return: tensor(435.6488, device='cuda:0')
episode: 126 training return: tensor(362.2772, device='cuda:0')
episode: 127 training return: tensor(396.0653, device='cuda:0')
epoch: 32 test_true_pfm: 3379.732323550021 sim_pfm: 374.0112329586215
episode: 128 training return: tensor(331.3799, device='cuda:0')
episode: 129 training return: tensor(315.0630, device='cuda:0')
episode: 130 training return: tensor(17.0472, device='cuda:0')
episode: 131 training return: tensor(346.1297, device='cuda:0')
epoch: 33 test_true_pfm: 3293.5031777695626 sim_pfm: 219.08041395367277
episode: 132 training return: tensor(406.9675, device='cuda:0')
episode: 133 training return: tensor(381.7010, device='cuda:0')
episode: 134 training return: tensor(166.5227, device='cuda:0')
episode: 135 training return: tensor(350.6193, device='cuda:0')
epoch: 34 test_true_pfm: 3325.03616203156 sim_pfm: 391.12980973578914
episode: 136 training return: tensor(413.8382, device='cuda:0')
episode: 137 training return: tensor(361.6504, device='cuda:0')
episode: 138 training return: tensor(323.1552, device='cuda:0')
episode: 139 training return: tensor(312.8828, device='cuda:0')
epoch: 35 test_true_pfm: 3417.8180713491406 sim_pfm: 416.430471506513
episode: 140 training return: tensor(354.5986, device='cuda:0')
episode: 141 training return: tensor(351.7508, device='cuda:0')
episode: 142 training return: tensor(340.7213, device='cuda:0')
episode: 143 training return: tensor(417.7632, device='cuda:0')
epoch: 36 test_true_pfm: 2872.5786690395835 sim_pfm: 155.96579792230236
episode: 144 training return: tensor(349.6671, device='cuda:0')
episode: 145 training return: tensor(190.4376, device='cuda:0')
episode: 146 training return: tensor(360.4553, device='cuda:0')
episode: 147 training return: tensor(358.4396, device='cuda:0')
epoch: 37 test_true_pfm: 3394.251055212488 sim_pfm: 304.5308663947896
episode: 148 training return: tensor(346.3796, device='cuda:0')
episode: 149 training return: tensor(-14.8420, device='cuda:0')
episode: 150 training return: tensor(374.5859, device='cuda:0')
episode: 151 training return: tensor(429.2885, device='cuda:0')
epoch: 38 test_true_pfm: 3426.7261614689637 sim_pfm: 392.2886840600113
episode: 152 training return: tensor(349.6306, device='cuda:0')
episode: 153 training return: tensor(308.2899, device='cuda:0')
episode: 154 training return: tensor(343.1892, device='cuda:0')
episode: 155 training return: tensor(404.4724, device='cuda:0')
epoch: 39 test_true_pfm: 3186.2296607164076 sim_pfm: 367.5336060819488
episode: 156 training return: tensor(40.5366, device='cuda:0')
episode: 157 training return: tensor(386.0026, device='cuda:0')
episode: 158 training return: tensor(388.9948, device='cuda:0')
episode: 159 training return: tensor(356.5820, device='cuda:0')
epoch: 40 test_true_pfm: 3432.389155365169 sim_pfm: 287.34880717637134
episode: 160 training return: tensor(429.6836, device='cuda:0')
episode: 161 training return: tensor(357.7955, device='cuda:0')
episode: 162 training return: tensor(401.3095, device='cuda:0')
episode: 163 training return: tensor(391.3535, device='cuda:0')
epoch: 41 test_true_pfm: 3364.1315803929174 sim_pfm: 333.50489278931246
episode: 164 training return: tensor(438.7182, device='cuda:0')
episode: 165 training return: tensor(381.9475, device='cuda:0')
episode: 166 training return: tensor(348.5489, device='cuda:0')
episode: 167 training return: tensor(446.8961, device='cuda:0')
epoch: 42 test_true_pfm: 3431.5529215906913 sim_pfm: 377.85193977322587
episode: 168 training return: tensor(404.6641, device='cuda:0')
episode: 169 training return: tensor(355.8627, device='cuda:0')
episode: 170 training return: tensor(290.5607, device='cuda:0')
episode: 171 training return: tensor(326.3327, device='cuda:0')
epoch: 43 test_true_pfm: 3123.9150274442727 sim_pfm: 374.74796389002586
episode: 172 training return: tensor(296.9105, device='cuda:0')
episode: 173 training return: tensor(383.0275, device='cuda:0')
episode: 174 training return: tensor(339.3574, device='cuda:0')
episode: 175 training return: tensor(338.6454, device='cuda:0')
epoch: 44 test_true_pfm: 3019.0944856844617 sim_pfm: 368.78989793612465
episode: 176 training return: tensor(-88.6529, device='cuda:0')
episode: 177 training return: tensor(407.0762, device='cuda:0')
episode: 178 training return: tensor(-123.6983, device='cuda:0')
episode: 179 training return: tensor(386.3456, device='cuda:0')
epoch: 45 test_true_pfm: 3464.8954648406484 sim_pfm: 375.93930190526106
episode: 180 training return: tensor(345.6942, device='cuda:0')
episode: 181 training return: tensor(325.0861, device='cuda:0')
episode: 182 training return: tensor(414.4589, device='cuda:0')
episode: 183 training return: tensor(397.5001, device='cuda:0')
epoch: 46 test_true_pfm: 2934.59525792494 sim_pfm: 393.8693122633267
episode: 184 training return: tensor(387.3817, device='cuda:0')
episode: 185 training return: tensor(292.8219, device='cuda:0')
episode: 186 training return: tensor(432.8190, device='cuda:0')
episode: 187 training return: tensor(190.1784, device='cuda:0')
epoch: 47 test_true_pfm: 3434.7189795106 sim_pfm: 425.34701597568346
episode: 188 training return: tensor(343.5044, device='cuda:0')
episode: 189 training return: tensor(352.5634, device='cuda:0')
episode: 190 training return: tensor(407.7531, device='cuda:0')
episode: 191 training return: tensor(353.7689, device='cuda:0')
epoch: 48 test_true_pfm: 3377.9560734373113 sim_pfm: 370.8677325938188
episode: 192 training return: tensor(403.5521, device='cuda:0')
episode: 193 training return: tensor(280.0594, device='cuda:0')
episode: 194 training return: tensor(333.8176, device='cuda:0')
episode: 195 training return: tensor(383.6044, device='cuda:0')
epoch: 49 test_true_pfm: 3459.945609632245 sim_pfm: 388.46886128152255
episode: 196 training return: tensor(137.5585, device='cuda:0')
episode: 197 training return: tensor(370.3443, device='cuda:0')
episode: 198 training return: tensor(401.1612, device='cuda:0')
episode: 199 training return: tensor(328.4771, device='cuda:0')
epoch: 50 test_true_pfm: 3491.3872415288747 sim_pfm: 402.4514684841076
episode: 200 training return: tensor(421.3850, device='cuda:0')
episode: 201 training return: tensor(383.3112, device='cuda:0')
episode: 202 training return: tensor(351.8456, device='cuda:0')
episode: 203 training return: tensor(451.2328, device='cuda:0')
epoch: 51 test_true_pfm: 3411.0083027272867 sim_pfm: 397.5832877282325
episode: 204 training return: tensor(-76.3384, device='cuda:0')
episode: 205 training return: tensor(400.5786, device='cuda:0')
episode: 206 training return: tensor(426.6708, device='cuda:0')
episode: 207 training return: tensor(421.4401, device='cuda:0')
epoch: 52 test_true_pfm: 3057.57449167378 sim_pfm: 383.31564731352654
episode: 208 training return: tensor(-37.6874, device='cuda:0')
episode: 209 training return: tensor(-34.7836, device='cuda:0')
episode: 210 training return: tensor(379.2518, device='cuda:0')
episode: 211 training return: tensor(358.7083, device='cuda:0')
epoch: 53 test_true_pfm: 3400.2053749804004 sim_pfm: 394.74024157803314
episode: 212 training return: tensor(398.0666, device='cuda:0')
episode: 213 training return: tensor(390.8368, device='cuda:0')
episode: 214 training return: tensor(359.9594, device='cuda:0')
episode: 215 training return: tensor(363.5182, device='cuda:0')
epoch: 54 test_true_pfm: 2912.425736278325 sim_pfm: 388.76087996373343
episode: 216 training return: tensor(424.9170, device='cuda:0')
episode: 217 training return: tensor(432.1648, device='cuda:0')
episode: 218 training return: tensor(407.9202, device='cuda:0')
episode: 219 training return: tensor(349.8557, device='cuda:0')
epoch: 55 test_true_pfm: 3446.2295398012034 sim_pfm: 384.47205505455105
episode: 220 training return: tensor(370.8079, device='cuda:0')
episode: 221 training return: tensor(373.5651, device='cuda:0')
episode: 222 training return: tensor(410.3616, device='cuda:0')
episode: 223 training return: tensor(312.0182, device='cuda:0')
epoch: 56 test_true_pfm: 3392.3625086070356 sim_pfm: 375.02465874162345
episode: 224 training return: tensor(409.7461, device='cuda:0')
episode: 225 training return: tensor(319.6611, device='cuda:0')
episode: 226 training return: tensor(402.1504, device='cuda:0')
episode: 227 training return: tensor(357.4119, device='cuda:0')
epoch: 57 test_true_pfm: 3399.9990017729883 sim_pfm: 398.00900932545
episode: 228 training return: tensor(375.4843, device='cuda:0')
episode: 229 training return: tensor(405.0360, device='cuda:0')
episode: 230 training return: tensor(348.0446, device='cuda:0')
episode: 231 training return: tensor(420.3826, device='cuda:0')
epoch: 58 test_true_pfm: 3409.4259695439737 sim_pfm: 347.8642442944595
episode: 232 training return: tensor(357.9068, device='cuda:0')
episode: 233 training return: tensor(320.8081, device='cuda:0')
episode: 234 training return: tensor(421.2167, device='cuda:0')
episode: 235 training return: tensor(-3.8373, device='cuda:0')
epoch: 59 test_true_pfm: 3289.81133734021 sim_pfm: 384.39338739660644
episode: 236 training return: tensor(399.1073, device='cuda:0')
episode: 237 training return: tensor(485.6859, device='cuda:0')
episode: 238 training return: tensor(351.4238, device='cuda:0')
episode: 239 training return: tensor(422.4366, device='cuda:0')
epoch: 60 test_true_pfm: 3410.1799698085542 sim_pfm: 321.8693145195721
episode: 240 training return: tensor(29.2860, device='cuda:0')
episode: 241 training return: tensor(384.8765, device='cuda:0')
episode: 242 training return: tensor(386.3250, device='cuda:0')
episode: 243 training return: tensor(408.5764, device='cuda:0')
epoch: 61 test_true_pfm: 3420.3428821077046 sim_pfm: 378.96465969628963
episode: 244 training return: tensor(382.7023, device='cuda:0')
episode: 245 training return: tensor(371.3130, device='cuda:0')
episode: 246 training return: tensor(330.5528, device='cuda:0')
episode: 247 training return: tensor(382.1825, device='cuda:0')
epoch: 62 test_true_pfm: 3440.762384516763 sim_pfm: 397.6892749435501
episode: 248 training return: tensor(-35.7484, device='cuda:0')
episode: 249 training return: tensor(389.2402, device='cuda:0')
episode: 250 training return: tensor(366.1105, device='cuda:0')
episode: 251 training return: tensor(320.9323, device='cuda:0')
epoch: 63 test_true_pfm: 3380.870140138721 sim_pfm: 375.46847010578495
episode: 252 training return: tensor(295.3932, device='cuda:0')
episode: 253 training return: tensor(396.3651, device='cuda:0')
episode: 254 training return: tensor(322.9533, device='cuda:0')
episode: 255 training return: tensor(410.4330, device='cuda:0')
epoch: 64 test_true_pfm: 3391.827711626991 sim_pfm: 368.51308708121843
episode: 256 training return: tensor(410.6112, device='cuda:0')
episode: 257 training return: tensor(462.5042, device='cuda:0')
episode: 258 training return: tensor(334.1933, device='cuda:0')
episode: 259 training return: tensor(365.0924, device='cuda:0')
epoch: 65 test_true_pfm: 3480.327294127673 sim_pfm: 343.4385496819062
episode: 260 training return: tensor(396.7561, device='cuda:0')
episode: 261 training return: tensor(380.2885, device='cuda:0')
episode: 262 training return: tensor(319.4014, device='cuda:0')
episode: 263 training return: tensor(412.1091, device='cuda:0')
epoch: 66 test_true_pfm: 3499.572397266224 sim_pfm: 377.492795420374
episode: 264 training return: tensor(389.1610, device='cuda:0')
episode: 265 training return: tensor(303.4521, device='cuda:0')
episode: 266 training return: tensor(391.7362, device='cuda:0')
episode: 267 training return: tensor(358.8519, device='cuda:0')
epoch: 67 test_true_pfm: 3460.8180461408624 sim_pfm: 373.3219028141563
episode: 268 training return: tensor(363.1783, device='cuda:0')
episode: 269 training return: tensor(304.7599, device='cuda:0')
episode: 270 training return: tensor(324.4658, device='cuda:0')
episode: 271 training return: tensor(351.8812, device='cuda:0')
epoch: 68 test_true_pfm: 3298.682867492364 sim_pfm: 220.77777965564746
episode: 272 training return: tensor(387.1583, device='cuda:0')
episode: 273 training return: tensor(333.8986, device='cuda:0')
episode: 274 training return: tensor(324.3955, device='cuda:0')
episode: 275 training return: tensor(402.5601, device='cuda:0')
epoch: 69 test_true_pfm: 3273.3859448871044 sim_pfm: 387.42130789514823
episode: 276 training return: tensor(55.7859, device='cuda:0')
episode: 277 training return: tensor(380.6268, device='cuda:0')
episode: 278 training return: tensor(346.3608, device='cuda:0')
episode: 279 training return: tensor(292.2470, device='cuda:0')
epoch: 70 test_true_pfm: 3427.4489201978395 sim_pfm: 364.4635562400411
episode: 280 training return: tensor(375.3478, device='cuda:0')
episode: 281 training return: tensor(313.7937, device='cuda:0')
episode: 282 training return: tensor(362.7366, device='cuda:0')
episode: 283 training return: tensor(338.6193, device='cuda:0')
epoch: 71 test_true_pfm: 3458.3022150785605 sim_pfm: 360.20931108358974
episode: 284 training return: tensor(365.1110, device='cuda:0')
episode: 285 training return: tensor(388.1122, device='cuda:0')
episode: 286 training return: tensor(-194.5206, device='cuda:0')
episode: 287 training return: tensor(327.1516, device='cuda:0')
epoch: 72 test_true_pfm: 3440.2643528780286 sim_pfm: 356.89825974951964
episode: 288 training return: tensor(345.6015, device='cuda:0')
episode: 289 training return: tensor(363.1289, device='cuda:0')
episode: 290 training return: tensor(347.2757, device='cuda:0')
episode: 291 training return: tensor(444.8759, device='cuda:0')
epoch: 73 test_true_pfm: 3459.4387601342683 sim_pfm: 402.624818649434
episode: 292 training return: tensor(395.1528, device='cuda:0')
episode: 293 training return: tensor(361.1289, device='cuda:0')
episode: 294 training return: tensor(-114.1166, device='cuda:0')
episode: 295 training return: tensor(340.0380, device='cuda:0')
epoch: 74 test_true_pfm: 3435.9601815458705 sim_pfm: 360.51269756952144
episode: 296 training return: tensor(392.7966, device='cuda:0')
episode: 297 training return: tensor(179.1802, device='cuda:0')
episode: 298 training return: tensor(324.5895, device='cuda:0')
episode: 299 training return: tensor(350.0156, device='cuda:0')
epoch: 75 test_true_pfm: 3392.71923789508 sim_pfm: 289.5091585834064
episode: 300 training return: tensor(101.3564, device='cuda:0')
episode: 301 training return: tensor(26.2183, device='cuda:0')
episode: 302 training return: tensor(349.5216, device='cuda:0')
episode: 303 training return: tensor(352.3122, device='cuda:0')
epoch: 76 test_true_pfm: 3355.4485771141335 sim_pfm: 396.84203966447967
episode: 304 training return: tensor(348.5621, device='cuda:0')
episode: 305 training return: tensor(394.8058, device='cuda:0')
episode: 306 training return: tensor(404.5573, device='cuda:0')
episode: 307 training return: tensor(447.7927, device='cuda:0')
epoch: 77 test_true_pfm: 3426.063534607678 sim_pfm: 367.9626471390172
episode: 308 training return: tensor(436.7417, device='cuda:0')
episode: 309 training return: tensor(390.9763, device='cuda:0')
episode: 310 training return: tensor(-20.9021, device='cuda:0')
episode: 311 training return: tensor(398.1297, device='cuda:0')
epoch: 78 test_true_pfm: 3498.194389782942 sim_pfm: 286.70470279069076
episode: 312 training return: tensor(322.1008, device='cuda:0')
episode: 313 training return: tensor(404.6487, device='cuda:0')
episode: 314 training return: tensor(424.5422, device='cuda:0')
episode: 315 training return: tensor(384.4725, device='cuda:0')
epoch: 79 test_true_pfm: 3386.0115372894666 sim_pfm: 274.1518547446467
episode: 316 training return: tensor(371.9662, device='cuda:0')
episode: 317 training return: tensor(459.1093, device='cuda:0')
episode: 318 training return: tensor(-87.9916, device='cuda:0')
episode: 319 training return: tensor(328.5588, device='cuda:0')
epoch: 80 test_true_pfm: 3454.1458056659503 sim_pfm: 414.9346495794792
episode: 320 training return: tensor(420.5424, device='cuda:0')
episode: 321 training return: tensor(350.4602, device='cuda:0')
episode: 322 training return: tensor(344.1109, device='cuda:0')
episode: 323 training return: tensor(116.6112, device='cuda:0')
epoch: 81 test_true_pfm: 3378.4588300528485 sim_pfm: 369.9721593108649
episode: 324 training return: tensor(376.8473, device='cuda:0')
episode: 325 training return: tensor(-95.6335, device='cuda:0')
episode: 326 training return: tensor(368.6205, device='cuda:0')
episode: 327 training return: tensor(395.2315, device='cuda:0')
epoch: 82 test_true_pfm: 3403.3264847144696 sim_pfm: 388.03516311910545
episode: 328 training return: tensor(406.8026, device='cuda:0')
episode: 329 training return: tensor(399.6693, device='cuda:0')
episode: 330 training return: tensor(418.2194, device='cuda:0')
episode: 331 training return: tensor(376.7427, device='cuda:0')
epoch: 83 test_true_pfm: 3388.526164470919 sim_pfm: 409.22053206329775
episode: 332 training return: tensor(354.7724, device='cuda:0')
episode: 333 training return: tensor(326.8300, device='cuda:0')
episode: 334 training return: tensor(430.8652, device='cuda:0')
episode: 335 training return: tensor(347.0600, device='cuda:0')
epoch: 84 test_true_pfm: 3425.386810924813 sim_pfm: 383.27479071426205
episode: 336 training return: tensor(390.0697, device='cuda:0')
episode: 337 training return: tensor(311.7805, device='cuda:0')
episode: 338 training return: tensor(380.2499, device='cuda:0')
episode: 339 training return: tensor(375.6666, device='cuda:0')
epoch: 85 test_true_pfm: 3438.628873885545 sim_pfm: 390.53191149703343
episode: 340 training return: tensor(390.5097, device='cuda:0')
episode: 341 training return: tensor(429.1341, device='cuda:0')
episode: 342 training return: tensor(513.1804, device='cuda:0')
episode: 343 training return: tensor(325.8956, device='cuda:0')
epoch: 86 test_true_pfm: 3439.6398400589787 sim_pfm: 416.99414811677224
episode: 344 training return: tensor(-18.7646, device='cuda:0')
episode: 345 training return: tensor(384.6667, device='cuda:0')
episode: 346 training return: tensor(473.7271, device='cuda:0')
episode: 347 training return: tensor(395.2177, device='cuda:0')
epoch: 87 test_true_pfm: 3445.201340571517 sim_pfm: 392.88198221016984
episode: 348 training return: tensor(309.4635, device='cuda:0')
episode: 349 training return: tensor(368.7230, device='cuda:0')
episode: 350 training return: tensor(407.6935, device='cuda:0')
episode: 351 training return: tensor(406.2688, device='cuda:0')
epoch: 88 test_true_pfm: 3403.654502145926 sim_pfm: 392.56147522563697
episode: 352 training return: tensor(124.9257, device='cuda:0')
episode: 353 training return: tensor(79.4744, device='cuda:0')
episode: 354 training return: tensor(409.0723, device='cuda:0')
episode: 355 training return: tensor(355.8556, device='cuda:0')
epoch: 89 test_true_pfm: 3440.838491226625 sim_pfm: 399.3764820294843
episode: 356 training return: tensor(133.0697, device='cuda:0')
episode: 357 training return: tensor(384.8576, device='cuda:0')
episode: 358 training return: tensor(330.5403, device='cuda:0')
episode: 359 training return: tensor(424.6614, device='cuda:0')
epoch: 90 test_true_pfm: 3371.9607842075347 sim_pfm: 424.9993658530293
episode: 360 training return: tensor(361.8839, device='cuda:0')
episode: 361 training return: tensor(422.6288, device='cuda:0')
episode: 362 training return: tensor(382.4098, device='cuda:0')
episode: 363 training return: tensor(352.7695, device='cuda:0')
epoch: 91 test_true_pfm: 3417.207091618958 sim_pfm: 422.8857030403257
episode: 364 training return: tensor(368.9915, device='cuda:0')
episode: 365 training return: tensor(437.2028, device='cuda:0')
episode: 366 training return: tensor(364.0692, device='cuda:0')
episode: 367 training return: tensor(342.3608, device='cuda:0')
epoch: 92 test_true_pfm: 3147.2170793967766 sim_pfm: 343.9279541380626
episode: 368 training return: tensor(369.0299, device='cuda:0')
episode: 369 training return: tensor(471.8566, device='cuda:0')
episode: 370 training return: tensor(413.6315, device='cuda:0')
episode: 371 training return: tensor(461.2517, device='cuda:0')
epoch: 93 test_true_pfm: 3513.531599100293 sim_pfm: 272.9324074370088
episode: 372 training return: tensor(12.4393, device='cuda:0')
episode: 373 training return: tensor(463.4180, device='cuda:0')
episode: 374 training return: tensor(423.5680, device='cuda:0')
episode: 375 training return: tensor(356.5351, device='cuda:0')
epoch: 94 test_true_pfm: 3431.108625082938 sim_pfm: 381.56664277335705
episode: 376 training return: tensor(400.2240, device='cuda:0')
episode: 377 training return: tensor(394.1353, device='cuda:0')
episode: 378 training return: tensor(397.1510, device='cuda:0')
episode: 379 training return: tensor(291.1620, device='cuda:0')
epoch: 95 test_true_pfm: 3459.065877664065 sim_pfm: 362.5307657062464
episode: 380 training return: tensor(404.9286, device='cuda:0')
episode: 381 training return: tensor(377.1822, device='cuda:0')
episode: 382 training return: tensor(250.9397, device='cuda:0')
episode: 383 training return: tensor(415.2170, device='cuda:0')
epoch: 96 test_true_pfm: 3375.7758977126946 sim_pfm: 401.52437241546187
episode: 384 training return: tensor(-56.6155, device='cuda:0')
episode: 385 training return: tensor(364.0620, device='cuda:0')
episode: 386 training return: tensor(351.3202, device='cuda:0')
episode: 387 training return: tensor(219.8120, device='cuda:0')
epoch: 97 test_true_pfm: 3415.8627722664605 sim_pfm: 387.69170164791285
episode: 388 training return: tensor(50.6791, device='cuda:0')
episode: 389 training return: tensor(362.3270, device='cuda:0')
episode: 390 training return: tensor(278.9926, device='cuda:0')
episode: 391 training return: tensor(283.3571, device='cuda:0')
epoch: 98 test_true_pfm: 3399.0011660374025 sim_pfm: 385.57576898823027
episode: 392 training return: tensor(376.2157, device='cuda:0')
episode: 393 training return: tensor(-8.2357, device='cuda:0')
episode: 394 training return: tensor(387.8240, device='cuda:0')
episode: 395 training return: tensor(444.4315, device='cuda:0')
epoch: 99 test_true_pfm: 3025.113285821712 sim_pfm: 411.57659348156693
episode: 396 training return: tensor(354.6248, device='cuda:0')
episode: 397 training return: tensor(377.3771, device='cuda:0')
episode: 398 training return: tensor(340.9943, device='cuda:0')
episode: 399 training return: tensor(30.3188, device='cuda:0')
epoch: 100 test_true_pfm: 3422.541636225041 sim_pfm: 395.74823101805913
episode: 400 training return: tensor(306.7017, device='cuda:0')
episode: 401 training return: tensor(416.1458, device='cuda:0')
episode: 402 training return: tensor(392.9453, device='cuda:0')
episode: 403 training return: tensor(393.4885, device='cuda:0')
epoch: 101 test_true_pfm: 3621.0626675326384 sim_pfm: 425.36432123580016
episode: 404 training return: tensor(340.4854, device='cuda:0')
episode: 405 training return: tensor(208.6420, device='cuda:0')
episode: 406 training return: tensor(393.8471, device='cuda:0')
episode: 407 training return: tensor(375.9333, device='cuda:0')
epoch: 102 test_true_pfm: 3447.856645820682 sim_pfm: 356.8811063967199
episode: 408 training return: tensor(382.3533, device='cuda:0')
episode: 409 training return: tensor(417.2277, device='cuda:0')
episode: 410 training return: tensor(427.0617, device='cuda:0')
episode: 411 training return: tensor(347.6364, device='cuda:0')
epoch: 103 test_true_pfm: 3468.6108564466026 sim_pfm: 380.0041116368666
episode: 412 training return: tensor(401.2577, device='cuda:0')
episode: 413 training return: tensor(259.5582, device='cuda:0')
episode: 414 training return: tensor(414.9545, device='cuda:0')
episode: 415 training return: tensor(355.7696, device='cuda:0')
epoch: 104 test_true_pfm: 3495.5903603023794 sim_pfm: 318.5737019442604
episode: 416 training return: tensor(445.3535, device='cuda:0')
episode: 417 training return: tensor(350.4029, device='cuda:0')
episode: 418 training return: tensor(190.9778, device='cuda:0')
episode: 419 training return: tensor(347.8169, device='cuda:0')
epoch: 105 test_true_pfm: 3412.7776720117017 sim_pfm: 383.3008990605983
episode: 420 training return: tensor(458.7342, device='cuda:0')
episode: 421 training return: tensor(405.5639, device='cuda:0')
episode: 422 training return: tensor(356.7673, device='cuda:0')
episode: 423 training return: tensor(380.8128, device='cuda:0')
epoch: 106 test_true_pfm: 3455.112948161545 sim_pfm: 403.22157168753137
episode: 424 training return: tensor(425.7176, device='cuda:0')
episode: 425 training return: tensor(397.3374, device='cuda:0')
episode: 426 training return: tensor(377.8058, device='cuda:0')
episode: 427 training return: tensor(33.1317, device='cuda:0')
epoch: 107 test_true_pfm: 3025.2156290109087 sim_pfm: 293.4374061897397
episode: 428 training return: tensor(361.9804, device='cuda:0')
episode: 429 training return: tensor(-95.8530, device='cuda:0')
episode: 430 training return: tensor(389.0402, device='cuda:0')
episode: 431 training return: tensor(398.3710, device='cuda:0')
epoch: 108 test_true_pfm: 3478.844739236938 sim_pfm: 296.95534708045307
episode: 432 training return: tensor(362.8506, device='cuda:0')
episode: 433 training return: tensor(408.0255, device='cuda:0')
episode: 434 training return: tensor(-131.1918, device='cuda:0')
episode: 435 training return: tensor(412.4184, device='cuda:0')
epoch: 109 test_true_pfm: 3429.828130463901 sim_pfm: 322.19374117895495
episode: 436 training return: tensor(434.4365, device='cuda:0')
episode: 437 training return: tensor(428.3470, device='cuda:0')
episode: 438 training return: tensor(481.3067, device='cuda:0')
episode: 439 training return: tensor(446.4578, device='cuda:0')
epoch: 110 test_true_pfm: 3369.299190247719 sim_pfm: 394.4659159242777
episode: 440 training return: tensor(365.6584, device='cuda:0')
episode: 441 training return: tensor(374.9259, device='cuda:0')
episode: 442 training return: tensor(405.9511, device='cuda:0')
episode: 443 training return: tensor(352.2025, device='cuda:0')
epoch: 111 test_true_pfm: 3426.2173271821016 sim_pfm: 405.3202212606072
episode: 444 training return: tensor(389.4653, device='cuda:0')
episode: 445 training return: tensor(435.1353, device='cuda:0')
episode: 446 training return: tensor(378.8654, device='cuda:0')
episode: 447 training return: tensor(443.8779, device='cuda:0')
epoch: 112 test_true_pfm: 3456.9794579491195 sim_pfm: 398.55408206109615
episode: 448 training return: tensor(324.0047, device='cuda:0')
episode: 449 training return: tensor(348.0594, device='cuda:0')
episode: 450 training return: tensor(363.0877, device='cuda:0')
episode: 451 training return: tensor(455.6628, device='cuda:0')
epoch: 113 test_true_pfm: 3475.2670462812493 sim_pfm: 327.9285898906528
episode: 452 training return: tensor(411.6305, device='cuda:0')
episode: 453 training return: tensor(377.5536, device='cuda:0')
episode: 454 training return: tensor(215.1879, device='cuda:0')
episode: 455 training return: tensor(438.8879, device='cuda:0')
epoch: 114 test_true_pfm: 3431.098546275172 sim_pfm: 395.6524413486962
episode: 456 training return: tensor(369.3035, device='cuda:0')
episode: 457 training return: tensor(397.3304, device='cuda:0')
episode: 458 training return: tensor(354.6489, device='cuda:0')
episode: 459 training return: tensor(365.5887, device='cuda:0')
epoch: 115 test_true_pfm: 3452.9877896422136 sim_pfm: 279.54788076347904
episode: 460 training return: tensor(426.5221, device='cuda:0')
episode: 461 training return: tensor(50.9831, device='cuda:0')
episode: 462 training return: tensor(403.1923, device='cuda:0')
episode: 463 training return: tensor(359.9521, device='cuda:0')
epoch: 116 test_true_pfm: 3447.6218680175807 sim_pfm: 407.2046876729776
episode: 464 training return: tensor(380.3599, device='cuda:0')
episode: 465 training return: tensor(354.3954, device='cuda:0')
episode: 466 training return: tensor(338.6515, device='cuda:0')
episode: 467 training return: tensor(381.5030, device='cuda:0')
epoch: 117 test_true_pfm: 3462.3406055165997 sim_pfm: 384.9265895291076
episode: 468 training return: tensor(277.9064, device='cuda:0')
episode: 469 training return: tensor(410.6423, device='cuda:0')
episode: 470 training return: tensor(425.8629, device='cuda:0')
episode: 471 training return: tensor(336.6461, device='cuda:0')
epoch: 118 test_true_pfm: 3417.896111894853 sim_pfm: 398.29875215210876
episode: 472 training return: tensor(385.1757, device='cuda:0')
episode: 473 training return: tensor(384.0792, device='cuda:0')
episode: 474 training return: tensor(449.8172, device='cuda:0')
episode: 475 training return: tensor(397.1953, device='cuda:0')
epoch: 119 test_true_pfm: 3497.36223065207 sim_pfm: 429.463247179888
episode: 476 training return: tensor(390.2429, device='cuda:0')
episode: 477 training return: tensor(406.6043, device='cuda:0')
episode: 478 training return: tensor(383.6651, device='cuda:0')
episode: 479 training return: tensor(475.7272, device='cuda:0')
epoch: 120 test_true_pfm: 3500.85547757307 sim_pfm: 409.72305437316146
episode: 480 training return: tensor(363.0172, device='cuda:0')
episode: 481 training return: tensor(432.7255, device='cuda:0')
episode: 482 training return: tensor(361.1968, device='cuda:0')
episode: 483 training return: tensor(341.2925, device='cuda:0')
epoch: 121 test_true_pfm: 3524.1279643540115 sim_pfm: 352.43854754645145
episode: 484 training return: tensor(369.3861, device='cuda:0')
episode: 485 training return: tensor(418.8044, device='cuda:0')
episode: 486 training return: tensor(402.2665, device='cuda:0')
episode: 487 training return: tensor(360.8877, device='cuda:0')
epoch: 122 test_true_pfm: 3490.442312814205 sim_pfm: 388.3630946908961
episode: 488 training return: tensor(391.2266, device='cuda:0')
episode: 489 training return: tensor(396.0508, device='cuda:0')
episode: 490 training return: tensor(369.9401, device='cuda:0')
episode: 491 training return: tensor(457.4277, device='cuda:0')
epoch: 123 test_true_pfm: 3492.385067985426 sim_pfm: 397.7514271127681
episode: 492 training return: tensor(384.6109, device='cuda:0')
episode: 493 training return: tensor(217.5738, device='cuda:0')
episode: 494 training return: tensor(395.3257, device='cuda:0')
episode: 495 training return: tensor(393.1112, device='cuda:0')
epoch: 124 test_true_pfm: 3535.572583931314 sim_pfm: 435.9508804896711
episode: 496 training return: tensor(423.5374, device='cuda:0')
episode: 497 training return: tensor(414.5616, device='cuda:0')
episode: 498 training return: tensor(465.1073, device='cuda:0')
episode: 499 training return: tensor(445.2675, device='cuda:0')
epoch: 125 test_true_pfm: 3483.9216208021153 sim_pfm: 408.9813044061302
episode: 500 training return: tensor(440.5376, device='cuda:0')
episode: 501 training return: tensor(396.5950, device='cuda:0')
episode: 502 training return: tensor(393.1959, device='cuda:0')
episode: 503 training return: tensor(365.5756, device='cuda:0')
epoch: 126 test_true_pfm: 3508.0617605479347 sim_pfm: 307.3941043759696
episode: 504 training return: tensor(401.3010, device='cuda:0')
episode: 505 training return: tensor(435.9349, device='cuda:0')
episode: 506 training return: tensor(346.0678, device='cuda:0')
episode: 507 training return: tensor(455.6252, device='cuda:0')
epoch: 127 test_true_pfm: 3494.6087634776322 sim_pfm: 360.4616721012474
episode: 508 training return: tensor(422.8671, device='cuda:0')
episode: 509 training return: tensor(407.5223, device='cuda:0')
episode: 510 training return: tensor(363.1479, device='cuda:0')
episode: 511 training return: tensor(360.7383, device='cuda:0')
epoch: 128 test_true_pfm: 3453.6704851255076 sim_pfm: 431.27862002251396
episode: 512 training return: tensor(410.5753, device='cuda:0')
episode: 513 training return: tensor(406.7505, device='cuda:0')
episode: 514 training return: tensor(447.2882, device='cuda:0')
episode: 515 training return: tensor(373.7169, device='cuda:0')
epoch: 129 test_true_pfm: 3473.464730101716 sim_pfm: 457.6823810940259
episode: 516 training return: tensor(485.2302, device='cuda:0')
episode: 517 training return: tensor(420.0278, device='cuda:0')
episode: 518 training return: tensor(416.8601, device='cuda:0')
episode: 519 training return: tensor(419.1033, device='cuda:0')
epoch: 130 test_true_pfm: 3440.2636672857275 sim_pfm: 393.8148622511071
episode: 520 training return: tensor(263.8043, device='cuda:0')
episode: 521 training return: tensor(442.2743, device='cuda:0')
episode: 522 training return: tensor(410.2915, device='cuda:0')
episode: 523 training return: tensor(367.6156, device='cuda:0')
epoch: 131 test_true_pfm: 3504.839993362281 sim_pfm: 425.2266779721249
episode: 524 training return: tensor(425.3502, device='cuda:0')
episode: 525 training return: tensor(374.8991, device='cuda:0')
episode: 526 training return: tensor(387.0311, device='cuda:0')
episode: 527 training return: tensor(415.2609, device='cuda:0')
epoch: 132 test_true_pfm: 3442.7518900052273 sim_pfm: 415.73410260533757
episode: 528 training return: tensor(352.8548, device='cuda:0')
episode: 529 training return: tensor(388.5482, device='cuda:0')
episode: 530 training return: tensor(138.0547, device='cuda:0')
episode: 531 training return: tensor(436.1574, device='cuda:0')
epoch: 133 test_true_pfm: 3440.853581786523 sim_pfm: 388.76340017500723
episode: 532 training return: tensor(398.6767, device='cuda:0')
episode: 533 training return: tensor(360.0742, device='cuda:0')
episode: 534 training return: tensor(359.5323, device='cuda:0')
episode: 535 training return: tensor(429.7661, device='cuda:0')
epoch: 134 test_true_pfm: 3436.812699006198 sim_pfm: 396.54091811895097
episode: 536 training return: tensor(346.7262, device='cuda:0')
episode: 537 training return: tensor(388.1421, device='cuda:0')
episode: 538 training return: tensor(383.4419, device='cuda:0')
episode: 539 training return: tensor(374.6351, device='cuda:0')
epoch: 135 test_true_pfm: 3442.680176053205 sim_pfm: 405.0224146495263
episode: 540 training return: tensor(391.9487, device='cuda:0')
episode: 541 training return: tensor(386.9429, device='cuda:0')
episode: 542 training return: tensor(390.3965, device='cuda:0')
episode: 543 training return: tensor(406.4267, device='cuda:0')
epoch: 136 test_true_pfm: 3433.5885630836788 sim_pfm: 437.95675801410107
episode: 544 training return: tensor(419.7371, device='cuda:0')
episode: 545 training return: tensor(446.7182, device='cuda:0')
episode: 546 training return: tensor(399.3503, device='cuda:0')
episode: 547 training return: tensor(366.6845, device='cuda:0')
epoch: 137 test_true_pfm: 3471.229982037767 sim_pfm: 362.94402731567
episode: 548 training return: tensor(364.3517, device='cuda:0')
episode: 549 training return: tensor(356.1778, device='cuda:0')
episode: 550 training return: tensor(366.8302, device='cuda:0')
episode: 551 training return: tensor(404.0856, device='cuda:0')
epoch: 138 test_true_pfm: 3456.002710905165 sim_pfm: 398.63361516495934
episode: 552 training return: tensor(455.7351, device='cuda:0')
episode: 553 training return: tensor(397.3212, device='cuda:0')
episode: 554 training return: tensor(119.2181, device='cuda:0')
episode: 555 training return: tensor(350.4376, device='cuda:0')
epoch: 139 test_true_pfm: 3251.072633337997 sim_pfm: 430.00513492252986
episode: 556 training return: tensor(212.3599, device='cuda:0')
episode: 557 training return: tensor(321.4843, device='cuda:0')
episode: 558 training return: tensor(404.0289, device='cuda:0')
episode: 559 training return: tensor(447.1033, device='cuda:0')
epoch: 140 test_true_pfm: 3457.4206209105464 sim_pfm: 393.8292693620897
episode: 560 training return: tensor(404.6414, device='cuda:0')
episode: 561 training return: tensor(333.4982, device='cuda:0')
episode: 562 training return: tensor(393.6497, device='cuda:0')
episode: 563 training return: tensor(418.1081, device='cuda:0')
epoch: 141 test_true_pfm: 3449.1422636045204 sim_pfm: 365.9252919657738
episode: 564 training return: tensor(352.2521, device='cuda:0')
episode: 565 training return: tensor(393.6442, device='cuda:0')
episode: 566 training return: tensor(381.3262, device='cuda:0')
episode: 567 training return: tensor(317.5979, device='cuda:0')
epoch: 142 test_true_pfm: 3426.0053650936256 sim_pfm: 385.30476577647886
episode: 568 training return: tensor(361.7259, device='cuda:0')
episode: 569 training return: tensor(421.5259, device='cuda:0')
episode: 570 training return: tensor(457.1274, device='cuda:0')
episode: 571 training return: tensor(402.4568, device='cuda:0')
epoch: 143 test_true_pfm: 3462.9519196617453 sim_pfm: 401.28426271028974
episode: 572 training return: tensor(330.7076, device='cuda:0')
episode: 573 training return: tensor(378.6461, device='cuda:0')
episode: 574 training return: tensor(-126.0767, device='cuda:0')
episode: 575 training return: tensor(356.1662, device='cuda:0')
epoch: 144 test_true_pfm: 3494.4740214518547 sim_pfm: 415.71851532535703
episode: 576 training return: tensor(411.1484, device='cuda:0')
episode: 577 training return: tensor(320.6203, device='cuda:0')
episode: 578 training return: tensor(401.7836, device='cuda:0')
episode: 579 training return: tensor(409.4345, device='cuda:0')
epoch: 145 test_true_pfm: 3428.1478967719977 sim_pfm: 380.8320402925601
episode: 580 training return: tensor(472.9661, device='cuda:0')
episode: 581 training return: tensor(365.2193, device='cuda:0')
episode: 582 training return: tensor(372.7899, device='cuda:0')
episode: 583 training return: tensor(443.1963, device='cuda:0')
epoch: 146 test_true_pfm: 3543.527010799296 sim_pfm: 309.18606064388104
episode: 584 training return: tensor(388.2205, device='cuda:0')
episode: 585 training return: tensor(439.8882, device='cuda:0')
episode: 586 training return: tensor(394.4904, device='cuda:0')
episode: 587 training return: tensor(242.9203, device='cuda:0')
epoch: 147 test_true_pfm: 3328.450305880935 sim_pfm: 361.50269200727536
episode: 588 training return: tensor(448.9417, device='cuda:0')
episode: 589 training return: tensor(415.2756, device='cuda:0')
episode: 590 training return: tensor(415.5868, device='cuda:0')
episode: 591 training return: tensor(347.6358, device='cuda:0')
epoch: 148 test_true_pfm: 3502.1737139002676 sim_pfm: 470.3937426052871
episode: 592 training return: tensor(373.0082, device='cuda:0')
episode: 593 training return: tensor(385.0905, device='cuda:0')
episode: 594 training return: tensor(373.0320, device='cuda:0')
episode: 595 training return: tensor(412.5982, device='cuda:0')
epoch: 149 test_true_pfm: 3560.6375850770733 sim_pfm: 346.8398108732654
episode: 596 training return: tensor(392.1118, device='cuda:0')
episode: 597 training return: tensor(377.5723, device='cuda:0')
episode: 598 training return: tensor(360.1845, device='cuda:0')
episode: 599 training return: tensor(419.4243, device='cuda:0')
epoch: 150 test_true_pfm: 3460.545374059073 sim_pfm: 432.4799070014463
