['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'expert', '--seed', '0']
epoch: 0 training_loss 0.33046554908156395 test_loss: 0.23950440883636476
epoch: 1 training_loss 0.21783898450434208 test_loss: 0.182262647151947
epoch: 2 training_loss 0.18981524400413036 test_loss: 0.18756406307220458
epoch: 3 training_loss 0.17668202936649322 test_loss: 0.15547611713409423
epoch: 4 training_loss 0.16593183822929858 test_loss: 0.13970792293548584
epoch: 5 training_loss 0.15869305200874806 test_loss: 0.1471117377281189
epoch: 6 training_loss 0.1471469022333622 test_loss: 0.15807819366455078
epoch: 7 training_loss 0.13755079969763756 test_loss: 0.12879158258438111
epoch: 8 training_loss 0.1464864630997181 test_loss: 0.13691695928573608
epoch: 9 training_loss 0.13476524084806443 test_loss: 0.13936028480529786
epoch: 10 training_loss 0.13780915658921003 test_loss: 0.11253095865249634
epoch: 11 training_loss 0.13792408466339112 test_loss: 0.1341138243675232
epoch: 12 training_loss 0.12062389019876718 test_loss: 0.12349128723144531
epoch: 13 training_loss 0.13351465590298175 test_loss: 0.11954442262649537
epoch: 14 training_loss 0.11812700118869543 test_loss: 0.12184844017028809
epoch: 15 training_loss 0.11837574809789658 test_loss: 0.13373433351516723
epoch: 16 training_loss 0.12128642734140158 test_loss: 0.12177588939666747
epoch: 17 training_loss 0.12524842128157615 test_loss: 0.11918336153030396
epoch: 18 training_loss 0.12275137320160866 test_loss: 0.12367297410964966
epoch: 19 training_loss 0.12419687364250422 test_loss: 0.11177266836166382
epoch: 20 training_loss 0.11563701398670673 test_loss: 0.1095062255859375
epoch: 21 training_loss 0.12223219897598028 test_loss: 0.11160900592803955
epoch: 22 training_loss 0.11947891112416982 test_loss: 0.12227340936660766
epoch: 23 training_loss 0.1131718672439456 test_loss: 0.10976167917251586
epoch: 24 training_loss 0.11336613062769174 test_loss: 0.10758541822433472
epoch: 25 training_loss 0.1194404586777091 test_loss: 0.10694818496704102
epoch: 26 training_loss 0.11324539806693792 test_loss: 0.1112208604812622
epoch: 27 training_loss 0.1098492594063282 test_loss: 0.1050944209098816
epoch: 28 training_loss 0.11845597073435783 test_loss: 0.0992551863193512
epoch: 29 training_loss 0.11928192507475614 test_loss: 0.12103146314620972
epoch: 30 training_loss 0.11829101007431746 test_loss: 0.14287112951278685
epoch: 31 training_loss 0.11292260985821485 test_loss: 0.1120031237602234
epoch: 32 training_loss 0.11009254023432731 test_loss: 0.09982969760894775
epoch: 33 training_loss 0.11377420105040073 test_loss: 0.12816450595855713
epoch: 34 training_loss 0.1112409320473671 test_loss: 0.12628244161605834
epoch: 35 training_loss 0.11504494085907936 test_loss: 0.08874256014823914
epoch: 36 training_loss 0.11366032019257545 test_loss: 0.13360109329223632
epoch: 37 training_loss 0.10999280538409949 test_loss: 0.11598858833312989
epoch: 38 training_loss 0.11254200734198093 test_loss: 0.11537646055221558
epoch: 39 training_loss 0.11570599753409624 test_loss: 0.09871360659599304
epoch: 40 training_loss 0.10733542636036873 test_loss: 0.10668811798095704
epoch: 41 training_loss 0.1197900753095746 test_loss: 0.10766152143478394
epoch: 42 training_loss 0.12722850661724805 test_loss: 0.11296876668930053
epoch: 43 training_loss 0.11279348410665989 test_loss: 0.11491847038269043
epoch: 44 training_loss 0.11299346331506968 test_loss: 0.1231237769126892
epoch: 45 training_loss 0.11005473759025336 test_loss: 0.09136276245117188
epoch: 46 training_loss 0.11109730288386345 test_loss: 0.10223771333694458
epoch: 47 training_loss 0.11019032072275876 test_loss: 0.12167936563491821
epoch: 48 training_loss 0.11828515686094761 test_loss: 0.10094543695449829
epoch: 49 training_loss 0.11291753396391868 test_loss: 0.10500231981277466
epoch: 50 training_loss 0.11445648387074471 test_loss: 0.10341622829437255
epoch: 51 training_loss 0.1122794159501791 test_loss: 0.1199643850326538
epoch: 52 training_loss 0.11167010078206659 test_loss: 0.10640608072280884
epoch: 53 training_loss 0.11178754784166813 test_loss: 0.10326756238937378
epoch: 54 training_loss 0.1086933258920908 test_loss: 0.11256191730499268
epoch: 55 training_loss 0.11290582086890937 test_loss: 0.1080042839050293
epoch: 56 training_loss 0.11177331998944283 test_loss: 0.09297314286231995
epoch: 57 training_loss 0.11022475156933069 test_loss: 0.11829248666763306
epoch: 58 training_loss 0.11648802209645509 test_loss: 0.11736948490142822
epoch: 59 training_loss 0.11068128369748592 test_loss: 0.11508177518844605
epoch: 60 training_loss 0.1085173524916172 test_loss: 0.11049489974975586
epoch: 61 training_loss 0.10816312801092863 test_loss: 0.09424307942390442
epoch: 62 training_loss 0.10565732728689908 test_loss: 0.10593528747558593
epoch: 63 training_loss 0.11728946410119534 test_loss: 0.11960800886154174
epoch: 64 training_loss 0.11280660316348076 test_loss: 0.10866358280181884
epoch: 65 training_loss 0.11243241537362338 test_loss: 0.10555192232131957
epoch: 66 training_loss 0.1187990304082632 test_loss: 0.11087605953216553
epoch: 67 training_loss 0.11278807170689106 test_loss: 0.11547678709030151
epoch: 68 training_loss 0.1070526991598308 test_loss: 0.09207149744033813
epoch: 69 training_loss 0.11027979463338852 test_loss: 0.09357131123542786
epoch: 70 training_loss 0.11103928331285715 test_loss: 0.08280577659606933
epoch: 71 training_loss 0.10822540823370218 test_loss: 0.11691184043884277
epoch: 72 training_loss 0.10977514458820224 test_loss: 0.10098863840103149
epoch: 73 training_loss 0.11370506228879095 test_loss: 0.0972401261329651
epoch: 74 training_loss 0.10813012039288879 test_loss: 0.11568410396575927
epoch: 75 training_loss 0.10180082481354474 test_loss: 0.10911129713058472
epoch: 76 training_loss 0.10645868936553597 test_loss: 0.12340422868728637
epoch: 77 training_loss 0.1129986296966672 test_loss: 0.1035461187362671
epoch: 78 training_loss 0.11293842393904924 test_loss: 0.11333538293838501
epoch: 79 training_loss 0.10896649226546287 test_loss: 0.11805893182754516
epoch: 80 training_loss 0.11615093201398849 test_loss: 0.10703036785125733
epoch: 81 training_loss 0.1026437370851636 test_loss: 0.10411088466644287
epoch: 82 training_loss 0.10855495374649764 test_loss: 0.09747864007949829
epoch: 83 training_loss 0.10062633771449328 test_loss: 0.10969414710998535
epoch: 84 training_loss 0.10391761587932706 test_loss: 0.09734417200088501
epoch: 85 training_loss 0.1083152487128973 test_loss: 0.10929789543151855
epoch: 86 training_loss 0.10497451029717922 test_loss: 0.08896110057830811
epoch: 87 training_loss 0.10933263780549168 test_loss: 0.10945789813995362
epoch: 88 training_loss 0.1168966107070446 test_loss: 0.11408364772796631
epoch: 89 training_loss 0.11337705278769135 test_loss: 0.09857099056243897
epoch: 90 training_loss 0.10457312693819404 test_loss: 0.10047945976257325
epoch: 91 training_loss 0.10724553059786558 test_loss: 0.10594561100006103
epoch: 92 training_loss 0.10668371934443713 test_loss: 0.12141448259353638
epoch: 93 training_loss 0.1111117292009294 test_loss: 0.10891857147216796
epoch: 94 training_loss 0.10757097298279404 test_loss: 0.11006063222885132
epoch: 95 training_loss 0.1166206007823348 test_loss: 0.10487216711044312
epoch: 96 training_loss 0.1116661363095045 test_loss: 0.10074526071548462
epoch: 97 training_loss 0.1036326883174479 test_loss: 0.11254905462265015
epoch: 98 training_loss 0.10505624562501907 test_loss: 0.10753893852233887
epoch: 99 training_loss 0.10179930124431849 test_loss: 0.1177757978439331
epoch: 100 training_loss 0.10735450562089682 test_loss: 0.11675963401794434
epoch: 101 training_loss 0.11060175728052854 test_loss: 0.10904628038406372
epoch: 102 training_loss 0.10796245269477367 test_loss: 0.09902421236038209
epoch: 103 training_loss 0.11347083047032357 test_loss: 0.11599045991897583
epoch: 104 training_loss 0.11433608481660486 test_loss: 0.1007729172706604
epoch: 105 training_loss 0.10637997929006815 test_loss: 0.1024080753326416
epoch: 106 training_loss 0.10815088402479887 test_loss: 0.11827602386474609
epoch: 107 training_loss 0.10866242840886116 test_loss: 0.11340746879577637
epoch: 108 training_loss 0.10389413699507713 test_loss: 0.10063825845718384
epoch: 109 training_loss 0.11677466366440058 test_loss: 0.10611379146575928
epoch: 110 training_loss 0.11654895439743995 test_loss: 0.10212961435317994
epoch: 111 training_loss 0.1035269337054342 test_loss: 0.11917929649353028
epoch: 112 training_loss 0.11308982092887163 test_loss: 0.11494455337524415
epoch: 113 training_loss 0.1155539832636714 test_loss: 0.10626275539398193
epoch: 114 training_loss 0.10756516043096781 test_loss: 0.10895987749099731
epoch: 115 training_loss 0.10645873915404082 test_loss: 0.11730849742889404
epoch: 116 training_loss 0.10975796781480313 test_loss: 0.09068741798400878
epoch: 117 training_loss 0.11636406872421504 test_loss: 0.09303078055381775
epoch: 118 training_loss 0.10549126602709294 test_loss: 0.10980753898620606
epoch: 119 training_loss 0.10630364466458558 test_loss: 0.09466739296913147
epoch: 120 training_loss 0.104995736181736 test_loss: 0.10066714286804199
epoch: 121 training_loss 0.10783800354227424 test_loss: 0.11203888654708863
epoch: 122 training_loss 0.09818642912432551 test_loss: 0.10365973711013794
epoch: 123 training_loss 0.11188530201092363 test_loss: 0.10750287771224976
epoch: 124 training_loss 0.10218494787812232 test_loss: 0.10085074901580811
epoch: 125 training_loss 0.10712172200903297 test_loss: 0.11076545715332031
epoch: 126 training_loss 0.11462944615632295 test_loss: 0.10498180389404296
epoch: 127 training_loss 0.10071089813485741 test_loss: 0.10838751792907715
epoch: 128 training_loss 0.1061771522462368 test_loss: 0.10623044967651367
epoch: 129 training_loss 0.10396540092304349 test_loss: 0.11474467515945434
epoch: 130 training_loss 0.10901348922401667 test_loss: 0.10054813623428345
epoch: 131 training_loss 0.11360528480261564 test_loss: 0.1136327862739563
epoch: 132 training_loss 0.11366573806852102 test_loss: 0.08900567889213562
epoch: 133 training_loss 0.1069184460490942 test_loss: 0.10346932411193847
epoch: 134 training_loss 0.10941154889762401 test_loss: 0.10371863842010498
epoch: 135 training_loss 0.11159718371927738 test_loss: 0.0879982054233551
epoch: 136 training_loss 0.10702488949522376 test_loss: 0.11395341157913208
epoch: 137 training_loss 0.1144701674953103 test_loss: 0.1025797963142395
epoch: 138 training_loss 0.11244464304298163 test_loss: 0.10733139514923096
epoch: 139 training_loss 0.10047376345843077 test_loss: 0.09844939112663269
epoch: 140 training_loss 0.10362454172223806 test_loss: 0.09698222875595093
epoch: 141 training_loss 0.11536288008093834 test_loss: 0.10207276344299317
epoch: 142 training_loss 0.11313352469354868 test_loss: 0.09874112606048584
epoch: 143 training_loss 0.11220356486737729 test_loss: 0.10637766122817993
epoch: 144 training_loss 0.10601184871047735 test_loss: 0.10784518718719482
epoch: 145 training_loss 0.11229810655117035 test_loss: 0.11201437711715698
epoch: 146 training_loss 0.10777921099215745 test_loss: 0.09899271130561829
epoch: 147 training_loss 0.1082278897613287 test_loss: 0.11347541809082032
epoch: 148 training_loss 0.10625730715692043 test_loss: 0.11264561414718628
epoch: 149 training_loss 0.09994993714615702 test_loss: 0.09590770006179809
epoch: 0 training_loss 45.81309858322143 test_loss: 25.444076538085938
epoch: 1 training_loss 21.22091018676758 test_loss: 18.315708923339844
epoch: 2 training_loss 15.815691165924072 test_loss: 13.841357421875
epoch: 3 training_loss 13.17649663925171 test_loss: 12.095442962646484
epoch: 4 training_loss 11.241484308242798 test_loss: 10.389273071289063
epoch: 5 training_loss 10.00847195148468 test_loss: 9.445159149169921
epoch: 6 training_loss 9.121765956878662 test_loss: 8.508197021484374
epoch: 7 training_loss 8.239436531066895 test_loss: 8.07074203491211
epoch: 8 training_loss 7.927110099792481 test_loss: 7.333478546142578
epoch: 9 training_loss 7.164368629455566 test_loss: 7.130862426757813
epoch: 10 training_loss 6.740283932685852 test_loss: 6.966097259521485
epoch: 11 training_loss 6.333087639808655 test_loss: 6.293193435668945
epoch: 12 training_loss 6.059827833175659 test_loss: 5.67764892578125
epoch: 13 training_loss 5.735808210372925 test_loss: 5.5270332336425785
epoch: 14 training_loss 5.43177896976471 test_loss: 5.442711639404297
epoch: 15 training_loss 5.2616186571121215 test_loss: 5.1834667205810545
epoch: 16 training_loss 5.11250823020935 test_loss: 5.198978424072266
epoch: 17 training_loss 5.098358643054962 test_loss: 4.782855987548828
epoch: 18 training_loss 4.857989883422851 test_loss: 4.868508529663086
epoch: 19 training_loss 4.750469343662262 test_loss: 4.700358581542969
epoch: 20 training_loss 4.52499632358551 test_loss: 4.531837463378906
epoch: 21 training_loss 4.451638984680176 test_loss: 4.3703155517578125
epoch: 22 training_loss 4.403159391880036 test_loss: 4.432104110717773
epoch: 23 training_loss 4.293614585399627 test_loss: 4.177847290039063
epoch: 24 training_loss 4.209041187763214 test_loss: 4.216147994995117
epoch: 25 training_loss 4.102809727191925 test_loss: 4.354523849487305
epoch: 26 training_loss 4.126094236373901 test_loss: 4.091632461547851
epoch: 27 training_loss 3.969292800426483 test_loss: 3.763996124267578
epoch: 28 training_loss 3.9040143585205076 test_loss: 3.942351150512695
epoch: 29 training_loss 3.8361905169487 test_loss: 3.8319087982177735
epoch: 30 training_loss 3.783769166469574 test_loss: 3.8001663208007814
epoch: 31 training_loss 3.736840567588806 test_loss: 3.5825965881347654
epoch: 32 training_loss 3.669753222465515 test_loss: 3.590631103515625
epoch: 33 training_loss 3.587908158302307 test_loss: 3.7757339477539062
epoch: 34 training_loss 3.5171918869018555 test_loss: 3.515513229370117
epoch: 35 training_loss 3.504877052307129 test_loss: 3.6506416320800783
epoch: 36 training_loss 3.431644546985626 test_loss: 3.5966434478759766
epoch: 37 training_loss 3.3931204962730406 test_loss: 3.661376953125
epoch: 38 training_loss 3.276868040561676 test_loss: 3.3051387786865236
epoch: 39 training_loss 3.2831477546691894 test_loss: 3.3054161071777344
epoch: 40 training_loss 3.278080642223358 test_loss: 3.229180908203125
epoch: 41 training_loss 3.217376947402954 test_loss: 3.208169937133789
epoch: 42 training_loss 3.1417954993247985 test_loss: 3.3462257385253906
epoch: 43 training_loss 3.1529319429397584 test_loss: 3.006027603149414
epoch: 44 training_loss 3.0695318746566773 test_loss: 3.097493362426758
epoch: 45 training_loss 3.1732541704177857 test_loss: 3.001575469970703
epoch: 46 training_loss 3.0582525444030764 test_loss: 3.2890037536621093
epoch: 47 training_loss 3.068681826591492 test_loss: 3.027513885498047
epoch: 48 training_loss 2.9583545565605163 test_loss: 2.9602733612060548
epoch: 49 training_loss 2.9318689274787904 test_loss: 3.1719593048095702
epoch: 50 training_loss 2.88681485414505 test_loss: 2.8039445877075195
epoch: 51 training_loss 2.9146256589889528 test_loss: 2.8998445510864257
epoch: 52 training_loss 2.8418443155288697 test_loss: 2.8070449829101562
epoch: 53 training_loss 2.8170054602622985 test_loss: 2.8857666015625
epoch: 54 training_loss 2.7875536370277403 test_loss: 2.7734375
epoch: 55 training_loss 2.7559624886512757 test_loss: 2.8677946090698243
epoch: 56 training_loss 2.733410367965698 test_loss: 2.69012451171875
epoch: 57 training_loss 2.73004207611084 test_loss: 2.861747932434082
epoch: 58 training_loss 2.7720238327980042 test_loss: 2.7592794418334963
epoch: 59 training_loss 2.6928439784049987 test_loss: 2.714070129394531
epoch: 60 training_loss 2.6179540407657624 test_loss: 2.646724510192871
epoch: 61 training_loss 2.5531306791305544 test_loss: 2.754287338256836
epoch: 62 training_loss 2.647105084657669 test_loss: 2.6126287460327147
epoch: 63 training_loss 2.549066200256348 test_loss: 2.6973215103149415
epoch: 64 training_loss 2.5551016664505006 test_loss: 2.5366704940795897
epoch: 65 training_loss 2.547036107778549 test_loss: 2.5790884017944338
epoch: 66 training_loss 2.510219671726227 test_loss: 2.601734924316406
epoch: 67 training_loss 2.5510548901557923 test_loss: 2.658854103088379
epoch: 68 training_loss 2.4901675844192503 test_loss: 2.53003044128418
epoch: 69 training_loss 2.464886566400528 test_loss: 2.470110321044922
epoch: 70 training_loss 2.439260152578354 test_loss: 2.2845270156860353
epoch: 71 training_loss 2.4113281774520874 test_loss: 2.503902626037598
epoch: 72 training_loss 2.4596525406837464 test_loss: 2.395047187805176
epoch: 73 training_loss 2.4141415977478027 test_loss: 2.421242904663086
epoch: 74 training_loss 2.398743269443512 test_loss: 2.4601814270019533
epoch: 75 training_loss 2.356972453594208 test_loss: 2.444400405883789
epoch: 76 training_loss 2.4006023156642913 test_loss: 2.425291061401367
epoch: 77 training_loss 2.2979810523986814 test_loss: 2.407046890258789
epoch: 78 training_loss 2.293781940937042 test_loss: 2.4358213424682615
epoch: 79 training_loss 2.2905733716487884 test_loss: 2.519678497314453
epoch: 80 training_loss 2.263625795841217 test_loss: 2.3179161071777346
epoch: 81 training_loss 2.3346615624427796 test_loss: 2.243452262878418
epoch: 82 training_loss 2.2977990663051604 test_loss: 2.1152633666992187
epoch: 83 training_loss 2.2209172785282134 test_loss: 2.352138328552246
epoch: 84 training_loss 2.195823940038681 test_loss: 2.2964622497558596
epoch: 85 training_loss 2.1761986565589906 test_loss: 2.3789901733398438
epoch: 86 training_loss 2.1772471499443053 test_loss: 2.3483848571777344
epoch: 87 training_loss 2.230704256296158 test_loss: 2.142015266418457
epoch: 88 training_loss 2.152569795846939 test_loss: 2.220362091064453
epoch: 89 training_loss 2.200768517255783 test_loss: 2.3533403396606447
epoch: 90 training_loss 2.234392647743225 test_loss: 2.056077003479004
epoch: 91 training_loss 2.161775048971176 test_loss: 2.3386966705322267
epoch: 92 training_loss 2.157854200601578 test_loss: 2.0358810424804688
epoch: 93 training_loss 2.089743367433548 test_loss: 2.2754562377929686
epoch: 94 training_loss 2.1001597201824187 test_loss: 2.1345678329467774
epoch: 95 training_loss 2.081161187887192 test_loss: 2.153034973144531
epoch: 96 training_loss 2.109474329948425 test_loss: 2.183971977233887
epoch: 97 training_loss 2.1184079563617706 test_loss: 2.0397645950317385
epoch: 98 training_loss 2.1162395679950716 test_loss: 2.032881736755371
epoch: 99 training_loss 2.105349785089493 test_loss: 2.125742530822754
epoch: 100 training_loss 2.08924929022789 test_loss: 2.1234773635864257
epoch: 101 training_loss 2.049127073287964 test_loss: 2.087930107116699
epoch: 102 training_loss 2.0597433245182035 test_loss: 2.121559715270996
epoch: 103 training_loss 2.0211219668388365 test_loss: 1.9719507217407226
epoch: 104 training_loss 2.0061240220069885 test_loss: 1.9097383499145508
epoch: 105 training_loss 2.0237462198734284 test_loss: 2.137582778930664
epoch: 106 training_loss 2.02105424284935 test_loss: 1.9422294616699218
epoch: 107 training_loss 2.0859272623062135 test_loss: 2.129034233093262
epoch: 108 training_loss 2.0110074496269226 test_loss: 1.91474609375
epoch: 109 training_loss 1.99243048787117 test_loss: 2.1291933059692383
epoch: 110 training_loss 1.9707763874530793 test_loss: 2.038580322265625
epoch: 111 training_loss 1.9901103210449218 test_loss: 2.1734691619873048
epoch: 112 training_loss 1.914678807258606 test_loss: 2.024827575683594
epoch: 113 training_loss 1.987635463476181 test_loss: 2.082145118713379
epoch: 114 training_loss 1.9661030077934265 test_loss: 1.9515609741210938
epoch: 115 training_loss 1.9842006409168242 test_loss: 1.9330799102783203
epoch: 116 training_loss 1.9255556225776673 test_loss: 1.9477415084838867
epoch: 117 training_loss 1.9963064908981323 test_loss: 1.945572853088379
epoch: 118 training_loss 1.8951850378513335 test_loss: 1.9843511581420898
epoch: 119 training_loss 1.9087281513214112 test_loss: 2.048334503173828
epoch: 120 training_loss 1.9293001043796538 test_loss: 1.871392822265625
epoch: 121 training_loss 1.9067591035366058 test_loss: 2.0251474380493164
epoch: 122 training_loss 1.8960471665859222 test_loss: 1.9246126174926759
epoch: 123 training_loss 1.858767511844635 test_loss: 2.055814743041992
epoch: 124 training_loss 1.9468751919269562 test_loss: 1.871847152709961
epoch: 125 training_loss 1.871506175994873 test_loss: 1.889767837524414
epoch: 126 training_loss 1.9140305721759796 test_loss: 1.867789077758789
epoch: 127 training_loss 1.917888572216034 test_loss: 1.9308073043823242
epoch: 128 training_loss 1.8826857781410218 test_loss: 1.9855159759521483
epoch: 129 training_loss 1.8084425842761993 test_loss: 1.8433757781982423
epoch: 130 training_loss 1.813965425491333 test_loss: 1.9477279663085938
epoch: 131 training_loss 1.8531939566135407 test_loss: 1.9052600860595703
epoch: 132 training_loss 1.8459167432785035 test_loss: 1.769232177734375
epoch: 133 training_loss 1.8596800863742828 test_loss: 1.9368640899658203
epoch: 134 training_loss 1.7955654978752136 test_loss: 1.8166322708129883
epoch: 135 training_loss 1.8446036410331725 test_loss: 1.7908109664916991
epoch: 136 training_loss 1.8332440662384033 test_loss: 1.9526763916015626
epoch: 137 training_loss 1.7906964004039765 test_loss: 1.8076139450073243
epoch: 138 training_loss 1.7656893646717071 test_loss: 1.891880416870117
epoch: 139 training_loss 1.8428841269016265 test_loss: 1.8067945480346679
epoch: 140 training_loss 1.7691367185115814 test_loss: 1.794413185119629
epoch: 141 training_loss 1.7919085383415223 test_loss: 1.9228853225708007
epoch: 142 training_loss 1.7648475241661072 test_loss: 1.8498931884765626
epoch: 143 training_loss 1.7894001662731172 test_loss: 1.844688606262207
epoch: 144 training_loss 1.7903228640556335 test_loss: 1.9013690948486328
epoch: 145 training_loss 1.7521863389015198 test_loss: 1.8453826904296875
epoch: 146 training_loss 1.7463016080856324 test_loss: 1.7057153701782226
epoch: 147 training_loss 1.7668141281604768 test_loss: 1.7383529663085937
epoch: 148 training_loss 1.8037243330478667 test_loss: 1.7880859375
epoch: 149 training_loss 1.7711331462860107 test_loss: 1.8636150360107422
8088.037941560486
episode: 0 training return: tensor(-920.7462, device='cuda:0')
episode: 1 training return: tensor(-667.0496, device='cuda:0')
episode: 2 training return: tensor(-264.2163, device='cuda:0')
episode: 3 training return: tensor(-999.9615, device='cuda:0')
epoch: 1 test_true_pfm: 4354.76811251527 sim_pfm: -706.2054226635955
episode: 4 training return: tensor(-884.2653, device='cuda:0')
episode: 5 training return: tensor(-372.6440, device='cuda:0')
episode: 6 training return: tensor(-160.3506, device='cuda:0')
episode: 7 training return: tensor(-392.7862, device='cuda:0')
epoch: 2 test_true_pfm: 7032.054717093794 sim_pfm: -347.6279443607976
episode: 8 training return: tensor(-614.9240, device='cuda:0')
episode: 9 training return: tensor(-992.5135, device='cuda:0')
episode: 10 training return: tensor(-534.4136, device='cuda:0')
episode: 11 training return: tensor(-414.9451, device='cuda:0')
epoch: 3 test_true_pfm: 8593.69653276311 sim_pfm: -471.649570721318
episode: 12 training return: tensor(-357.6294, device='cuda:0')
episode: 13 training return: tensor(-999.9991, device='cuda:0')
episode: 14 training return: tensor(-419.0501, device='cuda:0')
episode: 15 training return: tensor(-999.9753, device='cuda:0')
epoch: 4 test_true_pfm: 6661.939519722757 sim_pfm: -613.532485648912
episode: 16 training return: tensor(-85.3891, device='cuda:0')
episode: 17 training return: tensor(-83.8045, device='cuda:0')
episode: 18 training return: tensor(-999.7617, device='cuda:0')
episode: 19 training return: tensor(-282.8987, device='cuda:0')
epoch: 5 test_true_pfm: 5085.517306658672 sim_pfm: -677.7774738890585
episode: 20 training return: tensor(-999.9764, device='cuda:0')
episode: 21 training return: tensor(-999.9992, device='cuda:0')
episode: 22 training return: tensor(-999.9327, device='cuda:0')
episode: 23 training return: tensor(-967.8385, device='cuda:0')
epoch: 6 test_true_pfm: 7096.286710167403 sim_pfm: -243.3892142131032
episode: 24 training return: tensor(-277.6726, device='cuda:0')
episode: 25 training return: tensor(-140.1917, device='cuda:0')
episode: 26 training return: tensor(-465.4546, device='cuda:0')
episode: 27 training return: tensor(-860.7435, device='cuda:0')
epoch: 7 test_true_pfm: 4788.805837092152 sim_pfm: -639.532121100657
episode: 28 training return: tensor(-494.5870, device='cuda:0')
episode: 29 training return: tensor(-152.5297, device='cuda:0')
episode: 30 training return: tensor(-761.6819, device='cuda:0')
episode: 31 training return: tensor(-185.1017, device='cuda:0')
epoch: 8 test_true_pfm: 6625.794094193545 sim_pfm: -293.3964146564152
episode: 32 training return: tensor(-999.9999, device='cuda:0')
episode: 33 training return: tensor(-545.4682, device='cuda:0')
episode: 34 training return: tensor(-232.8086, device='cuda:0')
episode: 35 training return: tensor(-999.9985, device='cuda:0')
epoch: 9 test_true_pfm: 6443.614412747429 sim_pfm: -347.87992397130193
episode: 36 training return: tensor(-755.5816, device='cuda:0')
episode: 37 training return: tensor(-409.6494, device='cuda:0')
episode: 38 training return: tensor(-118.1020, device='cuda:0')
episode: 39 training return: tensor(-335.4434, device='cuda:0')
epoch: 10 test_true_pfm: 7835.561029573658 sim_pfm: -198.79508916472938
episode: 40 training return: tensor(-216.0867, device='cuda:0')
episode: 41 training return: tensor(-547.8069, device='cuda:0')
episode: 42 training return: tensor(-332.8704, device='cuda:0')
episode: 43 training return: tensor(-999.9647, device='cuda:0')
epoch: 11 test_true_pfm: 6388.178069418689 sim_pfm: -384.0670841263297
episode: 44 training return: tensor(-279.0999, device='cuda:0')
episode: 45 training return: tensor(-259.0099, device='cuda:0')
episode: 46 training return: tensor(-191.7770, device='cuda:0')
episode: 47 training return: tensor(-401.5046, device='cuda:0')
epoch: 12 test_true_pfm: 10223.235853819888 sim_pfm: -83.17632232612232
episode: 48 training return: tensor(-999.0321, device='cuda:0')
episode: 49 training return: tensor(-230.8983, device='cuda:0')
episode: 50 training return: tensor(-399.7690, device='cuda:0')
episode: 51 training return: tensor(-892.4976, device='cuda:0')
epoch: 13 test_true_pfm: 7788.469538528621 sim_pfm: -190.21885691193165
episode: 52 training return: tensor(-287.7202, device='cuda:0')
episode: 53 training return: tensor(-883.0620, device='cuda:0')
episode: 54 training return: tensor(-694.4406, device='cuda:0')
episode: 55 training return: tensor(-463.3269, device='cuda:0')
epoch: 14 test_true_pfm: 3236.4391155319463 sim_pfm: -513.1837735004374
episode: 56 training return: tensor(-200.8772, device='cuda:0')
episode: 57 training return: tensor(-702.6133, device='cuda:0')
episode: 58 training return: tensor(-863.5214, device='cuda:0')
episode: 59 training return: tensor(-805.6903, device='cuda:0')
epoch: 15 test_true_pfm: 9970.561555375813 sim_pfm: -388.0626496040204
episode: 60 training return: tensor(-781.4310, device='cuda:0')
episode: 61 training return: tensor(-670.7084, device='cuda:0')
episode: 62 training return: tensor(-738.0638, device='cuda:0')
episode: 63 training return: tensor(-291.2944, device='cuda:0')
epoch: 16 test_true_pfm: 6098.705109787476 sim_pfm: -273.36297207364504
episode: 64 training return: tensor(-177.0933, device='cuda:0')
episode: 65 training return: tensor(-649.4156, device='cuda:0')
episode: 66 training return: tensor(-820.6755, device='cuda:0')
episode: 67 training return: tensor(-707.1394, device='cuda:0')
epoch: 17 test_true_pfm: 6914.072550022293 sim_pfm: -393.32416757941246
episode: 68 training return: tensor(-554.6272, device='cuda:0')
episode: 69 training return: tensor(-1.7927, device='cuda:0')
episode: 70 training return: tensor(-176.5441, device='cuda:0')
episode: 71 training return: tensor(-999.6729, device='cuda:0')
epoch: 18 test_true_pfm: 10129.93375930239 sim_pfm: -373.0206640589652
episode: 72 training return: tensor(-261.1678, device='cuda:0')
episode: 73 training return: tensor(-196.0947, device='cuda:0')
episode: 74 training return: tensor(-435.6475, device='cuda:0')
episode: 75 training return: tensor(-116.0850, device='cuda:0')
epoch: 19 test_true_pfm: 6504.198282285471 sim_pfm: -406.7443162903267
episode: 76 training return: tensor(-265.6550, device='cuda:0')
episode: 77 training return: tensor(-786.4905, device='cuda:0')
episode: 78 training return: tensor(-839.7822, device='cuda:0')
episode: 79 training return: tensor(-999.9965, device='cuda:0')
epoch: 20 test_true_pfm: 6647.868461007639 sim_pfm: -55.24995608224223
episode: 80 training return: tensor(-539.7520, device='cuda:0')
episode: 81 training return: tensor(-715.4705, device='cuda:0')
episode: 82 training return: tensor(-985.4507, device='cuda:0')
episode: 83 training return: tensor(-90.4526, device='cuda:0')
epoch: 21 test_true_pfm: 9148.842659935046 sim_pfm: -350.3213512476941
episode: 84 training return: tensor(-406.8976, device='cuda:0')
episode: 85 training return: tensor(-466.3994, device='cuda:0')
episode: 86 training return: tensor(-579.6947, device='cuda:0')
episode: 87 training return: tensor(-297.4775, device='cuda:0')
epoch: 22 test_true_pfm: 10051.6876849994 sim_pfm: -27.052250576593604
episode: 88 training return: tensor(-305.0380, device='cuda:0')
episode: 89 training return: tensor(-550.2374, device='cuda:0')
episode: 90 training return: tensor(-433.7615, device='cuda:0')
episode: 91 training return: tensor(-353.3649, device='cuda:0')
epoch: 23 test_true_pfm: 6669.081247503208 sim_pfm: -240.6822779493426
episode: 92 training return: tensor(-229.3697, device='cuda:0')
episode: 93 training return: tensor(-885.9621, device='cuda:0')
episode: 94 training return: tensor(-974.0615, device='cuda:0')
episode: 95 training return: tensor(-382.6551, device='cuda:0')
epoch: 24 test_true_pfm: 10369.210721885021 sim_pfm: -451.2163124811098
episode: 96 training return: tensor(-344.3679, device='cuda:0')
episode: 97 training return: tensor(-195.5918, device='cuda:0')
episode: 98 training return: tensor(-320.1738, device='cuda:0')
episode: 99 training return: tensor(-114.5776, device='cuda:0')
epoch: 25 test_true_pfm: 10373.252532000604 sim_pfm: -120.30049372365465
episode: 100 training return: tensor(-866.2648, device='cuda:0')
episode: 101 training return: tensor(-258.0522, device='cuda:0')
episode: 102 training return: tensor(-999.9583, device='cuda:0')
episode: 103 training return: tensor(-125.9675, device='cuda:0')
epoch: 26 test_true_pfm: 10077.431109990235 sim_pfm: -380.79439901109436
episode: 104 training return: tensor(-35.3048, device='cuda:0')
episode: 105 training return: tensor(-999.9988, device='cuda:0')
episode: 106 training return: tensor(-274.0266, device='cuda:0')
episode: 107 training return: tensor(-846.0929, device='cuda:0')
epoch: 27 test_true_pfm: 10240.50285455253 sim_pfm: -684.6093532111068
episode: 108 training return: tensor(-999.9988, device='cuda:0')
episode: 109 training return: tensor(-327.2132, device='cuda:0')
episode: 110 training return: tensor(-254.8839, device='cuda:0')
episode: 111 training return: tensor(-814.0602, device='cuda:0')
epoch: 28 test_true_pfm: 10048.138715616034 sim_pfm: -327.17704302615795
episode: 112 training return: tensor(-708.6760, device='cuda:0')
episode: 113 training return: tensor(-999.9988, device='cuda:0')
episode: 114 training return: tensor(-999.8735, device='cuda:0')
episode: 115 training return: tensor(-994.3190, device='cuda:0')
epoch: 29 test_true_pfm: 10249.034066552167 sim_pfm: -303.7236338228492
episode: 116 training return: tensor(-348.9007, device='cuda:0')
episode: 117 training return: tensor(-370.4689, device='cuda:0')
episode: 118 training return: tensor(-993.8634, device='cuda:0')
episode: 119 training return: tensor(-791.5388, device='cuda:0')
epoch: 30 test_true_pfm: 5355.219460876865 sim_pfm: -687.1380385840312
episode: 120 training return: tensor(-198.1505, device='cuda:0')
episode: 121 training return: tensor(-999.5695, device='cuda:0')
episode: 122 training return: tensor(-716.9043, device='cuda:0')
episode: 123 training return: tensor(-84.2706, device='cuda:0')
epoch: 31 test_true_pfm: 10202.364291986378 sim_pfm: -290.5039889498924
episode: 124 training return: tensor(-286.7824, device='cuda:0')
episode: 125 training return: tensor(-396.7168, device='cuda:0')
episode: 126 training return: tensor(-246.8377, device='cuda:0')
episode: 127 training return: tensor(-990.5822, device='cuda:0')
epoch: 32 test_true_pfm: 8662.263347648404 sim_pfm: -189.96660541696474
episode: 128 training return: tensor(-458.4748, device='cuda:0')
episode: 129 training return: tensor(-365.3576, device='cuda:0')
episode: 130 training return: tensor(-382.2237, device='cuda:0')
episode: 131 training return: tensor(-999.9990, device='cuda:0')
epoch: 33 test_true_pfm: 4380.259652175066 sim_pfm: -230.53126147016883
episode: 132 training return: tensor(-16.4427, device='cuda:0')
episode: 133 training return: tensor(-898.0273, device='cuda:0')
episode: 134 training return: tensor(-999.7997, device='cuda:0')
episode: 135 training return: tensor(-138.8993, device='cuda:0')
epoch: 34 test_true_pfm: 9177.24650848625 sim_pfm: -32.051323898330644
episode: 136 training return: tensor(-315.9828, device='cuda:0')
episode: 137 training return: tensor(-834.4396, device='cuda:0')
episode: 138 training return: tensor(-926.5171, device='cuda:0')
episode: 139 training return: tensor(-845.7048, device='cuda:0')
epoch: 35 test_true_pfm: 6729.336612241411 sim_pfm: -359.1510729420115
episode: 140 training return: tensor(-289.7744, device='cuda:0')
episode: 141 training return: tensor(-999.9991, device='cuda:0')
episode: 142 training return: tensor(-281.9225, device='cuda:0')
episode: 143 training return: tensor(-253.6597, device='cuda:0')
epoch: 36 test_true_pfm: 6828.201196149861 sim_pfm: -334.6813676066813
episode: 144 training return: tensor(-142.8797, device='cuda:0')
episode: 145 training return: tensor(-164.2909, device='cuda:0')
episode: 146 training return: tensor(-108.1703, device='cuda:0')
episode: 147 training return: tensor(-270.8956, device='cuda:0')
epoch: 37 test_true_pfm: 9916.779704245635 sim_pfm: -347.55117661009234
episode: 148 training return: tensor(-334.1091, device='cuda:0')
episode: 149 training return: tensor(-232.3898, device='cuda:0')
episode: 150 training return: tensor(-69.7232, device='cuda:0')
episode: 151 training return: tensor(-211.8241, device='cuda:0')
epoch: 38 test_true_pfm: 6892.366048164188 sim_pfm: -314.82605905812426
episode: 152 training return: tensor(-244.5688, device='cuda:0')
episode: 153 training return: tensor(-44.6239, device='cuda:0')
episode: 154 training return: tensor(-344.1753, device='cuda:0')
episode: 155 training return: tensor(-209.1531, device='cuda:0')
epoch: 39 test_true_pfm: 7938.520371310772 sim_pfm: -261.0483639873564
episode: 156 training return: tensor(-999.9897, device='cuda:0')
episode: 157 training return: tensor(-196.0227, device='cuda:0')
episode: 158 training return: tensor(-602.7297, device='cuda:0')
episode: 159 training return: tensor(-257.2642, device='cuda:0')
epoch: 40 test_true_pfm: 6747.598320141119 sim_pfm: -123.48117150769879
episode: 160 training return: tensor(-456.0811, device='cuda:0')
episode: 161 training return: tensor(-116.7596, device='cuda:0')
episode: 162 training return: tensor(-951.8320, device='cuda:0')
episode: 163 training return: tensor(-179.3183, device='cuda:0')
epoch: 41 test_true_pfm: 10316.080375419913 sim_pfm: -478.1866800367522
episode: 164 training return: tensor(-418.2751, device='cuda:0')
episode: 165 training return: tensor(-313.1754, device='cuda:0')
episode: 166 training return: tensor(-999.9993, device='cuda:0')
episode: 167 training return: tensor(-192.2713, device='cuda:0')
epoch: 42 test_true_pfm: 9269.17692988049 sim_pfm: 60.96710229783397
episode: 168 training return: tensor(-221.2657, device='cuda:0')
episode: 169 training return: tensor(-383.2615, device='cuda:0')
episode: 170 training return: tensor(-445.0345, device='cuda:0')
episode: 171 training return: tensor(-310.7202, device='cuda:0')
epoch: 43 test_true_pfm: 10522.598012010074 sim_pfm: -97.99416263762396
episode: 172 training return: tensor(-634.1125, device='cuda:0')
episode: 173 training return: tensor(-201.0149, device='cuda:0')
episode: 174 training return: tensor(-999.8583, device='cuda:0')
episode: 175 training return: tensor(-351.5801, device='cuda:0')
epoch: 44 test_true_pfm: 8041.54078671943 sim_pfm: -31.70714276144281
episode: 176 training return: tensor(-999.9995, device='cuda:0')
episode: 177 training return: tensor(-470.7783, device='cuda:0')
episode: 178 training return: tensor(-833.2218, device='cuda:0')
episode: 179 training return: tensor(-121.9200, device='cuda:0')
epoch: 45 test_true_pfm: 10119.59807286359 sim_pfm: -198.147036680845
episode: 180 training return: tensor(-387.9564, device='cuda:0')
episode: 181 training return: tensor(-874.4902, device='cuda:0')
episode: 182 training return: tensor(-9.1437, device='cuda:0')
episode: 183 training return: tensor(-268.0983, device='cuda:0')
epoch: 46 test_true_pfm: 4560.813779177576 sim_pfm: -2.2574951582161398
episode: 184 training return: tensor(-210.5163, device='cuda:0')
episode: 185 training return: tensor(-700.6702, device='cuda:0')
episode: 186 training return: tensor(-150.2852, device='cuda:0')
episode: 187 training return: tensor(-494.4674, device='cuda:0')
epoch: 47 test_true_pfm: 10367.870146172436 sim_pfm: -330.82836193568073
episode: 188 training return: tensor(4.5204, device='cuda:0')
episode: 189 training return: tensor(-542.6596, device='cuda:0')
episode: 190 training return: tensor(-26.9805, device='cuda:0')
episode: 191 training return: tensor(-161.6687, device='cuda:0')
epoch: 48 test_true_pfm: 6837.623665807652 sim_pfm: 43.09983267332427
episode: 192 training return: tensor(-316.7813, device='cuda:0')
episode: 193 training return: tensor(-230.4452, device='cuda:0')
episode: 194 training return: tensor(-484.1095, device='cuda:0')
episode: 195 training return: tensor(-131.5972, device='cuda:0')
epoch: 49 test_true_pfm: 10381.914163910158 sim_pfm: 42.251083227864
episode: 196 training return: tensor(-47.0733, device='cuda:0')
episode: 197 training return: tensor(-212.9442, device='cuda:0')
episode: 198 training return: tensor(-999.9988, device='cuda:0')
episode: 199 training return: tensor(-305.9542, device='cuda:0')
epoch: 50 test_true_pfm: 6005.3312900388655 sim_pfm: -470.04647490180406
episode: 200 training return: tensor(-63.9510, device='cuda:0')
episode: 201 training return: tensor(-93.5684, device='cuda:0')
episode: 202 training return: tensor(-222.4830, device='cuda:0')
episode: 203 training return: tensor(-310.9272, device='cuda:0')
epoch: 51 test_true_pfm: 10153.67784048312 sim_pfm: -6.049978148502608
episode: 204 training return: tensor(-990.3865, device='cuda:0')
episode: 205 training return: tensor(-79.8025, device='cuda:0')
episode: 206 training return: tensor(-226.6566, device='cuda:0')
episode: 207 training return: tensor(-240.8477, device='cuda:0')
epoch: 52 test_true_pfm: 10120.013586997547 sim_pfm: 97.8797289429931
episode: 208 training return: tensor(-297.5498, device='cuda:0')
episode: 209 training return: tensor(-999.9893, device='cuda:0')
episode: 210 training return: tensor(-739.6778, device='cuda:0')
episode: 211 training return: tensor(-233.6431, device='cuda:0')
epoch: 53 test_true_pfm: 8183.950593960758 sim_pfm: -83.10918691658298
episode: 212 training return: tensor(-256.0794, device='cuda:0')
episode: 213 training return: tensor(-343.4837, device='cuda:0')
episode: 214 training return: tensor(-128.8526, device='cuda:0')
episode: 215 training return: tensor(-122.7606, device='cuda:0')
epoch: 54 test_true_pfm: 6567.556527581367 sim_pfm: -694.4513205424882
episode: 216 training return: tensor(-26.3872, device='cuda:0')
episode: 217 training return: tensor(-652.0840, device='cuda:0')
episode: 218 training return: tensor(-71.4204, device='cuda:0')
episode: 219 training return: tensor(22.8179, device='cuda:0')
epoch: 55 test_true_pfm: 6754.102736991168 sim_pfm: -297.6334233181454
episode: 220 training return: tensor(-338.8468, device='cuda:0')
episode: 221 training return: tensor(-258.9599, device='cuda:0')
episode: 222 training return: tensor(-106.8377, device='cuda:0')
episode: 223 training return: tensor(-180.7477, device='cuda:0')
epoch: 56 test_true_pfm: 10119.895300930422 sim_pfm: -92.58352556811103
episode: 224 training return: tensor(15.4356, device='cuda:0')
episode: 225 training return: tensor(-103.7205, device='cuda:0')
episode: 226 training return: tensor(-886.6102, device='cuda:0')
episode: 227 training return: tensor(0.2613, device='cuda:0')
epoch: 57 test_true_pfm: 6973.069061111339 sim_pfm: 38.897160215807766
episode: 228 training return: tensor(-999.9774, device='cuda:0')
episode: 229 training return: tensor(-118.6143, device='cuda:0')
episode: 230 training return: tensor(-25.8144, device='cuda:0')
episode: 231 training return: tensor(-18.3705, device='cuda:0')
epoch: 58 test_true_pfm: 6912.199400715991 sim_pfm: 6.255796731275041
episode: 232 training return: tensor(-116.2274, device='cuda:0')
episode: 233 training return: tensor(-34.9786, device='cuda:0')
episode: 234 training return: tensor(-4.8111, device='cuda:0')
episode: 235 training return: tensor(-70.3858, device='cuda:0')
epoch: 59 test_true_pfm: 8130.211995483233 sim_pfm: -123.1223495050023
episode: 236 training return: tensor(-70.1976, device='cuda:0')
episode: 237 training return: tensor(-135.0910, device='cuda:0')
episode: 238 training return: tensor(-68.7635, device='cuda:0')
episode: 239 training return: tensor(-228.1457, device='cuda:0')
epoch: 60 test_true_pfm: 3970.935788006663 sim_pfm: -38.623920329594206
episode: 240 training return: tensor(-124.7618, device='cuda:0')
episode: 241 training return: tensor(-999.9988, device='cuda:0')
episode: 242 training return: tensor(-153.2395, device='cuda:0')
episode: 243 training return: tensor(-69.1286, device='cuda:0')
epoch: 61 test_true_pfm: 10267.599343293316 sim_pfm: -72.10761245584581
episode: 244 training return: tensor(-268.2986, device='cuda:0')
episode: 245 training return: tensor(-66.5257, device='cuda:0')
episode: 246 training return: tensor(-29.1524, device='cuda:0')
episode: 247 training return: tensor(-101.7128, device='cuda:0')
epoch: 62 test_true_pfm: 10237.186274698162 sim_pfm: -111.19243465127268
episode: 248 training return: tensor(-181.3164, device='cuda:0')
episode: 249 training return: tensor(-22.7834, device='cuda:0')
episode: 250 training return: tensor(-147.3827, device='cuda:0')
episode: 251 training return: tensor(-90.6441, device='cuda:0')
epoch: 63 test_true_pfm: 10280.990823602811 sim_pfm: 74.36351159287733
episode: 252 training return: tensor(82.8859, device='cuda:0')
episode: 253 training return: tensor(-314.7091, device='cuda:0')
episode: 254 training return: tensor(-673.2583, device='cuda:0')
episode: 255 training return: tensor(-999.7527, device='cuda:0')
epoch: 64 test_true_pfm: 9662.694633403366 sim_pfm: -26.091239420551574
episode: 256 training return: tensor(-184.6992, device='cuda:0')
episode: 257 training return: tensor(-165.4587, device='cuda:0')
episode: 258 training return: tensor(-797.8428, device='cuda:0')
episode: 259 training return: tensor(-91.1485, device='cuda:0')
epoch: 65 test_true_pfm: 10345.412075386266 sim_pfm: -177.67178230458134
episode: 260 training return: tensor(-73.1783, device='cuda:0')
episode: 261 training return: tensor(-196.5331, device='cuda:0')
episode: 262 training return: tensor(-469.9554, device='cuda:0')
episode: 263 training return: tensor(-67.2785, device='cuda:0')
epoch: 66 test_true_pfm: 6646.616070795196 sim_pfm: -39.52198963212626
episode: 264 training return: tensor(-289.0285, device='cuda:0')
episode: 265 training return: tensor(-199.6276, device='cuda:0')
episode: 266 training return: tensor(-46.3387, device='cuda:0')
episode: 267 training return: tensor(-125.1879, device='cuda:0')
epoch: 67 test_true_pfm: 9302.488645603999 sim_pfm: -581.893877622264
episode: 268 training return: tensor(-916.6362, device='cuda:0')
episode: 269 training return: tensor(-175.9323, device='cuda:0')
episode: 270 training return: tensor(-64.6049, device='cuda:0')
episode: 271 training return: tensor(-999.9942, device='cuda:0')
epoch: 68 test_true_pfm: 9775.27106879781 sim_pfm: -656.9052309947243
episode: 272 training return: tensor(-258.8608, device='cuda:0')
episode: 273 training return: tensor(-207.3262, device='cuda:0')
episode: 274 training return: tensor(-52.7724, device='cuda:0')
episode: 275 training return: tensor(-999.3967, device='cuda:0')
epoch: 69 test_true_pfm: 6789.818270802862 sim_pfm: -86.41176039093989
episode: 276 training return: tensor(-147.1225, device='cuda:0')
episode: 277 training return: tensor(48.7766, device='cuda:0')
episode: 278 training return: tensor(-999.9297, device='cuda:0')
episode: 279 training return: tensor(-68.0118, device='cuda:0')
epoch: 70 test_true_pfm: 10418.26638994624 sim_pfm: -21.226318464750268
episode: 280 training return: tensor(-187.6858, device='cuda:0')
episode: 281 training return: tensor(-97.9968, device='cuda:0')
episode: 282 training return: tensor(-999.9996, device='cuda:0')
episode: 283 training return: tensor(-999.9810, device='cuda:0')
epoch: 71 test_true_pfm: 10288.657938549353 sim_pfm: -308.89075766468886
episode: 284 training return: tensor(-157.4447, device='cuda:0')
episode: 285 training return: tensor(-173.8366, device='cuda:0')
episode: 286 training return: tensor(-122.4379, device='cuda:0')
episode: 287 training return: tensor(-204.8251, device='cuda:0')
epoch: 72 test_true_pfm: 9842.905078543117 sim_pfm: -498.9631358151091
episode: 288 training return: tensor(-84.6831, device='cuda:0')
episode: 289 training return: tensor(36.2576, device='cuda:0')
episode: 290 training return: tensor(-59.1261, device='cuda:0')
episode: 291 training return: tensor(-117.2891, device='cuda:0')
epoch: 73 test_true_pfm: 4708.332521596422 sim_pfm: -278.5743511335265
episode: 292 training return: tensor(47.2058, device='cuda:0')
episode: 293 training return: tensor(-124.4905, device='cuda:0')
episode: 294 training return: tensor(-84.0519, device='cuda:0')
episode: 295 training return: tensor(-1000., device='cuda:0')
epoch: 74 test_true_pfm: 6624.971171923614 sim_pfm: -286.51801238846383
episode: 296 training return: tensor(-999.9998, device='cuda:0')
episode: 297 training return: tensor(-817.9628, device='cuda:0')
episode: 298 training return: tensor(-780.2787, device='cuda:0')
episode: 299 training return: tensor(-860.0002, device='cuda:0')
epoch: 75 test_true_pfm: 10312.81749198553 sim_pfm: -89.3706717689347
episode: 300 training return: tensor(-993.1498, device='cuda:0')
episode: 301 training return: tensor(-100.2332, device='cuda:0')
episode: 302 training return: tensor(-998.7599, device='cuda:0')
episode: 303 training return: tensor(-80.2951, device='cuda:0')
epoch: 76 test_true_pfm: 6756.725963409844 sim_pfm: 30.666694099393982
episode: 304 training return: tensor(-999.9999, device='cuda:0')
episode: 305 training return: tensor(-621.4861, device='cuda:0')
episode: 306 training return: tensor(-424.8955, device='cuda:0')
episode: 307 training return: tensor(-30.7443, device='cuda:0')
epoch: 77 test_true_pfm: 10177.007750976001 sim_pfm: -670.0694239421282
episode: 308 training return: tensor(-999.9997, device='cuda:0')
episode: 309 training return: tensor(-282.5696, device='cuda:0')
episode: 310 training return: tensor(-479.6709, device='cuda:0')
episode: 311 training return: tensor(-32.2353, device='cuda:0')
epoch: 78 test_true_pfm: 10266.430156128832 sim_pfm: 60.288572889384035
episode: 312 training return: tensor(27.5774, device='cuda:0')
episode: 313 training return: tensor(-229.5696, device='cuda:0')
episode: 314 training return: tensor(-56.6785, device='cuda:0')
episode: 315 training return: tensor(-142.1151, device='cuda:0')
epoch: 79 test_true_pfm: 6977.127663508442 sim_pfm: -417.8816065638093
episode: 316 training return: tensor(-80.8922, device='cuda:0')
episode: 317 training return: tensor(-60.3708, device='cuda:0')
episode: 318 training return: tensor(-49.4653, device='cuda:0')
episode: 319 training return: tensor(-76.8062, device='cuda:0')
epoch: 80 test_true_pfm: 3328.1120970244824 sim_pfm: -28.238336254638853
episode: 320 training return: tensor(-998.8985, device='cuda:0')
episode: 321 training return: tensor(191.7501, device='cuda:0')
episode: 322 training return: tensor(-674.1322, device='cuda:0')
episode: 323 training return: tensor(-620.5532, device='cuda:0')
epoch: 81 test_true_pfm: 6942.818007821918 sim_pfm: -17.325937164821273
episode: 324 training return: tensor(-39.6161, device='cuda:0')
episode: 325 training return: tensor(-113.0719, device='cuda:0')
episode: 326 training return: tensor(-999.9703, device='cuda:0')
episode: 327 training return: tensor(-999.9998, device='cuda:0')
epoch: 82 test_true_pfm: 10287.999712947361 sim_pfm: -338.5347183584042
episode: 328 training return: tensor(-372.0791, device='cuda:0')
episode: 329 training return: tensor(-195.8161, device='cuda:0')
episode: 330 training return: tensor(-999.9786, device='cuda:0')
episode: 331 training return: tensor(-478.2962, device='cuda:0')
epoch: 83 test_true_pfm: 8187.275114597741 sim_pfm: -327.29252283686463
episode: 332 training return: tensor(-48.3619, device='cuda:0')
episode: 333 training return: tensor(-857.0013, device='cuda:0')
episode: 334 training return: tensor(-1.7138, device='cuda:0')
episode: 335 training return: tensor(-119.8284, device='cuda:0')
epoch: 84 test_true_pfm: 6685.9203793049555 sim_pfm: -251.31047946438775
episode: 336 training return: tensor(-997.8179, device='cuda:0')
episode: 337 training return: tensor(-999.9995, device='cuda:0')
episode: 338 training return: tensor(-720.5578, device='cuda:0')
episode: 339 training return: tensor(-999.9998, device='cuda:0')
epoch: 85 test_true_pfm: 3146.630071744303 sim_pfm: -325.18180327808176
episode: 340 training return: tensor(-462.8111, device='cuda:0')
episode: 341 training return: tensor(-820.8455, device='cuda:0')
episode: 342 training return: tensor(-723.0248, device='cuda:0')
episode: 343 training return: tensor(-256.0809, device='cuda:0')
epoch: 86 test_true_pfm: 9717.710933614531 sim_pfm: -144.0482568333391
episode: 344 training return: tensor(-383.9655, device='cuda:0')
episode: 345 training return: tensor(-317.3422, device='cuda:0')
episode: 346 training return: tensor(-999.9995, device='cuda:0')
episode: 347 training return: tensor(-999.9984, device='cuda:0')
epoch: 87 test_true_pfm: 6770.6728011242985 sim_pfm: -13.90199532857514
episode: 348 training return: tensor(-323.1507, device='cuda:0')
episode: 349 training return: tensor(-84.4503, device='cuda:0')
episode: 350 training return: tensor(-996.4211, device='cuda:0')
episode: 351 training return: tensor(6.7613, device='cuda:0')
epoch: 88 test_true_pfm: 4106.661796940437 sim_pfm: -643.9230558327012
episode: 352 training return: tensor(-999.9999, device='cuda:0')
episode: 353 training return: tensor(-417.9503, device='cuda:0')
episode: 354 training return: tensor(-124.8542, device='cuda:0')
episode: 355 training return: tensor(-86.7162, device='cuda:0')
epoch: 89 test_true_pfm: 8024.403662771237 sim_pfm: -404.1081883065247
episode: 356 training return: tensor(-207.8994, device='cuda:0')
episode: 357 training return: tensor(-150.7376, device='cuda:0')
episode: 358 training return: tensor(-997.8413, device='cuda:0')
episode: 359 training return: tensor(-616.9169, device='cuda:0')
epoch: 90 test_true_pfm: 6824.37968333333 sim_pfm: -358.15401636401657
episode: 360 training return: tensor(-156.6379, device='cuda:0')
episode: 361 training return: tensor(-45.8152, device='cuda:0')
episode: 362 training return: tensor(-231.9340, device='cuda:0')
episode: 363 training return: tensor(16.1846, device='cuda:0')
epoch: 91 test_true_pfm: 10365.934622541556 sim_pfm: -321.7235721145601
episode: 364 training return: tensor(-17.1042, device='cuda:0')
episode: 365 training return: tensor(-999.9993, device='cuda:0')
episode: 366 training return: tensor(-108.3668, device='cuda:0')
episode: 367 training return: tensor(-148.7031, device='cuda:0')
epoch: 92 test_true_pfm: 6825.547629793418 sim_pfm: 36.5840742738607
episode: 368 training return: tensor(-989.4008, device='cuda:0')
episode: 369 training return: tensor(4.5163, device='cuda:0')
episode: 370 training return: tensor(-114.7121, device='cuda:0')
episode: 371 training return: tensor(-313.9315, device='cuda:0')
epoch: 93 test_true_pfm: 8576.931192576145 sim_pfm: -439.47480143598904
episode: 372 training return: tensor(-123.4523, device='cuda:0')
episode: 373 training return: tensor(-245.1723, device='cuda:0')
episode: 374 training return: tensor(-999.9994, device='cuda:0')
episode: 375 training return: tensor(-999.9500, device='cuda:0')
epoch: 94 test_true_pfm: 8342.929266407378 sim_pfm: -63.96608572472663
episode: 376 training return: tensor(-325.1628, device='cuda:0')
episode: 377 training return: tensor(-59.2036, device='cuda:0')
episode: 378 training return: tensor(-136.9716, device='cuda:0')
episode: 379 training return: tensor(-253.5498, device='cuda:0')
epoch: 95 test_true_pfm: 10587.194514364293 sim_pfm: -300.40943924098974
episode: 380 training return: tensor(52.6101, device='cuda:0')
episode: 381 training return: tensor(-999.9274, device='cuda:0')
episode: 382 training return: tensor(-90.6086, device='cuda:0')
episode: 383 training return: tensor(-35.7380, device='cuda:0')
epoch: 96 test_true_pfm: 10235.897763929957 sim_pfm: 91.7445865260476
episode: 384 training return: tensor(-307.6494, device='cuda:0')
episode: 385 training return: tensor(-157.9897, device='cuda:0')
episode: 386 training return: tensor(-78.5436, device='cuda:0')
episode: 387 training return: tensor(-999.9033, device='cuda:0')
epoch: 97 test_true_pfm: 10232.08869322536 sim_pfm: 43.64898020690695
episode: 388 training return: tensor(60.0246, device='cuda:0')
episode: 389 training return: tensor(-96.5171, device='cuda:0')
episode: 390 training return: tensor(-457.6428, device='cuda:0')
episode: 391 training return: tensor(-999.9998, device='cuda:0')
epoch: 98 test_true_pfm: 10358.443678518202 sim_pfm: -266.59700975526357
episode: 392 training return: tensor(-966.9314, device='cuda:0')
episode: 393 training return: tensor(-184.5387, device='cuda:0')
episode: 394 training return: tensor(-180.0444, device='cuda:0')
episode: 395 training return: tensor(-0.9130, device='cuda:0')
epoch: 99 test_true_pfm: 10580.811184644901 sim_pfm: 73.94640102008513
episode: 396 training return: tensor(-35.1721, device='cuda:0')
episode: 397 training return: tensor(-654.0280, device='cuda:0')
episode: 398 training return: tensor(-29.8544, device='cuda:0')
episode: 399 training return: tensor(-241.2361, device='cuda:0')
epoch: 100 test_true_pfm: 10411.82771676551 sim_pfm: -16.40939243347384
episode: 400 training return: tensor(77.9889, device='cuda:0')
episode: 401 training return: tensor(-158.0818, device='cuda:0')
episode: 402 training return: tensor(-47.5081, device='cuda:0')
episode: 403 training return: tensor(-999.9995, device='cuda:0')
epoch: 101 test_true_pfm: 10667.845165234861 sim_pfm: 103.20885254696866
episode: 404 training return: tensor(-154.4818, device='cuda:0')
episode: 405 training return: tensor(-65.2828, device='cuda:0')
episode: 406 training return: tensor(-12.7777, device='cuda:0')
episode: 407 training return: tensor(83.4182, device='cuda:0')
epoch: 102 test_true_pfm: 10402.794304061577 sim_pfm: 75.440682239132
episode: 408 training return: tensor(-342.7017, device='cuda:0')
episode: 409 training return: tensor(-735.6566, device='cuda:0')
episode: 410 training return: tensor(-999.9993, device='cuda:0')
episode: 411 training return: tensor(-53.3911, device='cuda:0')
epoch: 103 test_true_pfm: 10371.946896838774 sim_pfm: -371.5259382991353
episode: 412 training return: tensor(-80.8908, device='cuda:0')
episode: 413 training return: tensor(25.7840, device='cuda:0')
episode: 414 training return: tensor(-999.9392, device='cuda:0')
episode: 415 training return: tensor(-175.9318, device='cuda:0')
epoch: 104 test_true_pfm: 10277.57542947049 sim_pfm: 60.62857672806907
episode: 416 training return: tensor(-999.9991, device='cuda:0')
episode: 417 training return: tensor(-103.6251, device='cuda:0')
episode: 418 training return: tensor(69.4292, device='cuda:0')
episode: 419 training return: tensor(-998.4291, device='cuda:0')
epoch: 105 test_true_pfm: 9426.43970896558 sim_pfm: -245.52827962691663
episode: 420 training return: tensor(58.5987, device='cuda:0')
episode: 421 training return: tensor(-46.9738, device='cuda:0')
episode: 422 training return: tensor(-162.5785, device='cuda:0')
episode: 423 training return: tensor(-18.1287, device='cuda:0')
epoch: 106 test_true_pfm: 6984.014144498348 sim_pfm: -193.44295303269368
episode: 424 training return: tensor(-200.7063, device='cuda:0')
episode: 425 training return: tensor(29.9030, device='cuda:0')
episode: 426 training return: tensor(35.0928, device='cuda:0')
episode: 427 training return: tensor(3.9770, device='cuda:0')
epoch: 107 test_true_pfm: 10451.670812456936 sim_pfm: -294.1219763197005
episode: 428 training return: tensor(-153.0909, device='cuda:0')
episode: 429 training return: tensor(-999.9720, device='cuda:0')
episode: 430 training return: tensor(-223.9931, device='cuda:0')
episode: 431 training return: tensor(-267.8172, device='cuda:0')
epoch: 108 test_true_pfm: 10307.461354066398 sim_pfm: -493.1847739692603
episode: 432 training return: tensor(-32.4627, device='cuda:0')
episode: 433 training return: tensor(-493.0567, device='cuda:0')
episode: 434 training return: tensor(-10.2792, device='cuda:0')
episode: 435 training return: tensor(85.1075, device='cuda:0')
epoch: 109 test_true_pfm: 10489.742069635931 sim_pfm: -646.166293807871
episode: 436 training return: tensor(64.5005, device='cuda:0')
episode: 437 training return: tensor(84.6332, device='cuda:0')
episode: 438 training return: tensor(-148.9478, device='cuda:0')
episode: 439 training return: tensor(-17.2431, device='cuda:0')
epoch: 110 test_true_pfm: 5889.051716669602 sim_pfm: -220.59395388614698
episode: 440 training return: tensor(-135.2322, device='cuda:0')
episode: 441 training return: tensor(-51.2969, device='cuda:0')
episode: 442 training return: tensor(-219.4366, device='cuda:0')
episode: 443 training return: tensor(-142.1489, device='cuda:0')
epoch: 111 test_true_pfm: 10451.943653492905 sim_pfm: -177.55237999131592
episode: 444 training return: tensor(-824.3485, device='cuda:0')
episode: 445 training return: tensor(-82.6586, device='cuda:0')
episode: 446 training return: tensor(79.3826, device='cuda:0')
episode: 447 training return: tensor(-14.4493, device='cuda:0')
epoch: 112 test_true_pfm: 5998.277860555035 sim_pfm: -454.44732938151964
episode: 448 training return: tensor(-15.8670, device='cuda:0')
episode: 449 training return: tensor(36.5847, device='cuda:0')
episode: 450 training return: tensor(-999.9987, device='cuda:0')
episode: 451 training return: tensor(-11.9189, device='cuda:0')
epoch: 113 test_true_pfm: 10459.025884526904 sim_pfm: -18.12836600712035
episode: 452 training return: tensor(-185.2471, device='cuda:0')
episode: 453 training return: tensor(5.5876, device='cuda:0')
episode: 454 training return: tensor(-1.0902, device='cuda:0')
episode: 455 training return: tensor(-19.2379, device='cuda:0')
epoch: 114 test_true_pfm: 6765.927013610658 sim_pfm: -272.9431532572974
episode: 456 training return: tensor(-118.5320, device='cuda:0')
episode: 457 training return: tensor(-172.5666, device='cuda:0')
episode: 458 training return: tensor(-91.8591, device='cuda:0')
episode: 459 training return: tensor(-204.2942, device='cuda:0')
epoch: 115 test_true_pfm: 6981.355229576174 sim_pfm: -185.84942613833118
episode: 460 training return: tensor(-790.4216, device='cuda:0')
episode: 461 training return: tensor(-267.0784, device='cuda:0')
episode: 462 training return: tensor(54.2640, device='cuda:0')
episode: 463 training return: tensor(-198.2185, device='cuda:0')
epoch: 116 test_true_pfm: 9830.722708724374 sim_pfm: -300.0055077241656
episode: 464 training return: tensor(-121.5685, device='cuda:0')
episode: 465 training return: tensor(-67.4566, device='cuda:0')
episode: 466 training return: tensor(-17.7592, device='cuda:0')
episode: 467 training return: tensor(-158.0656, device='cuda:0')
epoch: 117 test_true_pfm: 5808.181317293022 sim_pfm: -537.506086854885
episode: 468 training return: tensor(-999.9673, device='cuda:0')
episode: 469 training return: tensor(2.7427, device='cuda:0')
episode: 470 training return: tensor(-216.4944, device='cuda:0')
episode: 471 training return: tensor(-999.9865, device='cuda:0')
epoch: 118 test_true_pfm: 10506.170542818116 sim_pfm: 102.99530949462981
episode: 472 training return: tensor(-27.5356, device='cuda:0')
episode: 473 training return: tensor(-184.1583, device='cuda:0')
episode: 474 training return: tensor(-288.0373, device='cuda:0')
episode: 475 training return: tensor(97.0270, device='cuda:0')
epoch: 119 test_true_pfm: 10436.481211915658 sim_pfm: -26.831870087616455
episode: 476 training return: tensor(-107.5015, device='cuda:0')
episode: 477 training return: tensor(19.3961, device='cuda:0')
episode: 478 training return: tensor(-8.8407, device='cuda:0')
episode: 479 training return: tensor(-184.3198, device='cuda:0')
epoch: 120 test_true_pfm: 10335.490175941139 sim_pfm: -66.76270020372856
episode: 480 training return: tensor(-151.6582, device='cuda:0')
episode: 481 training return: tensor(-152.4549, device='cuda:0')
episode: 482 training return: tensor(-827.0591, device='cuda:0')
episode: 483 training return: tensor(132.0815, device='cuda:0')
epoch: 121 test_true_pfm: 5331.523939704282 sim_pfm: 123.0152118706028
episode: 484 training return: tensor(-631.0410, device='cuda:0')
episode: 485 training return: tensor(-141.2157, device='cuda:0')
episode: 486 training return: tensor(-975.0792, device='cuda:0')
episode: 487 training return: tensor(-999.9806, device='cuda:0')
epoch: 122 test_true_pfm: 6985.799106477035 sim_pfm: 219.46856526758833
episode: 488 training return: tensor(-260.5851, device='cuda:0')
episode: 489 training return: tensor(-999.9998, device='cuda:0')
episode: 490 training return: tensor(-79.1560, device='cuda:0')
episode: 491 training return: tensor(-94.6806, device='cuda:0')
epoch: 123 test_true_pfm: 10289.68717692158 sim_pfm: -349.5437604809219
episode: 492 training return: tensor(-382.1317, device='cuda:0')
episode: 493 training return: tensor(17.0752, device='cuda:0')
episode: 494 training return: tensor(-999.8804, device='cuda:0')
episode: 495 training return: tensor(15.4308, device='cuda:0')
epoch: 124 test_true_pfm: 3285.738534053173 sim_pfm: 111.58868630800862
episode: 496 training return: tensor(-96.4068, device='cuda:0')
episode: 497 training return: tensor(-19.5042, device='cuda:0')
episode: 498 training return: tensor(-54.0075, device='cuda:0')
episode: 499 training return: tensor(-555.6383, device='cuda:0')
epoch: 125 test_true_pfm: 6822.1087736601285 sim_pfm: -253.96336454042466
episode: 500 training return: tensor(-999.9807, device='cuda:0')
episode: 501 training return: tensor(6.8395, device='cuda:0')
episode: 502 training return: tensor(-999.9811, device='cuda:0')
episode: 503 training return: tensor(-720.4999, device='cuda:0')
epoch: 126 test_true_pfm: 9926.811419720661 sim_pfm: -182.0771248942086
episode: 504 training return: tensor(41.5793, device='cuda:0')
episode: 505 training return: tensor(17.2186, device='cuda:0')
episode: 506 training return: tensor(-154.1524, device='cuda:0')
episode: 507 training return: tensor(-135.7286, device='cuda:0')
epoch: 127 test_true_pfm: 10397.096456611645 sim_pfm: 53.74607566229921
episode: 508 training return: tensor(-47.7554, device='cuda:0')
episode: 509 training return: tensor(-76.0865, device='cuda:0')
episode: 510 training return: tensor(-23.2547, device='cuda:0')
episode: 511 training return: tensor(-117.1215, device='cuda:0')
epoch: 128 test_true_pfm: 10475.91138507817 sim_pfm: -298.9107999963259
episode: 512 training return: tensor(20.1469, device='cuda:0')
episode: 513 training return: tensor(-42.0706, device='cuda:0')
episode: 514 training return: tensor(102.4366, device='cuda:0')
episode: 515 training return: tensor(-172.1691, device='cuda:0')
epoch: 129 test_true_pfm: 3370.7596294400014 sim_pfm: 102.04735799354967
episode: 516 training return: tensor(-861.6583, device='cuda:0')
episode: 517 training return: tensor(-135.2230, device='cuda:0')
episode: 518 training return: tensor(-105.4656, device='cuda:0')
episode: 519 training return: tensor(-133.0633, device='cuda:0')
epoch: 130 test_true_pfm: 6937.688252415212 sim_pfm: -126.83787874194483
episode: 520 training return: tensor(-999.9681, device='cuda:0')
episode: 521 training return: tensor(-142.1865, device='cuda:0')
episode: 522 training return: tensor(-648.7119, device='cuda:0')
episode: 523 training return: tensor(-124.4281, device='cuda:0')
epoch: 131 test_true_pfm: 9898.19261607101 sim_pfm: 31.792266610078514
episode: 524 training return: tensor(-206.5293, device='cuda:0')
episode: 525 training return: tensor(-999.8840, device='cuda:0')
episode: 526 training return: tensor(-62.7475, device='cuda:0')
episode: 527 training return: tensor(-36.3541, device='cuda:0')
epoch: 132 test_true_pfm: 10182.229690062317 sim_pfm: -52.28167581766805
episode: 528 training return: tensor(-68.7412, device='cuda:0')
episode: 529 training return: tensor(-748.8032, device='cuda:0')
episode: 530 training return: tensor(-144.8181, device='cuda:0')
episode: 531 training return: tensor(50.0181, device='cuda:0')
epoch: 133 test_true_pfm: 8366.81625330526 sim_pfm: 66.45635171070656
episode: 532 training return: tensor(-19.2871, device='cuda:0')
episode: 533 training return: tensor(96.7191, device='cuda:0')
episode: 534 training return: tensor(-82.7429, device='cuda:0')
episode: 535 training return: tensor(-1.4231, device='cuda:0')
epoch: 134 test_true_pfm: 10157.514982972627 sim_pfm: 3.026489860387907
episode: 536 training return: tensor(-152.0570, device='cuda:0')
episode: 537 training return: tensor(-295.3379, device='cuda:0')
episode: 538 training return: tensor(-39.9626, device='cuda:0')
episode: 539 training return: tensor(-102.0206, device='cuda:0')
epoch: 135 test_true_pfm: 6859.6026660661855 sim_pfm: -25.942708744842093
episode: 540 training return: tensor(48.4777, device='cuda:0')
episode: 541 training return: tensor(-36.9851, device='cuda:0')
episode: 542 training return: tensor(58.6739, device='cuda:0')
episode: 543 training return: tensor(-91.2910, device='cuda:0')
epoch: 136 test_true_pfm: 6917.861431542083 sim_pfm: 165.03964611523165
episode: 544 training return: tensor(-131.4632, device='cuda:0')
episode: 545 training return: tensor(-622.3110, device='cuda:0')
episode: 546 training return: tensor(-934.1820, device='cuda:0')
episode: 547 training return: tensor(-83.1411, device='cuda:0')
epoch: 137 test_true_pfm: 10270.09120299812 sim_pfm: 3.1701068635932947
episode: 548 training return: tensor(24.0046, device='cuda:0')
episode: 549 training return: tensor(-52.3214, device='cuda:0')
episode: 550 training return: tensor(78.9451, device='cuda:0')
episode: 551 training return: tensor(-43.3604, device='cuda:0')
epoch: 138 test_true_pfm: 10481.281399325124 sim_pfm: -222.94143608234785
episode: 552 training return: tensor(-70.0455, device='cuda:0')
episode: 553 training return: tensor(-824.8232, device='cuda:0')
episode: 554 training return: tensor(-74.1725, device='cuda:0')
episode: 555 training return: tensor(-39.0013, device='cuda:0')
epoch: 139 test_true_pfm: 10554.34693069953 sim_pfm: -353.49692882215214
episode: 556 training return: tensor(-270.8878, device='cuda:0')
episode: 557 training return: tensor(-255.5547, device='cuda:0')
episode: 558 training return: tensor(-15.9616, device='cuda:0')
episode: 559 training return: tensor(-69.5487, device='cuda:0')
epoch: 140 test_true_pfm: 10274.733457794498 sim_pfm: 28.689728357770946
episode: 560 training return: tensor(62.1582, device='cuda:0')
episode: 561 training return: tensor(-2.4212, device='cuda:0')
episode: 562 training return: tensor(-73.4512, device='cuda:0')
episode: 563 training return: tensor(65.1496, device='cuda:0')
epoch: 141 test_true_pfm: 10412.412939499243 sim_pfm: 113.97921193261088
episode: 564 training return: tensor(-72.2955, device='cuda:0')
episode: 565 training return: tensor(94.3853, device='cuda:0')
episode: 566 training return: tensor(-482.9365, device='cuda:0')
episode: 567 training return: tensor(87.8725, device='cuda:0')
epoch: 142 test_true_pfm: 8240.042109253875 sim_pfm: -208.7242823233828
episode: 568 training return: tensor(-139.3957, device='cuda:0')
episode: 569 training return: tensor(-98.3002, device='cuda:0')
episode: 570 training return: tensor(-148.2691, device='cuda:0')
episode: 571 training return: tensor(-69.0592, device='cuda:0')
epoch: 143 test_true_pfm: 10549.158406605327 sim_pfm: 120.66726238208746
episode: 572 training return: tensor(-999.8595, device='cuda:0')
episode: 573 training return: tensor(80.2116, device='cuda:0')
episode: 574 training return: tensor(-756.3927, device='cuda:0')
episode: 575 training return: tensor(55.8388, device='cuda:0')
epoch: 144 test_true_pfm: 10495.574932331492 sim_pfm: 116.29815027756074
episode: 576 training return: tensor(-177.4251, device='cuda:0')
episode: 577 training return: tensor(81.8275, device='cuda:0')
episode: 578 training return: tensor(74.1159, device='cuda:0')
episode: 579 training return: tensor(-36.2848, device='cuda:0')
epoch: 145 test_true_pfm: 3276.25006541718 sim_pfm: -5.858458204233709
episode: 580 training return: tensor(159.9881, device='cuda:0')
episode: 581 training return: tensor(-140.6963, device='cuda:0')
episode: 582 training return: tensor(-594.4876, device='cuda:0')
episode: 583 training return: tensor(45.9686, device='cuda:0')
epoch: 146 test_true_pfm: 10371.157558062065 sim_pfm: -279.10647026094375
episode: 584 training return: tensor(-138.9700, device='cuda:0')
episode: 585 training return: tensor(-999.9938, device='cuda:0')
episode: 586 training return: tensor(-424.2687, device='cuda:0')
episode: 587 training return: tensor(111.0763, device='cuda:0')
epoch: 147 test_true_pfm: 6861.620348302524 sim_pfm: 145.66281321557472
episode: 588 training return: tensor(-183.6954, device='cuda:0')
episode: 589 training return: tensor(20.7862, device='cuda:0')
episode: 590 training return: tensor(13.3468, device='cuda:0')
episode: 591 training return: tensor(90.0845, device='cuda:0')
epoch: 148 test_true_pfm: 10576.911049083552 sim_pfm: -210.6485022100775
episode: 592 training return: tensor(85.1240, device='cuda:0')
episode: 593 training return: tensor(153.4777, device='cuda:0')
episode: 594 training return: tensor(-999.9985, device='cuda:0')
episode: 595 training return: tensor(57.2351, device='cuda:0')
epoch: 149 test_true_pfm: 6791.946444087771 sim_pfm: 131.4234576675808
episode: 596 training return: tensor(-110.5595, device='cuda:0')
episode: 597 training return: tensor(-999.9991, device='cuda:0')
episode: 598 training return: tensor(95.3724, device='cuda:0')
episode: 599 training return: tensor(-101.7027, device='cuda:0')
epoch: 150 test_true_pfm: 10390.770410157953 sim_pfm: 135.87430819108462
