['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '3000']
epoch: 0 training_loss 0.26362981483340264 test_loss: 0.09703578948974609
epoch: 1 training_loss 0.20662061356008052 test_loss: 0.0886705219745636
epoch: 2 training_loss 0.1956053577363491 test_loss: 0.08891853094100952
epoch: 3 training_loss 0.1903205543011427 test_loss: 0.0827896237373352
epoch: 4 training_loss 0.194356070458889 test_loss: 0.08229140639305114
epoch: 5 training_loss 0.1939652567356825 test_loss: 0.0897104263305664
epoch: 6 training_loss 0.18873049855232238 test_loss: 0.0892018735408783
epoch: 7 training_loss 0.18885317638516427 test_loss: 0.08933298587799073
epoch: 8 training_loss 0.1896139195561409 test_loss: 0.0891667664051056
epoch: 9 training_loss 0.18532030880451203 test_loss: 0.08811768293380737
epoch: 10 training_loss 0.1868184068799019 test_loss: 0.09053857922554016
epoch: 11 training_loss 0.17509032443165778 test_loss: 0.08430757522583007
epoch: 12 training_loss 0.17047502122819425 test_loss: 0.08504504561424256
epoch: 13 training_loss 0.18002404227852822 test_loss: 0.08619914650917053
epoch: 14 training_loss 0.17465094938874245 test_loss: 0.08706227540969849
epoch: 15 training_loss 0.1756530486792326 test_loss: 0.09186530113220215
epoch: 16 training_loss 0.17748301178216935 test_loss: 0.0868959367275238
epoch: 17 training_loss 0.17461514197289943 test_loss: 0.08154486417770386
epoch: 18 training_loss 0.17889776147902012 test_loss: 0.08612650036811828
epoch: 19 training_loss 0.18166435301303863 test_loss: 0.08778215050697327
epoch: 20 training_loss 0.18166761048138141 test_loss: 0.0898050308227539
epoch: 21 training_loss 0.17452739775180817 test_loss: 0.08638433814048767
epoch: 22 training_loss 0.17646852135658264 test_loss: 0.08664281964302063
epoch: 23 training_loss 0.17803116381168366 test_loss: 0.08451870083808899
epoch: 24 training_loss 0.17659712813794612 test_loss: 0.08811814785003662
epoch: 25 training_loss 0.16839209847152234 test_loss: 0.0853920340538025
epoch: 26 training_loss 0.17196056991815567 test_loss: 0.09012079238891602
epoch: 27 training_loss 0.17482883080840111 test_loss: 0.08783329725265503
epoch: 28 training_loss 0.17159185022115708 test_loss: 0.08872777223587036
epoch: 29 training_loss 0.1761937229335308 test_loss: 0.093614262342453
epoch: 30 training_loss 0.17550778314471244 test_loss: 0.08987371921539307
epoch: 31 training_loss 0.17743263430893422 test_loss: 0.09165869355201721
epoch: 32 training_loss 0.16464227244257926 test_loss: 0.08754773736000061
epoch: 33 training_loss 0.16702378660440445 test_loss: 0.09192968010902405
epoch: 34 training_loss 0.17112216629087926 test_loss: 0.09410720467567443
epoch: 35 training_loss 0.1750627412647009 test_loss: 0.09127497673034668
epoch: 36 training_loss 0.17349188320338726 test_loss: 0.08946643471717834
epoch: 37 training_loss 0.1552376203238964 test_loss: 0.09543195962905884
epoch: 38 training_loss 0.17208253480494023 test_loss: 0.09059762954711914
epoch: 39 training_loss 0.162287305817008 test_loss: 0.09171398878097534
epoch: 40 training_loss 0.16958952322602272 test_loss: 0.09181976914405823
epoch: 41 training_loss 0.16752442702651024 test_loss: 0.09584194421768188
epoch: 42 training_loss 0.1666928091645241 test_loss: 0.10047874450683594
epoch: 43 training_loss 0.16037451058626176 test_loss: 0.09355009198188782
epoch: 44 training_loss 0.1615615803003311 test_loss: 0.09747787714004516
epoch: 45 training_loss 0.15509515151381492 test_loss: 0.0937223732471466
epoch: 46 training_loss 0.16058339051902293 test_loss: 0.10245095491409302
epoch: 47 training_loss 0.1657459856942296 test_loss: 0.09287554621696473
epoch: 48 training_loss 0.15873818092048167 test_loss: 0.09439361691474915
epoch: 49 training_loss 0.15415688086301088 test_loss: 0.09611420631408692
epoch: 50 training_loss 0.15900474451482297 test_loss: 0.09074262380599976
epoch: 51 training_loss 0.1530374463647604 test_loss: 0.0924721598625183
epoch: 52 training_loss 0.15749636605381967 test_loss: 0.09557555317878723
epoch: 53 training_loss 0.1531817204132676 test_loss: 0.09610490798950196
epoch: 54 training_loss 0.1526088049262762 test_loss: 0.09874991774559021
epoch: 55 training_loss 0.15859374955296515 test_loss: 0.09338966012001038
epoch: 56 training_loss 0.15484821498394014 test_loss: 0.09866219758987427
epoch: 57 training_loss 0.15248094879090787 test_loss: 0.09252062439918518
epoch: 58 training_loss 0.1554807084798813 test_loss: 0.09856561422348023
epoch: 59 training_loss 0.1496900526434183 test_loss: 0.09380231499671936
epoch: 60 training_loss 0.1583079531788826 test_loss: 0.092283433675766
epoch: 61 training_loss 0.15507014263421298 test_loss: 0.09553143978118897
epoch: 62 training_loss 0.14292887419462205 test_loss: 0.10257017612457275
epoch: 63 training_loss 0.15483086109161376 test_loss: 0.10194028615951538
epoch: 64 training_loss 0.1463129820674658 test_loss: 0.10061562061309814
epoch: 65 training_loss 0.14770246893167496 test_loss: 0.10310369729995728
epoch: 66 training_loss 0.1554275544732809 test_loss: 0.10418299436569214
epoch: 67 training_loss 0.1492167879268527 test_loss: 0.1093437671661377
epoch: 68 training_loss 0.13821009680628776 test_loss: 0.10007256269454956
epoch: 69 training_loss 0.13329946130514145 test_loss: 0.10544342994689941
epoch: 70 training_loss 0.15280647568404673 test_loss: 0.09518263936042785
epoch: 71 training_loss 0.1405638889223337 test_loss: 0.102066969871521
epoch: 72 training_loss 0.14279127296060323 test_loss: 0.1017304539680481
epoch: 73 training_loss 0.13375378131866456 test_loss: 0.11407173871994018
epoch: 74 training_loss 0.13700562193989754 test_loss: 0.10419033765792847
epoch: 75 training_loss 0.12877841517329217 test_loss: 0.11008509397506713
epoch: 76 training_loss 0.12288122041150927 test_loss: 0.11322267055511474
epoch: 77 training_loss 0.12046602126210929 test_loss: 0.1053086519241333
epoch: 78 training_loss 0.1258776167407632 test_loss: 0.12962031364440918
epoch: 79 training_loss 0.13051657926291227 test_loss: 0.12067936658859253
epoch: 80 training_loss 0.1287208713963628 test_loss: 0.11399775743484497
epoch: 81 training_loss 0.12385310910642147 test_loss: 0.11292548179626465
epoch: 82 training_loss 0.11542908042669296 test_loss: 0.11985450983047485
epoch: 83 training_loss 0.12077542185783387 test_loss: 0.11929627656936645
epoch: 84 training_loss 0.12207175061106681 test_loss: 0.11510179042816163
epoch: 85 training_loss 0.10590878877788783 test_loss: 0.1209947109222412
epoch: 86 training_loss 0.11288037985563278 test_loss: 0.1226650357246399
epoch: 87 training_loss 0.1116074432618916 test_loss: 0.13111701011657714
epoch: 88 training_loss 0.10767871141433716 test_loss: 0.12821612358093262
epoch: 89 training_loss 0.11130416618660093 test_loss: 0.13226735591888428
epoch: 90 training_loss 0.1141107127442956 test_loss: 0.11229121685028076
epoch: 91 training_loss 0.10970903385430575 test_loss: 0.12058330774307251
epoch: 92 training_loss 0.11121221717447043 test_loss: 0.13259475231170653
epoch: 93 training_loss 0.10657887507230043 test_loss: 0.12246071100234986
epoch: 94 training_loss 0.1051429044827819 test_loss: 0.1230736494064331
epoch: 95 training_loss 0.10515961539000272 test_loss: 0.1264133095741272
epoch: 96 training_loss 0.09504901882261038 test_loss: 0.1263316750526428
epoch: 97 training_loss 0.10250127043575048 test_loss: 0.12817834615707396
epoch: 98 training_loss 0.10244245193898678 test_loss: 0.14691743850708008
epoch: 99 training_loss 0.09884474534541368 test_loss: 0.12264840602874756
epoch: 100 training_loss 0.09189214326441288 test_loss: 0.13739114999771118
epoch: 101 training_loss 0.10104999743402004 test_loss: 0.12990130186080934
epoch: 102 training_loss 0.09381219113245606 test_loss: 0.12827068567276
epoch: 103 training_loss 0.09548073383048176 test_loss: 0.13993301391601562
epoch: 104 training_loss 0.0842294879257679 test_loss: 0.13113139867782592
epoch: 105 training_loss 0.08277293929830193 test_loss: 0.1461241364479065
epoch: 106 training_loss 0.08803679898381234 test_loss: 0.1361206889152527
epoch: 107 training_loss 0.08497425731271506 test_loss: 0.13373336791992188
epoch: 108 training_loss 0.08462057277560234 test_loss: 0.13878269195556642
epoch: 109 training_loss 0.08168217113241553 test_loss: 0.13779034614562988
epoch: 110 training_loss 0.07859938824549317 test_loss: 0.1380167245864868
epoch: 111 training_loss 0.07778217855840922 test_loss: 0.15525274276733397
epoch: 112 training_loss 0.08464138768613338 test_loss: 0.15489166975021362
epoch: 113 training_loss 0.07754370363429189 test_loss: 0.1507340908050537
epoch: 114 training_loss 0.08282959977164865 test_loss: 0.15510255098342896
epoch: 115 training_loss 0.08062540154904127 test_loss: 0.1352735161781311
epoch: 116 training_loss 0.07441476151347161 test_loss: 0.14094096422195435
epoch: 117 training_loss 0.07596752483397723 test_loss: 0.16080976724624635
epoch: 118 training_loss 0.07287140000611543 test_loss: 0.13831669092178345
epoch: 119 training_loss 0.07470352299511433 test_loss: 0.15252066850662233
epoch: 120 training_loss 0.07154245603829622 test_loss: 0.15635697841644286
epoch: 121 training_loss 0.07114198813214898 test_loss: 0.13855311870574952
epoch: 122 training_loss 0.07151938203722238 test_loss: 0.14101145267486573
epoch: 123 training_loss 0.07268579918891191 test_loss: 0.14504055976867675
epoch: 124 training_loss 0.06841526463627816 test_loss: 0.1476631283760071
epoch: 125 training_loss 0.07157921763136983 test_loss: 0.1633332133293152
epoch: 126 training_loss 0.07093474639579654 test_loss: 0.1375621199607849
epoch: 127 training_loss 0.06663304494693875 test_loss: 0.15656012296676636
epoch: 128 training_loss 0.0649227954261005 test_loss: 0.1470581293106079
epoch: 129 training_loss 0.06526197655126452 test_loss: 0.15597248077392578
epoch: 130 training_loss 0.07176141288131475 test_loss: 0.15605978965759276
epoch: 131 training_loss 0.06181698063388467 test_loss: 0.1584809899330139
epoch: 132 training_loss 0.06515579281374811 test_loss: 0.16157405376434325
epoch: 133 training_loss 0.06424084279686212 test_loss: 0.14993470907211304
epoch: 134 training_loss 0.05652243159711361 test_loss: 0.15964564085006713
epoch: 135 training_loss 0.05659226581454277 test_loss: 0.16883437633514403
epoch: 136 training_loss 0.056673661330714824 test_loss: 0.17067750692367553
epoch: 137 training_loss 0.058325633518397806 test_loss: 0.15709630250930787
epoch: 138 training_loss 0.05767428804188967 test_loss: 0.1726905584335327
epoch: 139 training_loss 0.05336293013766408 test_loss: 0.16766419410705566
epoch: 140 training_loss 0.05359903741627932 test_loss: 0.18274109363555907
epoch: 141 training_loss 0.05470445072278381 test_loss: 0.15712827444076538
epoch: 142 training_loss 0.05453574718907475 test_loss: 0.15599790811538697
epoch: 143 training_loss 0.04775261842645705 test_loss: 0.17620939016342163
epoch: 144 training_loss 0.052789602670818565 test_loss: 0.17233550548553467
epoch: 145 training_loss 0.04803057773038745 test_loss: 0.17327842712402344
epoch: 146 training_loss 0.05391968529671431 test_loss: 0.17365365028381347
epoch: 147 training_loss 0.047521197106689214 test_loss: 0.18022669553756715
epoch: 148 training_loss 0.04804774932563305 test_loss: 0.1837196707725525
epoch: 149 training_loss 0.05214979072101414 test_loss: 0.1689760684967041
epoch: 0 training_loss 7.900890374183655 test_loss: 2.342106246948242
epoch: 1 training_loss 3.9556792736053468 test_loss: 1.5198087692260742
epoch: 2 training_loss 2.6756935143470764 test_loss: 1.1680681228637695
epoch: 3 training_loss 2.137514960765839 test_loss: 0.9791297912597656
epoch: 4 training_loss 1.8470820331573485 test_loss: 0.8661494255065918
epoch: 5 training_loss 1.6428584814071656 test_loss: 0.7833809375762939
epoch: 6 training_loss 1.5215637385845184 test_loss: 0.7443910121917725
epoch: 7 training_loss 1.3910205507278441 test_loss: 0.6804612636566162
epoch: 8 training_loss 1.315638643503189 test_loss: 0.6452336788177491
epoch: 9 training_loss 1.2701492619514465 test_loss: 0.6329245090484619
epoch: 10 training_loss 1.1904459565877914 test_loss: 0.582417106628418
epoch: 11 training_loss 1.1579747194051742 test_loss: 0.5607583522796631
epoch: 12 training_loss 1.1021443700790405 test_loss: 0.5519753932952881
epoch: 13 training_loss 1.0749958342313766 test_loss: 0.5425866603851318
epoch: 14 training_loss 1.017879865169525 test_loss: 0.5073978900909424
epoch: 15 training_loss 0.9893305748701096 test_loss: 0.513702392578125
epoch: 16 training_loss 0.9720298546552658 test_loss: 0.4884451389312744
epoch: 17 training_loss 0.9406403422355651 test_loss: 0.4734638690948486
epoch: 18 training_loss 0.917296620607376 test_loss: 0.4607537269592285
epoch: 19 training_loss 0.889532555937767 test_loss: 0.44814109802246094
epoch: 20 training_loss 0.8808492398262024 test_loss: 0.44662065505981446
epoch: 21 training_loss 0.85743665933609 test_loss: 0.4401707649230957
epoch: 22 training_loss 0.8479071748256684 test_loss: 0.43625617027282715
epoch: 23 training_loss 0.8216263318061828 test_loss: 0.42118382453918457
epoch: 24 training_loss 0.8177962690591812 test_loss: 0.4469779014587402
epoch: 25 training_loss 0.8092471468448639 test_loss: 0.4084293842315674
epoch: 26 training_loss 0.7969121783971786 test_loss: 0.4109299659729004
epoch: 27 training_loss 0.7858127409219742 test_loss: 0.39270591735839844
epoch: 28 training_loss 0.7688434845209122 test_loss: 0.40150747299194334
epoch: 29 training_loss 0.7669769263267517 test_loss: 0.3919473648071289
epoch: 30 training_loss 0.7604486417770385 test_loss: 0.383324408531189
epoch: 31 training_loss 0.7561599314212799 test_loss: 0.3762191295623779
epoch: 32 training_loss 0.7469093197584152 test_loss: 0.37749786376953126
epoch: 33 training_loss 0.7307203555107117 test_loss: 0.3709003210067749
epoch: 34 training_loss 0.7144340926408768 test_loss: 0.3720918893814087
epoch: 35 training_loss 0.720585635304451 test_loss: 0.36495020389556887
epoch: 36 training_loss 0.6988247030973435 test_loss: 0.35710022449493406
epoch: 37 training_loss 0.7030464613437652 test_loss: 0.3622979879379272
epoch: 38 training_loss 0.6936035257577896 test_loss: 0.3622798204421997
epoch: 39 training_loss 0.6850247192382812 test_loss: 0.351460862159729
epoch: 40 training_loss 0.680274715423584 test_loss: 0.35744638442993165
epoch: 41 training_loss 0.6709294587373733 test_loss: 0.34607539176940916
epoch: 42 training_loss 0.6755971449613571 test_loss: 0.349918532371521
epoch: 43 training_loss 0.6618541204929351 test_loss: 0.33454074859619143
epoch: 44 training_loss 0.6582509249448776 test_loss: 0.3432797431945801
epoch: 45 training_loss 0.6679534530639648 test_loss: 0.33777267932891847
epoch: 46 training_loss 0.6532766461372376 test_loss: 0.3298803806304932
epoch: 47 training_loss 0.6521821063756943 test_loss: 0.3405898571014404
epoch: 48 training_loss 0.6510586869716645 test_loss: 0.35908286571502684
epoch: 49 training_loss 0.6379851126670837 test_loss: 0.332283616065979
epoch: 50 training_loss 0.6250724452733993 test_loss: 0.34079458713531496
epoch: 51 training_loss 0.6261879020929336 test_loss: 0.31901397705078127
epoch: 52 training_loss 0.6125907343626023 test_loss: 0.3335076332092285
epoch: 53 training_loss 0.6154626399278641 test_loss: 0.3279773235321045
epoch: 54 training_loss 0.6080753910541534 test_loss: 0.3184669971466064
epoch: 55 training_loss 0.6162057137489318 test_loss: 0.31181628704071046
epoch: 56 training_loss 0.6069371968507766 test_loss: 0.32157092094421386
epoch: 57 training_loss 0.6008727467060089 test_loss: 0.3132298946380615
epoch: 58 training_loss 0.6035938048362732 test_loss: 0.31555795669555664
epoch: 59 training_loss 0.5917619121074676 test_loss: 0.3077535629272461
epoch: 60 training_loss 0.5936911588907242 test_loss: 0.31639339923858645
epoch: 61 training_loss 0.5985521370172501 test_loss: 0.31391494274139403
epoch: 62 training_loss 0.5866949898004532 test_loss: 0.30826482772827146
epoch: 63 training_loss 0.6000367158651352 test_loss: 0.3065525531768799
epoch: 64 training_loss 0.5742632734775543 test_loss: 0.3186546564102173
epoch: 65 training_loss 0.5861265248060227 test_loss: 0.3020280122756958
epoch: 66 training_loss 0.5782873129844666 test_loss: 0.3039844989776611
epoch: 67 training_loss 0.5766914981603622 test_loss: 0.3026799440383911
epoch: 68 training_loss 0.5699943622946739 test_loss: 0.30127222537994386
epoch: 69 training_loss 0.5684063640236855 test_loss: 0.3015174388885498
epoch: 70 training_loss 0.5703361523151398 test_loss: 0.29687411785125734
epoch: 71 training_loss 0.5629486721754074 test_loss: 0.29952278137207033
epoch: 72 training_loss 0.5653451311588288 test_loss: 0.30594983100891116
epoch: 73 training_loss 0.5573029413819313 test_loss: 0.3071810007095337
epoch: 74 training_loss 0.5634815007448196 test_loss: 0.2884580850601196
epoch: 75 training_loss 0.5578948926925659 test_loss: 0.29061737060546877
epoch: 76 training_loss 0.55598852455616 test_loss: 0.28605563640594484
epoch: 77 training_loss 0.5519623538851738 test_loss: 0.2848546028137207
epoch: 78 training_loss 0.5564255747199058 test_loss: 0.2978534460067749
epoch: 79 training_loss 0.5627513822913169 test_loss: 0.29253854751586916
epoch: 80 training_loss 0.5614819940924645 test_loss: 0.30985047817230227
epoch: 81 training_loss 0.5438580334186554 test_loss: 0.2829932689666748
epoch: 82 training_loss 0.5471241894364357 test_loss: 0.28941028118133544
epoch: 83 training_loss 0.5470415014028549 test_loss: 0.28790457248687745
epoch: 84 training_loss 0.5419198396801949 test_loss: 0.28841023445129393
epoch: 85 training_loss 0.5345566159486771 test_loss: 0.2845674991607666
epoch: 86 training_loss 0.5349083665013313 test_loss: 0.27820398807525637
epoch: 87 training_loss 0.5298837888240814 test_loss: 0.2799688816070557
epoch: 88 training_loss 0.5311368852853775 test_loss: 0.2878369569778442
epoch: 89 training_loss 0.5456193453073501 test_loss: 0.2770757913589478
epoch: 90 training_loss 0.5357649508118629 test_loss: 0.28390648365020754
epoch: 91 training_loss 0.5314669466018677 test_loss: 0.2826651096343994
epoch: 92 training_loss 0.5362550911307334 test_loss: 0.28510024547576907
epoch: 93 training_loss 0.5312520509958267 test_loss: 0.2768193244934082
epoch: 94 training_loss 0.5251252526044845 test_loss: 0.2768517732620239
epoch: 95 training_loss 0.5336954516172409 test_loss: 0.28821985721588134
epoch: 96 training_loss 0.53078822016716 test_loss: 0.27569429874420165
epoch: 97 training_loss 0.5225905391573906 test_loss: 0.28091318607330323
epoch: 98 training_loss 0.5203993821144104 test_loss: 0.2750078678131104
epoch: 99 training_loss 0.5291957387328148 test_loss: 0.27378230094909667
epoch: 100 training_loss 0.5151503950357437 test_loss: 0.2801805019378662
epoch: 101 training_loss 0.5241130974888801 test_loss: 0.2738350391387939
epoch: 102 training_loss 0.5221880745887756 test_loss: 0.272767972946167
epoch: 103 training_loss 0.5246896991133689 test_loss: 0.2689556121826172
epoch: 104 training_loss 0.5118485727906227 test_loss: 0.27851808071136475
epoch: 105 training_loss 0.5213556075096131 test_loss: 0.28313169479370115
epoch: 106 training_loss 0.5105532106757164 test_loss: 0.27078967094421386
epoch: 107 training_loss 0.515688838660717 test_loss: 0.27452104091644286
epoch: 108 training_loss 0.5111271357536316 test_loss: 0.2793373346328735
epoch: 109 training_loss 0.5053746736049652 test_loss: 0.26495463848114015
epoch: 110 training_loss 0.5036467641592026 test_loss: 0.27407045364379884
epoch: 111 training_loss 0.5081633630394936 test_loss: 0.2660017490386963
epoch: 112 training_loss 0.5059611228108406 test_loss: 0.2662580728530884
epoch: 113 training_loss 0.5109377232193947 test_loss: 0.2706986665725708
epoch: 114 training_loss 0.5049386316537857 test_loss: 0.26898193359375
epoch: 115 training_loss 0.5078595471382141 test_loss: 0.27633535861968994
epoch: 116 training_loss 0.49991791844367983 test_loss: 0.265531325340271
epoch: 117 training_loss 0.5149798664450645 test_loss: 0.2653787136077881
epoch: 118 training_loss 0.5022560915350914 test_loss: 0.2676257848739624
epoch: 119 training_loss 0.5051921024918556 test_loss: 0.2791621208190918
epoch: 120 training_loss 0.5047114205360412 test_loss: 0.26458594799041746
epoch: 121 training_loss 0.4983692118525505 test_loss: 0.2641641139984131
epoch: 122 training_loss 0.50268983989954 test_loss: 0.2748945951461792
epoch: 123 training_loss 0.5026749759912491 test_loss: 0.27222399711608886
epoch: 124 training_loss 0.5024357175827027 test_loss: 0.26228985786437986
epoch: 125 training_loss 0.48961538940668103 test_loss: 0.2686378717422485
epoch: 126 training_loss 0.4924876803159714 test_loss: 0.2606555461883545
epoch: 127 training_loss 0.4901193657517433 test_loss: 0.2586733102798462
epoch: 128 training_loss 0.5066241869330406 test_loss: 0.26614110469818114
epoch: 129 training_loss 0.5041512897610665 test_loss: 0.26634094715118406
epoch: 130 training_loss 0.49679609775543215 test_loss: 0.25386059284210205
epoch: 131 training_loss 0.48790381729602816 test_loss: 0.26331729888916017
epoch: 132 training_loss 0.4906930235028267 test_loss: 0.257935094833374
epoch: 133 training_loss 0.48631051152944565 test_loss: 0.2597111701965332
epoch: 134 training_loss 0.4885063380002975 test_loss: 0.26782999038696287
epoch: 135 training_loss 0.49260016918182375 test_loss: 0.2544354677200317
epoch: 136 training_loss 0.4877087357640266 test_loss: 0.25700674057006834
epoch: 137 training_loss 0.4845805883407593 test_loss: 0.25445756912231443
epoch: 138 training_loss 0.4866270953416824 test_loss: 0.2623286247253418
epoch: 139 training_loss 0.4914205858111382 test_loss: 0.2504908561706543
epoch: 140 training_loss 0.47908599346876146 test_loss: 0.25528388023376464
epoch: 141 training_loss 0.48247284650802613 test_loss: 0.2621758937835693
epoch: 142 training_loss 0.4860350635647774 test_loss: 0.2734572172164917
epoch: 143 training_loss 0.48457745373249056 test_loss: 0.25728371143341067
epoch: 144 training_loss 0.47379841804504397 test_loss: 0.2617707490921021
epoch: 145 training_loss 0.47923070400953294 test_loss: 0.25766763687133787
epoch: 146 training_loss 0.4782264280319214 test_loss: 0.25940814018249514
epoch: 147 training_loss 0.47984403640031814 test_loss: 0.2590554475784302
epoch: 148 training_loss 0.488156818151474 test_loss: 0.2633702278137207
epoch: 149 training_loss 0.4784231919050217 test_loss: 0.249849271774292
2201.6329428008144
episode: 0 training return: tensor(224.4959, device='cuda:0')
episode: 1 training return: tensor(-78.9181, device='cuda:0')
episode: 2 training return: tensor(74.9926, device='cuda:0')
episode: 3 training return: tensor(-37.0027, device='cuda:0')
epoch: 1 test_true_pfm: 2162.737778145433 sim_pfm: 242.29541220255973
episode: 4 training return: tensor(-117.1102, device='cuda:0')
episode: 5 training return: tensor(-47.0901, device='cuda:0')
episode: 6 training return: tensor(-85.3606, device='cuda:0')
episode: 7 training return: tensor(-46.8977, device='cuda:0')
epoch: 2 test_true_pfm: 2466.7545204831113 sim_pfm: -3.561487565030499
episode: 8 training return: tensor(178.9929, device='cuda:0')
episode: 9 training return: tensor(-64.0836, device='cuda:0')
episode: 10 training return: tensor(-61.9558, device='cuda:0')
episode: 11 training return: tensor(83.1320, device='cuda:0')
epoch: 3 test_true_pfm: 1651.7921850455932 sim_pfm: -45.99308090408643
episode: 12 training return: tensor(16.7670, device='cuda:0')
episode: 13 training return: tensor(93.6148, device='cuda:0')
episode: 14 training return: tensor(60.7186, device='cuda:0')
episode: 15 training return: tensor(190.5893, device='cuda:0')
epoch: 4 test_true_pfm: 1287.4874966550954 sim_pfm: -162.56810796967088
episode: 16 training return: tensor(261.0680, device='cuda:0')
episode: 17 training return: tensor(-23.0671, device='cuda:0')
episode: 18 training return: tensor(200.8282, device='cuda:0')
episode: 19 training return: tensor(-37.3382, device='cuda:0')
epoch: 5 test_true_pfm: 2455.6606540479074 sim_pfm: 129.7101752520151
episode: 20 training return: tensor(-17.8220, device='cuda:0')
episode: 21 training return: tensor(35.2450, device='cuda:0')
episode: 22 training return: tensor(99.4498, device='cuda:0')
episode: 23 training return: tensor(-87.3385, device='cuda:0')
epoch: 6 test_true_pfm: 2199.5117155299654 sim_pfm: 97.02439201803645
episode: 24 training return: tensor(149.3492, device='cuda:0')
episode: 25 training return: tensor(-17.5240, device='cuda:0')
episode: 26 training return: tensor(-49.4495, device='cuda:0')
episode: 27 training return: tensor(204.6742, device='cuda:0')
epoch: 7 test_true_pfm: 3270.5507319362077 sim_pfm: 371.28293647702475
episode: 28 training return: tensor(-82.0043, device='cuda:0')
episode: 29 training return: tensor(210.8385, device='cuda:0')
episode: 30 training return: tensor(7.3840, device='cuda:0')
episode: 31 training return: tensor(12.5689, device='cuda:0')
epoch: 8 test_true_pfm: 2397.225784004076 sim_pfm: 191.09607757358268
episode: 32 training return: tensor(118.9676, device='cuda:0')
episode: 33 training return: tensor(79.6927, device='cuda:0')
episode: 34 training return: tensor(112.3072, device='cuda:0')
episode: 35 training return: tensor(291.6282, device='cuda:0')
epoch: 9 test_true_pfm: 2402.4198665089375 sim_pfm: 216.52834654576145
episode: 36 training return: tensor(-46.3497, device='cuda:0')
episode: 37 training return: tensor(156.6659, device='cuda:0')
episode: 38 training return: tensor(28.5972, device='cuda:0')
episode: 39 training return: tensor(220.1737, device='cuda:0')
epoch: 10 test_true_pfm: 1787.6158281208197 sim_pfm: 193.06155047670472
episode: 40 training return: tensor(-3.7836, device='cuda:0')
episode: 41 training return: tensor(-38.7566, device='cuda:0')
episode: 42 training return: tensor(11.0329, device='cuda:0')
episode: 43 training return: tensor(-58.6872, device='cuda:0')
epoch: 11 test_true_pfm: 2087.3072275279874 sim_pfm: 190.4108785225544
episode: 44 training return: tensor(41.0237, device='cuda:0')
episode: 45 training return: tensor(60.8658, device='cuda:0')
episode: 46 training return: tensor(2.1345, device='cuda:0')
episode: 47 training return: tensor(397.7114, device='cuda:0')
epoch: 12 test_true_pfm: 2585.4523597564807 sim_pfm: 427.71156503834453
episode: 48 training return: tensor(-33.3676, device='cuda:0')
episode: 49 training return: tensor(51.5311, device='cuda:0')
episode: 50 training return: tensor(25.7041, device='cuda:0')
episode: 51 training return: tensor(114.7957, device='cuda:0')
epoch: 13 test_true_pfm: 2671.008015269943 sim_pfm: 277.98698071950156
episode: 52 training return: tensor(422.6674, device='cuda:0')
episode: 53 training return: tensor(516.3978, device='cuda:0')
episode: 54 training return: tensor(427.5486, device='cuda:0')
episode: 55 training return: tensor(-23.7172, device='cuda:0')
epoch: 14 test_true_pfm: 3042.7703863216398 sim_pfm: 294.95972307305783
episode: 56 training return: tensor(28.6302, device='cuda:0')
episode: 57 training return: tensor(141.7513, device='cuda:0')
episode: 58 training return: tensor(3.1138, device='cuda:0')
episode: 59 training return: tensor(-20.2466, device='cuda:0')
epoch: 15 test_true_pfm: 2445.377376576303 sim_pfm: 299.7458317130707
episode: 60 training return: tensor(-3.4893, device='cuda:0')
episode: 61 training return: tensor(465.3037, device='cuda:0')
episode: 62 training return: tensor(471.5347, device='cuda:0')
episode: 63 training return: tensor(115.1147, device='cuda:0')
epoch: 16 test_true_pfm: 2269.8156448072878 sim_pfm: 267.2046896008348
episode: 64 training return: tensor(1.9809, device='cuda:0')
episode: 65 training return: tensor(-48.2827, device='cuda:0')
episode: 66 training return: tensor(308.9785, device='cuda:0')
episode: 67 training return: tensor(-91.8963, device='cuda:0')
epoch: 17 test_true_pfm: 2884.2540721480077 sim_pfm: 300.92212290576816
episode: 68 training return: tensor(231.0902, device='cuda:0')
episode: 69 training return: tensor(73.0814, device='cuda:0')
episode: 70 training return: tensor(69.4042, device='cuda:0')
episode: 71 training return: tensor(-41.8890, device='cuda:0')
epoch: 18 test_true_pfm: 2452.5676858713346 sim_pfm: 205.69744593215486
episode: 72 training return: tensor(156.2797, device='cuda:0')
episode: 73 training return: tensor(-4.7742, device='cuda:0')
episode: 74 training return: tensor(33.6818, device='cuda:0')
episode: 75 training return: tensor(-39.1556, device='cuda:0')
epoch: 19 test_true_pfm: 2863.45095384958 sim_pfm: 278.07952171346795
episode: 76 training return: tensor(-5.4758, device='cuda:0')
episode: 77 training return: tensor(17.7234, device='cuda:0')
episode: 78 training return: tensor(-29.4517, device='cuda:0')
episode: 79 training return: tensor(261.8482, device='cuda:0')
epoch: 20 test_true_pfm: 2373.714950782764 sim_pfm: 214.27024027643105
episode: 80 training return: tensor(212.2473, device='cuda:0')
episode: 81 training return: tensor(90.9410, device='cuda:0')
episode: 82 training return: tensor(205.3942, device='cuda:0')
episode: 83 training return: tensor(-49.6598, device='cuda:0')
epoch: 21 test_true_pfm: 2958.2607977237944 sim_pfm: 435.1675532126101
episode: 84 training return: tensor(458.3239, device='cuda:0')
episode: 85 training return: tensor(119.8130, device='cuda:0')
episode: 86 training return: tensor(123.9953, device='cuda:0')
episode: 87 training return: tensor(82.3699, device='cuda:0')
epoch: 22 test_true_pfm: 3053.430329359772 sim_pfm: 288.8924575623144
episode: 88 training return: tensor(195.7695, device='cuda:0')
episode: 89 training return: tensor(-36.5651, device='cuda:0')
episode: 90 training return: tensor(-10.7672, device='cuda:0')
episode: 91 training return: tensor(134.2989, device='cuda:0')
epoch: 23 test_true_pfm: 3144.547279433457 sim_pfm: 179.45536446963283
episode: 92 training return: tensor(284.0877, device='cuda:0')
episode: 93 training return: tensor(69.0750, device='cuda:0')
episode: 94 training return: tensor(228.8498, device='cuda:0')
episode: 95 training return: tensor(102.9388, device='cuda:0')
epoch: 24 test_true_pfm: 2850.8392964801 sim_pfm: 192.7372263999035
episode: 96 training return: tensor(166.1680, device='cuda:0')
episode: 97 training return: tensor(77.7511, device='cuda:0')
episode: 98 training return: tensor(-51.8235, device='cuda:0')
episode: 99 training return: tensor(146.5549, device='cuda:0')
epoch: 25 test_true_pfm: 2634.876278253136 sim_pfm: 268.60837978297303
episode: 100 training return: tensor(62.2013, device='cuda:0')
episode: 101 training return: tensor(-77.6050, device='cuda:0')
episode: 102 training return: tensor(-19.2271, device='cuda:0')
episode: 103 training return: tensor(359.9982, device='cuda:0')
epoch: 26 test_true_pfm: 2631.3865148793043 sim_pfm: 442.7478761311116
episode: 104 training return: tensor(273.4591, device='cuda:0')
episode: 105 training return: tensor(-19.0425, device='cuda:0')
episode: 106 training return: tensor(-5.4422, device='cuda:0')
episode: 107 training return: tensor(523.1332, device='cuda:0')
epoch: 27 test_true_pfm: 2262.045484529035 sim_pfm: 340.0714908834004
episode: 108 training return: tensor(-37.6730, device='cuda:0')
episode: 109 training return: tensor(140.8140, device='cuda:0')
episode: 110 training return: tensor(64.7849, device='cuda:0')
episode: 111 training return: tensor(-65.2525, device='cuda:0')
epoch: 28 test_true_pfm: 2052.9765059052374 sim_pfm: 341.13149027825176
episode: 112 training return: tensor(264.5601, device='cuda:0')
episode: 113 training return: tensor(-33.9757, device='cuda:0')
episode: 114 training return: tensor(194.2524, device='cuda:0')
episode: 115 training return: tensor(-27.9514, device='cuda:0')
epoch: 29 test_true_pfm: 2909.974578074602 sim_pfm: 353.1024424557302
episode: 116 training return: tensor(186.5589, device='cuda:0')
episode: 117 training return: tensor(480.9764, device='cuda:0')
episode: 118 training return: tensor(27.6609, device='cuda:0')
episode: 119 training return: tensor(28.7400, device='cuda:0')
epoch: 30 test_true_pfm: 2251.000119811262 sim_pfm: 446.6763498184834
episode: 120 training return: tensor(144.7894, device='cuda:0')
episode: 121 training return: tensor(169.1620, device='cuda:0')
episode: 122 training return: tensor(-27.3893, device='cuda:0')
episode: 123 training return: tensor(-64.3972, device='cuda:0')
epoch: 31 test_true_pfm: 2214.4440256675566 sim_pfm: 180.5403449742007
episode: 124 training return: tensor(101.4232, device='cuda:0')
episode: 125 training return: tensor(-62.1875, device='cuda:0')
episode: 126 training return: tensor(-65.5827, device='cuda:0')
episode: 127 training return: tensor(-6.9032, device='cuda:0')
epoch: 32 test_true_pfm: 2524.106374631249 sim_pfm: 442.8896108866708
episode: 128 training return: tensor(160.9639, device='cuda:0')
episode: 129 training return: tensor(-22.3455, device='cuda:0')
episode: 130 training return: tensor(126.8354, device='cuda:0')
episode: 131 training return: tensor(-7.1133, device='cuda:0')
epoch: 33 test_true_pfm: 2866.0850828952885 sim_pfm: 223.47979106445564
episode: 132 training return: tensor(9.0981, device='cuda:0')
episode: 133 training return: tensor(143.9937, device='cuda:0')
episode: 134 training return: tensor(417.2689, device='cuda:0')
episode: 135 training return: tensor(135.4269, device='cuda:0')
epoch: 34 test_true_pfm: 2935.4646462406795 sim_pfm: 282.4627243894308
episode: 136 training return: tensor(45.1906, device='cuda:0')
episode: 137 training return: tensor(120.3001, device='cuda:0')
episode: 138 training return: tensor(62.0705, device='cuda:0')
episode: 139 training return: tensor(-6.4916, device='cuda:0')
epoch: 35 test_true_pfm: 3327.0161780745234 sim_pfm: 167.36199261872875
episode: 140 training return: tensor(-15.5737, device='cuda:0')
episode: 141 training return: tensor(137.1987, device='cuda:0')
episode: 142 training return: tensor(144.0390, device='cuda:0')
episode: 143 training return: tensor(169.4545, device='cuda:0')
epoch: 36 test_true_pfm: 3248.0971951880783 sim_pfm: 344.37749987384694
episode: 144 training return: tensor(7.3205, device='cuda:0')
episode: 145 training return: tensor(13.2357, device='cuda:0')
episode: 146 training return: tensor(40.5644, device='cuda:0')
episode: 147 training return: tensor(-14.3887, device='cuda:0')
epoch: 37 test_true_pfm: 2301.66995674113 sim_pfm: 390.7723796056137
episode: 148 training return: tensor(59.8911, device='cuda:0')
episode: 149 training return: tensor(-64.7775, device='cuda:0')
episode: 150 training return: tensor(20.5975, device='cuda:0')
episode: 151 training return: tensor(-20.7584, device='cuda:0')
epoch: 38 test_true_pfm: 2356.8251760405406 sim_pfm: 263.2718473091566
episode: 152 training return: tensor(9.5108, device='cuda:0')
episode: 153 training return: tensor(82.3700, device='cuda:0')
episode: 154 training return: tensor(47.1283, device='cuda:0')
episode: 155 training return: tensor(62.2864, device='cuda:0')
epoch: 39 test_true_pfm: 2764.67818549921 sim_pfm: 320.2148628779978
episode: 156 training return: tensor(-2.3308, device='cuda:0')
episode: 157 training return: tensor(222.6311, device='cuda:0')
episode: 158 training return: tensor(467.7989, device='cuda:0')
episode: 159 training return: tensor(-9.6545, device='cuda:0')
epoch: 40 test_true_pfm: 2500.378585256824 sim_pfm: 243.0072671310821
episode: 160 training return: tensor(92.0024, device='cuda:0')
episode: 161 training return: tensor(81.6462, device='cuda:0')
episode: 162 training return: tensor(10.2835, device='cuda:0')
episode: 163 training return: tensor(52.1156, device='cuda:0')
epoch: 41 test_true_pfm: 2232.7553306025043 sim_pfm: 215.17667737341253
episode: 164 training return: tensor(65.9968, device='cuda:0')
episode: 165 training return: tensor(286.1984, device='cuda:0')
episode: 166 training return: tensor(130.0671, device='cuda:0')
episode: 167 training return: tensor(-43.5856, device='cuda:0')
epoch: 42 test_true_pfm: 2665.215060202407 sim_pfm: 140.89391276508104
episode: 168 training return: tensor(-9.3643, device='cuda:0')
episode: 169 training return: tensor(-35.3179, device='cuda:0')
episode: 170 training return: tensor(25.7458, device='cuda:0')
episode: 171 training return: tensor(4.3230, device='cuda:0')
epoch: 43 test_true_pfm: 2641.4201811634907 sim_pfm: 285.02272575659055
episode: 172 training return: tensor(-21.2784, device='cuda:0')
episode: 173 training return: tensor(81.7038, device='cuda:0')
episode: 174 training return: tensor(410.3135, device='cuda:0')
episode: 175 training return: tensor(451.5568, device='cuda:0')
epoch: 44 test_true_pfm: 2315.672252970283 sim_pfm: 402.0790321087309
episode: 176 training return: tensor(8.6209, device='cuda:0')
episode: 177 training return: tensor(290.9791, device='cuda:0')
episode: 178 training return: tensor(-70.7761, device='cuda:0')
episode: 179 training return: tensor(101.4693, device='cuda:0')
epoch: 45 test_true_pfm: 2166.236123925603 sim_pfm: 290.86281694673625
episode: 180 training return: tensor(100.8358, device='cuda:0')
episode: 181 training return: tensor(87.4381, device='cuda:0')
episode: 182 training return: tensor(54.6983, device='cuda:0')
episode: 183 training return: tensor(324.2972, device='cuda:0')
epoch: 46 test_true_pfm: 3184.2527366033864 sim_pfm: 309.48551443296793
episode: 184 training return: tensor(14.1499, device='cuda:0')
episode: 185 training return: tensor(7.7657, device='cuda:0')
episode: 186 training return: tensor(275.1218, device='cuda:0')
episode: 187 training return: tensor(-10.6189, device='cuda:0')
epoch: 47 test_true_pfm: 2962.56029125503 sim_pfm: 367.1213044661951
episode: 188 training return: tensor(-1.8518, device='cuda:0')
episode: 189 training return: tensor(420.4381, device='cuda:0')
episode: 190 training return: tensor(-38.7442, device='cuda:0')
episode: 191 training return: tensor(51.8358, device='cuda:0')
epoch: 48 test_true_pfm: 2526.7670873200814 sim_pfm: 364.42030394414905
episode: 192 training return: tensor(455.8847, device='cuda:0')
episode: 193 training return: tensor(49.4580, device='cuda:0')
episode: 194 training return: tensor(-31.6809, device='cuda:0')
episode: 195 training return: tensor(78.0435, device='cuda:0')
epoch: 49 test_true_pfm: 2442.019993787173 sim_pfm: 268.95533713061985
episode: 196 training return: tensor(361.1475, device='cuda:0')
episode: 197 training return: tensor(-25.5031, device='cuda:0')
episode: 198 training return: tensor(88.5224, device='cuda:0')
episode: 199 training return: tensor(-4.8534, device='cuda:0')
epoch: 50 test_true_pfm: 2040.117538832181 sim_pfm: 140.11803968371046
episode: 200 training return: tensor(8.9372, device='cuda:0')
episode: 201 training return: tensor(190.8280, device='cuda:0')
episode: 202 training return: tensor(-39.0144, device='cuda:0')
episode: 203 training return: tensor(51.7867, device='cuda:0')
epoch: 51 test_true_pfm: 2950.7614132083395 sim_pfm: 242.23137440821543
episode: 204 training return: tensor(29.2380, device='cuda:0')
episode: 205 training return: tensor(300.9332, device='cuda:0')
episode: 206 training return: tensor(501.6581, device='cuda:0')
episode: 207 training return: tensor(444.1551, device='cuda:0')
epoch: 52 test_true_pfm: 2020.3954135915294 sim_pfm: 163.68580513234096
episode: 208 training return: tensor(417.1263, device='cuda:0')
episode: 209 training return: tensor(182.2024, device='cuda:0')
episode: 210 training return: tensor(75.9096, device='cuda:0')
episode: 211 training return: tensor(44.4826, device='cuda:0')
epoch: 53 test_true_pfm: 2264.529303908077 sim_pfm: 257.8719337915148
episode: 212 training return: tensor(44.4903, device='cuda:0')
episode: 213 training return: tensor(198.2152, device='cuda:0')
episode: 214 training return: tensor(-37.9618, device='cuda:0')
episode: 215 training return: tensor(429.0088, device='cuda:0')
epoch: 54 test_true_pfm: 2942.5885311238885 sim_pfm: 184.06847960781306
episode: 216 training return: tensor(-24.7851, device='cuda:0')
episode: 217 training return: tensor(26.9442, device='cuda:0')
episode: 218 training return: tensor(30.1001, device='cuda:0')
episode: 219 training return: tensor(37.9677, device='cuda:0')
epoch: 55 test_true_pfm: 3073.0661766802186 sim_pfm: 447.23568697863567
episode: 220 training return: tensor(43.1102, device='cuda:0')
episode: 221 training return: tensor(414.6061, device='cuda:0')
episode: 222 training return: tensor(141.8710, device='cuda:0')
episode: 223 training return: tensor(415.9341, device='cuda:0')
epoch: 56 test_true_pfm: 2494.4743199557324 sim_pfm: 167.58708442475958
episode: 224 training return: tensor(42.5438, device='cuda:0')
episode: 225 training return: tensor(9.1353, device='cuda:0')
episode: 226 training return: tensor(95.5056, device='cuda:0')
episode: 227 training return: tensor(443.9141, device='cuda:0')
epoch: 57 test_true_pfm: 2465.9629809233725 sim_pfm: 220.26397509120093
episode: 228 training return: tensor(137.3594, device='cuda:0')
episode: 229 training return: tensor(361.0789, device='cuda:0')
episode: 230 training return: tensor(451.8305, device='cuda:0')
episode: 231 training return: tensor(241.9395, device='cuda:0')
epoch: 58 test_true_pfm: 2705.71390578233 sim_pfm: 168.06727441450735
episode: 232 training return: tensor(6.8731, device='cuda:0')
episode: 233 training return: tensor(382.6290, device='cuda:0')
episode: 234 training return: tensor(38.0979, device='cuda:0')
episode: 235 training return: tensor(18.8720, device='cuda:0')
epoch: 59 test_true_pfm: 2529.4167002128684 sim_pfm: 267.4416632973201
episode: 236 training return: tensor(20.8721, device='cuda:0')
episode: 237 training return: tensor(-37.1557, device='cuda:0')
episode: 238 training return: tensor(-21.2419, device='cuda:0')
episode: 239 training return: tensor(190.9180, device='cuda:0')
epoch: 60 test_true_pfm: 3015.1703437918695 sim_pfm: 263.04014667142957
episode: 240 training return: tensor(515.1547, device='cuda:0')
episode: 241 training return: tensor(451.9781, device='cuda:0')
episode: 242 training return: tensor(453.9754, device='cuda:0')
episode: 243 training return: tensor(2.0384, device='cuda:0')
epoch: 61 test_true_pfm: 2843.7924140808423 sim_pfm: 102.45893761583527
episode: 244 training return: tensor(221.2913, device='cuda:0')
episode: 245 training return: tensor(-8.5083, device='cuda:0')
episode: 246 training return: tensor(169.2850, device='cuda:0')
episode: 247 training return: tensor(54.1240, device='cuda:0')
epoch: 62 test_true_pfm: 2117.1418576832307 sim_pfm: 209.765757331353
episode: 248 training return: tensor(10.5277, device='cuda:0')
episode: 249 training return: tensor(94.3231, device='cuda:0')
episode: 250 training return: tensor(6.4041, device='cuda:0')
episode: 251 training return: tensor(70.2140, device='cuda:0')
epoch: 63 test_true_pfm: 2732.6855551915046 sim_pfm: 365.2329115981702
episode: 252 training return: tensor(35.9143, device='cuda:0')
episode: 253 training return: tensor(44.7518, device='cuda:0')
episode: 254 training return: tensor(343.5055, device='cuda:0')
episode: 255 training return: tensor(68.1158, device='cuda:0')
epoch: 64 test_true_pfm: 2677.0741991180807 sim_pfm: 374.75174134733
episode: 256 training return: tensor(-28.2187, device='cuda:0')
episode: 257 training return: tensor(257.7853, device='cuda:0')
episode: 258 training return: tensor(28.5828, device='cuda:0')
episode: 259 training return: tensor(367.2951, device='cuda:0')
epoch: 65 test_true_pfm: 2849.681048286005 sim_pfm: 364.47187321962946
episode: 260 training return: tensor(238.2221, device='cuda:0')
episode: 261 training return: tensor(163.4278, device='cuda:0')
episode: 262 training return: tensor(55.5419, device='cuda:0')
episode: 263 training return: tensor(173.7498, device='cuda:0')
epoch: 66 test_true_pfm: 2163.6812522730133 sim_pfm: 241.17877463404633
episode: 264 training return: tensor(63.0536, device='cuda:0')
episode: 265 training return: tensor(150.9672, device='cuda:0')
episode: 266 training return: tensor(-22.7996, device='cuda:0')
episode: 267 training return: tensor(155.1322, device='cuda:0')
epoch: 67 test_true_pfm: 2715.2756691654713 sim_pfm: 285.88165021664463
episode: 268 training return: tensor(52.1193, device='cuda:0')
episode: 269 training return: tensor(70.1674, device='cuda:0')
episode: 270 training return: tensor(33.0047, device='cuda:0')
episode: 271 training return: tensor(28.9843, device='cuda:0')
epoch: 68 test_true_pfm: 2809.639820946359 sim_pfm: 313.87078985342913
episode: 272 training return: tensor(58.5791, device='cuda:0')
episode: 273 training return: tensor(57.2925, device='cuda:0')
episode: 274 training return: tensor(102.9202, device='cuda:0')
episode: 275 training return: tensor(179.1755, device='cuda:0')
epoch: 69 test_true_pfm: 2567.601467168397 sim_pfm: 388.83918632986024
episode: 276 training return: tensor(465.1049, device='cuda:0')
episode: 277 training return: tensor(90.5213, device='cuda:0')
episode: 278 training return: tensor(128.7636, device='cuda:0')
episode: 279 training return: tensor(476.3619, device='cuda:0')
epoch: 70 test_true_pfm: 2450.3776799959774 sim_pfm: 422.89844183808117
episode: 280 training return: tensor(302.3475, device='cuda:0')
episode: 281 training return: tensor(419.8405, device='cuda:0')
episode: 282 training return: tensor(430.8230, device='cuda:0')
episode: 283 training return: tensor(10.0458, device='cuda:0')
epoch: 71 test_true_pfm: 2496.182020979247 sim_pfm: 418.8693873594166
episode: 284 training return: tensor(238.3163, device='cuda:0')
episode: 285 training return: tensor(198.4583, device='cuda:0')
episode: 286 training return: tensor(22.2612, device='cuda:0')
episode: 287 training return: tensor(183.0104, device='cuda:0')
epoch: 72 test_true_pfm: 2430.0219692676524 sim_pfm: 335.6607982755522
episode: 288 training return: tensor(53.3571, device='cuda:0')
episode: 289 training return: tensor(129.7235, device='cuda:0')
episode: 290 training return: tensor(64.4964, device='cuda:0')
episode: 291 training return: tensor(430.7935, device='cuda:0')
epoch: 73 test_true_pfm: 2030.1823508500254 sim_pfm: 240.60985229855092
episode: 292 training return: tensor(34.3378, device='cuda:0')
episode: 293 training return: tensor(89.6399, device='cuda:0')
episode: 294 training return: tensor(153.1410, device='cuda:0')
episode: 295 training return: tensor(46.1883, device='cuda:0')
epoch: 74 test_true_pfm: 2916.3228529506036 sim_pfm: 425.6836809788365
episode: 296 training return: tensor(28.8700, device='cuda:0')
episode: 297 training return: tensor(169.0487, device='cuda:0')
episode: 298 training return: tensor(224.3331, device='cuda:0')
episode: 299 training return: tensor(25.5750, device='cuda:0')
epoch: 75 test_true_pfm: 2448.5522498408895 sim_pfm: 276.358403999242
episode: 300 training return: tensor(6.5912, device='cuda:0')
episode: 301 training return: tensor(197.3119, device='cuda:0')
episode: 302 training return: tensor(40.8375, device='cuda:0')
episode: 303 training return: tensor(469.3044, device='cuda:0')
epoch: 76 test_true_pfm: 2864.4763680422257 sim_pfm: 331.0607553666147
episode: 304 training return: tensor(158.4485, device='cuda:0')
episode: 305 training return: tensor(40.0599, device='cuda:0')
episode: 306 training return: tensor(457.5360, device='cuda:0')
episode: 307 training return: tensor(328.7311, device='cuda:0')
epoch: 77 test_true_pfm: 2950.2303163729885 sim_pfm: 175.39721711028446
episode: 308 training return: tensor(79.0440, device='cuda:0')
episode: 309 training return: tensor(12.9049, device='cuda:0')
episode: 310 training return: tensor(7.7457, device='cuda:0')
episode: 311 training return: tensor(110.2479, device='cuda:0')
epoch: 78 test_true_pfm: 1999.505703321541 sim_pfm: 203.1773361892168
episode: 312 training return: tensor(146.1445, device='cuda:0')
episode: 313 training return: tensor(30.1918, device='cuda:0')
episode: 314 training return: tensor(434.6392, device='cuda:0')
episode: 315 training return: tensor(98.1370, device='cuda:0')
epoch: 79 test_true_pfm: 2994.7496757197587 sim_pfm: 346.4974605346409
episode: 316 training return: tensor(286.4713, device='cuda:0')
episode: 317 training return: tensor(-25.4738, device='cuda:0')
episode: 318 training return: tensor(10.5374, device='cuda:0')
episode: 319 training return: tensor(12.9462, device='cuda:0')
epoch: 80 test_true_pfm: 2028.4607430188444 sim_pfm: 263.2803986071958
episode: 320 training return: tensor(53.1701, device='cuda:0')
episode: 321 training return: tensor(42.4173, device='cuda:0')
episode: 322 training return: tensor(13.3500, device='cuda:0')
episode: 323 training return: tensor(69.5881, device='cuda:0')
epoch: 81 test_true_pfm: 2898.6017382996056 sim_pfm: 340.21902357885847
episode: 324 training return: tensor(-17.9954, device='cuda:0')
episode: 325 training return: tensor(83.7157, device='cuda:0')
episode: 326 training return: tensor(62.8449, device='cuda:0')
episode: 327 training return: tensor(224.1080, device='cuda:0')
epoch: 82 test_true_pfm: 2815.2036080986068 sim_pfm: 367.8155060221907
episode: 328 training return: tensor(57.2695, device='cuda:0')
episode: 329 training return: tensor(112.1711, device='cuda:0')
episode: 330 training return: tensor(41.9398, device='cuda:0')
episode: 331 training return: tensor(454.6237, device='cuda:0')
epoch: 83 test_true_pfm: 2453.351577903476 sim_pfm: 356.482103216035
episode: 332 training return: tensor(179.4550, device='cuda:0')
episode: 333 training return: tensor(163.2975, device='cuda:0')
episode: 334 training return: tensor(54.2853, device='cuda:0')
episode: 335 training return: tensor(118.6423, device='cuda:0')
epoch: 84 test_true_pfm: 3054.0902121005292 sim_pfm: 193.25684869535812
episode: 336 training return: tensor(60.5398, device='cuda:0')
episode: 337 training return: tensor(174.6490, device='cuda:0')
episode: 338 training return: tensor(329.6443, device='cuda:0')
episode: 339 training return: tensor(81.0455, device='cuda:0')
epoch: 85 test_true_pfm: 3068.5864853792614 sim_pfm: 169.02252827033712
episode: 340 training return: tensor(118.8713, device='cuda:0')
episode: 341 training return: tensor(291.0696, device='cuda:0')
episode: 342 training return: tensor(221.9571, device='cuda:0')
episode: 343 training return: tensor(51.9612, device='cuda:0')
epoch: 86 test_true_pfm: 2569.6034437288745 sim_pfm: 391.88819047342986
episode: 344 training return: tensor(-6.0988, device='cuda:0')
episode: 345 training return: tensor(8.4144, device='cuda:0')
episode: 346 training return: tensor(51.1441, device='cuda:0')
episode: 347 training return: tensor(283.2854, device='cuda:0')
epoch: 87 test_true_pfm: 2565.671143033192 sim_pfm: 252.91489679098595
episode: 348 training return: tensor(60.5272, device='cuda:0')
episode: 349 training return: tensor(353.7583, device='cuda:0')
episode: 350 training return: tensor(113.8905, device='cuda:0')
episode: 351 training return: tensor(78.2183, device='cuda:0')
epoch: 88 test_true_pfm: 2783.784139790418 sim_pfm: 328.2937962667784
episode: 352 training return: tensor(169.0823, device='cuda:0')
episode: 353 training return: tensor(308.6445, device='cuda:0')
episode: 354 training return: tensor(0.5806, device='cuda:0')
episode: 355 training return: tensor(483.7242, device='cuda:0')
epoch: 89 test_true_pfm: 2503.1721915240482 sim_pfm: 200.48218819419466
episode: 356 training return: tensor(302.1124, device='cuda:0')
episode: 357 training return: tensor(329.4396, device='cuda:0')
episode: 358 training return: tensor(108.8339, device='cuda:0')
episode: 359 training return: tensor(72.1067, device='cuda:0')
epoch: 90 test_true_pfm: 2304.951248471163 sim_pfm: 270.7873905497836
episode: 360 training return: tensor(327.2907, device='cuda:0')
episode: 361 training return: tensor(8.9168, device='cuda:0')
episode: 362 training return: tensor(236.9217, device='cuda:0')
episode: 363 training return: tensor(25.2512, device='cuda:0')
epoch: 91 test_true_pfm: 2529.35636933672 sim_pfm: 277.65356481529307
episode: 364 training return: tensor(47.3280, device='cuda:0')
episode: 365 training return: tensor(46.4839, device='cuda:0')
episode: 366 training return: tensor(1.2206, device='cuda:0')
episode: 367 training return: tensor(41.8210, device='cuda:0')
epoch: 92 test_true_pfm: 2383.4692571462638 sim_pfm: 210.6677208027492
episode: 368 training return: tensor(531.6891, device='cuda:0')
episode: 369 training return: tensor(195.4143, device='cuda:0')
episode: 370 training return: tensor(181.9964, device='cuda:0')
episode: 371 training return: tensor(37.3617, device='cuda:0')
epoch: 93 test_true_pfm: 2705.0692968441867 sim_pfm: 331.9374579380189
episode: 372 training return: tensor(64.4869, device='cuda:0')
episode: 373 training return: tensor(172.7997, device='cuda:0')
episode: 374 training return: tensor(277.4362, device='cuda:0')
episode: 375 training return: tensor(307.0614, device='cuda:0')
epoch: 94 test_true_pfm: 2833.836979283134 sim_pfm: 208.23768240842037
episode: 376 training return: tensor(444.0182, device='cuda:0')
episode: 377 training return: tensor(276.0150, device='cuda:0')
episode: 378 training return: tensor(95.3037, device='cuda:0')
episode: 379 training return: tensor(1.6653, device='cuda:0')
epoch: 95 test_true_pfm: 2401.9485795916785 sim_pfm: 262.5856371364789
episode: 380 training return: tensor(97.0117, device='cuda:0')
episode: 381 training return: tensor(38.9351, device='cuda:0')
episode: 382 training return: tensor(41.6448, device='cuda:0')
episode: 383 training return: tensor(149.7217, device='cuda:0')
epoch: 96 test_true_pfm: 2017.6753575218506 sim_pfm: 372.23379967340344
episode: 384 training return: tensor(185.9161, device='cuda:0')
episode: 385 training return: tensor(475.5674, device='cuda:0')
episode: 386 training return: tensor(52.8613, device='cuda:0')
episode: 387 training return: tensor(-8.8048, device='cuda:0')
epoch: 97 test_true_pfm: 2778.6802222886995 sim_pfm: 289.9371671919459
episode: 388 training return: tensor(186.7969, device='cuda:0')
episode: 389 training return: tensor(100.8374, device='cuda:0')
episode: 390 training return: tensor(33.4357, device='cuda:0')
episode: 391 training return: tensor(90.9707, device='cuda:0')
epoch: 98 test_true_pfm: 2636.7943397179592 sim_pfm: 357.0534504488266
episode: 392 training return: tensor(439.2666, device='cuda:0')
episode: 393 training return: tensor(72.3284, device='cuda:0')
episode: 394 training return: tensor(51.1417, device='cuda:0')
episode: 395 training return: tensor(52.0962, device='cuda:0')
epoch: 99 test_true_pfm: 2493.3646168720625 sim_pfm: 231.38435554523798
episode: 396 training return: tensor(194.4656, device='cuda:0')
episode: 397 training return: tensor(297.8085, device='cuda:0')
episode: 398 training return: tensor(-29.9388, device='cuda:0')
episode: 399 training return: tensor(-5.6502, device='cuda:0')
epoch: 100 test_true_pfm: 2885.6583106044786 sim_pfm: 279.35947476912406
episode: 400 training return: tensor(33.0827, device='cuda:0')
episode: 401 training return: tensor(24.7694, device='cuda:0')
episode: 402 training return: tensor(125.3782, device='cuda:0')
episode: 403 training return: tensor(15.2847, device='cuda:0')
epoch: 101 test_true_pfm: 2093.296058260524 sim_pfm: 279.3818170397426
episode: 404 training return: tensor(30.7713, device='cuda:0')
episode: 405 training return: tensor(177.7962, device='cuda:0')
episode: 406 training return: tensor(172.3762, device='cuda:0')
episode: 407 training return: tensor(131.7973, device='cuda:0')
epoch: 102 test_true_pfm: 2880.945330155584 sim_pfm: 174.18820028199116
episode: 408 training return: tensor(299.8137, device='cuda:0')
episode: 409 training return: tensor(167.9634, device='cuda:0')
episode: 410 training return: tensor(480.6058, device='cuda:0')
episode: 411 training return: tensor(76.6889, device='cuda:0')
epoch: 103 test_true_pfm: 2981.3335459335153 sim_pfm: 382.0345122854148
episode: 412 training return: tensor(112.9072, device='cuda:0')
episode: 413 training return: tensor(37.9765, device='cuda:0')
episode: 414 training return: tensor(18.0379, device='cuda:0')
episode: 415 training return: tensor(425.0325, device='cuda:0')
epoch: 104 test_true_pfm: 2316.947685221461 sim_pfm: 348.71219875566504
episode: 416 training return: tensor(11.9634, device='cuda:0')
episode: 417 training return: tensor(125.5285, device='cuda:0')
episode: 418 training return: tensor(108.5859, device='cuda:0')
episode: 419 training return: tensor(47.2913, device='cuda:0')
epoch: 105 test_true_pfm: 2768.152728944642 sim_pfm: 234.10941228683805
episode: 420 training return: tensor(10.8898, device='cuda:0')
episode: 421 training return: tensor(176.8777, device='cuda:0')
episode: 422 training return: tensor(155.1593, device='cuda:0')
episode: 423 training return: tensor(329.0365, device='cuda:0')
epoch: 106 test_true_pfm: 1774.0827851629108 sim_pfm: 351.6986379779798
episode: 424 training return: tensor(162.0551, device='cuda:0')
episode: 425 training return: tensor(72.0351, device='cuda:0')
episode: 426 training return: tensor(227.6587, device='cuda:0')
episode: 427 training return: tensor(465.4812, device='cuda:0')
epoch: 107 test_true_pfm: 2624.482340789253 sim_pfm: 302.47447295702295
episode: 428 training return: tensor(131.7873, device='cuda:0')
episode: 429 training return: tensor(463.4746, device='cuda:0')
episode: 430 training return: tensor(367.6700, device='cuda:0')
episode: 431 training return: tensor(60.2410, device='cuda:0')
epoch: 108 test_true_pfm: 2639.5429216240086 sim_pfm: 484.9356958093801
episode: 432 training return: tensor(221.7451, device='cuda:0')
episode: 433 training return: tensor(120.2588, device='cuda:0')
episode: 434 training return: tensor(47.6376, device='cuda:0')
episode: 435 training return: tensor(177.0175, device='cuda:0')
epoch: 109 test_true_pfm: 2240.16899863385 sim_pfm: 230.05021660590623
episode: 436 training return: tensor(26.2737, device='cuda:0')
episode: 437 training return: tensor(-7.4839, device='cuda:0')
episode: 438 training return: tensor(53.6437, device='cuda:0')
episode: 439 training return: tensor(198.8594, device='cuda:0')
epoch: 110 test_true_pfm: 2094.7675561527144 sim_pfm: 241.1783958968978
episode: 440 training return: tensor(182.4041, device='cuda:0')
episode: 441 training return: tensor(315.8962, device='cuda:0')
episode: 442 training return: tensor(50.8410, device='cuda:0')
episode: 443 training return: tensor(196.7358, device='cuda:0')
epoch: 111 test_true_pfm: 2521.3474070784364 sim_pfm: 307.22656164095196
episode: 444 training return: tensor(4.7550, device='cuda:0')
episode: 445 training return: tensor(173.5512, device='cuda:0')
episode: 446 training return: tensor(481.0461, device='cuda:0')
episode: 447 training return: tensor(141.5843, device='cuda:0')
epoch: 112 test_true_pfm: 2663.020485989539 sim_pfm: 446.05475895665586
episode: 448 training return: tensor(160.8539, device='cuda:0')
episode: 449 training return: tensor(304.2557, device='cuda:0')
episode: 450 training return: tensor(221.9503, device='cuda:0')
episode: 451 training return: tensor(341.3382, device='cuda:0')
epoch: 113 test_true_pfm: 2528.2175665744858 sim_pfm: 450.1839064167968
episode: 452 training return: tensor(49.0209, device='cuda:0')
episode: 453 training return: tensor(143.0107, device='cuda:0')
episode: 454 training return: tensor(91.6447, device='cuda:0')
episode: 455 training return: tensor(78.9863, device='cuda:0')
epoch: 114 test_true_pfm: 2539.3288456909904 sim_pfm: 258.5772008173905
episode: 456 training return: tensor(102.9318, device='cuda:0')
episode: 457 training return: tensor(23.7297, device='cuda:0')
episode: 458 training return: tensor(271.3404, device='cuda:0')
episode: 459 training return: tensor(69.5049, device='cuda:0')
epoch: 115 test_true_pfm: 2460.0026092887897 sim_pfm: 358.36336177300353
episode: 460 training return: tensor(31.1758, device='cuda:0')
episode: 461 training return: tensor(13.2969, device='cuda:0')
episode: 462 training return: tensor(60.7950, device='cuda:0')
episode: 463 training return: tensor(117.7671, device='cuda:0')
epoch: 116 test_true_pfm: 2435.179001727424 sim_pfm: 312.74770789506147
episode: 464 training return: tensor(62.7172, device='cuda:0')
episode: 465 training return: tensor(176.7248, device='cuda:0')
episode: 466 training return: tensor(149.9509, device='cuda:0')
episode: 467 training return: tensor(53.4268, device='cuda:0')
epoch: 117 test_true_pfm: 2297.0549283867863 sim_pfm: 281.2118983894761
episode: 468 training return: tensor(95.7993, device='cuda:0')
episode: 469 training return: tensor(93.6025, device='cuda:0')
episode: 470 training return: tensor(466.3054, device='cuda:0')
episode: 471 training return: tensor(471.0909, device='cuda:0')
epoch: 118 test_true_pfm: 2505.430436306593 sim_pfm: 226.1844419266563
episode: 472 training return: tensor(30.2902, device='cuda:0')
episode: 473 training return: tensor(477.1310, device='cuda:0')
episode: 474 training return: tensor(155.7329, device='cuda:0')
episode: 475 training return: tensor(92.7208, device='cuda:0')
epoch: 119 test_true_pfm: 2322.492370000342 sim_pfm: 357.4693788299337
episode: 476 training return: tensor(40.4269, device='cuda:0')
episode: 477 training return: tensor(70.8800, device='cuda:0')
episode: 478 training return: tensor(197.9551, device='cuda:0')
episode: 479 training return: tensor(326.0443, device='cuda:0')
epoch: 120 test_true_pfm: 3096.34148966172 sim_pfm: 310.60761312181904
episode: 480 training return: tensor(182.8560, device='cuda:0')
episode: 481 training return: tensor(153.7593, device='cuda:0')
episode: 482 training return: tensor(12.8771, device='cuda:0')
episode: 483 training return: tensor(9.7871, device='cuda:0')
epoch: 121 test_true_pfm: 1769.393147201599 sim_pfm: 204.1919675260433
episode: 484 training return: tensor(349.2030, device='cuda:0')
episode: 485 training return: tensor(269.7361, device='cuda:0')
episode: 486 training return: tensor(212.0192, device='cuda:0')
episode: 487 training return: tensor(90.0825, device='cuda:0')
epoch: 122 test_true_pfm: 2809.019127518601 sim_pfm: 206.6353028290905
episode: 488 training return: tensor(310.1671, device='cuda:0')
episode: 489 training return: tensor(130.5876, device='cuda:0')
episode: 490 training return: tensor(87.6968, device='cuda:0')
episode: 491 training return: tensor(323.2571, device='cuda:0')
epoch: 123 test_true_pfm: 2705.1636957206088 sim_pfm: 292.76716289117275
episode: 492 training return: tensor(286.2036, device='cuda:0')
episode: 493 training return: tensor(225.6332, device='cuda:0')
episode: 494 training return: tensor(232.1456, device='cuda:0')
episode: 495 training return: tensor(96.5045, device='cuda:0')
epoch: 124 test_true_pfm: 2161.0052179781 sim_pfm: 261.0219148134929
episode: 496 training return: tensor(172.9638, device='cuda:0')
episode: 497 training return: tensor(500.0032, device='cuda:0')
episode: 498 training return: tensor(284.7363, device='cuda:0')
episode: 499 training return: tensor(210.2490, device='cuda:0')
epoch: 125 test_true_pfm: 2918.613030178025 sim_pfm: 309.19280889248085
episode: 500 training return: tensor(10.8467, device='cuda:0')
episode: 501 training return: tensor(196.9266, device='cuda:0')
episode: 502 training return: tensor(31.2525, device='cuda:0')
episode: 503 training return: tensor(12.8844, device='cuda:0')
epoch: 126 test_true_pfm: 2647.969257809995 sim_pfm: 248.0150963322958
episode: 504 training return: tensor(183.6262, device='cuda:0')
episode: 505 training return: tensor(191.6132, device='cuda:0')
episode: 506 training return: tensor(252.0275, device='cuda:0')
episode: 507 training return: tensor(144.8676, device='cuda:0')
epoch: 127 test_true_pfm: 2606.471484668776 sim_pfm: 309.16545102434856
episode: 508 training return: tensor(92.3792, device='cuda:0')
episode: 509 training return: tensor(78.5581, device='cuda:0')
episode: 510 training return: tensor(88.3818, device='cuda:0')
episode: 511 training return: tensor(80.7659, device='cuda:0')
epoch: 128 test_true_pfm: 1942.4539231851768 sim_pfm: 345.09695344159263
episode: 512 training return: tensor(198.2977, device='cuda:0')
episode: 513 training return: tensor(205.1773, device='cuda:0')
episode: 514 training return: tensor(184.1566, device='cuda:0')
episode: 515 training return: tensor(331.9593, device='cuda:0')
epoch: 129 test_true_pfm: 2767.61625358229 sim_pfm: 333.19445465357666
episode: 516 training return: tensor(100.6731, device='cuda:0')
episode: 517 training return: tensor(377.0807, device='cuda:0')
episode: 518 training return: tensor(64.8793, device='cuda:0')
episode: 519 training return: tensor(160.2901, device='cuda:0')
epoch: 130 test_true_pfm: 2450.1334676982788 sim_pfm: 404.5530101011197
episode: 520 training return: tensor(376.9844, device='cuda:0')
episode: 521 training return: tensor(44.6137, device='cuda:0')
episode: 522 training return: tensor(63.1462, device='cuda:0')
episode: 523 training return: tensor(370.7416, device='cuda:0')
epoch: 131 test_true_pfm: 2114.5705880653513 sim_pfm: 343.3596853875012
episode: 524 training return: tensor(54.8712, device='cuda:0')
episode: 525 training return: tensor(251.2945, device='cuda:0')
episode: 526 training return: tensor(437.1740, device='cuda:0')
episode: 527 training return: tensor(91.6963, device='cuda:0')
epoch: 132 test_true_pfm: 2776.5309067569433 sim_pfm: 370.704009630145
episode: 528 training return: tensor(299.2887, device='cuda:0')
episode: 529 training return: tensor(285.5507, device='cuda:0')
episode: 530 training return: tensor(222.1634, device='cuda:0')
episode: 531 training return: tensor(15.1557, device='cuda:0')
epoch: 133 test_true_pfm: 2156.346816515778 sim_pfm: 203.12285863534393
episode: 532 training return: tensor(159.1412, device='cuda:0')
episode: 533 training return: tensor(207.1402, device='cuda:0')
episode: 534 training return: tensor(175.2795, device='cuda:0')
episode: 535 training return: tensor(171.0844, device='cuda:0')
epoch: 134 test_true_pfm: 2340.3503802750806 sim_pfm: 151.25897922760728
episode: 536 training return: tensor(27.1360, device='cuda:0')
episode: 537 training return: tensor(322.8023, device='cuda:0')
episode: 538 training return: tensor(80.2711, device='cuda:0')
episode: 539 training return: tensor(180.0123, device='cuda:0')
epoch: 135 test_true_pfm: 2381.845635633938 sim_pfm: 127.85232631588588
episode: 540 training return: tensor(74.3032, device='cuda:0')
episode: 541 training return: tensor(506.6481, device='cuda:0')
episode: 542 training return: tensor(475.5389, device='cuda:0')
episode: 543 training return: tensor(76.0606, device='cuda:0')
epoch: 136 test_true_pfm: 2463.314681460215 sim_pfm: 398.43315483167925
episode: 544 training return: tensor(29.0961, device='cuda:0')
episode: 545 training return: tensor(27.9537, device='cuda:0')
episode: 546 training return: tensor(377.7307, device='cuda:0')
episode: 547 training return: tensor(213.2132, device='cuda:0')
epoch: 137 test_true_pfm: 2592.614152505675 sim_pfm: 351.40225906847627
episode: 548 training return: tensor(217.7429, device='cuda:0')
episode: 549 training return: tensor(88.5927, device='cuda:0')
episode: 550 training return: tensor(504.5513, device='cuda:0')
episode: 551 training return: tensor(103.3895, device='cuda:0')
epoch: 138 test_true_pfm: 2286.89734806603 sim_pfm: 257.18803499501274
episode: 552 training return: tensor(69.3015, device='cuda:0')
episode: 553 training return: tensor(514.6526, device='cuda:0')
episode: 554 training return: tensor(183.2263, device='cuda:0')
episode: 555 training return: tensor(96.2975, device='cuda:0')
epoch: 139 test_true_pfm: 2572.867778817042 sim_pfm: 389.53807419879985
episode: 556 training return: tensor(207.7910, device='cuda:0')
episode: 557 training return: tensor(232.4919, device='cuda:0')
episode: 558 training return: tensor(128.5674, device='cuda:0')
episode: 559 training return: tensor(131.0826, device='cuda:0')
epoch: 140 test_true_pfm: 1882.8424052551964 sim_pfm: 282.5414729790452
episode: 560 training return: tensor(441.5527, device='cuda:0')
episode: 561 training return: tensor(38.3900, device='cuda:0')
episode: 562 training return: tensor(256.8878, device='cuda:0')
episode: 563 training return: tensor(251.0458, device='cuda:0')
epoch: 141 test_true_pfm: 2987.581296788983 sim_pfm: 296.6248964208644
episode: 564 training return: tensor(97.7091, device='cuda:0')
episode: 565 training return: tensor(126.4367, device='cuda:0')
episode: 566 training return: tensor(90.5158, device='cuda:0')
episode: 567 training return: tensor(204.8836, device='cuda:0')
epoch: 142 test_true_pfm: 2056.841949840788 sim_pfm: 281.45503824262414
episode: 568 training return: tensor(40.8224, device='cuda:0')
episode: 569 training return: tensor(41.4859, device='cuda:0')
episode: 570 training return: tensor(70.1649, device='cuda:0')
episode: 571 training return: tensor(385.6108, device='cuda:0')
epoch: 143 test_true_pfm: 2476.161254302198 sim_pfm: 373.0703850755235
episode: 572 training return: tensor(127.6145, device='cuda:0')
episode: 573 training return: tensor(201.4995, device='cuda:0')
episode: 574 training return: tensor(238.6433, device='cuda:0')
episode: 575 training return: tensor(51.0311, device='cuda:0')
epoch: 144 test_true_pfm: 2719.6254564935675 sim_pfm: 166.54307376383804
episode: 576 training return: tensor(139.3369, device='cuda:0')
episode: 577 training return: tensor(311.6541, device='cuda:0')
episode: 578 training return: tensor(29.2130, device='cuda:0')
episode: 579 training return: tensor(463.2365, device='cuda:0')
epoch: 145 test_true_pfm: 2352.122698899537 sim_pfm: 381.1237765658104
episode: 580 training return: tensor(80.9882, device='cuda:0')
episode: 581 training return: tensor(93.1615, device='cuda:0')
episode: 582 training return: tensor(-10.7343, device='cuda:0')
episode: 583 training return: tensor(108.5621, device='cuda:0')
epoch: 146 test_true_pfm: 2365.6378871286893 sim_pfm: 434.0859402891171
episode: 584 training return: tensor(121.0400, device='cuda:0')
episode: 585 training return: tensor(48.2328, device='cuda:0')
episode: 586 training return: tensor(111.6444, device='cuda:0')
episode: 587 training return: tensor(16.4254, device='cuda:0')
epoch: 147 test_true_pfm: 3159.405911847763 sim_pfm: 200.5600866874447
episode: 588 training return: tensor(32.5870, device='cuda:0')
episode: 589 training return: tensor(296.2929, device='cuda:0')
episode: 590 training return: tensor(182.6169, device='cuda:0')
episode: 591 training return: tensor(31.1648, device='cuda:0')
epoch: 148 test_true_pfm: 2205.391909704036 sim_pfm: 258.62981993407203
episode: 592 training return: tensor(82.3235, device='cuda:0')
episode: 593 training return: tensor(161.3810, device='cuda:0')
episode: 594 training return: tensor(26.2547, device='cuda:0')
episode: 595 training return: tensor(85.2436, device='cuda:0')
epoch: 149 test_true_pfm: 1907.7454061626675 sim_pfm: 150.17112792518068
episode: 596 training return: tensor(296.4013, device='cuda:0')
episode: 597 training return: tensor(190.8805, device='cuda:0')
episode: 598 training return: tensor(58.2741, device='cuda:0')
episode: 599 training return: tensor(304.9467, device='cuda:0')
epoch: 150 test_true_pfm: 2202.6628939805346 sim_pfm: 259.6487583243482
