['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '4']
epoch: 0 training_loss 0.2877404760569334 test_loss: 0.22417290210723878
epoch: 1 training_loss 0.19101939648389815 test_loss: 0.17654826641082763
epoch: 2 training_loss 0.17684019304811954 test_loss: 0.17107057571411133
epoch: 3 training_loss 0.16311970300972461 test_loss: 0.16415891647338868
epoch: 4 training_loss 0.16968604106456042 test_loss: 0.1343177318572998
epoch: 5 training_loss 0.1451878234371543 test_loss: 0.18020896911621093
epoch: 6 training_loss 0.14406492307782173 test_loss: 0.1352154493331909
epoch: 7 training_loss 0.13692522294819354 test_loss: 0.14330796003341675
epoch: 8 training_loss 0.14445391092449428 test_loss: 0.1426633358001709
epoch: 9 training_loss 0.1363379652798176 test_loss: 0.15394768714904786
epoch: 10 training_loss 0.13690569691359997 test_loss: 0.15204498767852784
epoch: 11 training_loss 0.13545372009277343 test_loss: 0.16026700735092164
epoch: 12 training_loss 0.1326360873132944 test_loss: 0.13549537658691407
epoch: 13 training_loss 0.13416437350213528 test_loss: 0.1255782961845398
epoch: 14 training_loss 0.1243521336838603 test_loss: 0.11799095869064331
epoch: 15 training_loss 0.13156874846667052 test_loss: 0.12842998504638672
epoch: 16 training_loss 0.129096954241395 test_loss: 0.1253983497619629
epoch: 17 training_loss 0.1305158055201173 test_loss: 0.13653361797332764
epoch: 18 training_loss 0.14075185872614385 test_loss: 0.12269784212112426
epoch: 19 training_loss 0.1230145139619708 test_loss: 0.12900031805038453
epoch: 20 training_loss 0.13227153614163398 test_loss: 0.12333889007568359
epoch: 21 training_loss 0.12444040052592754 test_loss: 0.10866972208023071
epoch: 22 training_loss 0.12537834834307432 test_loss: 0.11204315423965454
epoch: 23 training_loss 0.13006207380443813 test_loss: 0.12568867206573486
epoch: 24 training_loss 0.12091216981410981 test_loss: 0.13152512311935424
epoch: 25 training_loss 0.1246710205450654 test_loss: 0.12154757976531982
epoch: 26 training_loss 0.12745497226715088 test_loss: 0.1146539568901062
epoch: 27 training_loss 0.1184696071781218 test_loss: 0.15414059162139893
epoch: 28 training_loss 0.12085402151569724 test_loss: 0.13224928379058837
epoch: 29 training_loss 0.12818931261077524 test_loss: 0.12373372316360473
epoch: 30 training_loss 0.1251918490231037 test_loss: 0.12302488088607788
epoch: 31 training_loss 0.12576326897367834 test_loss: 0.11196619272232056
epoch: 32 training_loss 0.12767876368016004 test_loss: 0.11966146230697632
epoch: 33 training_loss 0.12312634505331516 test_loss: 0.14277939796447753
epoch: 34 training_loss 0.12483899176120758 test_loss: 0.11512013673782348
epoch: 35 training_loss 0.12785866260528564 test_loss: 0.12543323040008544
epoch: 36 training_loss 0.11763364993035794 test_loss: 0.12811321020126343
epoch: 37 training_loss 0.1324704325944185 test_loss: 0.13846923112869264
epoch: 38 training_loss 0.11920699566602706 test_loss: 0.13190122842788696
epoch: 39 training_loss 0.12430389009416104 test_loss: 0.11130564212799073
epoch: 40 training_loss 0.11652348697185516 test_loss: 0.11925156116485595
epoch: 41 training_loss 0.12223827950656414 test_loss: 0.10350923538208008
epoch: 42 training_loss 0.12148110777139663 test_loss: 0.12649269104003907
epoch: 43 training_loss 0.11829119807109237 test_loss: 0.13933585882186889
epoch: 44 training_loss 0.1275533838570118 test_loss: 0.11932544708251953
epoch: 45 training_loss 0.11526964139193296 test_loss: 0.11219296455383301
epoch: 46 training_loss 0.12738216891884804 test_loss: 0.13402491807937622
epoch: 47 training_loss 0.12806465357542038 test_loss: 0.11943399906158447
epoch: 48 training_loss 0.12080121658742428 test_loss: 0.09201168417930602
epoch: 49 training_loss 0.11927982483059169 test_loss: 0.14427996873855592
epoch: 50 training_loss 0.11708447597920894 test_loss: 0.10273255109786987
epoch: 51 training_loss 0.11889964170753956 test_loss: 0.14362255334854127
epoch: 52 training_loss 0.12045294132083655 test_loss: 0.12718279361724855
epoch: 53 training_loss 0.11242788538336754 test_loss: 0.1215485692024231
epoch: 54 training_loss 0.1250508251786232 test_loss: 0.12209609746932984
epoch: 55 training_loss 0.12136648196727037 test_loss: 0.1389120936393738
epoch: 56 training_loss 0.11971440922468901 test_loss: 0.11867836713790894
epoch: 57 training_loss 0.12208090752363204 test_loss: 0.11754636764526367
epoch: 58 training_loss 0.12349907249212265 test_loss: 0.14457393884658815
epoch: 59 training_loss 0.1189152180030942 test_loss: 0.141211199760437
epoch: 60 training_loss 0.12416690766811371 test_loss: 0.13261194229125978
epoch: 61 training_loss 0.1215412623807788 test_loss: 0.15030672550201415
epoch: 62 training_loss 0.1172247214987874 test_loss: 0.11000767946243287
epoch: 63 training_loss 0.12549725169315934 test_loss: 0.1260396957397461
epoch: 64 training_loss 0.1161833356320858 test_loss: 0.1276869535446167
epoch: 65 training_loss 0.11880147188901902 test_loss: 0.13201292753219604
epoch: 66 training_loss 0.12321395646780729 test_loss: 0.10643037557601928
epoch: 67 training_loss 0.12068621516227722 test_loss: 0.09975202679634095
epoch: 68 training_loss 0.113617229051888 test_loss: 0.12480952739715576
epoch: 69 training_loss 0.12716105014085768 test_loss: 0.11048005819320679
epoch: 70 training_loss 0.1225061209499836 test_loss: 0.1326020836830139
epoch: 71 training_loss 0.1254616491869092 test_loss: 0.12038658857345581
epoch: 72 training_loss 0.12599998071789742 test_loss: 0.1106525182723999
epoch: 73 training_loss 0.11728192439302802 test_loss: 0.11664170026779175
epoch: 74 training_loss 0.12745576882734894 test_loss: 0.10703897476196289
epoch: 75 training_loss 0.12635184602811933 test_loss: 0.13336434364318847
epoch: 76 training_loss 0.1235629304498434 test_loss: 0.12169708013534546
epoch: 77 training_loss 0.12231699392199516 test_loss: 0.11230236291885376
epoch: 78 training_loss 0.12412385791540145 test_loss: 0.1287304162979126
epoch: 79 training_loss 0.1255323695205152 test_loss: 0.12408055067062378
epoch: 80 training_loss 0.11471402268856763 test_loss: 0.12379200458526611
epoch: 81 training_loss 0.11785520218312741 test_loss: 0.10877583026885987
epoch: 82 training_loss 0.11504198469221592 test_loss: 0.1165573239326477
epoch: 83 training_loss 0.12350119061768056 test_loss: 0.11865413188934326
epoch: 84 training_loss 0.11714416965842248 test_loss: 0.1055400013923645
epoch: 85 training_loss 0.12274534575641155 test_loss: 0.12018417119979859
epoch: 86 training_loss 0.11776630669832229 test_loss: 0.11687947511672973
epoch: 87 training_loss 0.12066042393445969 test_loss: 0.12644947767257692
epoch: 88 training_loss 0.11422691028565168 test_loss: 0.11109758615493774
epoch: 89 training_loss 0.11911327941343189 test_loss: 0.11760144233703614
epoch: 90 training_loss 0.1259593253955245 test_loss: 0.12821860313415528
epoch: 91 training_loss 0.11959553714841605 test_loss: 0.11896963119506836
epoch: 92 training_loss 0.12169952481985093 test_loss: 0.11144354343414306
epoch: 93 training_loss 0.11427945982664824 test_loss: 0.14022061824798585
epoch: 94 training_loss 0.11553019925951957 test_loss: 0.12656621932983397
epoch: 95 training_loss 0.12556547481566668 test_loss: 0.13017452955245973
epoch: 96 training_loss 0.11386358043178917 test_loss: 0.13524924516677855
epoch: 97 training_loss 0.11720837602391838 test_loss: 0.1296474575996399
epoch: 98 training_loss 0.11100042834877968 test_loss: 0.1358795166015625
epoch: 99 training_loss 0.12057119958102704 test_loss: 0.10646837949752808
epoch: 100 training_loss 0.11490391742438077 test_loss: 0.12489858865737916
epoch: 101 training_loss 0.11075263161212207 test_loss: 0.10961713790893554
epoch: 102 training_loss 0.11668979227542878 test_loss: 0.13918838500976563
epoch: 103 training_loss 0.11783422462642193 test_loss: 0.1317791223526001
epoch: 104 training_loss 0.11871942050755024 test_loss: 0.11052117347717286
epoch: 105 training_loss 0.10977292215451598 test_loss: 0.10970858335494996
epoch: 106 training_loss 0.12672167051583527 test_loss: 0.11642180681228638
epoch: 107 training_loss 0.1256314091384411 test_loss: 0.1098777174949646
epoch: 108 training_loss 0.1167348738387227 test_loss: 0.12159475088119506
epoch: 109 training_loss 0.11530803557485342 test_loss: 0.11508400440216064
epoch: 110 training_loss 0.11945460096001626 test_loss: 0.13278255462646485
epoch: 111 training_loss 0.11847869396209716 test_loss: 0.15015006065368652
epoch: 112 training_loss 0.11362335793673992 test_loss: 0.129525089263916
epoch: 113 training_loss 0.11448628528043628 test_loss: 0.11152392625808716
epoch: 114 training_loss 0.11034014973789454 test_loss: 0.1171269178390503
epoch: 115 training_loss 0.11889593373984099 test_loss: 0.1272013306617737
epoch: 116 training_loss 0.1203639025054872 test_loss: 0.1075249195098877
epoch: 117 training_loss 0.11283233594149351 test_loss: 0.12740057706832886
epoch: 118 training_loss 0.12297491051256657 test_loss: 0.10820425748825073
epoch: 119 training_loss 0.11489748872816563 test_loss: 0.13402748107910156
epoch: 120 training_loss 0.12431513521820307 test_loss: 0.11352657079696656
epoch: 121 training_loss 0.12204742230474949 test_loss: 0.11476682424545288
epoch: 122 training_loss 0.10806006409227847 test_loss: 0.11447200775146485
epoch: 123 training_loss 0.11951423574239016 test_loss: 0.12298038005828857
epoch: 124 training_loss 0.11799970924854279 test_loss: 0.11680113077163697
epoch: 125 training_loss 0.11385489102452993 test_loss: 0.10106414556503296
epoch: 126 training_loss 0.12155695494264364 test_loss: 0.12147891521453857
epoch: 127 training_loss 0.1229204524308443 test_loss: 0.09287073016166687
epoch: 128 training_loss 0.11534772351384164 test_loss: 0.11782925128936768
epoch: 129 training_loss 0.1179890026897192 test_loss: 0.11667917966842652
epoch: 130 training_loss 0.11025580324232578 test_loss: 0.12040796279907226
epoch: 131 training_loss 0.12120713979005814 test_loss: 0.11501842737197876
epoch: 132 training_loss 0.11837646197527647 test_loss: 0.11204369068145752
epoch: 133 training_loss 0.11396130815148353 test_loss: 0.12983620166778564
epoch: 134 training_loss 0.1220804225280881 test_loss: 0.11443184614181519
epoch: 135 training_loss 0.10944586887955665 test_loss: 0.12185837030410766
epoch: 136 training_loss 0.13456539826467634 test_loss: 0.11936507225036622
epoch: 137 training_loss 0.1212556792795658 test_loss: 0.11532440185546874
epoch: 138 training_loss 0.11755553163588046 test_loss: 0.11992708444595337
epoch: 139 training_loss 0.12154108546674251 test_loss: 0.11730929613113403
epoch: 140 training_loss 0.119144067466259 test_loss: 0.14081047773361205
epoch: 141 training_loss 0.11574746433645487 test_loss: 0.11621695756912231
epoch: 142 training_loss 0.11511646799743175 test_loss: 0.11818565130233764
epoch: 143 training_loss 0.10530655117705465 test_loss: 0.1288287401199341
epoch: 144 training_loss 0.11843624044209719 test_loss: 0.11464636325836182
epoch: 145 training_loss 0.1182890110835433 test_loss: 0.11640169620513915
epoch: 146 training_loss 0.11789184920489788 test_loss: 0.1188689112663269
epoch: 147 training_loss 0.11722978621721268 test_loss: 0.11261159181594849
epoch: 148 training_loss 0.11216291323304177 test_loss: 0.15156912803649902
epoch: 149 training_loss 0.1168184107542038 test_loss: 0.09768581986427308
epoch: 0 training_loss 28.43972231864929 test_loss: 8.901690673828124
epoch: 1 training_loss 7.342747392654419 test_loss: 6.106759643554687
epoch: 2 training_loss 5.404708609580994 test_loss: 4.982730865478516
epoch: 3 training_loss 4.602314805984497 test_loss: 4.596782302856445
epoch: 4 training_loss 3.9216343426704405 test_loss: 3.8647056579589845
epoch: 5 training_loss 3.4903083515167235 test_loss: 3.2269832611083986
epoch: 6 training_loss 3.149954950809479 test_loss: 2.9529890060424804
epoch: 7 training_loss 2.838508290052414 test_loss: 2.683573913574219
epoch: 8 training_loss 2.626903015375137 test_loss: 2.609937858581543
epoch: 9 training_loss 2.484412542581558 test_loss: 2.3612506866455076
epoch: 10 training_loss 2.250988738536835 test_loss: 2.3910621643066405
epoch: 11 training_loss 2.2040981709957124 test_loss: 2.1289554595947267
epoch: 12 training_loss 2.1935903108119965 test_loss: 2.1793432235717773
epoch: 13 training_loss 2.1221165347099302 test_loss: 2.0625385284423827
epoch: 14 training_loss 2.001584156751633 test_loss: 1.9398107528686523
epoch: 15 training_loss 1.9163379430770875 test_loss: 1.8317203521728516
epoch: 16 training_loss 1.9019541430473328 test_loss: 1.8780750274658202
epoch: 17 training_loss 1.809333735704422 test_loss: 1.9522829055786133
epoch: 18 training_loss 1.7921975421905518 test_loss: 1.84820556640625
epoch: 19 training_loss 1.7523027217388154 test_loss: 1.7951278686523438
epoch: 20 training_loss 1.7481361472606658 test_loss: 1.7922403335571289
epoch: 21 training_loss 1.6693105757236482 test_loss: 1.6112398147583007
epoch: 22 training_loss 1.6438647675514222 test_loss: 1.7147083282470703
epoch: 23 training_loss 1.6696467554569245 test_loss: 1.6081138610839845
epoch: 24 training_loss 1.5748145806789398 test_loss: 1.5550311088562012
epoch: 25 training_loss 1.6085785055160522 test_loss: 1.689341163635254
epoch: 26 training_loss 1.636440291404724 test_loss: 1.5505763053894044
epoch: 27 training_loss 1.567660949230194 test_loss: 1.608077621459961
epoch: 28 training_loss 1.554626439809799 test_loss: 1.529952049255371
epoch: 29 training_loss 1.5607555460929872 test_loss: 1.6103055953979493
epoch: 30 training_loss 1.4935920357704162 test_loss: 1.56005916595459
epoch: 31 training_loss 1.4934531664848327 test_loss: 1.4673954010009767
epoch: 32 training_loss 1.4428667879104615 test_loss: 1.4343718528747558
epoch: 33 training_loss 1.5025016868114471 test_loss: 1.5229159355163575
epoch: 34 training_loss 1.4683926129341125 test_loss: 1.5796460151672362
epoch: 35 training_loss 1.4517267990112304 test_loss: 1.479147720336914
epoch: 36 training_loss 1.449721680879593 test_loss: 1.3336846351623535
epoch: 37 training_loss 1.353559182882309 test_loss: 1.4141918182373048
epoch: 38 training_loss 1.4104508781433105 test_loss: 1.3793012619018554
epoch: 39 training_loss 1.4529611325263978 test_loss: 1.3802886009216309
epoch: 40 training_loss 1.374153378009796 test_loss: 1.4169501304626464
epoch: 41 training_loss 1.3955884027481078 test_loss: 1.39400634765625
epoch: 42 training_loss 1.370993410348892 test_loss: 1.363520908355713
epoch: 43 training_loss 1.3398004424571992 test_loss: 1.382296657562256
epoch: 44 training_loss 1.3216087412834168 test_loss: 1.3568146705627442
epoch: 45 training_loss 1.3005655473470688 test_loss: 1.3816693305969239
epoch: 46 training_loss 1.3505434203147888 test_loss: 1.2916186332702637
epoch: 47 training_loss 1.340086922645569 test_loss: 1.3508654594421388
epoch: 48 training_loss 1.3250649189949035 test_loss: 1.3288829803466797
epoch: 49 training_loss 1.3230625677108765 test_loss: 1.2949463844299316
epoch: 50 training_loss 1.2909407168626785 test_loss: 1.3080608367919921
epoch: 51 training_loss 1.2876958084106445 test_loss: 1.3090608596801758
epoch: 52 training_loss 1.300378230214119 test_loss: 1.2713150024414062
epoch: 53 training_loss 1.2491939210891723 test_loss: 1.2262332916259766
epoch: 54 training_loss 1.2792416965961457 test_loss: 1.2689066886901856
epoch: 55 training_loss 1.2407098859548569 test_loss: 1.3483824729919434
epoch: 56 training_loss 1.2308961302042007 test_loss: 1.2729063987731934
epoch: 57 training_loss 1.3007443845272064 test_loss: 1.2645439147949218
epoch: 58 training_loss 1.2239033091068268 test_loss: 1.2198376655578613
epoch: 59 training_loss 1.2362556481361389 test_loss: 1.2359821319580078
epoch: 60 training_loss 1.2009076887369157 test_loss: 1.2382603645324708
epoch: 61 training_loss 1.2675200176239014 test_loss: 1.2068947792053222
epoch: 62 training_loss 1.227774293422699 test_loss: 1.1797325134277343
epoch: 63 training_loss 1.1722097045183182 test_loss: 1.2024953842163086
epoch: 64 training_loss 1.2295072180032731 test_loss: 1.2270357131958007
epoch: 65 training_loss 1.187775555253029 test_loss: 1.2114056587219237
epoch: 66 training_loss 1.2073597186803817 test_loss: 1.1949503898620606
epoch: 67 training_loss 1.2124266666173935 test_loss: 1.1876173973083497
epoch: 68 training_loss 1.182954022884369 test_loss: 1.1591644287109375
epoch: 69 training_loss 1.1668633723258972 test_loss: 1.2327034950256348
epoch: 70 training_loss 1.1461264061927796 test_loss: 1.1828213691711427
epoch: 71 training_loss 1.1472034740447998 test_loss: 1.2200127601623536
epoch: 72 training_loss 1.1383871299028396 test_loss: 1.1799586296081543
epoch: 73 training_loss 1.1320695942640304 test_loss: 1.159162998199463
epoch: 74 training_loss 1.1582549571990968 test_loss: 1.110605239868164
epoch: 75 training_loss 1.1303432500362396 test_loss: 1.1649900436401368
epoch: 76 training_loss 1.1699390226602555 test_loss: 1.1869426727294923
epoch: 77 training_loss 1.1278641402721405 test_loss: 1.1780616760253906
epoch: 78 training_loss 1.1269848746061326 test_loss: 1.174205207824707
epoch: 79 training_loss 1.1528746432065964 test_loss: 1.1403593063354491
epoch: 80 training_loss 1.1357075488567352 test_loss: 1.1592260360717774
epoch: 81 training_loss 1.1351901215314866 test_loss: 1.1595921516418457
epoch: 82 training_loss 1.107942146062851 test_loss: 1.0884856224060058
epoch: 83 training_loss 1.1039967322349549 test_loss: 1.1152566909790038
epoch: 84 training_loss 1.095081314444542 test_loss: 1.142416000366211
epoch: 85 training_loss 1.1037663340568542 test_loss: 1.1212209701538085
epoch: 86 training_loss 1.1078571444749832 test_loss: 1.1273061752319335
epoch: 87 training_loss 1.097240788936615 test_loss: 1.1030752182006835
epoch: 88 training_loss 1.1179883521795273 test_loss: 1.1821415901184082
epoch: 89 training_loss 1.064969881772995 test_loss: 1.1595437049865722
epoch: 90 training_loss 1.0987315058708191 test_loss: 1.1140958786010742
epoch: 91 training_loss 1.0822634214162827 test_loss: 1.0947185516357423
epoch: 92 training_loss 1.0659025156497954 test_loss: 1.1149880409240722
epoch: 93 training_loss 1.126015186905861 test_loss: 1.103529167175293
epoch: 94 training_loss 1.0819795978069306 test_loss: 1.1578041076660157
epoch: 95 training_loss 1.0710396629571914 test_loss: 1.065151309967041
epoch: 96 training_loss 1.0740366220474242 test_loss: 1.0592803955078125
epoch: 97 training_loss 1.0892498701810838 test_loss: 1.0619616508483887
epoch: 98 training_loss 1.0616663163900375 test_loss: 1.0720037460327148
epoch: 99 training_loss 1.080887160897255 test_loss: 1.1284497261047364
epoch: 100 training_loss 1.0554457592964173 test_loss: 1.1118821144104003
epoch: 101 training_loss 1.0567174434661866 test_loss: 1.0869868278503418
epoch: 102 training_loss 1.054580785036087 test_loss: 1.0811093330383301
epoch: 103 training_loss 1.07788831949234 test_loss: 1.0813075065612794
epoch: 104 training_loss 1.0585137456655502 test_loss: 1.114420223236084
epoch: 105 training_loss 1.059737812280655 test_loss: 1.068214511871338
epoch: 106 training_loss 1.0443222308158875 test_loss: 1.0855101585388183
epoch: 107 training_loss 1.00818605363369 test_loss: 1.023809051513672
epoch: 108 training_loss 1.0788949286937715 test_loss: 1.0493136405944825
epoch: 109 training_loss 1.0108788347244262 test_loss: 0.9943449020385742
epoch: 110 training_loss 1.0226253098249436 test_loss: 1.0611217498779297
epoch: 111 training_loss 1.0293125486373902 test_loss: 0.9909096717834472
epoch: 112 training_loss 1.0195097875595094 test_loss: 1.1060699462890624
epoch: 113 training_loss 1.019765625 test_loss: 1.0267444610595704
epoch: 114 training_loss 1.0286000126600265 test_loss: 1.0995418548583984
epoch: 115 training_loss 0.9945715564489365 test_loss: 1.0531881332397461
epoch: 116 training_loss 1.0306903100013733 test_loss: 0.9636602401733398
epoch: 117 training_loss 1.0150607067346573 test_loss: 1.0415019035339355
epoch: 118 training_loss 1.0198942416906356 test_loss: 1.0872231483459474
epoch: 119 training_loss 1.0214612871408462 test_loss: 1.0620818138122559
epoch: 120 training_loss 1.0023972880840302 test_loss: 0.9839303970336915
epoch: 121 training_loss 1.0286801052093506 test_loss: 1.001469326019287
epoch: 122 training_loss 1.0028477078676223 test_loss: 1.0410057067871095
epoch: 123 training_loss 1.0253583317995072 test_loss: 1.0530805587768555
epoch: 124 training_loss 1.0479282999038697 test_loss: 0.968834114074707
epoch: 125 training_loss 1.0027152651548386 test_loss: 1.0275979042053223
epoch: 126 training_loss 0.9732620990276337 test_loss: 1.043959903717041
epoch: 127 training_loss 1.0041557151079177 test_loss: 0.9686734199523925
epoch: 128 training_loss 1.0243620306253434 test_loss: 1.0205865859985352
epoch: 129 training_loss 0.9796134501695632 test_loss: 0.99027099609375
epoch: 130 training_loss 1.0012480092048646 test_loss: 1.0264480590820313
epoch: 131 training_loss 0.9905709904432297 test_loss: 1.0470569610595704
epoch: 132 training_loss 0.9984066450595855 test_loss: 1.0067011833190918
epoch: 133 training_loss 0.9796258908510208 test_loss: 1.061838722229004
epoch: 134 training_loss 1.0083959197998047 test_loss: 0.9638494491577149
epoch: 135 training_loss 0.9679363906383515 test_loss: 1.0003878593444824
epoch: 136 training_loss 0.9974188756942749 test_loss: 0.9528902053833008
epoch: 137 training_loss 0.9965019577741623 test_loss: 1.0341156005859375
epoch: 138 training_loss 0.994125273823738 test_loss: 1.0160846710205078
epoch: 139 training_loss 0.9841313946247101 test_loss: 0.9574776649475097
epoch: 140 training_loss 0.9628670877218246 test_loss: 1.0259299278259277
epoch: 141 training_loss 1.0048292082548143 test_loss: 1.0122872352600099
epoch: 142 training_loss 0.9842882949113846 test_loss: 1.0116282463073731
epoch: 143 training_loss 0.9619382923841476 test_loss: 0.9974116325378418
epoch: 144 training_loss 0.9562674123048782 test_loss: 1.0318635940551757
epoch: 145 training_loss 0.9899602282047272 test_loss: 0.9799899101257324
epoch: 146 training_loss 0.9781907743215561 test_loss: 0.9774503707885742
epoch: 147 training_loss 0.9405038589239121 test_loss: 0.9968976020812989
epoch: 148 training_loss 0.9764245671033859 test_loss: 1.0085247993469237
epoch: 149 training_loss 0.9826142078638077 test_loss: 1.0059410095214845
3976.9560750047203
episode: 0 training return: tensor(109.8170, device='cuda:0')
episode: 1 training return: tensor(-911.6890, device='cuda:0')
episode: 2 training return: tensor(127.0746, device='cuda:0')
episode: 3 training return: tensor(76.2037, device='cuda:0')
epoch: 1 test_true_pfm: 4014.4653627873436 sim_pfm: 127.90064625136438
episode: 4 training return: tensor(198.1290, device='cuda:0')
episode: 5 training return: tensor(88.4935, device='cuda:0')
episode: 6 training return: tensor(84.8432, device='cuda:0')
episode: 7 training return: tensor(53.6414, device='cuda:0')
epoch: 2 test_true_pfm: 3982.3386010784343 sim_pfm: 151.32864229069673
episode: 8 training return: tensor(116.1189, device='cuda:0')
episode: 9 training return: tensor(64.1001, device='cuda:0')
episode: 10 training return: tensor(153.6392, device='cuda:0')
episode: 11 training return: tensor(152.7645, device='cuda:0')
epoch: 3 test_true_pfm: 3994.697099206905 sim_pfm: 125.72260286364083
episode: 12 training return: tensor(114.9738, device='cuda:0')
episode: 13 training return: tensor(59.7714, device='cuda:0')
episode: 14 training return: tensor(88.7731, device='cuda:0')
episode: 15 training return: tensor(28.9981, device='cuda:0')
epoch: 4 test_true_pfm: 4000.75692229981 sim_pfm: 90.72764345955996
episode: 16 training return: tensor(163.5398, device='cuda:0')
episode: 17 training return: tensor(90.4072, device='cuda:0')
episode: 18 training return: tensor(108.2121, device='cuda:0')
episode: 19 training return: tensor(147.7270, device='cuda:0')
epoch: 5 test_true_pfm: 3996.021666390323 sim_pfm: 45.80242621546495
episode: 20 training return: tensor(112.2807, device='cuda:0')
episode: 21 training return: tensor(78.4827, device='cuda:0')
episode: 22 training return: tensor(137.4102, device='cuda:0')
episode: 23 training return: tensor(127.9882, device='cuda:0')
epoch: 6 test_true_pfm: 4001.3343142738127 sim_pfm: 163.90658047312172
episode: 24 training return: tensor(105.7935, device='cuda:0')
episode: 25 training return: tensor(67.5694, device='cuda:0')
episode: 26 training return: tensor(91.3833, device='cuda:0')
episode: 27 training return: tensor(117.1963, device='cuda:0')
epoch: 7 test_true_pfm: 4048.9158111727825 sim_pfm: 154.80690807126425
episode: 28 training return: tensor(156.7539, device='cuda:0')
episode: 29 training return: tensor(109.8807, device='cuda:0')
episode: 30 training return: tensor(108.3337, device='cuda:0')
episode: 31 training return: tensor(158.1484, device='cuda:0')
epoch: 8 test_true_pfm: 4002.362452425816 sim_pfm: 145.72306998840455
episode: 32 training return: tensor(170.0743, device='cuda:0')
episode: 33 training return: tensor(92.6661, device='cuda:0')
episode: 34 training return: tensor(85.5004, device='cuda:0')
episode: 35 training return: tensor(152.7807, device='cuda:0')
epoch: 9 test_true_pfm: 3964.0743143748477 sim_pfm: 121.953585326594
episode: 36 training return: tensor(122.9865, device='cuda:0')
episode: 37 training return: tensor(152.1768, device='cuda:0')
episode: 38 training return: tensor(97.5042, device='cuda:0')
episode: 39 training return: tensor(71.8059, device='cuda:0')
epoch: 10 test_true_pfm: 3956.200868712403 sim_pfm: 108.06257378581601
episode: 40 training return: tensor(154.8573, device='cuda:0')
episode: 41 training return: tensor(98.6203, device='cuda:0')
episode: 42 training return: tensor(112.0513, device='cuda:0')
episode: 43 training return: tensor(144.9111, device='cuda:0')
epoch: 11 test_true_pfm: 3999.650520005207 sim_pfm: 114.43860117265528
episode: 44 training return: tensor(133.9218, device='cuda:0')
episode: 45 training return: tensor(97.9949, device='cuda:0')
episode: 46 training return: tensor(155.6126, device='cuda:0')
episode: 47 training return: tensor(145.0112, device='cuda:0')
epoch: 12 test_true_pfm: 3973.5109274752763 sim_pfm: 132.86329381041773
episode: 48 training return: tensor(93.7089, device='cuda:0')
episode: 49 training return: tensor(109.0044, device='cuda:0')
episode: 50 training return: tensor(73.5281, device='cuda:0')
episode: 51 training return: tensor(144.2198, device='cuda:0')
epoch: 13 test_true_pfm: 3986.9083954190005 sim_pfm: 117.77869975409703
episode: 52 training return: tensor(43.7483, device='cuda:0')
episode: 53 training return: tensor(171.6358, device='cuda:0')
episode: 54 training return: tensor(143.8785, device='cuda:0')
episode: 55 training return: tensor(149.2885, device='cuda:0')
epoch: 14 test_true_pfm: 4020.06409131815 sim_pfm: 129.12952074871282
episode: 56 training return: tensor(132.4434, device='cuda:0')
episode: 57 training return: tensor(78.6957, device='cuda:0')
episode: 58 training return: tensor(100.7999, device='cuda:0')
episode: 59 training return: tensor(138.4307, device='cuda:0')
epoch: 15 test_true_pfm: 4002.9531535863084 sim_pfm: 155.14760244193408
episode: 60 training return: tensor(124.7499, device='cuda:0')
episode: 61 training return: tensor(-769.0986, device='cuda:0')
episode: 62 training return: tensor(173.5362, device='cuda:0')
episode: 63 training return: tensor(145.9070, device='cuda:0')
epoch: 16 test_true_pfm: 3971.7572454699634 sim_pfm: 130.54285350754313
episode: 64 training return: tensor(100.6910, device='cuda:0')
episode: 65 training return: tensor(188.8742, device='cuda:0')
episode: 66 training return: tensor(109.3421, device='cuda:0')
episode: 67 training return: tensor(-718.2142, device='cuda:0')
epoch: 17 test_true_pfm: 4024.0606692587285 sim_pfm: 155.742124487384
episode: 68 training return: tensor(171.6995, device='cuda:0')
episode: 69 training return: tensor(134.9031, device='cuda:0')
episode: 70 training return: tensor(59.8349, device='cuda:0')
episode: 71 training return: tensor(89.8282, device='cuda:0')
epoch: 18 test_true_pfm: 3982.7714233754673 sim_pfm: 134.2434288894777
episode: 72 training return: tensor(103.3415, device='cuda:0')
episode: 73 training return: tensor(104.0536, device='cuda:0')
episode: 74 training return: tensor(158.2317, device='cuda:0')
episode: 75 training return: tensor(137.1382, device='cuda:0')
epoch: 19 test_true_pfm: 4015.9071646302596 sim_pfm: 148.37555938514802
episode: 76 training return: tensor(143.7442, device='cuda:0')
episode: 77 training return: tensor(146.4466, device='cuda:0')
episode: 78 training return: tensor(152.8677, device='cuda:0')
episode: 79 training return: tensor(83.8299, device='cuda:0')
epoch: 20 test_true_pfm: 4002.3967625856535 sim_pfm: 99.38814344334726
episode: 80 training return: tensor(159.0818, device='cuda:0')
episode: 81 training return: tensor(118.5256, device='cuda:0')
episode: 82 training return: tensor(121.5371, device='cuda:0')
episode: 83 training return: tensor(128.9609, device='cuda:0')
epoch: 21 test_true_pfm: 4004.5197531087165 sim_pfm: 114.75507665005473
episode: 84 training return: tensor(188.0623, device='cuda:0')
episode: 85 training return: tensor(112.3622, device='cuda:0')
episode: 86 training return: tensor(145.9514, device='cuda:0')
episode: 87 training return: tensor(124.5649, device='cuda:0')
epoch: 22 test_true_pfm: 4019.2124518669057 sim_pfm: 116.71555383661568
episode: 88 training return: tensor(102.8036, device='cuda:0')
episode: 89 training return: tensor(75.9883, device='cuda:0')
episode: 90 training return: tensor(140.1658, device='cuda:0')
episode: 91 training return: tensor(-793.6616, device='cuda:0')
epoch: 23 test_true_pfm: 3973.818022058868 sim_pfm: 166.71479092166797
episode: 92 training return: tensor(150.9644, device='cuda:0')
episode: 93 training return: tensor(148.1692, device='cuda:0')
episode: 94 training return: tensor(78.8972, device='cuda:0')
episode: 95 training return: tensor(166.8536, device='cuda:0')
epoch: 24 test_true_pfm: 3982.2871519659384 sim_pfm: 156.60670077669783
episode: 96 training return: tensor(118.1670, device='cuda:0')
episode: 97 training return: tensor(88.2252, device='cuda:0')
episode: 98 training return: tensor(140.2157, device='cuda:0')
episode: 99 training return: tensor(168.1847, device='cuda:0')
epoch: 25 test_true_pfm: 3996.7697595802165 sim_pfm: 143.42359267673842
episode: 100 training return: tensor(107.0382, device='cuda:0')
episode: 101 training return: tensor(113.7315, device='cuda:0')
episode: 102 training return: tensor(151.2511, device='cuda:0')
episode: 103 training return: tensor(90.0329, device='cuda:0')
epoch: 26 test_true_pfm: 3951.747652643158 sim_pfm: 145.12968513175534
episode: 104 training return: tensor(107.7853, device='cuda:0')
episode: 105 training return: tensor(94.1532, device='cuda:0')
episode: 106 training return: tensor(115.8464, device='cuda:0')
episode: 107 training return: tensor(158.7676, device='cuda:0')
epoch: 27 test_true_pfm: 3983.4931436855463 sim_pfm: 145.52388808424197
episode: 108 training return: tensor(140.5775, device='cuda:0')
episode: 109 training return: tensor(118.4449, device='cuda:0')
episode: 110 training return: tensor(85.7638, device='cuda:0')
episode: 111 training return: tensor(60.4125, device='cuda:0')
epoch: 28 test_true_pfm: 4026.497859603211 sim_pfm: 117.14475631703196
episode: 112 training return: tensor(76.3672, device='cuda:0')
episode: 113 training return: tensor(53.3575, device='cuda:0')
episode: 114 training return: tensor(84.5550, device='cuda:0')
episode: 115 training return: tensor(130.5625, device='cuda:0')
epoch: 29 test_true_pfm: 4010.0716879996494 sim_pfm: 128.7188479952747
episode: 116 training return: tensor(158.2438, device='cuda:0')
episode: 117 training return: tensor(167.3324, device='cuda:0')
episode: 118 training return: tensor(146.6515, device='cuda:0')
episode: 119 training return: tensor(170.6158, device='cuda:0')
epoch: 30 test_true_pfm: 3987.2758622584734 sim_pfm: 142.5835290097457
episode: 120 training return: tensor(112.6517, device='cuda:0')
episode: 121 training return: tensor(146.4348, device='cuda:0')
episode: 122 training return: tensor(137.2403, device='cuda:0')
episode: 123 training return: tensor(150.5327, device='cuda:0')
epoch: 31 test_true_pfm: 3983.136035964409 sim_pfm: 112.85182847019557
episode: 124 training return: tensor(147.6795, device='cuda:0')
episode: 125 training return: tensor(162.1650, device='cuda:0')
episode: 126 training return: tensor(138.6676, device='cuda:0')
episode: 127 training return: tensor(94.3663, device='cuda:0')
epoch: 32 test_true_pfm: 3891.939762214473 sim_pfm: 131.1401435917263
episode: 128 training return: tensor(150.8501, device='cuda:0')
episode: 129 training return: tensor(103.1352, device='cuda:0')
episode: 130 training return: tensor(169.4290, device='cuda:0')
episode: 131 training return: tensor(187.1170, device='cuda:0')
epoch: 33 test_true_pfm: 3991.8209936688 sim_pfm: 137.0931442679624
episode: 132 training return: tensor(149.2617, device='cuda:0')
episode: 133 training return: tensor(142.2837, device='cuda:0')
episode: 134 training return: tensor(181.5977, device='cuda:0')
episode: 135 training return: tensor(90.5181, device='cuda:0')
epoch: 34 test_true_pfm: 3998.9888678377015 sim_pfm: 114.20687221203116
episode: 136 training return: tensor(144.5159, device='cuda:0')
episode: 137 training return: tensor(74.2743, device='cuda:0')
episode: 138 training return: tensor(167.6819, device='cuda:0')
episode: 139 training return: tensor(64.7914, device='cuda:0')
epoch: 35 test_true_pfm: 3940.5045259829276 sim_pfm: 141.78111524747996
episode: 140 training return: tensor(131.8071, device='cuda:0')
episode: 141 training return: tensor(141.1699, device='cuda:0')
episode: 142 training return: tensor(162.0054, device='cuda:0')
episode: 143 training return: tensor(184.1226, device='cuda:0')
epoch: 36 test_true_pfm: 3989.059893443116 sim_pfm: 135.0397319769642
episode: 144 training return: tensor(86.0972, device='cuda:0')
episode: 145 training return: tensor(-919.9930, device='cuda:0')
episode: 146 training return: tensor(187.3248, device='cuda:0')
episode: 147 training return: tensor(107.3581, device='cuda:0')
epoch: 37 test_true_pfm: 3913.79379313748 sim_pfm: 120.07871433826706
episode: 148 training return: tensor(105.3794, device='cuda:0')
episode: 149 training return: tensor(55.4948, device='cuda:0')
episode: 150 training return: tensor(138.0415, device='cuda:0')
episode: 151 training return: tensor(186.9992, device='cuda:0')
epoch: 38 test_true_pfm: 3962.1011735794127 sim_pfm: 142.22469557719887
episode: 152 training return: tensor(129.7594, device='cuda:0')
episode: 153 training return: tensor(96.1845, device='cuda:0')
episode: 154 training return: tensor(95.0370, device='cuda:0')
episode: 155 training return: tensor(118.5550, device='cuda:0')
epoch: 39 test_true_pfm: 3960.242605503776 sim_pfm: 124.7555335193659
episode: 156 training return: tensor(110.7821, device='cuda:0')
episode: 157 training return: tensor(139.8191, device='cuda:0')
episode: 158 training return: tensor(101.2197, device='cuda:0')
episode: 159 training return: tensor(114.0887, device='cuda:0')
epoch: 40 test_true_pfm: 4013.6857643140097 sim_pfm: 154.30524210637668
episode: 160 training return: tensor(145.6534, device='cuda:0')
episode: 161 training return: tensor(179.8551, device='cuda:0')
episode: 162 training return: tensor(94.4276, device='cuda:0')
episode: 163 training return: tensor(155.6245, device='cuda:0')
epoch: 41 test_true_pfm: 3980.5735089964855 sim_pfm: 139.3166102595957
episode: 164 training return: tensor(178.6749, device='cuda:0')
episode: 165 training return: tensor(100.6356, device='cuda:0')
episode: 166 training return: tensor(173.2778, device='cuda:0')
episode: 167 training return: tensor(165.4921, device='cuda:0')
epoch: 42 test_true_pfm: 3997.271829886988 sim_pfm: 119.00805546438399
episode: 168 training return: tensor(131.8575, device='cuda:0')
episode: 169 training return: tensor(152.5014, device='cuda:0')
episode: 170 training return: tensor(165.0413, device='cuda:0')
episode: 171 training return: tensor(112.6599, device='cuda:0')
epoch: 43 test_true_pfm: 3942.1405877020684 sim_pfm: 157.7138656890214
episode: 172 training return: tensor(170.1977, device='cuda:0')
episode: 173 training return: tensor(180.6362, device='cuda:0')
episode: 174 training return: tensor(157.3960, device='cuda:0')
episode: 175 training return: tensor(99.9928, device='cuda:0')
epoch: 44 test_true_pfm: 4026.760440812921 sim_pfm: 118.96525040309643
episode: 176 training return: tensor(150.9751, device='cuda:0')
episode: 177 training return: tensor(121.2229, device='cuda:0')
episode: 178 training return: tensor(-32.9042, device='cuda:0')
episode: 179 training return: tensor(19.2524, device='cuda:0')
epoch: 45 test_true_pfm: 3987.5493396827937 sim_pfm: 153.6652827145202
episode: 180 training return: tensor(81.5968, device='cuda:0')
episode: 181 training return: tensor(179.5068, device='cuda:0')
episode: 182 training return: tensor(159.6552, device='cuda:0')
episode: 183 training return: tensor(106.9378, device='cuda:0')
epoch: 46 test_true_pfm: 3981.395974246427 sim_pfm: 98.72588368990303
episode: 184 training return: tensor(130.2359, device='cuda:0')
episode: 185 training return: tensor(157.0549, device='cuda:0')
episode: 186 training return: tensor(108.9388, device='cuda:0')
episode: 187 training return: tensor(160.6308, device='cuda:0')
epoch: 47 test_true_pfm: 4027.8034271938272 sim_pfm: 155.495079910208
episode: 188 training return: tensor(166.4818, device='cuda:0')
episode: 189 training return: tensor(197.1914, device='cuda:0')
episode: 190 training return: tensor(123.7265, device='cuda:0')
episode: 191 training return: tensor(113.3300, device='cuda:0')
epoch: 48 test_true_pfm: 4002.5735472595547 sim_pfm: 103.3610020877871
episode: 192 training return: tensor(157.2047, device='cuda:0')
episode: 193 training return: tensor(158.6037, device='cuda:0')
episode: 194 training return: tensor(151.4851, device='cuda:0')
episode: 195 training return: tensor(79.6624, device='cuda:0')
epoch: 49 test_true_pfm: 4031.624645548541 sim_pfm: 142.36418239277555
episode: 196 training return: tensor(125.6166, device='cuda:0')
episode: 197 training return: tensor(113.5817, device='cuda:0')
episode: 198 training return: tensor(148.3090, device='cuda:0')
episode: 199 training return: tensor(159.3909, device='cuda:0')
epoch: 50 test_true_pfm: 4014.5115529227824 sim_pfm: 113.7451609276565
episode: 200 training return: tensor(106.1371, device='cuda:0')
episode: 201 training return: tensor(84.7799, device='cuda:0')
episode: 202 training return: tensor(106.4917, device='cuda:0')
episode: 203 training return: tensor(45.9326, device='cuda:0')
epoch: 51 test_true_pfm: 3986.897692036459 sim_pfm: 96.79684787669491
episode: 204 training return: tensor(106.3256, device='cuda:0')
episode: 205 training return: tensor(100.2674, device='cuda:0')
episode: 206 training return: tensor(171.9369, device='cuda:0')
episode: 207 training return: tensor(156.5567, device='cuda:0')
epoch: 52 test_true_pfm: 3996.6517490956317 sim_pfm: 136.97784924588632
episode: 208 training return: tensor(136.7814, device='cuda:0')
episode: 209 training return: tensor(149.5230, device='cuda:0')
episode: 210 training return: tensor(82.9094, device='cuda:0')
episode: 211 training return: tensor(159.3192, device='cuda:0')
epoch: 53 test_true_pfm: 4021.4234735072096 sim_pfm: 143.5051472345464
episode: 212 training return: tensor(89.9726, device='cuda:0')
episode: 213 training return: tensor(130.2786, device='cuda:0')
episode: 214 training return: tensor(108.9016, device='cuda:0')
episode: 215 training return: tensor(153.0257, device='cuda:0')
epoch: 54 test_true_pfm: 4008.1515583546466 sim_pfm: 138.0186677935999
episode: 216 training return: tensor(112.0206, device='cuda:0')
episode: 217 training return: tensor(89.8810, device='cuda:0')
episode: 218 training return: tensor(134.5956, device='cuda:0')
episode: 219 training return: tensor(177.2276, device='cuda:0')
epoch: 55 test_true_pfm: 4017.8992709487757 sim_pfm: 152.47889700405844
episode: 220 training return: tensor(101.7173, device='cuda:0')
episode: 221 training return: tensor(76.9900, device='cuda:0')
episode: 222 training return: tensor(161.4178, device='cuda:0')
episode: 223 training return: tensor(154.6259, device='cuda:0')
epoch: 56 test_true_pfm: 3985.184390425629 sim_pfm: 149.66617792557614
episode: 224 training return: tensor(96.9640, device='cuda:0')
episode: 225 training return: tensor(177.7442, device='cuda:0')
episode: 226 training return: tensor(113.8926, device='cuda:0')
episode: 227 training return: tensor(51.4465, device='cuda:0')
epoch: 57 test_true_pfm: 3986.2385439195664 sim_pfm: 164.3712098725567
episode: 228 training return: tensor(163.0803, device='cuda:0')
episode: 229 training return: tensor(154.6093, device='cuda:0')
episode: 230 training return: tensor(169.5515, device='cuda:0')
episode: 231 training return: tensor(180.7173, device='cuda:0')
epoch: 58 test_true_pfm: 3928.398831109693 sim_pfm: 122.37206903185385
episode: 232 training return: tensor(95.4231, device='cuda:0')
episode: 233 training return: tensor(145.0199, device='cuda:0')
episode: 234 training return: tensor(141.7012, device='cuda:0')
episode: 235 training return: tensor(68.4714, device='cuda:0')
epoch: 59 test_true_pfm: 3972.977702266599 sim_pfm: 122.72885410254821
episode: 236 training return: tensor(130.0583, device='cuda:0')
episode: 237 training return: tensor(166.5379, device='cuda:0')
episode: 238 training return: tensor(92.5187, device='cuda:0')
episode: 239 training return: tensor(167.2861, device='cuda:0')
epoch: 60 test_true_pfm: 3993.104370627909 sim_pfm: 134.65700245426464
episode: 240 training return: tensor(124.7824, device='cuda:0')
episode: 241 training return: tensor(186.7885, device='cuda:0')
episode: 242 training return: tensor(165.8508, device='cuda:0')
episode: 243 training return: tensor(120.4845, device='cuda:0')
epoch: 61 test_true_pfm: 3972.303115710673 sim_pfm: 144.80650269781472
episode: 244 training return: tensor(172.7750, device='cuda:0')
episode: 245 training return: tensor(145.2768, device='cuda:0')
episode: 246 training return: tensor(111.7476, device='cuda:0')
episode: 247 training return: tensor(169.4633, device='cuda:0')
epoch: 62 test_true_pfm: 3996.4971872617703 sim_pfm: 137.27668371298932
episode: 248 training return: tensor(116.7368, device='cuda:0')
episode: 249 training return: tensor(116.2811, device='cuda:0')
episode: 250 training return: tensor(175.4139, device='cuda:0')
episode: 251 training return: tensor(178.8513, device='cuda:0')
epoch: 63 test_true_pfm: 3996.3987146584936 sim_pfm: 112.98334716599008
episode: 252 training return: tensor(106.6282, device='cuda:0')
episode: 253 training return: tensor(133.1224, device='cuda:0')
episode: 254 training return: tensor(193.4501, device='cuda:0')
episode: 255 training return: tensor(107.3000, device='cuda:0')
epoch: 64 test_true_pfm: 4041.5749699434873 sim_pfm: 47.07417276724785
episode: 256 training return: tensor(127.2201, device='cuda:0')
episode: 257 training return: tensor(169.9674, device='cuda:0')
episode: 258 training return: tensor(166.7423, device='cuda:0')
episode: 259 training return: tensor(146.4809, device='cuda:0')
epoch: 65 test_true_pfm: 3991.122906391173 sim_pfm: 130.33367002008404
episode: 260 training return: tensor(82.7896, device='cuda:0')
episode: 261 training return: tensor(91.0322, device='cuda:0')
episode: 262 training return: tensor(97.5674, device='cuda:0')
episode: 263 training return: tensor(125.5174, device='cuda:0')
epoch: 66 test_true_pfm: 3988.670505732059 sim_pfm: 151.8059755200617
episode: 264 training return: tensor(129.5826, device='cuda:0')
episode: 265 training return: tensor(180.5442, device='cuda:0')
episode: 266 training return: tensor(196.7245, device='cuda:0')
episode: 267 training return: tensor(123.7320, device='cuda:0')
epoch: 67 test_true_pfm: 3973.4855998105686 sim_pfm: 143.07173743711124
episode: 268 training return: tensor(182.4226, device='cuda:0')
episode: 269 training return: tensor(134.4870, device='cuda:0')
episode: 270 training return: tensor(170.5689, device='cuda:0')
episode: 271 training return: tensor(177.6267, device='cuda:0')
epoch: 68 test_true_pfm: 4007.2154888801156 sim_pfm: 157.02755155581204
episode: 272 training return: tensor(104.6003, device='cuda:0')
episode: 273 training return: tensor(105.0975, device='cuda:0')
episode: 274 training return: tensor(163.7522, device='cuda:0')
episode: 275 training return: tensor(185.7759, device='cuda:0')
epoch: 69 test_true_pfm: 4050.682923306122 sim_pfm: 148.17265974883534
episode: 276 training return: tensor(139.8880, device='cuda:0')
episode: 277 training return: tensor(120.6921, device='cuda:0')
episode: 278 training return: tensor(193.1591, device='cuda:0')
episode: 279 training return: tensor(191.3628, device='cuda:0')
epoch: 70 test_true_pfm: 3998.622424396759 sim_pfm: 148.55169707142826
episode: 280 training return: tensor(83.4179, device='cuda:0')
episode: 281 training return: tensor(129.6604, device='cuda:0')
episode: 282 training return: tensor(158.5419, device='cuda:0')
episode: 283 training return: tensor(137.4850, device='cuda:0')
epoch: 71 test_true_pfm: 3999.708162335461 sim_pfm: 122.83691328851273
episode: 284 training return: tensor(180.0845, device='cuda:0')
episode: 285 training return: tensor(184.3018, device='cuda:0')
episode: 286 training return: tensor(173.4660, device='cuda:0')
episode: 287 training return: tensor(106.7120, device='cuda:0')
epoch: 72 test_true_pfm: 3997.784529255941 sim_pfm: 137.33958400471602
episode: 288 training return: tensor(172.4521, device='cuda:0')
episode: 289 training return: tensor(179.9204, device='cuda:0')
episode: 290 training return: tensor(185.9855, device='cuda:0')
episode: 291 training return: tensor(71.0926, device='cuda:0')
epoch: 73 test_true_pfm: 3963.6795163337524 sim_pfm: 141.0643537561118
episode: 292 training return: tensor(65.6462, device='cuda:0')
episode: 293 training return: tensor(148.2023, device='cuda:0')
episode: 294 training return: tensor(143.7069, device='cuda:0')
episode: 295 training return: tensor(144.1160, device='cuda:0')
epoch: 74 test_true_pfm: 3989.602560631481 sim_pfm: 153.94499220419675
episode: 296 training return: tensor(178.2090, device='cuda:0')
episode: 297 training return: tensor(154.1991, device='cuda:0')
episode: 298 training return: tensor(132.3992, device='cuda:0')
episode: 299 training return: tensor(164.8019, device='cuda:0')
epoch: 75 test_true_pfm: 3957.7614764760365 sim_pfm: 130.63241812425744
episode: 300 training return: tensor(122.9193, device='cuda:0')
episode: 301 training return: tensor(164.5875, device='cuda:0')
episode: 302 training return: tensor(80.3250, device='cuda:0')
episode: 303 training return: tensor(213.9758, device='cuda:0')
epoch: 76 test_true_pfm: 3965.376847654865 sim_pfm: 148.04250989090846
episode: 304 training return: tensor(187.0627, device='cuda:0')
episode: 305 training return: tensor(160.2089, device='cuda:0')
episode: 306 training return: tensor(153.3862, device='cuda:0')
episode: 307 training return: tensor(104.4218, device='cuda:0')
epoch: 77 test_true_pfm: 4009.6148029311285 sim_pfm: 145.37439361491124
episode: 308 training return: tensor(45.2991, device='cuda:0')
episode: 309 training return: tensor(140.0431, device='cuda:0')
episode: 310 training return: tensor(140.3024, device='cuda:0')
episode: 311 training return: tensor(192.9677, device='cuda:0')
epoch: 78 test_true_pfm: 4002.4928497656806 sim_pfm: 124.22773850537487
episode: 312 training return: tensor(170.1151, device='cuda:0')
episode: 313 training return: tensor(192.8111, device='cuda:0')
episode: 314 training return: tensor(112.8925, device='cuda:0')
episode: 315 training return: tensor(107.4791, device='cuda:0')
epoch: 79 test_true_pfm: 3996.158861449707 sim_pfm: 108.6769638863625
episode: 316 training return: tensor(134.4959, device='cuda:0')
episode: 317 training return: tensor(183.0247, device='cuda:0')
episode: 318 training return: tensor(122.3594, device='cuda:0')
episode: 319 training return: tensor(168.9525, device='cuda:0')
epoch: 80 test_true_pfm: 3993.0902451135607 sim_pfm: 127.80331822338242
episode: 320 training return: tensor(163.7832, device='cuda:0')
episode: 321 training return: tensor(176.6088, device='cuda:0')
episode: 322 training return: tensor(111.5539, device='cuda:0')
episode: 323 training return: tensor(116.7475, device='cuda:0')
epoch: 81 test_true_pfm: 3993.669216443137 sim_pfm: 118.03916528900542
episode: 324 training return: tensor(157.1784, device='cuda:0')
episode: 325 training return: tensor(117.4012, device='cuda:0')
episode: 326 training return: tensor(103.5258, device='cuda:0')
episode: 327 training return: tensor(125.5863, device='cuda:0')
epoch: 82 test_true_pfm: 3990.9365018317562 sim_pfm: 126.62196918020102
episode: 328 training return: tensor(120.5883, device='cuda:0')
episode: 329 training return: tensor(146.3642, device='cuda:0')
episode: 330 training return: tensor(140.7755, device='cuda:0')
episode: 331 training return: tensor(169.0873, device='cuda:0')
epoch: 83 test_true_pfm: 4000.264558834264 sim_pfm: 146.1068931283274
episode: 332 training return: tensor(179.1577, device='cuda:0')
episode: 333 training return: tensor(125.1717, device='cuda:0')
episode: 334 training return: tensor(197.6294, device='cuda:0')
episode: 335 training return: tensor(169.5411, device='cuda:0')
epoch: 84 test_true_pfm: 3976.9743869127483 sim_pfm: 160.66234691557474
episode: 336 training return: tensor(91.0611, device='cuda:0')
episode: 337 training return: tensor(174.1996, device='cuda:0')
episode: 338 training return: tensor(151.5134, device='cuda:0')
episode: 339 training return: tensor(130.0010, device='cuda:0')
epoch: 85 test_true_pfm: 3994.7867742102735 sim_pfm: 138.41658020412433
episode: 340 training return: tensor(169.8331, device='cuda:0')
episode: 341 training return: tensor(144.3159, device='cuda:0')
episode: 342 training return: tensor(153.5854, device='cuda:0')
episode: 343 training return: tensor(131.3611, device='cuda:0')
epoch: 86 test_true_pfm: 4020.6823964736336 sim_pfm: 118.81858196455869
episode: 344 training return: tensor(142.7044, device='cuda:0')
episode: 345 training return: tensor(144.9880, device='cuda:0')
episode: 346 training return: tensor(94.8391, device='cuda:0')
episode: 347 training return: tensor(165.3374, device='cuda:0')
epoch: 87 test_true_pfm: 4013.9162372678 sim_pfm: 144.24178128732214
episode: 348 training return: tensor(120.2641, device='cuda:0')
episode: 349 training return: tensor(167.5767, device='cuda:0')
episode: 350 training return: tensor(149.1052, device='cuda:0')
episode: 351 training return: tensor(146.1586, device='cuda:0')
epoch: 88 test_true_pfm: 4002.762919953087 sim_pfm: 150.64143550323206
episode: 352 training return: tensor(112.8664, device='cuda:0')
episode: 353 training return: tensor(106.8154, device='cuda:0')
episode: 354 training return: tensor(64.0024, device='cuda:0')
episode: 355 training return: tensor(172.7254, device='cuda:0')
epoch: 89 test_true_pfm: 4002.8360451553744 sim_pfm: 141.8377131166247
episode: 356 training return: tensor(117.2418, device='cuda:0')
episode: 357 training return: tensor(165.9608, device='cuda:0')
episode: 358 training return: tensor(158.7567, device='cuda:0')
episode: 359 training return: tensor(131.5049, device='cuda:0')
epoch: 90 test_true_pfm: 4064.1843960793117 sim_pfm: 175.1752359063636
episode: 360 training return: tensor(134.6552, device='cuda:0')
episode: 361 training return: tensor(120.5331, device='cuda:0')
episode: 362 training return: tensor(163.4524, device='cuda:0')
episode: 363 training return: tensor(114.6749, device='cuda:0')
epoch: 91 test_true_pfm: 3812.621805405091 sim_pfm: 140.43591268287855
episode: 364 training return: tensor(190.7271, device='cuda:0')
episode: 365 training return: tensor(119.0418, device='cuda:0')
episode: 366 training return: tensor(102.6611, device='cuda:0')
episode: 367 training return: tensor(185.0659, device='cuda:0')
epoch: 92 test_true_pfm: 3977.2608623022084 sim_pfm: 109.70164934349789
episode: 368 training return: tensor(168.5648, device='cuda:0')
episode: 369 training return: tensor(160.4359, device='cuda:0')
episode: 370 training return: tensor(175.9641, device='cuda:0')
episode: 371 training return: tensor(186.3080, device='cuda:0')
epoch: 93 test_true_pfm: 4007.7542324267633 sim_pfm: 137.65065407083603
episode: 372 training return: tensor(181.1824, device='cuda:0')
episode: 373 training return: tensor(87.2560, device='cuda:0')
episode: 374 training return: tensor(131.6934, device='cuda:0')
episode: 375 training return: tensor(139.5578, device='cuda:0')
epoch: 94 test_true_pfm: 4009.8480602744326 sim_pfm: 140.14378158647256
episode: 376 training return: tensor(161.1486, device='cuda:0')
episode: 377 training return: tensor(58.1004, device='cuda:0')
episode: 378 training return: tensor(180.1981, device='cuda:0')
episode: 379 training return: tensor(160.4415, device='cuda:0')
epoch: 95 test_true_pfm: 3954.3671141642103 sim_pfm: 118.50334263790864
episode: 380 training return: tensor(156.3053, device='cuda:0')
episode: 381 training return: tensor(193.7265, device='cuda:0')
episode: 382 training return: tensor(191.2272, device='cuda:0')
episode: 383 training return: tensor(134.7005, device='cuda:0')
epoch: 96 test_true_pfm: 3965.7719640800165 sim_pfm: 102.06219619660017
episode: 384 training return: tensor(81.6602, device='cuda:0')
episode: 385 training return: tensor(152.4029, device='cuda:0')
episode: 386 training return: tensor(186.0639, device='cuda:0')
episode: 387 training return: tensor(128.3864, device='cuda:0')
epoch: 97 test_true_pfm: 3727.195311155782 sim_pfm: 123.35808650474064
episode: 388 training return: tensor(122.8012, device='cuda:0')
episode: 389 training return: tensor(160.7122, device='cuda:0')
episode: 390 training return: tensor(149.9429, device='cuda:0')
episode: 391 training return: tensor(55.7418, device='cuda:0')
epoch: 98 test_true_pfm: 3973.0591915047657 sim_pfm: 156.51645015006457
episode: 392 training return: tensor(67.8916, device='cuda:0')
episode: 393 training return: tensor(125.9920, device='cuda:0')
episode: 394 training return: tensor(194.8332, device='cuda:0')
episode: 395 training return: tensor(138.5180, device='cuda:0')
epoch: 99 test_true_pfm: 3971.457025578382 sim_pfm: 118.79259815610324
episode: 396 training return: tensor(156.0983, device='cuda:0')
episode: 397 training return: tensor(168.3909, device='cuda:0')
episode: 398 training return: tensor(119.7606, device='cuda:0')
episode: 399 training return: tensor(140.1899, device='cuda:0')
epoch: 100 test_true_pfm: 4025.983311969591 sim_pfm: 151.29495180922095
episode: 400 training return: tensor(182.2910, device='cuda:0')
episode: 401 training return: tensor(121.6823, device='cuda:0')
episode: 402 training return: tensor(109.1156, device='cuda:0')
episode: 403 training return: tensor(136.5977, device='cuda:0')
epoch: 101 test_true_pfm: 4042.7495636160165 sim_pfm: 141.38406525496006
episode: 404 training return: tensor(170.0356, device='cuda:0')
episode: 405 training return: tensor(183.8320, device='cuda:0')
episode: 406 training return: tensor(125.9019, device='cuda:0')
episode: 407 training return: tensor(138.8265, device='cuda:0')
epoch: 102 test_true_pfm: 4000.623741835941 sim_pfm: 154.58402982041784
episode: 408 training return: tensor(175.4114, device='cuda:0')
episode: 409 training return: tensor(181.4967, device='cuda:0')
episode: 410 training return: tensor(172.3127, device='cuda:0')
episode: 411 training return: tensor(168.6779, device='cuda:0')
epoch: 103 test_true_pfm: 3989.878300633935 sim_pfm: 144.02759366789056
episode: 412 training return: tensor(126.4612, device='cuda:0')
episode: 413 training return: tensor(132.9994, device='cuda:0')
episode: 414 training return: tensor(175.8334, device='cuda:0')
episode: 415 training return: tensor(159.8172, device='cuda:0')
epoch: 104 test_true_pfm: 4023.835798565537 sim_pfm: 139.12666790411458
episode: 416 training return: tensor(173.4128, device='cuda:0')
episode: 417 training return: tensor(167.0738, device='cuda:0')
episode: 418 training return: tensor(183.9604, device='cuda:0')
episode: 419 training return: tensor(69.1561, device='cuda:0')
epoch: 105 test_true_pfm: 4020.1459891775994 sim_pfm: 166.84820046580475
episode: 420 training return: tensor(129.7608, device='cuda:0')
episode: 421 training return: tensor(155.7041, device='cuda:0')
episode: 422 training return: tensor(149.3966, device='cuda:0')
episode: 423 training return: tensor(154.1007, device='cuda:0')
epoch: 106 test_true_pfm: 3974.449179366203 sim_pfm: 156.29046492753938
episode: 424 training return: tensor(128.9497, device='cuda:0')
episode: 425 training return: tensor(179.1366, device='cuda:0')
episode: 426 training return: tensor(187.0533, device='cuda:0')
episode: 427 training return: tensor(135.6329, device='cuda:0')
epoch: 107 test_true_pfm: 4009.0982862276283 sim_pfm: 142.30169157941904
episode: 428 training return: tensor(183.4443, device='cuda:0')
episode: 429 training return: tensor(185.1983, device='cuda:0')
episode: 430 training return: tensor(128.8470, device='cuda:0')
episode: 431 training return: tensor(142.1821, device='cuda:0')
epoch: 108 test_true_pfm: 4013.360030674435 sim_pfm: 169.94290772050348
episode: 432 training return: tensor(172.5797, device='cuda:0')
episode: 433 training return: tensor(185.7325, device='cuda:0')
episode: 434 training return: tensor(132.6961, device='cuda:0')
episode: 435 training return: tensor(156.9432, device='cuda:0')
epoch: 109 test_true_pfm: 4017.4918804788635 sim_pfm: 166.54646499212444
episode: 436 training return: tensor(144.4587, device='cuda:0')
episode: 437 training return: tensor(141.0986, device='cuda:0')
episode: 438 training return: tensor(80.0992, device='cuda:0')
episode: 439 training return: tensor(129.4686, device='cuda:0')
epoch: 110 test_true_pfm: 4002.4140451447133 sim_pfm: 137.43456357938703
episode: 440 training return: tensor(134.0992, device='cuda:0')
episode: 441 training return: tensor(168.8257, device='cuda:0')
episode: 442 training return: tensor(189.4545, device='cuda:0')
episode: 443 training return: tensor(90.4753, device='cuda:0')
epoch: 111 test_true_pfm: 4008.6513055977794 sim_pfm: 143.68796935184704
episode: 444 training return: tensor(162.1918, device='cuda:0')
episode: 445 training return: tensor(189.4465, device='cuda:0')
episode: 446 training return: tensor(158.2722, device='cuda:0')
episode: 447 training return: tensor(116.0829, device='cuda:0')
epoch: 112 test_true_pfm: 3981.1537487996516 sim_pfm: 135.94394823205462
episode: 448 training return: tensor(142.1177, device='cuda:0')
episode: 449 training return: tensor(103.7150, device='cuda:0')
episode: 450 training return: tensor(141.1681, device='cuda:0')
episode: 451 training return: tensor(149.3156, device='cuda:0')
epoch: 113 test_true_pfm: 3970.546766384341 sim_pfm: 110.32898873522451
episode: 452 training return: tensor(114.3611, device='cuda:0')
episode: 453 training return: tensor(135.3298, device='cuda:0')
episode: 454 training return: tensor(167.0551, device='cuda:0')
episode: 455 training return: tensor(96.3765, device='cuda:0')
epoch: 114 test_true_pfm: 3981.6380530526626 sim_pfm: 104.19664257157517
episode: 456 training return: tensor(107.9100, device='cuda:0')
episode: 457 training return: tensor(134.3820, device='cuda:0')
episode: 458 training return: tensor(107.6160, device='cuda:0')
episode: 459 training return: tensor(130.5517, device='cuda:0')
epoch: 115 test_true_pfm: 4018.9519656368066 sim_pfm: 157.2285333268616
episode: 460 training return: tensor(176.9658, device='cuda:0')
episode: 461 training return: tensor(191.9790, device='cuda:0')
episode: 462 training return: tensor(147.7865, device='cuda:0')
episode: 463 training return: tensor(181.2660, device='cuda:0')
epoch: 116 test_true_pfm: 4014.530132230487 sim_pfm: 181.99354485882213
episode: 464 training return: tensor(185.0075, device='cuda:0')
episode: 465 training return: tensor(148.8543, device='cuda:0')
episode: 466 training return: tensor(184.7964, device='cuda:0')
episode: 467 training return: tensor(188.8287, device='cuda:0')
epoch: 117 test_true_pfm: 4047.481560005824 sim_pfm: 187.5375261142908
episode: 468 training return: tensor(179.7120, device='cuda:0')
episode: 469 training return: tensor(192.3482, device='cuda:0')
episode: 470 training return: tensor(206.1359, device='cuda:0')
episode: 471 training return: tensor(192.4868, device='cuda:0')
epoch: 118 test_true_pfm: 4043.5955622869483 sim_pfm: 165.7178973297899
episode: 472 training return: tensor(194.7188, device='cuda:0')
episode: 473 training return: tensor(147.8453, device='cuda:0')
episode: 474 training return: tensor(174.5180, device='cuda:0')
episode: 475 training return: tensor(143.4619, device='cuda:0')
epoch: 119 test_true_pfm: 3992.23435613135 sim_pfm: 121.98320376416086
episode: 476 training return: tensor(189.0887, device='cuda:0')
episode: 477 training return: tensor(156.5694, device='cuda:0')
episode: 478 training return: tensor(172.0099, device='cuda:0')
episode: 479 training return: tensor(184.1339, device='cuda:0')
epoch: 120 test_true_pfm: 4026.970484741168 sim_pfm: 168.61391601607707
episode: 480 training return: tensor(193.5768, device='cuda:0')
episode: 481 training return: tensor(223.3527, device='cuda:0')
episode: 482 training return: tensor(131.9494, device='cuda:0')
episode: 483 training return: tensor(194.6277, device='cuda:0')
epoch: 121 test_true_pfm: 4019.5636210755824 sim_pfm: 168.69338398218193
episode: 484 training return: tensor(194.2726, device='cuda:0')
episode: 485 training return: tensor(180.1132, device='cuda:0')
episode: 486 training return: tensor(194.8694, device='cuda:0')
episode: 487 training return: tensor(190.8270, device='cuda:0')
epoch: 122 test_true_pfm: 4045.468659849657 sim_pfm: 167.49517535439614
episode: 488 training return: tensor(153.8750, device='cuda:0')
episode: 489 training return: tensor(186.9323, device='cuda:0')
episode: 490 training return: tensor(118.3408, device='cuda:0')
episode: 491 training return: tensor(169.2072, device='cuda:0')
epoch: 123 test_true_pfm: 4050.5585177870507 sim_pfm: 198.0704154202928
episode: 492 training return: tensor(155.7860, device='cuda:0')
episode: 493 training return: tensor(209.0995, device='cuda:0')
episode: 494 training return: tensor(114.5652, device='cuda:0')
episode: 495 training return: tensor(177.8804, device='cuda:0')
epoch: 124 test_true_pfm: 4019.3747752305644 sim_pfm: 135.75144162208503
episode: 496 training return: tensor(186.8894, device='cuda:0')
episode: 497 training return: tensor(-897.0128, device='cuda:0')
episode: 498 training return: tensor(196.8544, device='cuda:0')
episode: 499 training return: tensor(191.5896, device='cuda:0')
epoch: 125 test_true_pfm: 3997.1192765329665 sim_pfm: 149.3123921037671
episode: 500 training return: tensor(148.4660, device='cuda:0')
episode: 501 training return: tensor(197.3169, device='cuda:0')
episode: 502 training return: tensor(174.4684, device='cuda:0')
episode: 503 training return: tensor(205.5492, device='cuda:0')
epoch: 126 test_true_pfm: 4066.5312496298598 sim_pfm: 147.69330393604469
episode: 504 training return: tensor(124.9581, device='cuda:0')
episode: 505 training return: tensor(172.2045, device='cuda:0')
episode: 506 training return: tensor(143.1163, device='cuda:0')
episode: 507 training return: tensor(170.8005, device='cuda:0')
epoch: 127 test_true_pfm: 4042.200766636799 sim_pfm: 157.577996250989
episode: 508 training return: tensor(173.3059, device='cuda:0')
episode: 509 training return: tensor(148.5898, device='cuda:0')
episode: 510 training return: tensor(147.5538, device='cuda:0')
episode: 511 training return: tensor(119.7732, device='cuda:0')
epoch: 128 test_true_pfm: 4006.053116671274 sim_pfm: 156.66980880078822
episode: 512 training return: tensor(175.8056, device='cuda:0')
episode: 513 training return: tensor(149.4216, device='cuda:0')
episode: 514 training return: tensor(185.9072, device='cuda:0')
episode: 515 training return: tensor(183.5787, device='cuda:0')
epoch: 129 test_true_pfm: 4026.500167278425 sim_pfm: 168.5273738130248
episode: 516 training return: tensor(178.1355, device='cuda:0')
episode: 517 training return: tensor(147.3349, device='cuda:0')
episode: 518 training return: tensor(135.7964, device='cuda:0')
episode: 519 training return: tensor(156.6778, device='cuda:0')
epoch: 130 test_true_pfm: 4006.227416929334 sim_pfm: 168.71313991754627
episode: 520 training return: tensor(152.8216, device='cuda:0')
episode: 521 training return: tensor(188.0358, device='cuda:0')
episode: 522 training return: tensor(189.2209, device='cuda:0')
episode: 523 training return: tensor(160.1473, device='cuda:0')
epoch: 131 test_true_pfm: 4024.8907751542433 sim_pfm: 187.20142779692347
episode: 524 training return: tensor(196.2282, device='cuda:0')
episode: 525 training return: tensor(180.2162, device='cuda:0')
episode: 526 training return: tensor(180.5230, device='cuda:0')
episode: 527 training return: tensor(139.5845, device='cuda:0')
epoch: 132 test_true_pfm: 4044.8192339898596 sim_pfm: 171.73014400913962
episode: 528 training return: tensor(169.1013, device='cuda:0')
episode: 529 training return: tensor(122.9166, device='cuda:0')
episode: 530 training return: tensor(177.7715, device='cuda:0')
episode: 531 training return: tensor(175.4208, device='cuda:0')
epoch: 133 test_true_pfm: 4054.538120944913 sim_pfm: 189.9093559162478
episode: 532 training return: tensor(117.3665, device='cuda:0')
episode: 533 training return: tensor(154.3009, device='cuda:0')
episode: 534 training return: tensor(123.3187, device='cuda:0')
episode: 535 training return: tensor(155.8161, device='cuda:0')
epoch: 134 test_true_pfm: 4063.446349068315 sim_pfm: 186.43498764312244
episode: 536 training return: tensor(193.3234, device='cuda:0')
episode: 537 training return: tensor(-966.1657, device='cuda:0')
episode: 538 training return: tensor(156.5746, device='cuda:0')
episode: 539 training return: tensor(112.1098, device='cuda:0')
epoch: 135 test_true_pfm: 4018.8878217885103 sim_pfm: 172.07396046632007
episode: 540 training return: tensor(144.3395, device='cuda:0')
episode: 541 training return: tensor(184.5768, device='cuda:0')
episode: 542 training return: tensor(174.1736, device='cuda:0')
episode: 543 training return: tensor(169.9280, device='cuda:0')
epoch: 136 test_true_pfm: 4012.7666788982224 sim_pfm: 148.10943497816334
episode: 544 training return: tensor(161.1514, device='cuda:0')
episode: 545 training return: tensor(181.5910, device='cuda:0')
episode: 546 training return: tensor(142.4659, device='cuda:0')
episode: 547 training return: tensor(156.9220, device='cuda:0')
epoch: 137 test_true_pfm: 4028.896069626832 sim_pfm: 170.0338082591285
episode: 548 training return: tensor(179.2780, device='cuda:0')
episode: 549 training return: tensor(186.6083, device='cuda:0')
episode: 550 training return: tensor(179.1047, device='cuda:0')
episode: 551 training return: tensor(108.3735, device='cuda:0')
epoch: 138 test_true_pfm: 3999.3853103585348 sim_pfm: 167.98987840742726
episode: 552 training return: tensor(168.9938, device='cuda:0')
episode: 553 training return: tensor(172.0928, device='cuda:0')
episode: 554 training return: tensor(137.7624, device='cuda:0')
episode: 555 training return: tensor(161.8866, device='cuda:0')
epoch: 139 test_true_pfm: 3965.249805314738 sim_pfm: 139.9735765112952
episode: 556 training return: tensor(191.6291, device='cuda:0')
episode: 557 training return: tensor(131.1880, device='cuda:0')
episode: 558 training return: tensor(-788.5459, device='cuda:0')
episode: 559 training return: tensor(129.1886, device='cuda:0')
epoch: 140 test_true_pfm: 4041.4891660912203 sim_pfm: 190.87399768132795
episode: 560 training return: tensor(163.0885, device='cuda:0')
episode: 561 training return: tensor(157.8729, device='cuda:0')
episode: 562 training return: tensor(174.2512, device='cuda:0')
episode: 563 training return: tensor(191.9586, device='cuda:0')
epoch: 141 test_true_pfm: 4003.304302308143 sim_pfm: 128.19796305086734
episode: 564 training return: tensor(124.6240, device='cuda:0')
episode: 565 training return: tensor(178.6787, device='cuda:0')
episode: 566 training return: tensor(170.6000, device='cuda:0')
episode: 567 training return: tensor(172.8032, device='cuda:0')
epoch: 142 test_true_pfm: 4035.8358954901196 sim_pfm: 175.24503072246443
episode: 568 training return: tensor(164.1500, device='cuda:0')
episode: 569 training return: tensor(200.5699, device='cuda:0')
episode: 570 training return: tensor(168.3121, device='cuda:0')
episode: 571 training return: tensor(140.3913, device='cuda:0')
epoch: 143 test_true_pfm: 4064.8336532882354 sim_pfm: 188.67924074821835
episode: 572 training return: tensor(180.5585, device='cuda:0')
episode: 573 training return: tensor(169.5857, device='cuda:0')
episode: 574 training return: tensor(185.5289, device='cuda:0')
episode: 575 training return: tensor(134.8833, device='cuda:0')
epoch: 144 test_true_pfm: 4037.471948368253 sim_pfm: 163.10269623462227
episode: 576 training return: tensor(194.5081, device='cuda:0')
episode: 577 training return: tensor(175.6974, device='cuda:0')
episode: 578 training return: tensor(131.5586, device='cuda:0')
episode: 579 training return: tensor(184.0711, device='cuda:0')
epoch: 145 test_true_pfm: 4036.2006748785134 sim_pfm: 162.39881967773545
episode: 580 training return: tensor(105.2986, device='cuda:0')
episode: 581 training return: tensor(149.1300, device='cuda:0')
episode: 582 training return: tensor(143.7701, device='cuda:0')
episode: 583 training return: tensor(107.8280, device='cuda:0')
epoch: 146 test_true_pfm: 4059.0885683954425 sim_pfm: 202.30593418647186
episode: 584 training return: tensor(163.1316, device='cuda:0')
episode: 585 training return: tensor(104.8017, device='cuda:0')
episode: 586 training return: tensor(129.5832, device='cuda:0')
episode: 587 training return: tensor(196.6943, device='cuda:0')
epoch: 147 test_true_pfm: 4034.494820899939 sim_pfm: 177.11437384437886
episode: 588 training return: tensor(132.4889, device='cuda:0')
episode: 589 training return: tensor(138.7436, device='cuda:0')
episode: 590 training return: tensor(179.2152, device='cuda:0')
episode: 591 training return: tensor(52.7696, device='cuda:0')
epoch: 148 test_true_pfm: 3989.941782627839 sim_pfm: 188.68635024727942
episode: 592 training return: tensor(178.1131, device='cuda:0')
episode: 593 training return: tensor(125.4259, device='cuda:0')
episode: 594 training return: tensor(132.6462, device='cuda:0')
episode: 595 training return: tensor(150.0304, device='cuda:0')
epoch: 149 test_true_pfm: 4055.22413079226 sim_pfm: 202.5403160021912
episode: 596 training return: tensor(133.1996, device='cuda:0')
episode: 597 training return: tensor(186.8510, device='cuda:0')
episode: 598 training return: tensor(146.3517, device='cuda:0')
episode: 599 training return: tensor(188.1742, device='cuda:0')
epoch: 150 test_true_pfm: 4027.383000982456 sim_pfm: 151.6515002302282
