['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '10000', '--sub']
epoch: 0 training_loss 0.2624410679936409 test_loss: 0.2145538330078125
epoch: 1 training_loss 0.16264388360083104 test_loss: 0.1660634160041809
epoch: 2 training_loss 0.1346790986135602 test_loss: 0.16103098392486573
epoch: 3 training_loss 0.1309243119135499 test_loss: 0.1310973048210144
epoch: 4 training_loss 0.11444568201899528 test_loss: 0.1384470582008362
epoch: 5 training_loss 0.10456584123894572 test_loss: 0.13742867708206177
epoch: 6 training_loss 0.1097464757785201 test_loss: 0.16663997173309325
epoch: 7 training_loss 0.10742188818752765 test_loss: 0.10936589241027832
epoch: 8 training_loss 0.11006702236831188 test_loss: 0.14062706232070923
epoch: 9 training_loss 0.10063063323497773 test_loss: 0.12390960454940796
epoch: 10 training_loss 0.10274974120780826 test_loss: 0.12053097486495971
epoch: 11 training_loss 0.09858946768566966 test_loss: 0.129816472530365
epoch: 12 training_loss 0.10358608886599541 test_loss: 0.12701863050460815
epoch: 13 training_loss 0.10512890625745058 test_loss: 0.13729369640350342
epoch: 14 training_loss 0.10160177888348698 test_loss: 0.13060426712036133
epoch: 15 training_loss 0.0909449484758079 test_loss: 0.10801941156387329
epoch: 16 training_loss 0.10521268554031848 test_loss: 0.12169731855392456
epoch: 17 training_loss 0.08979110648855566 test_loss: 0.12712086439132692
epoch: 18 training_loss 0.10092844728380441 test_loss: 0.1071439504623413
epoch: 19 training_loss 0.09199369700625538 test_loss: 0.12157986164093018
epoch: 20 training_loss 0.08891818560659885 test_loss: 0.13250144720077514
epoch: 21 training_loss 0.08876663753762841 test_loss: 0.13091901540756226
epoch: 22 training_loss 0.09068310540169477 test_loss: 0.12827279567718505
epoch: 23 training_loss 0.09130293671041727 test_loss: 0.12573095560073852
epoch: 24 training_loss 0.09827370826154948 test_loss: 0.1236350417137146
epoch: 25 training_loss 0.08662893276661635 test_loss: 0.11920329332351684
epoch: 26 training_loss 0.08984757110476493 test_loss: 0.11692047119140625
epoch: 27 training_loss 0.0874786744825542 test_loss: 0.11604497432708741
epoch: 28 training_loss 0.0818646839261055 test_loss: 0.1184512734413147
epoch: 29 training_loss 0.08228442904539407 test_loss: 0.1405930757522583
epoch: 30 training_loss 0.08945301957428456 test_loss: 0.11931542158126832
epoch: 31 training_loss 0.08474339976906776 test_loss: 0.11216197013854981
epoch: 32 training_loss 0.08615990370512008 test_loss: 0.11070847511291504
epoch: 33 training_loss 0.08242301253601908 test_loss: 0.12805756330490112
epoch: 34 training_loss 0.08752825943753123 test_loss: 0.1127439260482788
epoch: 35 training_loss 0.08958161478862166 test_loss: 0.10979431867599487
epoch: 36 training_loss 0.08100618815049529 test_loss: 0.10535762310028077
epoch: 37 training_loss 0.08056748872622847 test_loss: 0.10144004821777344
epoch: 38 training_loss 0.08211742281913757 test_loss: 0.13027501106262207
epoch: 39 training_loss 0.08350364120677113 test_loss: 0.1272937536239624
epoch: 40 training_loss 0.07705716339871288 test_loss: 0.12996106147766112
epoch: 41 training_loss 0.08217508357018233 test_loss: 0.128557026386261
epoch: 42 training_loss 0.08559387071058154 test_loss: 0.12906391620635987
epoch: 43 training_loss 0.07880858674645425 test_loss: 0.12904640436172485
epoch: 44 training_loss 0.08729387357831002 test_loss: 0.10947521924972534
epoch: 45 training_loss 0.08153940111398697 test_loss: 0.13746814727783202
epoch: 46 training_loss 0.08357983369380236 test_loss: 0.12587943077087402
epoch: 47 training_loss 0.08358377482742071 test_loss: 0.14894572496414185
epoch: 48 training_loss 0.08692660585045814 test_loss: 0.1307168960571289
epoch: 49 training_loss 0.08416667439043522 test_loss: 0.11915057897567749
epoch: 50 training_loss 0.07390389412641525 test_loss: 0.1264452815055847
epoch: 51 training_loss 0.0823038942925632 test_loss: 0.1581062912940979
epoch: 52 training_loss 0.07775073032826185 test_loss: 0.12560218572616577
epoch: 53 training_loss 0.07716541539877653 test_loss: 0.14233568906784058
epoch: 54 training_loss 0.08542289333418011 test_loss: 0.11845271587371826
epoch: 55 training_loss 0.07692561877891421 test_loss: 0.13742536306381226
epoch: 56 training_loss 0.07844586831517518 test_loss: 0.13297072649002076
epoch: 57 training_loss 0.07370515834540128 test_loss: 0.12870302200317382
epoch: 58 training_loss 0.07913721950724721 test_loss: 0.14119412899017333
epoch: 59 training_loss 0.06897212319076061 test_loss: 0.1378464698791504
epoch: 60 training_loss 0.07317648977041244 test_loss: 0.13275545835494995
epoch: 61 training_loss 0.07608468654565513 test_loss: 0.12990714311599733
epoch: 62 training_loss 0.07479300072416663 test_loss: 0.12064996957778931
epoch: 63 training_loss 0.07881479181349277 test_loss: 0.14693905115127565
epoch: 64 training_loss 0.07078023347072303 test_loss: 0.11703884601593018
epoch: 65 training_loss 0.0726406416669488 test_loss: 0.11534205675125123
epoch: 66 training_loss 0.07746287047863007 test_loss: 0.14012014865875244
epoch: 67 training_loss 0.07445902310311794 test_loss: 0.13978734016418456
epoch: 68 training_loss 0.07429667185992002 test_loss: 0.1455802083015442
epoch: 69 training_loss 0.07304491451010107 test_loss: 0.1300431489944458
epoch: 70 training_loss 0.06986353714950383 test_loss: 0.13698946237564086
epoch: 71 training_loss 0.07636578300967813 test_loss: 0.13502044677734376
epoch: 72 training_loss 0.0747798520885408 test_loss: 0.1504341721534729
epoch: 73 training_loss 0.07752164636738598 test_loss: 0.14607460498809816
epoch: 74 training_loss 0.07527287947013975 test_loss: 0.14984612464904784
epoch: 75 training_loss 0.07231320228427648 test_loss: 0.12884867191314697
epoch: 76 training_loss 0.07103080962784589 test_loss: 0.13039882183074952
epoch: 77 training_loss 0.07300861138850451 test_loss: 0.1193231225013733
epoch: 78 training_loss 0.07422034401446581 test_loss: 0.1532795786857605
epoch: 79 training_loss 0.06743054494261741 test_loss: 0.1343095064163208
epoch: 80 training_loss 0.06802185337990523 test_loss: 0.1410768747329712
epoch: 81 training_loss 0.07049712508916856 test_loss: 0.1447369337081909
epoch: 82 training_loss 0.07502457979135216 test_loss: 0.16114617586135865
epoch: 83 training_loss 0.06792328728362917 test_loss: 0.16534510850906373
epoch: 84 training_loss 0.06995720666833222 test_loss: 0.14327024221420287
epoch: 85 training_loss 0.07257987076416611 test_loss: 0.13809728622436523
epoch: 86 training_loss 0.0692026637122035 test_loss: 0.149086594581604
epoch: 87 training_loss 0.07026370864361525 test_loss: 0.14874660968780518
epoch: 88 training_loss 0.06707566706463694 test_loss: 0.14168592691421508
epoch: 89 training_loss 0.07050802998244762 test_loss: 0.14192367792129518
epoch: 90 training_loss 0.07437035908922553 test_loss: 0.16956558227539062
epoch: 91 training_loss 0.07072636004537344 test_loss: 0.17128616571426392
epoch: 92 training_loss 0.07480331920087338 test_loss: 0.15760467052459717
epoch: 93 training_loss 0.06939548476599157 test_loss: 0.14842756986618041
epoch: 94 training_loss 0.06858272492885589 test_loss: 0.13612797260284423
epoch: 95 training_loss 0.06759714935906232 test_loss: 0.149297833442688
epoch: 96 training_loss 0.0660826267581433 test_loss: 0.15244729518890382
epoch: 97 training_loss 0.0711332446988672 test_loss: 0.15517882108688355
epoch: 98 training_loss 0.06974982904270291 test_loss: 0.1764079213142395
epoch: 99 training_loss 0.06657417820766569 test_loss: 0.16192522048950195
epoch: 100 training_loss 0.06938630206510425 test_loss: 0.13081153631210327
epoch: 101 training_loss 0.06966234001331031 test_loss: 0.16732679605484008
epoch: 102 training_loss 0.06591943349689246 test_loss: 0.15217194557189942
epoch: 103 training_loss 0.06651211354881525 test_loss: 0.13498739004135132
epoch: 104 training_loss 0.06190335753373802 test_loss: 0.1540234088897705
epoch: 105 training_loss 0.07229885009117425 test_loss: 0.1338821291923523
epoch: 106 training_loss 0.06470856986008584 test_loss: 0.1527121663093567
epoch: 107 training_loss 0.06590301597490907 test_loss: 0.14968353509902954
epoch: 108 training_loss 0.06381368591450155 test_loss: 0.1469775676727295
epoch: 109 training_loss 0.06763354985043407 test_loss: 0.14905087947845458
epoch: 110 training_loss 0.06174000228755176 test_loss: 0.1527826428413391
epoch: 111 training_loss 0.06260349126532674 test_loss: 0.17020440101623535
epoch: 112 training_loss 0.06626716036349535 test_loss: 0.16947784423828124
epoch: 113 training_loss 0.06581839483231305 test_loss: 0.15942177772521973
epoch: 114 training_loss 0.06915926165878773 test_loss: 0.1642984390258789
epoch: 115 training_loss 0.06915353946387767 test_loss: 0.16095738410949706
epoch: 116 training_loss 0.06491005298681557 test_loss: 0.17387535572052001
epoch: 117 training_loss 0.06382734819315374 test_loss: 0.15729520320892335
epoch: 118 training_loss 0.0694929208047688 test_loss: 0.16591575145721435
epoch: 119 training_loss 0.0671781112253666 test_loss: 0.15178115367889405
epoch: 120 training_loss 0.06036514276638627 test_loss: 0.1624348759651184
epoch: 121 training_loss 0.06773288128897548 test_loss: 0.14709854125976562
epoch: 122 training_loss 0.06806816574186086 test_loss: 0.1625204563140869
epoch: 123 training_loss 0.07004445403814316 test_loss: 0.12970211505889892
epoch: 124 training_loss 0.06183247983455658 test_loss: 0.16716803312301637
epoch: 125 training_loss 0.06528644613921643 test_loss: 0.15726622343063354
epoch: 126 training_loss 0.05914155436679721 test_loss: 0.14676486253738402
epoch: 127 training_loss 0.06415396997705101 test_loss: 0.14663001298904418
epoch: 128 training_loss 0.05967959926463664 test_loss: 0.15619285106658937
epoch: 129 training_loss 0.06270769638940692 test_loss: 0.13187286853790284
epoch: 130 training_loss 0.057280751224607226 test_loss: 0.15567415952682495
epoch: 131 training_loss 0.06407693767920136 test_loss: 0.15981602668762207
epoch: 132 training_loss 0.06176616054028273 test_loss: 0.15805047750473022
epoch: 133 training_loss 0.06217984495218843 test_loss: 0.17655556201934813
epoch: 134 training_loss 0.06134923834353685 test_loss: 0.15639423131942748
epoch: 135 training_loss 0.06264837950468063 test_loss: 0.1754804015159607
epoch: 136 training_loss 0.06146533655934036 test_loss: 0.15143156051635742
epoch: 137 training_loss 0.06127595176454634 test_loss: 0.1478162407875061
epoch: 138 training_loss 0.05439418138936162 test_loss: 0.15739067792892455
epoch: 139 training_loss 0.063607944175601 test_loss: 0.1468811273574829
epoch: 140 training_loss 0.0621300966758281 test_loss: 0.15672471523284912
epoch: 141 training_loss 0.06457802651450038 test_loss: 0.15566728115081788
epoch: 142 training_loss 0.0721055324189365 test_loss: 0.16766661405563354
epoch: 143 training_loss 0.06262920137494803 test_loss: 0.1434660315513611
epoch: 144 training_loss 0.05871686238795519 test_loss: 0.16135101318359374
epoch: 145 training_loss 0.05915892578661442 test_loss: 0.17559696435928346
epoch: 146 training_loss 0.06031229012645781 test_loss: 0.1512562870979309
epoch: 147 training_loss 0.05923438148573041 test_loss: 0.17108378410339356
epoch: 148 training_loss 0.06121635487303138 test_loss: 0.1598926305770874
epoch: 149 training_loss 0.0591698978561908 test_loss: 0.1606935143470764
epoch: 0 training_loss 41.33240663528442 test_loss: 21.466966247558595
epoch: 1 training_loss 17.421072368621825 test_loss: 14.369725036621094
epoch: 2 training_loss 12.914285383224488 test_loss: 11.828777313232422
epoch: 3 training_loss 10.617569808959962 test_loss: 10.01717758178711
epoch: 4 training_loss 9.434872446060181 test_loss: 8.942881011962891
epoch: 5 training_loss 8.473219361305237 test_loss: 8.047000122070312
epoch: 6 training_loss 7.734230003356934 test_loss: 7.630229187011719
epoch: 7 training_loss 7.291870965957641 test_loss: 7.14703598022461
epoch: 8 training_loss 6.903673043251038 test_loss: 6.80186767578125
epoch: 9 training_loss 6.587724957466126 test_loss: 6.5805000305175785
epoch: 10 training_loss 6.235622854232788 test_loss: 6.30579948425293
epoch: 11 training_loss 5.938802690505981 test_loss: 5.944110870361328
epoch: 12 training_loss 5.702444467544556 test_loss: 5.912453842163086
epoch: 13 training_loss 5.4949282765388485 test_loss: 5.70103645324707
epoch: 14 training_loss 5.301812195777893 test_loss: 5.3144275665283205
epoch: 15 training_loss 5.1995621275901795 test_loss: 5.228945541381836
epoch: 16 training_loss 4.984895198345185 test_loss: 5.18774528503418
epoch: 17 training_loss 4.734355027675629 test_loss: 4.769735717773438
epoch: 18 training_loss 4.773218724727631 test_loss: 4.92913818359375
epoch: 19 training_loss 4.547979295253754 test_loss: 4.708377456665039
epoch: 20 training_loss 4.570739989280701 test_loss: 4.58349723815918
epoch: 21 training_loss 4.305957942008972 test_loss: 4.447653198242188
epoch: 22 training_loss 4.237263650894165 test_loss: 4.6034698486328125
epoch: 23 training_loss 4.143726303577423 test_loss: 4.36209716796875
epoch: 24 training_loss 4.050995779037476 test_loss: 4.195608901977539
epoch: 25 training_loss 3.995232946872711 test_loss: 4.200201797485351
epoch: 26 training_loss 3.946060664653778 test_loss: 4.070904922485352
epoch: 27 training_loss 3.8290799474716186 test_loss: 3.9378437042236327
epoch: 28 training_loss 3.8121878600120542 test_loss: 3.8779544830322266
epoch: 29 training_loss 3.7498335337638853 test_loss: 4.0167396545410154
epoch: 30 training_loss 3.738316123485565 test_loss: 3.8161548614501952
epoch: 31 training_loss 3.6787606620788575 test_loss: 3.745412826538086
epoch: 32 training_loss 3.6153796672821046 test_loss: 3.7598087310791017
epoch: 33 training_loss 3.6204349756240846 test_loss: 3.903247833251953
epoch: 34 training_loss 3.492816774845123 test_loss: 3.6762317657470702
epoch: 35 training_loss 3.519295485019684 test_loss: 3.676875686645508
epoch: 36 training_loss 3.4665996479988097 test_loss: 3.595084381103516
epoch: 37 training_loss 3.3864773082733155 test_loss: 3.517831802368164
epoch: 38 training_loss 3.387773585319519 test_loss: 3.541085433959961
epoch: 39 training_loss 3.320194334983826 test_loss: 3.550794219970703
epoch: 40 training_loss 3.363536529541016 test_loss: 3.477666473388672
epoch: 41 training_loss 3.3065458154678344 test_loss: 3.520079803466797
epoch: 42 training_loss 3.2544904470443727 test_loss: 3.378915023803711
epoch: 43 training_loss 3.162718834877014 test_loss: 3.3132667541503906
epoch: 44 training_loss 3.1818781065940858 test_loss: 3.3450332641601563
epoch: 45 training_loss 3.1394339489936827 test_loss: 3.3181503295898436
epoch: 46 training_loss 3.1755272793769835 test_loss: 3.4246288299560548
epoch: 47 training_loss 3.1682964730262757 test_loss: 3.2394664764404295
epoch: 48 training_loss 3.068248703479767 test_loss: 3.276570510864258
epoch: 49 training_loss 3.020836601257324 test_loss: 3.2380115509033205
epoch: 50 training_loss 3.069824695587158 test_loss: 3.099471092224121
epoch: 51 training_loss 3.0509526920318604 test_loss: 3.2101112365722657
epoch: 52 training_loss 2.9319848299026487 test_loss: 3.2076690673828123
epoch: 53 training_loss 2.9429200768470762 test_loss: 3.177418899536133
epoch: 54 training_loss 2.976490375995636 test_loss: 3.0370492935180664
epoch: 55 training_loss 2.93790016412735 test_loss: 3.218191909790039
epoch: 56 training_loss 2.918043396472931 test_loss: 3.10558967590332
epoch: 57 training_loss 2.9166006588935853 test_loss: 3.1262441635131837
epoch: 58 training_loss 2.897806601524353 test_loss: 3.1127975463867186
epoch: 59 training_loss 2.804659802913666 test_loss: 3.0785131454467773
epoch: 60 training_loss 2.888811664581299 test_loss: 3.0812000274658202
epoch: 61 training_loss 2.7811389660835264 test_loss: 2.981239891052246
epoch: 62 training_loss 2.8277753233909606 test_loss: 3.056361961364746
epoch: 63 training_loss 2.8327292728424074 test_loss: 3.0088090896606445
epoch: 64 training_loss 2.8129342460632323 test_loss: 2.857883262634277
epoch: 65 training_loss 2.8139025163650513 test_loss: 2.99330997467041
epoch: 66 training_loss 2.7309972989559173 test_loss: 2.885528564453125
epoch: 67 training_loss 2.800700581073761 test_loss: 2.937892723083496
epoch: 68 training_loss 2.6973711347579954 test_loss: 2.9075477600097654
epoch: 69 training_loss 2.7079776406288145 test_loss: 2.8108217239379885
epoch: 70 training_loss 2.698094975948334 test_loss: 2.888957977294922
epoch: 71 training_loss 2.6903652858734133 test_loss: 2.795718193054199
epoch: 72 training_loss 2.6847154796123505 test_loss: 2.816823196411133
epoch: 73 training_loss 2.6985101103782654 test_loss: 2.8867568969726562
epoch: 74 training_loss 2.6667085099220276 test_loss: 2.83902530670166
epoch: 75 training_loss 2.5925755286216736 test_loss: 2.8573640823364257
epoch: 76 training_loss 2.652638564109802 test_loss: 2.9141021728515626
epoch: 77 training_loss 2.6974208211898802 test_loss: 2.82900447845459
epoch: 78 training_loss 2.6213370752334595 test_loss: 2.873418617248535
epoch: 79 training_loss 2.5924230861663817 test_loss: 2.695640754699707
epoch: 80 training_loss 2.5804823541641237 test_loss: 2.7642799377441407
epoch: 81 training_loss 2.5982040143013 test_loss: 2.6864320755004885
epoch: 82 training_loss 2.5674797642230986 test_loss: 2.733898735046387
epoch: 83 training_loss 2.5656460547447204 test_loss: 2.8181434631347657
epoch: 84 training_loss 2.5826528000831606 test_loss: 2.8187721252441404
epoch: 85 training_loss 2.55558388710022 test_loss: 2.665700149536133
epoch: 86 training_loss 2.5708510863780973 test_loss: 2.7412261962890625
epoch: 87 training_loss 2.530203061103821 test_loss: 2.778126907348633
epoch: 88 training_loss 2.5425111722946165 test_loss: 2.7165245056152343
epoch: 89 training_loss 2.4553322911262514 test_loss: 2.6338396072387695
epoch: 90 training_loss 2.521972131729126 test_loss: 2.695703887939453
epoch: 91 training_loss 2.4435511553287506 test_loss: 2.669880485534668
epoch: 92 training_loss 2.519136357307434 test_loss: 2.6519372940063475
epoch: 93 training_loss 2.437235745191574 test_loss: 2.649117279052734
epoch: 94 training_loss 2.4571732664108277 test_loss: 2.643595314025879
epoch: 95 training_loss 2.4634427297115327 test_loss: 2.7519451141357423
epoch: 96 training_loss 2.4784009146690367 test_loss: 2.689274024963379
epoch: 97 training_loss 2.389730656147003 test_loss: 2.615117835998535
epoch: 98 training_loss 2.3727188885211943 test_loss: 2.624974822998047
epoch: 99 training_loss 2.4338417220115662 test_loss: 2.6012041091918947
epoch: 100 training_loss 2.4870565927028654 test_loss: 2.6012605667114257
epoch: 101 training_loss 2.4278420984745024 test_loss: 2.5601282119750977
epoch: 102 training_loss 2.4402425265312195 test_loss: 2.5351892471313477
epoch: 103 training_loss 2.461211416721344 test_loss: 2.587269401550293
epoch: 104 training_loss 2.3887646162509917 test_loss: 2.5555288314819338
epoch: 105 training_loss 2.3737283504009246 test_loss: 2.748332977294922
epoch: 106 training_loss 2.4141794872283935 test_loss: 2.4756378173828124
epoch: 107 training_loss 2.416865280866623 test_loss: 2.5703702926635743
epoch: 108 training_loss 2.4088762867450715 test_loss: 2.641362190246582
epoch: 109 training_loss 2.393090645074844 test_loss: 2.5266529083251954
epoch: 110 training_loss 2.3792945492267608 test_loss: 2.5713068008422852
epoch: 111 training_loss 2.3495365631580354 test_loss: 2.5354766845703125
epoch: 112 training_loss 2.3078450846672056 test_loss: 2.521528625488281
epoch: 113 training_loss 2.3590957736968994 test_loss: 2.5122629165649415
epoch: 114 training_loss 2.3505525159835816 test_loss: 2.4748573303222656
epoch: 115 training_loss 2.352917958498001 test_loss: 2.4944684982299803
epoch: 116 training_loss 2.3318888103961943 test_loss: 2.506495475769043
epoch: 117 training_loss 2.3309700977802277 test_loss: 2.5610002517700194
epoch: 118 training_loss 2.3050105249881745 test_loss: 2.4150016784667967
epoch: 119 training_loss 2.314861948490143 test_loss: 2.519840621948242
epoch: 120 training_loss 2.262774716615677 test_loss: 2.485910987854004
epoch: 121 training_loss 2.3083125436306 test_loss: 2.5010120391845705
epoch: 122 training_loss 2.2866511797904967 test_loss: 2.4156620025634767
epoch: 123 training_loss 2.286294243335724 test_loss: 2.5180667877197265
epoch: 124 training_loss 2.3276912534236907 test_loss: 2.415732002258301
epoch: 125 training_loss 2.26946910738945 test_loss: 2.4252649307250977
epoch: 126 training_loss 2.316874098777771 test_loss: 2.4524276733398436
epoch: 127 training_loss 2.3087065970897673 test_loss: 2.4031742095947264
epoch: 128 training_loss 2.32335631608963 test_loss: 2.439329719543457
epoch: 129 training_loss 2.2335350453853606 test_loss: 2.4810823440551757
epoch: 130 training_loss 2.2555585777759553 test_loss: 2.457542610168457
epoch: 131 training_loss 2.270780402421951 test_loss: 2.512245750427246
epoch: 132 training_loss 2.198481523990631 test_loss: 2.4298368453979493
epoch: 133 training_loss 2.2289734482765198 test_loss: 2.5265764236450194
epoch: 134 training_loss 2.283889479637146 test_loss: 2.4321739196777346
epoch: 135 training_loss 2.216728000640869 test_loss: 2.3785213470458983
epoch: 136 training_loss 2.216566035747528 test_loss: 2.3727706909179687
epoch: 137 training_loss 2.2503518223762513 test_loss: 2.4456214904785156
epoch: 138 training_loss 2.182292078733444 test_loss: 2.373526382446289
epoch: 139 training_loss 2.1922355246543885 test_loss: 2.3973175048828126
epoch: 140 training_loss 2.2894271099567414 test_loss: 2.4277000427246094
epoch: 141 training_loss 2.2006817400455474 test_loss: 2.3647319793701174
epoch: 142 training_loss 2.155691065788269 test_loss: 2.408085823059082
epoch: 143 training_loss 2.2083098208904266 test_loss: 2.4450408935546877
epoch: 144 training_loss 2.210791976451874 test_loss: 2.3147321701049806
epoch: 145 training_loss 2.1871869575977327 test_loss: 2.3716983795166016
epoch: 146 training_loss 2.2165407824516294 test_loss: 2.2887563705444336
epoch: 147 training_loss 2.1867680776119234 test_loss: 2.3680093765258787
epoch: 148 training_loss 2.1679383277893067 test_loss: 2.3425230026245116
epoch: 149 training_loss 2.155582958459854 test_loss: 2.3233497619628904
2688.835503237881
episode: 0 training return: tensor(219.7299, device='cuda:0')
episode: 1 training return: tensor(-340.2008, device='cuda:0')
episode: 2 training return: tensor(277.3526, device='cuda:0')
episode: 3 training return: tensor(356.0691, device='cuda:0')
epoch: 1 test_true_pfm: 2301.164037177935 sim_pfm: -227.00034329108894
episode: 4 training return: tensor(255.5067, device='cuda:0')
episode: 5 training return: tensor(-453.1490, device='cuda:0')
episode: 6 training return: tensor(311.1970, device='cuda:0')
episode: 7 training return: tensor(123.4536, device='cuda:0')
epoch: 2 test_true_pfm: 2715.2205524909646 sim_pfm: -82.06335944434977
episode: 8 training return: tensor(304.0146, device='cuda:0')
episode: 9 training return: tensor(306.2623, device='cuda:0')
episode: 10 training return: tensor(346.1883, device='cuda:0')
episode: 11 training return: tensor(-295.6376, device='cuda:0')
epoch: 3 test_true_pfm: 3502.525840018399 sim_pfm: 224.4259362067969
episode: 12 training return: tensor(-94.8553, device='cuda:0')
episode: 13 training return: tensor(-157.7922, device='cuda:0')
episode: 14 training return: tensor(224.4436, device='cuda:0')
episode: 15 training return: tensor(12.1431, device='cuda:0')
epoch: 4 test_true_pfm: 3504.161439115915 sim_pfm: 308.1236590242188
episode: 16 training return: tensor(414.3786, device='cuda:0')
episode: 17 training return: tensor(-250.5484, device='cuda:0')
episode: 18 training return: tensor(-312.1516, device='cuda:0')
episode: 19 training return: tensor(218.4601, device='cuda:0')
epoch: 5 test_true_pfm: 2856.410160150434 sim_pfm: 14.302873883124752
episode: 20 training return: tensor(298.5325, device='cuda:0')
episode: 21 training return: tensor(-39.0503, device='cuda:0')
episode: 22 training return: tensor(-280.4631, device='cuda:0')
episode: 23 training return: tensor(357.5394, device='cuda:0')
epoch: 6 test_true_pfm: 3529.62000459011 sim_pfm: 308.61901081421337
episode: 24 training return: tensor(404.2213, device='cuda:0')
episode: 25 training return: tensor(-222.4059, device='cuda:0')
episode: 26 training return: tensor(-295.5302, device='cuda:0')
episode: 27 training return: tensor(334.5649, device='cuda:0')
epoch: 7 test_true_pfm: 3402.5551479652554 sim_pfm: 80.91810417183054
episode: 28 training return: tensor(-36.9449, device='cuda:0')
episode: 29 training return: tensor(110.9628, device='cuda:0')
episode: 30 training return: tensor(277.6757, device='cuda:0')
episode: 31 training return: tensor(242.7830, device='cuda:0')
epoch: 8 test_true_pfm: 3275.5970079371964 sim_pfm: 404.75052687720745
episode: 32 training return: tensor(-350.7514, device='cuda:0')
episode: 33 training return: tensor(335.8950, device='cuda:0')
episode: 34 training return: tensor(410.4174, device='cuda:0')
episode: 35 training return: tensor(14.6315, device='cuda:0')
epoch: 9 test_true_pfm: 2961.0037783438092 sim_pfm: 25.49086027328546
episode: 36 training return: tensor(297.3868, device='cuda:0')
episode: 37 training return: tensor(-229.9542, device='cuda:0')
episode: 38 training return: tensor(-202.0461, device='cuda:0')
episode: 39 training return: tensor(256.5704, device='cuda:0')
epoch: 10 test_true_pfm: 2586.9833896899377 sim_pfm: 103.32042685806907
episode: 40 training return: tensor(-153.4411, device='cuda:0')
episode: 41 training return: tensor(338.1565, device='cuda:0')
episode: 42 training return: tensor(411.5515, device='cuda:0')
episode: 43 training return: tensor(126.9232, device='cuda:0')
epoch: 11 test_true_pfm: 3286.6149419931166 sim_pfm: -3.0626614340581
episode: 44 training return: tensor(235.1205, device='cuda:0')
episode: 45 training return: tensor(-196.9642, device='cuda:0')
episode: 46 training return: tensor(-290.7649, device='cuda:0')
episode: 47 training return: tensor(276.6092, device='cuda:0')
epoch: 12 test_true_pfm: 3296.106479889592 sim_pfm: 298.6413285615854
episode: 48 training return: tensor(-60.5150, device='cuda:0')
episode: 49 training return: tensor(-371.7622, device='cuda:0')
episode: 50 training return: tensor(199.8943, device='cuda:0')
episode: 51 training return: tensor(55.4474, device='cuda:0')
epoch: 13 test_true_pfm: 2160.7778178650933 sim_pfm: 65.31284030165989
episode: 52 training return: tensor(316.7693, device='cuda:0')
episode: 53 training return: tensor(-62.9906, device='cuda:0')
episode: 54 training return: tensor(-32.2954, device='cuda:0')
episode: 55 training return: tensor(96.7595, device='cuda:0')
epoch: 14 test_true_pfm: 3023.0324441466933 sim_pfm: 54.39785371449155
episode: 56 training return: tensor(294.8819, device='cuda:0')
episode: 57 training return: tensor(-131.8463, device='cuda:0')
episode: 58 training return: tensor(-303.7580, device='cuda:0')
episode: 59 training return: tensor(72.8491, device='cuda:0')
epoch: 15 test_true_pfm: 2348.9136970062737 sim_pfm: 213.59388143275282
episode: 60 training return: tensor(279.6802, device='cuda:0')
episode: 61 training return: tensor(-73.2187, device='cuda:0')
episode: 62 training return: tensor(170.3619, device='cuda:0')
episode: 63 training return: tensor(324.1171, device='cuda:0')
epoch: 16 test_true_pfm: 3166.0056501549493 sim_pfm: 126.66159638210472
episode: 64 training return: tensor(-376.5035, device='cuda:0')
episode: 65 training return: tensor(207.9868, device='cuda:0')
episode: 66 training return: tensor(283.0688, device='cuda:0')
episode: 67 training return: tensor(350.2333, device='cuda:0')
epoch: 17 test_true_pfm: 3106.1267524955547 sim_pfm: 78.1452945372245
episode: 68 training return: tensor(-182.9594, device='cuda:0')
episode: 69 training return: tensor(271.3573, device='cuda:0')
episode: 70 training return: tensor(-265.2515, device='cuda:0')
episode: 71 training return: tensor(23.7793, device='cuda:0')
epoch: 18 test_true_pfm: 2056.633233071833 sim_pfm: 224.6065934294602
episode: 72 training return: tensor(420.8362, device='cuda:0')
episode: 73 training return: tensor(238.6144, device='cuda:0')
episode: 74 training return: tensor(340.8922, device='cuda:0')
episode: 75 training return: tensor(295.0660, device='cuda:0')
epoch: 19 test_true_pfm: 2533.5713696252237 sim_pfm: 278.4351109934699
episode: 76 training return: tensor(342.7773, device='cuda:0')
episode: 77 training return: tensor(389.7410, device='cuda:0')
episode: 78 training return: tensor(331.7415, device='cuda:0')
episode: 79 training return: tensor(379.9208, device='cuda:0')
epoch: 20 test_true_pfm: 3509.239661449986 sim_pfm: 162.2685778942347
episode: 80 training return: tensor(303.5504, device='cuda:0')
episode: 81 training return: tensor(295.1624, device='cuda:0')
episode: 82 training return: tensor(-162.1357, device='cuda:0')
episode: 83 training return: tensor(292.3519, device='cuda:0')
epoch: 21 test_true_pfm: 2897.970643509149 sim_pfm: 98.66560710039145
episode: 84 training return: tensor(176.0476, device='cuda:0')
episode: 85 training return: tensor(189.9332, device='cuda:0')
episode: 86 training return: tensor(-237.9120, device='cuda:0')
episode: 87 training return: tensor(241.3575, device='cuda:0')
epoch: 22 test_true_pfm: 3120.817726230994 sim_pfm: 133.92274580488447
episode: 88 training return: tensor(-118.6680, device='cuda:0')
episode: 89 training return: tensor(-196.5652, device='cuda:0')
episode: 90 training return: tensor(350.8806, device='cuda:0')
episode: 91 training return: tensor(-291.4890, device='cuda:0')
epoch: 23 test_true_pfm: 2653.249374266186 sim_pfm: 384.0134887045715
episode: 92 training return: tensor(389.0931, device='cuda:0')
episode: 93 training return: tensor(344.8702, device='cuda:0')
episode: 94 training return: tensor(347.6551, device='cuda:0')
episode: 95 training return: tensor(49.1997, device='cuda:0')
epoch: 24 test_true_pfm: 2609.3217324323778 sim_pfm: -11.811079923257543
episode: 96 training return: tensor(164.4977, device='cuda:0')
episode: 97 training return: tensor(-196.6064, device='cuda:0')
episode: 98 training return: tensor(320.0550, device='cuda:0')
episode: 99 training return: tensor(300.0163, device='cuda:0')
epoch: 25 test_true_pfm: 2460.7457608772834 sim_pfm: 205.07346373209535
episode: 100 training return: tensor(380.5374, device='cuda:0')
episode: 101 training return: tensor(194.2083, device='cuda:0')
episode: 102 training return: tensor(339.5228, device='cuda:0')
episode: 103 training return: tensor(323.4249, device='cuda:0')
epoch: 26 test_true_pfm: 2916.1705430542356 sim_pfm: 377.495893770402
episode: 104 training return: tensor(221.0966, device='cuda:0')
episode: 105 training return: tensor(-118.3539, device='cuda:0')
episode: 106 training return: tensor(397.3316, device='cuda:0')
episode: 107 training return: tensor(267.2728, device='cuda:0')
epoch: 27 test_true_pfm: 3573.788266872604 sim_pfm: 332.66549022080534
episode: 108 training return: tensor(266.1707, device='cuda:0')
episode: 109 training return: tensor(-207.2428, device='cuda:0')
episode: 110 training return: tensor(-266.8954, device='cuda:0')
episode: 111 training return: tensor(-92.7927, device='cuda:0')
epoch: 28 test_true_pfm: 3231.468513041542 sim_pfm: 353.18197682749206
episode: 112 training return: tensor(-256.1856, device='cuda:0')
episode: 113 training return: tensor(178.9642, device='cuda:0')
episode: 114 training return: tensor(367.5506, device='cuda:0')
episode: 115 training return: tensor(295.8750, device='cuda:0')
epoch: 29 test_true_pfm: 3030.2415291149678 sim_pfm: -38.23752634784129
episode: 116 training return: tensor(242.8921, device='cuda:0')
episode: 117 training return: tensor(-209.7739, device='cuda:0')
episode: 118 training return: tensor(-21.3968, device='cuda:0')
episode: 119 training return: tensor(112.7218, device='cuda:0')
epoch: 30 test_true_pfm: 2546.868548450872 sim_pfm: 166.70120397713617
episode: 120 training return: tensor(337.4537, device='cuda:0')
episode: 121 training return: tensor(-131.6325, device='cuda:0')
episode: 122 training return: tensor(-233.1964, device='cuda:0')
episode: 123 training return: tensor(322.9502, device='cuda:0')
epoch: 31 test_true_pfm: 3520.0749738767286 sim_pfm: 344.76946279848926
episode: 124 training return: tensor(148.6987, device='cuda:0')
episode: 125 training return: tensor(348.4366, device='cuda:0')
episode: 126 training return: tensor(303.5057, device='cuda:0')
episode: 127 training return: tensor(353.8736, device='cuda:0')
epoch: 32 test_true_pfm: 3543.999495034413 sim_pfm: 26.98710532129432
episode: 128 training return: tensor(7.3307, device='cuda:0')
episode: 129 training return: tensor(388.6329, device='cuda:0')
episode: 130 training return: tensor(129.5953, device='cuda:0')
episode: 131 training return: tensor(432.7381, device='cuda:0')
epoch: 33 test_true_pfm: 3516.799162714828 sim_pfm: 353.53312721906696
episode: 132 training return: tensor(369.1347, device='cuda:0')
episode: 133 training return: tensor(385.9197, device='cuda:0')
episode: 134 training return: tensor(408.9867, device='cuda:0')
episode: 135 training return: tensor(380.0273, device='cuda:0')
epoch: 34 test_true_pfm: 2909.325389528127 sim_pfm: 207.26792248022198
episode: 136 training return: tensor(315.0453, device='cuda:0')
episode: 137 training return: tensor(412.7056, device='cuda:0')
episode: 138 training return: tensor(377.1644, device='cuda:0')
episode: 139 training return: tensor(420.5696, device='cuda:0')
epoch: 35 test_true_pfm: 3374.125734748064 sim_pfm: 336.7683340786025
episode: 140 training return: tensor(336.4002, device='cuda:0')
episode: 141 training return: tensor(307.7632, device='cuda:0')
episode: 142 training return: tensor(-219.9545, device='cuda:0')
episode: 143 training return: tensor(343.6298, device='cuda:0')
epoch: 36 test_true_pfm: 3376.4475974118673 sim_pfm: 132.03949158996693
episode: 144 training return: tensor(355.3189, device='cuda:0')
episode: 145 training return: tensor(390.7560, device='cuda:0')
episode: 146 training return: tensor(320.3801, device='cuda:0')
episode: 147 training return: tensor(268.5882, device='cuda:0')
epoch: 37 test_true_pfm: 3550.887738833342 sim_pfm: 374.63490752945654
episode: 148 training return: tensor(363.8209, device='cuda:0')
episode: 149 training return: tensor(431.9805, device='cuda:0')
episode: 150 training return: tensor(26.1087, device='cuda:0')
episode: 151 training return: tensor(306.8164, device='cuda:0')
epoch: 38 test_true_pfm: 3457.3743475413344 sim_pfm: 137.62421725290673
episode: 152 training return: tensor(346.7602, device='cuda:0')
episode: 153 training return: tensor(403.0027, device='cuda:0')
episode: 154 training return: tensor(234.6928, device='cuda:0')
episode: 155 training return: tensor(318.2645, device='cuda:0')
epoch: 39 test_true_pfm: 3535.7781779020625 sim_pfm: 151.1615146197728
episode: 156 training return: tensor(359.5862, device='cuda:0')
episode: 157 training return: tensor(131.5417, device='cuda:0')
episode: 158 training return: tensor(395.6703, device='cuda:0')
episode: 159 training return: tensor(269.6760, device='cuda:0')
epoch: 40 test_true_pfm: 3564.3281623697426 sim_pfm: 299.48927341770224
episode: 160 training return: tensor(339.5230, device='cuda:0')
episode: 161 training return: tensor(337.1811, device='cuda:0')
episode: 162 training return: tensor(373.5457, device='cuda:0')
episode: 163 training return: tensor(26.0264, device='cuda:0')
epoch: 41 test_true_pfm: 3511.113634631492 sim_pfm: 385.63296604841406
episode: 164 training return: tensor(365.4920, device='cuda:0')
episode: 165 training return: tensor(250.1949, device='cuda:0')
episode: 166 training return: tensor(209.1491, device='cuda:0')
episode: 167 training return: tensor(167.2381, device='cuda:0')
epoch: 42 test_true_pfm: 3434.4720132173316 sim_pfm: 193.81425236956179
episode: 168 training return: tensor(361.8068, device='cuda:0')
episode: 169 training return: tensor(395.4475, device='cuda:0')
episode: 170 training return: tensor(334.0422, device='cuda:0')
episode: 171 training return: tensor(299.2186, device='cuda:0')
epoch: 43 test_true_pfm: 3578.9003022656266 sim_pfm: 417.0205597725774
episode: 172 training return: tensor(294.1912, device='cuda:0')
episode: 173 training return: tensor(-331.9237, device='cuda:0')
episode: 174 training return: tensor(278.4806, device='cuda:0')
episode: 175 training return: tensor(366.5335, device='cuda:0')
epoch: 44 test_true_pfm: 3474.2041236974023 sim_pfm: 346.23605413928937
episode: 176 training return: tensor(-249.6942, device='cuda:0')
episode: 177 training return: tensor(256.4207, device='cuda:0')
episode: 178 training return: tensor(-318.3742, device='cuda:0')
episode: 179 training return: tensor(200.7971, device='cuda:0')
epoch: 45 test_true_pfm: 3033.2937886393997 sim_pfm: 350.80990079285885
episode: 180 training return: tensor(366.7991, device='cuda:0')
episode: 181 training return: tensor(-251.4977, device='cuda:0')
episode: 182 training return: tensor(-261.7296, device='cuda:0')
episode: 183 training return: tensor(-64.3850, device='cuda:0')
epoch: 46 test_true_pfm: 2741.255621799217 sim_pfm: 224.39432102135228
episode: 184 training return: tensor(57.0448, device='cuda:0')
episode: 185 training return: tensor(-190.0634, device='cuda:0')
episode: 186 training return: tensor(407.6748, device='cuda:0')
episode: 187 training return: tensor(-348.5004, device='cuda:0')
epoch: 47 test_true_pfm: 3411.677939396579 sim_pfm: 144.75936745224558
episode: 188 training return: tensor(391.1519, device='cuda:0')
episode: 189 training return: tensor(238.1959, device='cuda:0')
episode: 190 training return: tensor(-479.0510, device='cuda:0')
episode: 191 training return: tensor(336.6123, device='cuda:0')
epoch: 48 test_true_pfm: 3337.8364212058714 sim_pfm: 375.76023412434733
episode: 192 training return: tensor(316.1213, device='cuda:0')
episode: 193 training return: tensor(411.0663, device='cuda:0')
episode: 194 training return: tensor(289.9202, device='cuda:0')
episode: 195 training return: tensor(127.0326, device='cuda:0')
epoch: 49 test_true_pfm: 3442.699232519797 sim_pfm: 303.50271423246403
episode: 196 training return: tensor(249.5820, device='cuda:0')
episode: 197 training return: tensor(201.0713, device='cuda:0')
episode: 198 training return: tensor(281.4823, device='cuda:0')
episode: 199 training return: tensor(-321.3712, device='cuda:0')
epoch: 50 test_true_pfm: 3469.689110563202 sim_pfm: 331.7929261848719
episode: 200 training return: tensor(-294.5526, device='cuda:0')
episode: 201 training return: tensor(390.3724, device='cuda:0')
episode: 202 training return: tensor(-242.4763, device='cuda:0')
episode: 203 training return: tensor(-264.2861, device='cuda:0')
epoch: 51 test_true_pfm: 3493.657218578988 sim_pfm: 233.70763616395803
episode: 204 training return: tensor(351.8259, device='cuda:0')
episode: 205 training return: tensor(201.1290, device='cuda:0')
episode: 206 training return: tensor(336.2896, device='cuda:0')
episode: 207 training return: tensor(358.3600, device='cuda:0')
epoch: 52 test_true_pfm: 3225.881894351553 sim_pfm: 213.5475225559203
episode: 208 training return: tensor(-128.8195, device='cuda:0')
episode: 209 training return: tensor(246.0593, device='cuda:0')
episode: 210 training return: tensor(-211.0658, device='cuda:0')
episode: 211 training return: tensor(-181.4234, device='cuda:0')
epoch: 53 test_true_pfm: 3159.477291836527 sim_pfm: 386.9699788466872
episode: 212 training return: tensor(348.3990, device='cuda:0')
episode: 213 training return: tensor(368.9881, device='cuda:0')
episode: 214 training return: tensor(356.0952, device='cuda:0')
episode: 215 training return: tensor(302.0399, device='cuda:0')
epoch: 54 test_true_pfm: 3394.268072132951 sim_pfm: 252.848151783585
episode: 216 training return: tensor(317.1815, device='cuda:0')
episode: 217 training return: tensor(-239.7815, device='cuda:0')
episode: 218 training return: tensor(-153.4346, device='cuda:0')
episode: 219 training return: tensor(389.4382, device='cuda:0')
epoch: 55 test_true_pfm: 3569.699040637932 sim_pfm: 364.46704770819633
episode: 220 training return: tensor(413.7265, device='cuda:0')
episode: 221 training return: tensor(191.1222, device='cuda:0')
episode: 222 training return: tensor(383.8904, device='cuda:0')
episode: 223 training return: tensor(280.8695, device='cuda:0')
epoch: 56 test_true_pfm: 3551.9646761320705 sim_pfm: 364.7694202531032
episode: 224 training return: tensor(384.2195, device='cuda:0')
episode: 225 training return: tensor(294.7043, device='cuda:0')
episode: 226 training return: tensor(253.8876, device='cuda:0')
episode: 227 training return: tensor(-103.8084, device='cuda:0')
epoch: 57 test_true_pfm: 2712.105386861191 sim_pfm: 338.98983496332465
episode: 228 training return: tensor(341.5447, device='cuda:0')
episode: 229 training return: tensor(311.3571, device='cuda:0')
episode: 230 training return: tensor(356.1763, device='cuda:0')
episode: 231 training return: tensor(295.2590, device='cuda:0')
epoch: 58 test_true_pfm: 3181.4761989009367 sim_pfm: 153.64064103353303
episode: 232 training return: tensor(-193.6489, device='cuda:0')
episode: 233 training return: tensor(324.3000, device='cuda:0')
episode: 234 training return: tensor(124.9008, device='cuda:0')
episode: 235 training return: tensor(378.8681, device='cuda:0')
epoch: 59 test_true_pfm: 3558.866498594549 sim_pfm: 376.5289877388471
episode: 236 training return: tensor(-174.5815, device='cuda:0')
episode: 237 training return: tensor(314.9688, device='cuda:0')
episode: 238 training return: tensor(471.1930, device='cuda:0')
episode: 239 training return: tensor(331.9878, device='cuda:0')
epoch: 60 test_true_pfm: 3602.6746744360585 sim_pfm: 350.0187720778631
episode: 240 training return: tensor(230.7838, device='cuda:0')
episode: 241 training return: tensor(344.9202, device='cuda:0')
episode: 242 training return: tensor(207.2343, device='cuda:0')
episode: 243 training return: tensor(270.2209, device='cuda:0')
epoch: 61 test_true_pfm: 3348.1680347735382 sim_pfm: 406.229959378116
episode: 244 training return: tensor(253.7789, device='cuda:0')
episode: 245 training return: tensor(213.2343, device='cuda:0')
episode: 246 training return: tensor(342.5005, device='cuda:0')
episode: 247 training return: tensor(327.1187, device='cuda:0')
epoch: 62 test_true_pfm: 3480.6266102972363 sim_pfm: 234.87421418673088
episode: 248 training return: tensor(449.4217, device='cuda:0')
episode: 249 training return: tensor(-104.7666, device='cuda:0')
episode: 250 training return: tensor(319.6039, device='cuda:0')
episode: 251 training return: tensor(261.8228, device='cuda:0')
epoch: 63 test_true_pfm: 3205.57093515083 sim_pfm: 409.77082648931537
episode: 252 training return: tensor(293.4131, device='cuda:0')
episode: 253 training return: tensor(285.8844, device='cuda:0')
episode: 254 training return: tensor(327.0729, device='cuda:0')
episode: 255 training return: tensor(385.0998, device='cuda:0')
epoch: 64 test_true_pfm: 3446.6992861441317 sim_pfm: 334.24116298455436
episode: 256 training return: tensor(-235.0218, device='cuda:0')
episode: 257 training return: tensor(337.8763, device='cuda:0')
episode: 258 training return: tensor(268.2775, device='cuda:0')
episode: 259 training return: tensor(293.3574, device='cuda:0')
epoch: 65 test_true_pfm: 2943.815293832673 sim_pfm: 385.2720294961085
episode: 260 training return: tensor(341.4348, device='cuda:0')
episode: 261 training return: tensor(263.5042, device='cuda:0')
episode: 262 training return: tensor(324.9511, device='cuda:0')
episode: 263 training return: tensor(360.6904, device='cuda:0')
epoch: 66 test_true_pfm: 3096.8245179770197 sim_pfm: 367.9785054093615
episode: 264 training return: tensor(318.4842, device='cuda:0')
episode: 265 training return: tensor(336.0819, device='cuda:0')
episode: 266 training return: tensor(414.3285, device='cuda:0')
episode: 267 training return: tensor(371.8818, device='cuda:0')
epoch: 67 test_true_pfm: 3533.1457906567025 sim_pfm: 369.29622230182093
episode: 268 training return: tensor(-163.8504, device='cuda:0')
episode: 269 training return: tensor(-175.3240, device='cuda:0')
episode: 270 training return: tensor(347.4799, device='cuda:0')
episode: 271 training return: tensor(413.0587, device='cuda:0')
epoch: 68 test_true_pfm: 3262.3191975675086 sim_pfm: 335.4879316162551
episode: 272 training return: tensor(-61.8923, device='cuda:0')
episode: 273 training return: tensor(367.9920, device='cuda:0')
episode: 274 training return: tensor(355.3144, device='cuda:0')
episode: 275 training return: tensor(-221.2148, device='cuda:0')
epoch: 69 test_true_pfm: 3571.0527022862916 sim_pfm: 323.4548616047832
episode: 276 training return: tensor(335.9706, device='cuda:0')
episode: 277 training return: tensor(-275.6937, device='cuda:0')
episode: 278 training return: tensor(371.0334, device='cuda:0')
episode: 279 training return: tensor(-354.0431, device='cuda:0')
epoch: 70 test_true_pfm: 3166.995247829865 sim_pfm: 385.6805427106253
episode: 280 training return: tensor(-213.8373, device='cuda:0')
episode: 281 training return: tensor(310.2655, device='cuda:0')
episode: 282 training return: tensor(387.7392, device='cuda:0')
episode: 283 training return: tensor(348.6637, device='cuda:0')
epoch: 71 test_true_pfm: 3462.7164653396794 sim_pfm: 319.57528849131387
episode: 284 training return: tensor(-191.2458, device='cuda:0')
episode: 285 training return: tensor(244.9510, device='cuda:0')
episode: 286 training return: tensor(339.1763, device='cuda:0')
episode: 287 training return: tensor(329.0028, device='cuda:0')
epoch: 72 test_true_pfm: 3390.8102250600605 sim_pfm: 336.7999709686749
episode: 288 training return: tensor(361.2739, device='cuda:0')
episode: 289 training return: tensor(96.4027, device='cuda:0')
episode: 290 training return: tensor(260.0208, device='cuda:0')
episode: 291 training return: tensor(362.7274, device='cuda:0')
epoch: 73 test_true_pfm: 3605.6289020581085 sim_pfm: 399.1482526229326
episode: 292 training return: tensor(265.6209, device='cuda:0')
episode: 293 training return: tensor(312.3492, device='cuda:0')
episode: 294 training return: tensor(381.1225, device='cuda:0')
episode: 295 training return: tensor(357.8227, device='cuda:0')
epoch: 74 test_true_pfm: 3539.2428029834887 sim_pfm: 160.6040150379898
episode: 296 training return: tensor(345.7490, device='cuda:0')
episode: 297 training return: tensor(169.8845, device='cuda:0')
episode: 298 training return: tensor(-151.7309, device='cuda:0')
episode: 299 training return: tensor(-253.1539, device='cuda:0')
epoch: 75 test_true_pfm: 3543.232012989017 sim_pfm: 393.7885234951197
episode: 300 training return: tensor(471.4360, device='cuda:0')
episode: 301 training return: tensor(416.6281, device='cuda:0')
episode: 302 training return: tensor(371.3104, device='cuda:0')
episode: 303 training return: tensor(109.7729, device='cuda:0')
epoch: 76 test_true_pfm: 3621.21878309402 sim_pfm: 420.2967270738639
episode: 304 training return: tensor(318.3159, device='cuda:0')
episode: 305 training return: tensor(341.8327, device='cuda:0')
episode: 306 training return: tensor(-286.1195, device='cuda:0')
episode: 307 training return: tensor(398.4962, device='cuda:0')
epoch: 77 test_true_pfm: 3505.988541379651 sim_pfm: 310.0357728391925
episode: 308 training return: tensor(423.6443, device='cuda:0')
episode: 309 training return: tensor(297.7500, device='cuda:0')
episode: 310 training return: tensor(-170.9685, device='cuda:0')
episode: 311 training return: tensor(378.9456, device='cuda:0')
epoch: 78 test_true_pfm: 3585.3735201070535 sim_pfm: 389.2369655786509
episode: 312 training return: tensor(378.3742, device='cuda:0')
episode: 313 training return: tensor(352.1726, device='cuda:0')
episode: 314 training return: tensor(372.6967, device='cuda:0')
episode: 315 training return: tensor(389.8296, device='cuda:0')
epoch: 79 test_true_pfm: 3504.4476598496717 sim_pfm: 188.98240622734497
episode: 316 training return: tensor(418.7171, device='cuda:0')
episode: 317 training return: tensor(367.0987, device='cuda:0')
episode: 318 training return: tensor(340.6204, device='cuda:0')
episode: 319 training return: tensor(-258.6444, device='cuda:0')
epoch: 80 test_true_pfm: 3598.616279234128 sim_pfm: 325.7181535308094
episode: 320 training return: tensor(322.4799, device='cuda:0')
episode: 321 training return: tensor(347.1553, device='cuda:0')
episode: 322 training return: tensor(433.9872, device='cuda:0')
episode: 323 training return: tensor(316.3256, device='cuda:0')
epoch: 81 test_true_pfm: 3336.92608202996 sim_pfm: 403.4657496527846
episode: 324 training return: tensor(355.4201, device='cuda:0')
episode: 325 training return: tensor(-169.6717, device='cuda:0')
episode: 326 training return: tensor(231.8845, device='cuda:0')
episode: 327 training return: tensor(292.8951, device='cuda:0')
epoch: 82 test_true_pfm: 3543.490700775282 sim_pfm: 355.16070775710006
episode: 328 training return: tensor(422.6153, device='cuda:0')
episode: 329 training return: tensor(358.6978, device='cuda:0')
episode: 330 training return: tensor(355.6435, device='cuda:0')
episode: 331 training return: tensor(323.3588, device='cuda:0')
epoch: 83 test_true_pfm: 3550.695967875294 sim_pfm: 380.9646841775296
episode: 332 training return: tensor(335.5568, device='cuda:0')
episode: 333 training return: tensor(361.7128, device='cuda:0')
episode: 334 training return: tensor(369.5150, device='cuda:0')
episode: 335 training return: tensor(377.1298, device='cuda:0')
epoch: 84 test_true_pfm: 3535.7343377519624 sim_pfm: 353.0574571347097
episode: 336 training return: tensor(478.6202, device='cuda:0')
episode: 337 training return: tensor(341.6561, device='cuda:0')
episode: 338 training return: tensor(287.1331, device='cuda:0')
episode: 339 training return: tensor(287.4701, device='cuda:0')
epoch: 85 test_true_pfm: 3505.7499608714957 sim_pfm: 118.99659590525941
episode: 340 training return: tensor(266.1612, device='cuda:0')
episode: 341 training return: tensor(436.7646, device='cuda:0')
episode: 342 training return: tensor(102.9640, device='cuda:0')
episode: 343 training return: tensor(317.6560, device='cuda:0')
epoch: 86 test_true_pfm: 3522.80051911518 sim_pfm: 343.4547073774641
episode: 344 training return: tensor(354.9857, device='cuda:0')
episode: 345 training return: tensor(404.1896, device='cuda:0')
episode: 346 training return: tensor(387.9747, device='cuda:0')
episode: 347 training return: tensor(224.1784, device='cuda:0')
epoch: 87 test_true_pfm: 3169.0369403436102 sim_pfm: 381.7652026610352
episode: 348 training return: tensor(366.8500, device='cuda:0')
episode: 349 training return: tensor(418.5585, device='cuda:0')
episode: 350 training return: tensor(404.3098, device='cuda:0')
episode: 351 training return: tensor(382.3514, device='cuda:0')
epoch: 88 test_true_pfm: 3507.0002432063707 sim_pfm: 300.4141497996946
episode: 352 training return: tensor(414.4716, device='cuda:0')
episode: 353 training return: tensor(-168.1279, device='cuda:0')
episode: 354 training return: tensor(353.2025, device='cuda:0')
episode: 355 training return: tensor(348.4990, device='cuda:0')
epoch: 89 test_true_pfm: 3075.82073883918 sim_pfm: 194.90511290884265
episode: 356 training return: tensor(363.4425, device='cuda:0')
episode: 357 training return: tensor(-205.2122, device='cuda:0')
episode: 358 training return: tensor(322.9353, device='cuda:0')
episode: 359 training return: tensor(349.0650, device='cuda:0')
epoch: 90 test_true_pfm: 3529.8967860952102 sim_pfm: 371.9656024047775
episode: 360 training return: tensor(446.3510, device='cuda:0')
episode: 361 training return: tensor(364.1886, device='cuda:0')
episode: 362 training return: tensor(-85.3512, device='cuda:0')
episode: 363 training return: tensor(258.0951, device='cuda:0')
epoch: 91 test_true_pfm: 3499.4207988927865 sim_pfm: 416.17639692273224
episode: 364 training return: tensor(453.6088, device='cuda:0')
episode: 365 training return: tensor(257.5331, device='cuda:0')
episode: 366 training return: tensor(405.9576, device='cuda:0')
episode: 367 training return: tensor(3.9715, device='cuda:0')
epoch: 92 test_true_pfm: 3377.74279124375 sim_pfm: 401.7317241083559
episode: 368 training return: tensor(381.8748, device='cuda:0')
episode: 369 training return: tensor(367.7675, device='cuda:0')
episode: 370 training return: tensor(338.5701, device='cuda:0')
episode: 371 training return: tensor(278.8775, device='cuda:0')
epoch: 93 test_true_pfm: 3462.4331738614815 sim_pfm: 358.72117000215803
episode: 372 training return: tensor(381.5719, device='cuda:0')
episode: 373 training return: tensor(387.6800, device='cuda:0')
episode: 374 training return: tensor(376.0073, device='cuda:0')
episode: 375 training return: tensor(305.8435, device='cuda:0')
epoch: 94 test_true_pfm: 3591.954857146879 sim_pfm: 420.5460752272823
episode: 376 training return: tensor(316.8745, device='cuda:0')
episode: 377 training return: tensor(218.2050, device='cuda:0')
episode: 378 training return: tensor(403.6470, device='cuda:0')
episode: 379 training return: tensor(427.0264, device='cuda:0')
epoch: 95 test_true_pfm: 3579.795626018953 sim_pfm: 279.5593690141686
episode: 380 training return: tensor(-221.5289, device='cuda:0')
episode: 381 training return: tensor(-253.8123, device='cuda:0')
episode: 382 training return: tensor(351.6453, device='cuda:0')
episode: 383 training return: tensor(308.1927, device='cuda:0')
epoch: 96 test_true_pfm: 3570.813379679958 sim_pfm: 250.88217600685311
episode: 384 training return: tensor(367.5172, device='cuda:0')
episode: 385 training return: tensor(402.0549, device='cuda:0')
episode: 386 training return: tensor(314.7645, device='cuda:0')
episode: 387 training return: tensor(348.7191, device='cuda:0')
epoch: 97 test_true_pfm: 3521.691006068542 sim_pfm: 387.9844529539502
episode: 388 training return: tensor(372.0869, device='cuda:0')
episode: 389 training return: tensor(388.8795, device='cuda:0')
episode: 390 training return: tensor(-302.2122, device='cuda:0')
episode: 391 training return: tensor(299.1770, device='cuda:0')
epoch: 98 test_true_pfm: 3516.5046737174 sim_pfm: 361.7194061698586
episode: 392 training return: tensor(-242.7333, device='cuda:0')
episode: 393 training return: tensor(334.6495, device='cuda:0')
episode: 394 training return: tensor(338.7411, device='cuda:0')
episode: 395 training return: tensor(352.2198, device='cuda:0')
epoch: 99 test_true_pfm: 3548.1763910843288 sim_pfm: 389.32757654887
episode: 396 training return: tensor(319.3178, device='cuda:0')
episode: 397 training return: tensor(313.1952, device='cuda:0')
episode: 398 training return: tensor(362.0235, device='cuda:0')
episode: 399 training return: tensor(331.6317, device='cuda:0')
epoch: 100 test_true_pfm: 2770.507628736959 sim_pfm: 334.45516081344493
episode: 400 training return: tensor(374.0234, device='cuda:0')
episode: 401 training return: tensor(-244.9663, device='cuda:0')
episode: 402 training return: tensor(426.2999, device='cuda:0')
episode: 403 training return: tensor(297.4671, device='cuda:0')
epoch: 101 test_true_pfm: 3540.1208155297572 sim_pfm: 363.63410587310983
episode: 404 training return: tensor(350.6701, device='cuda:0')
episode: 405 training return: tensor(437.1654, device='cuda:0')
episode: 406 training return: tensor(360.0482, device='cuda:0')
episode: 407 training return: tensor(333.0273, device='cuda:0')
epoch: 102 test_true_pfm: 3553.4664146530426 sim_pfm: 357.55474178978085
episode: 408 training return: tensor(331.8782, device='cuda:0')
episode: 409 training return: tensor(347.0335, device='cuda:0')
episode: 410 training return: tensor(383.0812, device='cuda:0')
episode: 411 training return: tensor(16.6905, device='cuda:0')
epoch: 103 test_true_pfm: 3554.2075506968777 sim_pfm: 410.13971548408153
episode: 412 training return: tensor(374.0393, device='cuda:0')
episode: 413 training return: tensor(431.4620, device='cuda:0')
episode: 414 training return: tensor(389.5226, device='cuda:0')
episode: 415 training return: tensor(340.9848, device='cuda:0')
epoch: 104 test_true_pfm: 3626.5007491898905 sim_pfm: 226.14508011787743
episode: 416 training return: tensor(91.1316, device='cuda:0')
episode: 417 training return: tensor(383.5225, device='cuda:0')
episode: 418 training return: tensor(376.3047, device='cuda:0')
episode: 419 training return: tensor(285.5287, device='cuda:0')
epoch: 105 test_true_pfm: 3566.3488827888855 sim_pfm: 392.1157631956157
episode: 420 training return: tensor(367.9273, device='cuda:0')
episode: 421 training return: tensor(340.5406, device='cuda:0')
episode: 422 training return: tensor(378.7407, device='cuda:0')
episode: 423 training return: tensor(363.1486, device='cuda:0')
epoch: 106 test_true_pfm: 3558.2676504929077 sim_pfm: 393.95725539519725
episode: 424 training return: tensor(364.8957, device='cuda:0')
episode: 425 training return: tensor(333.0680, device='cuda:0')
episode: 426 training return: tensor(300.0219, device='cuda:0')
episode: 427 training return: tensor(389.1981, device='cuda:0')
epoch: 107 test_true_pfm: 3546.102214200775 sim_pfm: 423.4886474867041
episode: 428 training return: tensor(372.9798, device='cuda:0')
episode: 429 training return: tensor(-58.5420, device='cuda:0')
episode: 430 training return: tensor(359.3465, device='cuda:0')
episode: 431 training return: tensor(302.0429, device='cuda:0')
epoch: 108 test_true_pfm: 3561.0278090872075 sim_pfm: 325.34192036928533
episode: 432 training return: tensor(295.7796, device='cuda:0')
episode: 433 training return: tensor(341.9650, device='cuda:0')
episode: 434 training return: tensor(284.0907, device='cuda:0')
episode: 435 training return: tensor(365.2999, device='cuda:0')
epoch: 109 test_true_pfm: 3530.942350517067 sim_pfm: 355.9301353256645
episode: 436 training return: tensor(405.3163, device='cuda:0')
episode: 437 training return: tensor(381.8171, device='cuda:0')
episode: 438 training return: tensor(-20.0031, device='cuda:0')
episode: 439 training return: tensor(-324.6124, device='cuda:0')
epoch: 110 test_true_pfm: 3108.3356104599247 sim_pfm: 87.10394692942889
episode: 440 training return: tensor(353.5876, device='cuda:0')
episode: 441 training return: tensor(487.2401, device='cuda:0')
episode: 442 training return: tensor(342.9366, device='cuda:0')
episode: 443 training return: tensor(357.2050, device='cuda:0')
epoch: 111 test_true_pfm: 3060.5043224403084 sim_pfm: 362.82444738749956
episode: 444 training return: tensor(336.9937, device='cuda:0')
episode: 445 training return: tensor(374.0801, device='cuda:0')
episode: 446 training return: tensor(49.7477, device='cuda:0')
episode: 447 training return: tensor(345.4840, device='cuda:0')
epoch: 112 test_true_pfm: 3504.368557270797 sim_pfm: 371.3660658922939
episode: 448 training return: tensor(408.4532, device='cuda:0')
episode: 449 training return: tensor(388.8102, device='cuda:0')
episode: 450 training return: tensor(-200.7439, device='cuda:0')
episode: 451 training return: tensor(448.5692, device='cuda:0')
epoch: 113 test_true_pfm: 2732.7119861143333 sim_pfm: 374.1270386388448
episode: 452 training return: tensor(303.7274, device='cuda:0')
episode: 453 training return: tensor(79.0263, device='cuda:0')
episode: 454 training return: tensor(-192.2450, device='cuda:0')
episode: 455 training return: tensor(371.9499, device='cuda:0')
epoch: 114 test_true_pfm: 3263.6780034643652 sim_pfm: 375.6470611105033
episode: 456 training return: tensor(372.3662, device='cuda:0')
episode: 457 training return: tensor(375.0647, device='cuda:0')
episode: 458 training return: tensor(376.0673, device='cuda:0')
episode: 459 training return: tensor(385.6103, device='cuda:0')
epoch: 115 test_true_pfm: 3564.174912910918 sim_pfm: 254.50136673630914
episode: 460 training return: tensor(420.1222, device='cuda:0')
episode: 461 training return: tensor(331.8326, device='cuda:0')
episode: 462 training return: tensor(40.8763, device='cuda:0')
episode: 463 training return: tensor(351.2162, device='cuda:0')
epoch: 116 test_true_pfm: 3536.3866810969284 sim_pfm: 373.00451340302243
episode: 464 training return: tensor(288.0622, device='cuda:0')
episode: 465 training return: tensor(-22.7256, device='cuda:0')
episode: 466 training return: tensor(336.9384, device='cuda:0')
episode: 467 training return: tensor(232.8403, device='cuda:0')
epoch: 117 test_true_pfm: 3554.74289349595 sim_pfm: 356.0219103417864
episode: 468 training return: tensor(344.2305, device='cuda:0')
episode: 469 training return: tensor(333.4671, device='cuda:0')
episode: 470 training return: tensor(356.1989, device='cuda:0')
episode: 471 training return: tensor(303.8741, device='cuda:0')
epoch: 118 test_true_pfm: 3508.6979206736164 sim_pfm: 381.09422900263843
episode: 472 training return: tensor(219.0014, device='cuda:0')
episode: 473 training return: tensor(324.4039, device='cuda:0')
episode: 474 training return: tensor(259.6131, device='cuda:0')
episode: 475 training return: tensor(329.3776, device='cuda:0')
epoch: 119 test_true_pfm: 3549.3584479969686 sim_pfm: 359.70338217112777
episode: 476 training return: tensor(9.1500, device='cuda:0')
episode: 477 training return: tensor(302.5116, device='cuda:0')
episode: 478 training return: tensor(389.3230, device='cuda:0')
episode: 479 training return: tensor(372.8038, device='cuda:0')
epoch: 120 test_true_pfm: 3581.580952520231 sim_pfm: 227.87465877559347
episode: 480 training return: tensor(370.2850, device='cuda:0')
episode: 481 training return: tensor(381.5180, device='cuda:0')
episode: 482 training return: tensor(375.9257, device='cuda:0')
episode: 483 training return: tensor(391.8264, device='cuda:0')
epoch: 121 test_true_pfm: 3539.44260171646 sim_pfm: 362.27530201675836
episode: 484 training return: tensor(396.2996, device='cuda:0')
episode: 485 training return: tensor(384.9285, device='cuda:0')
episode: 486 training return: tensor(321.2703, device='cuda:0')
episode: 487 training return: tensor(313.6079, device='cuda:0')
epoch: 122 test_true_pfm: 3566.4672030180213 sim_pfm: 187.6505117637183
episode: 488 training return: tensor(-134.9986, device='cuda:0')
episode: 489 training return: tensor(391.1312, device='cuda:0')
episode: 490 training return: tensor(324.8024, device='cuda:0')
episode: 491 training return: tensor(183.2328, device='cuda:0')
epoch: 123 test_true_pfm: 3553.390817332365 sim_pfm: 360.056902245502
episode: 492 training return: tensor(376.8157, device='cuda:0')
episode: 493 training return: tensor(413.1168, device='cuda:0')
episode: 494 training return: tensor(276.0743, device='cuda:0')
episode: 495 training return: tensor(259.1303, device='cuda:0')
epoch: 124 test_true_pfm: 3542.0597724618747 sim_pfm: 383.8005854172904
episode: 496 training return: tensor(359.3877, device='cuda:0')
episode: 497 training return: tensor(426.0768, device='cuda:0')
episode: 498 training return: tensor(381.7697, device='cuda:0')
episode: 499 training return: tensor(-112.9772, device='cuda:0')
epoch: 125 test_true_pfm: 3560.796724141262 sim_pfm: 314.7547700045786
episode: 500 training return: tensor(205.5318, device='cuda:0')
episode: 501 training return: tensor(416.7496, device='cuda:0')
episode: 502 training return: tensor(282.1849, device='cuda:0')
episode: 503 training return: tensor(392.0912, device='cuda:0')
epoch: 126 test_true_pfm: 3516.9514070690825 sim_pfm: 374.8944980616604
episode: 504 training return: tensor(208.9690, device='cuda:0')
episode: 505 training return: tensor(360.4704, device='cuda:0')
episode: 506 training return: tensor(-129.8126, device='cuda:0')
episode: 507 training return: tensor(325.8439, device='cuda:0')
epoch: 127 test_true_pfm: 3530.1134542600316 sim_pfm: 353.00461774125387
episode: 508 training return: tensor(279.8611, device='cuda:0')
episode: 509 training return: tensor(-219.2635, device='cuda:0')
episode: 510 training return: tensor(376.6087, device='cuda:0')
episode: 511 training return: tensor(388.4316, device='cuda:0')
epoch: 128 test_true_pfm: 3565.0752695448446 sim_pfm: 403.70928428617
episode: 512 training return: tensor(318.7662, device='cuda:0')
episode: 513 training return: tensor(379.8067, device='cuda:0')
episode: 514 training return: tensor(356.9628, device='cuda:0')
episode: 515 training return: tensor(364.3157, device='cuda:0')
epoch: 129 test_true_pfm: 3582.552121272187 sim_pfm: 364.2681144951105
episode: 516 training return: tensor(336.6899, device='cuda:0')
episode: 517 training return: tensor(334.9670, device='cuda:0')
episode: 518 training return: tensor(457.7379, device='cuda:0')
episode: 519 training return: tensor(-127.3816, device='cuda:0')
epoch: 130 test_true_pfm: 3486.8158829449676 sim_pfm: 365.8316651430214
episode: 520 training return: tensor(395.6516, device='cuda:0')
episode: 521 training return: tensor(369.2645, device='cuda:0')
episode: 522 training return: tensor(349.1278, device='cuda:0')
episode: 523 training return: tensor(382.3095, device='cuda:0')
epoch: 131 test_true_pfm: 3531.1759506731964 sim_pfm: 367.53636509836844
episode: 524 training return: tensor(401.1706, device='cuda:0')
episode: 525 training return: tensor(-285.0908, device='cuda:0')
episode: 526 training return: tensor(397.0865, device='cuda:0')
episode: 527 training return: tensor(369.0252, device='cuda:0')
epoch: 132 test_true_pfm: 3521.767178751741 sim_pfm: 358.9482288752527
episode: 528 training return: tensor(78.9663, device='cuda:0')
episode: 529 training return: tensor(409.6245, device='cuda:0')
episode: 530 training return: tensor(390.7750, device='cuda:0')
episode: 531 training return: tensor(198.7680, device='cuda:0')
epoch: 133 test_true_pfm: 2785.283495742431 sim_pfm: 380.32166799908737
episode: 532 training return: tensor(353.2518, device='cuda:0')
episode: 533 training return: tensor(358.0846, device='cuda:0')
episode: 534 training return: tensor(367.4146, device='cuda:0')
episode: 535 training return: tensor(324.9153, device='cuda:0')
epoch: 134 test_true_pfm: 3162.2954920694224 sim_pfm: 380.62130840365234
episode: 536 training return: tensor(355.8137, device='cuda:0')
episode: 537 training return: tensor(347.2182, device='cuda:0')
episode: 538 training return: tensor(417.8372, device='cuda:0')
episode: 539 training return: tensor(343.0398, device='cuda:0')
epoch: 135 test_true_pfm: 3332.9291795195845 sim_pfm: 395.0216988155735
episode: 540 training return: tensor(308.7872, device='cuda:0')
episode: 541 training return: tensor(267.9976, device='cuda:0')
episode: 542 training return: tensor(355.4294, device='cuda:0')
episode: 543 training return: tensor(393.1484, device='cuda:0')
epoch: 136 test_true_pfm: 3589.961697252142 sim_pfm: 394.9758389068399
episode: 544 training return: tensor(340.9558, device='cuda:0')
episode: 545 training return: tensor(168.8652, device='cuda:0')
episode: 546 training return: tensor(344.7565, device='cuda:0')
episode: 547 training return: tensor(391.4677, device='cuda:0')
epoch: 137 test_true_pfm: 3565.4228973601153 sim_pfm: 378.1365652483267
episode: 548 training return: tensor(322.1891, device='cuda:0')
episode: 549 training return: tensor(316.0507, device='cuda:0')
episode: 550 training return: tensor(325.1685, device='cuda:0')
episode: 551 training return: tensor(328.3488, device='cuda:0')
epoch: 138 test_true_pfm: 3502.403608073882 sim_pfm: 370.8368509499899
episode: 552 training return: tensor(-273.5196, device='cuda:0')
episode: 553 training return: tensor(425.0510, device='cuda:0')
episode: 554 training return: tensor(324.0511, device='cuda:0')
episode: 555 training return: tensor(311.6113, device='cuda:0')
epoch: 139 test_true_pfm: 3582.5866615404243 sim_pfm: 397.26701785985887
episode: 556 training return: tensor(385.7887, device='cuda:0')
episode: 557 training return: tensor(382.2650, device='cuda:0')
episode: 558 training return: tensor(271.0903, device='cuda:0')
episode: 559 training return: tensor(365.1550, device='cuda:0')
epoch: 140 test_true_pfm: 3518.2628924980963 sim_pfm: 362.0047974413513
episode: 560 training return: tensor(301.6464, device='cuda:0')
episode: 561 training return: tensor(400.8280, device='cuda:0')
episode: 562 training return: tensor(310.4102, device='cuda:0')
episode: 563 training return: tensor(406.6059, device='cuda:0')
epoch: 141 test_true_pfm: 3312.8968716523705 sim_pfm: 394.010698097952
episode: 564 training return: tensor(446.0954, device='cuda:0')
episode: 565 training return: tensor(339.6992, device='cuda:0')
episode: 566 training return: tensor(359.2510, device='cuda:0')
episode: 567 training return: tensor(435.9026, device='cuda:0')
epoch: 142 test_true_pfm: 3555.3663367289682 sim_pfm: 446.778880782115
episode: 568 training return: tensor(382.2374, device='cuda:0')
episode: 569 training return: tensor(374.9759, device='cuda:0')
episode: 570 training return: tensor(358.2721, device='cuda:0')
episode: 571 training return: tensor(332.1476, device='cuda:0')
epoch: 143 test_true_pfm: 3328.3934794658476 sim_pfm: 361.0972664036187
episode: 572 training return: tensor(422.5207, device='cuda:0')
episode: 573 training return: tensor(78.9226, device='cuda:0')
episode: 574 training return: tensor(405.6204, device='cuda:0')
episode: 575 training return: tensor(359.4423, device='cuda:0')
epoch: 144 test_true_pfm: 3509.109229352636 sim_pfm: 389.2296713440446
episode: 576 training return: tensor(376.9547, device='cuda:0')
episode: 577 training return: tensor(355.9084, device='cuda:0')
episode: 578 training return: tensor(355.0954, device='cuda:0')
episode: 579 training return: tensor(338.3685, device='cuda:0')
epoch: 145 test_true_pfm: 3409.5214564116777 sim_pfm: 387.958088843928
episode: 580 training return: tensor(-132.4238, device='cuda:0')
episode: 581 training return: tensor(356.4582, device='cuda:0')
episode: 582 training return: tensor(314.2361, device='cuda:0')
episode: 583 training return: tensor(60.8935, device='cuda:0')
epoch: 146 test_true_pfm: 3539.068691775221 sim_pfm: 387.76529505922616
episode: 584 training return: tensor(341.4150, device='cuda:0')
episode: 585 training return: tensor(255.3139, device='cuda:0')
episode: 586 training return: tensor(109.7891, device='cuda:0')
episode: 587 training return: tensor(368.7860, device='cuda:0')
epoch: 147 test_true_pfm: 3530.0679704231975 sim_pfm: 364.39739784103585
episode: 588 training return: tensor(323.9610, device='cuda:0')
episode: 589 training return: tensor(323.8609, device='cuda:0')
episode: 590 training return: tensor(364.8732, device='cuda:0')
episode: 591 training return: tensor(395.6311, device='cuda:0')
epoch: 148 test_true_pfm: 3222.331738462686 sim_pfm: 307.96118359445245
episode: 592 training return: tensor(401.9081, device='cuda:0')
episode: 593 training return: tensor(347.2135, device='cuda:0')
episode: 594 training return: tensor(384.4299, device='cuda:0')
episode: 595 training return: tensor(399.2430, device='cuda:0')
epoch: 149 test_true_pfm: 3515.5450029404105 sim_pfm: 192.544583678653
episode: 596 training return: tensor(369.7282, device='cuda:0')
episode: 597 training return: tensor(404.5590, device='cuda:0')
episode: 598 training return: tensor(403.7401, device='cuda:0')
episode: 599 training return: tensor(447.1025, device='cuda:0')
epoch: 150 test_true_pfm: 3285.8954869224053 sim_pfm: 290.36237487454974
