['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '10000']
epoch: 0 training_loss 0.2676480004191399 test_loss: 0.19415370225906373
epoch: 1 training_loss 0.2037919447571039 test_loss: 0.1936928629875183
epoch: 2 training_loss 0.1946431752294302 test_loss: 0.1820027470588684
epoch: 3 training_loss 0.1969987753778696 test_loss: 0.1963297963142395
epoch: 4 training_loss 0.19392939105629922 test_loss: 0.19152718782424927
epoch: 5 training_loss 0.1959567004442215 test_loss: 0.18349767923355104
epoch: 6 training_loss 0.18729417465627193 test_loss: 0.18722838163375854
epoch: 7 training_loss 0.19167360350489615 test_loss: 0.19439021348953248
epoch: 8 training_loss 0.18940519630908967 test_loss: 0.18368252515792846
epoch: 9 training_loss 0.18955830909311772 test_loss: 0.18083784580230713
epoch: 10 training_loss 0.18915893137454987 test_loss: 0.18066738843917846
epoch: 11 training_loss 0.18698939204216003 test_loss: 0.18812882900238037
epoch: 12 training_loss 0.1784781840443611 test_loss: 0.18820077180862427
epoch: 13 training_loss 0.182077134847641 test_loss: 0.18042271137237548
epoch: 14 training_loss 0.18672820687294006 test_loss: 0.16846462488174438
epoch: 15 training_loss 0.18267268300056458 test_loss: 0.17199010848999025
epoch: 16 training_loss 0.17744304217398166 test_loss: 0.17367243766784668
epoch: 17 training_loss 0.17235692761838436 test_loss: 0.17999930381774903
epoch: 18 training_loss 0.17445151723921298 test_loss: 0.18935452699661254
epoch: 19 training_loss 0.1841973078250885 test_loss: 0.1835724115371704
epoch: 20 training_loss 0.185942802131176 test_loss: 0.2008887529373169
epoch: 21 training_loss 0.18815213173627854 test_loss: 0.17953504323959352
epoch: 22 training_loss 0.182759243324399 test_loss: 0.17625811100006103
epoch: 23 training_loss 0.17443349808454514 test_loss: 0.17279375791549684
epoch: 24 training_loss 0.17766604326665403 test_loss: 0.176122784614563
epoch: 25 training_loss 0.1812957940995693 test_loss: 0.18325212001800537
epoch: 26 training_loss 0.1783462619781494 test_loss: 0.17644662857055665
epoch: 27 training_loss 0.17933718375861646 test_loss: 0.18793148994445802
epoch: 28 training_loss 0.17819866873323917 test_loss: 0.1776012063026428
epoch: 29 training_loss 0.1751285769790411 test_loss: 0.1815179705619812
epoch: 30 training_loss 0.1786878365278244 test_loss: 0.18830691576004027
epoch: 31 training_loss 0.18465786978602408 test_loss: 0.1843856692314148
epoch: 32 training_loss 0.1840647901594639 test_loss: 0.17327289581298827
epoch: 33 training_loss 0.17867122173309327 test_loss: 0.1746976375579834
epoch: 34 training_loss 0.1796702516078949 test_loss: 0.18081929683685302
epoch: 35 training_loss 0.1829554284363985 test_loss: 0.18298254013061524
epoch: 36 training_loss 0.17659667409956456 test_loss: 0.18571568727493287
epoch: 37 training_loss 0.17515301637351513 test_loss: 0.16889129877090453
epoch: 38 training_loss 0.17803479313850404 test_loss: 0.18445855379104614
epoch: 39 training_loss 0.172736791446805 test_loss: 0.17968279123306274
epoch: 40 training_loss 0.1698065747320652 test_loss: 0.1806308150291443
epoch: 41 training_loss 0.17600076675415038 test_loss: 0.1833565354347229
epoch: 42 training_loss 0.17357857525348663 test_loss: 0.17629413604736327
epoch: 43 training_loss 0.17465914361178875 test_loss: 0.1889575481414795
epoch: 44 training_loss 0.17184022158384324 test_loss: 0.17388041019439698
epoch: 45 training_loss 0.18094597481191158 test_loss: 0.186014986038208
epoch: 46 training_loss 0.1742453559488058 test_loss: 0.19222420454025269
epoch: 47 training_loss 0.1797471809387207 test_loss: 0.18166879415512086
epoch: 48 training_loss 0.1823577645421028 test_loss: 0.18133867979049684
epoch: 49 training_loss 0.1718788256496191 test_loss: 0.17203106880187988
epoch: 50 training_loss 0.17062181152403355 test_loss: 0.18356162309646606
epoch: 51 training_loss 0.17735769972205162 test_loss: 0.18104264736175538
epoch: 52 training_loss 0.1794873707741499 test_loss: 0.17595149278640748
epoch: 53 training_loss 0.17574538588523864 test_loss: 0.18093420267105104
epoch: 54 training_loss 0.17978168852627277 test_loss: 0.1591668963432312
epoch: 55 training_loss 0.17140707418322562 test_loss: 0.1828044056892395
epoch: 56 training_loss 0.17528449580073358 test_loss: 0.18469067811965942
epoch: 57 training_loss 0.17592282719910146 test_loss: 0.18631441593170167
epoch: 58 training_loss 0.17010394617915153 test_loss: 0.17674170732498168
epoch: 59 training_loss 0.17779142148792743 test_loss: 0.17940740585327147
epoch: 60 training_loss 0.17572454042732716 test_loss: 0.17509164810180664
epoch: 61 training_loss 0.18015969954431058 test_loss: 0.1852598786354065
epoch: 62 training_loss 0.16860024355351924 test_loss: 0.18759149312973022
epoch: 63 training_loss 0.18050242573022843 test_loss: 0.1810302495956421
epoch: 64 training_loss 0.1688610377907753 test_loss: 0.1700502395629883
epoch: 65 training_loss 0.17440347045660018 test_loss: 0.1801764726638794
epoch: 66 training_loss 0.17499105654656888 test_loss: 0.18450605869293213
epoch: 67 training_loss 0.17490318074822425 test_loss: 0.17966457605361938
epoch: 68 training_loss 0.17172485254704953 test_loss: 0.18417290449142457
epoch: 69 training_loss 0.1755807586759329 test_loss: 0.19003686904907227
epoch: 70 training_loss 0.17428003147244453 test_loss: 0.17607200145721436
epoch: 71 training_loss 0.17336023285984992 test_loss: 0.18226076364517213
epoch: 72 training_loss 0.17582795202732085 test_loss: 0.17730404138565065
epoch: 73 training_loss 0.18586385779082776 test_loss: 0.17717641592025757
epoch: 74 training_loss 0.16618583008646964 test_loss: 0.1814335584640503
epoch: 75 training_loss 0.16918723188340665 test_loss: 0.17290574312210083
epoch: 76 training_loss 0.1702466880530119 test_loss: 0.18325217962265014
epoch: 77 training_loss 0.17003300074487926 test_loss: 0.17962582111358644
epoch: 78 training_loss 0.16830704040825367 test_loss: 0.17500452995300292
epoch: 79 training_loss 0.1675728064030409 test_loss: 0.18935847282409668
epoch: 80 training_loss 0.17111175458878278 test_loss: 0.1630315065383911
epoch: 81 training_loss 0.1719153320044279 test_loss: 0.17439767122268676
epoch: 82 training_loss 0.1698906721919775 test_loss: 0.18611377477645874
epoch: 83 training_loss 0.16878570660948752 test_loss: 0.16864665746688842
epoch: 84 training_loss 0.1811810104548931 test_loss: 0.1776483654975891
epoch: 85 training_loss 0.17206230372190476 test_loss: 0.18937524557113647
epoch: 86 training_loss 0.1606867265701294 test_loss: 0.18168789148330688
epoch: 87 training_loss 0.17674801282584668 test_loss: 0.1709257483482361
epoch: 88 training_loss 0.1711701302975416 test_loss: 0.17293211221694946
epoch: 89 training_loss 0.17341486655175686 test_loss: 0.18271923065185547
epoch: 90 training_loss 0.16720958635210992 test_loss: 0.17290364503860473
epoch: 91 training_loss 0.17323926784098148 test_loss: 0.1799926280975342
epoch: 92 training_loss 0.16987920589745045 test_loss: 0.18422607183456421
epoch: 93 training_loss 0.17059763707220554 test_loss: 0.1861932635307312
epoch: 94 training_loss 0.17082845859229565 test_loss: 0.18337072134017945
epoch: 95 training_loss 0.16453693106770514 test_loss: 0.18173329830169677
epoch: 96 training_loss 0.16981618009507657 test_loss: 0.18469650745391847
epoch: 97 training_loss 0.17281660564243795 test_loss: 0.1719928503036499
epoch: 98 training_loss 0.174397416934371 test_loss: 0.18777551651000976
epoch: 99 training_loss 0.1743861000984907 test_loss: 0.18563499450683593
epoch: 100 training_loss 0.17198835745453833 test_loss: 0.1722169876098633
epoch: 101 training_loss 0.17006186015903949 test_loss: 0.19057095050811768
epoch: 102 training_loss 0.1700899998471141 test_loss: 0.1870640993118286
epoch: 103 training_loss 0.17415173273533582 test_loss: 0.18100547790527344
epoch: 104 training_loss 0.17250732250511647 test_loss: 0.17958043813705443
epoch: 105 training_loss 0.17833794169127942 test_loss: 0.1836408257484436
epoch: 106 training_loss 0.17511006120592357 test_loss: 0.17874367237091066
epoch: 107 training_loss 0.16885133430361748 test_loss: 0.17322741746902465
epoch: 108 training_loss 0.17429745465517044 test_loss: 0.17239043712615967
epoch: 109 training_loss 0.16983437426388265 test_loss: 0.19458383321762085
epoch: 110 training_loss 0.17081526316702367 test_loss: 0.17418233156204224
epoch: 111 training_loss 0.17445799261331557 test_loss: 0.19409099817276002
epoch: 112 training_loss 0.17293182104825974 test_loss: 0.1814626693725586
epoch: 113 training_loss 0.16835830681025982 test_loss: 0.18555519580841065
epoch: 114 training_loss 0.16668459974229335 test_loss: 0.1838636040687561
epoch: 115 training_loss 0.1754347199946642 test_loss: 0.18371162414550782
epoch: 116 training_loss 0.17441237330436707 test_loss: 0.17316006422042846
epoch: 117 training_loss 0.17664653733372687 test_loss: 0.16688119173049926
epoch: 118 training_loss 0.1787676666676998 test_loss: 0.17327258586883545
epoch: 119 training_loss 0.15893058471381663 test_loss: 0.1824716806411743
epoch: 120 training_loss 0.16225878566503524 test_loss: 0.179809045791626
epoch: 121 training_loss 0.1756825353950262 test_loss: 0.18162227869033815
epoch: 122 training_loss 0.16949652925133704 test_loss: 0.18370116949081422
epoch: 123 training_loss 0.16799342148005963 test_loss: 0.18575741052627565
epoch: 124 training_loss 0.16941557087004186 test_loss: 0.18095145225524903
epoch: 125 training_loss 0.17071461327373982 test_loss: 0.18004518747329712
epoch: 126 training_loss 0.16986598365008831 test_loss: 0.19097503423690795
epoch: 127 training_loss 0.16657081738114357 test_loss: 0.17218559980392456
epoch: 128 training_loss 0.17235884696245193 test_loss: 0.1893545150756836
epoch: 129 training_loss 0.1654789673537016 test_loss: 0.172760009765625
epoch: 130 training_loss 0.16464763484895228 test_loss: 0.19157530069351197
epoch: 131 training_loss 0.16147689130157233 test_loss: 0.18799402713775634
epoch: 132 training_loss 0.17217467591166496 test_loss: 0.18015239238739014
epoch: 133 training_loss 0.172389652505517 test_loss: 0.17891072034835814
epoch: 134 training_loss 0.1670167002081871 test_loss: 0.19097667932510376
epoch: 135 training_loss 0.16543325528502464 test_loss: 0.18770067691802977
epoch: 136 training_loss 0.16529481559991838 test_loss: 0.18745820522308348
epoch: 137 training_loss 0.184076354727149 test_loss: 0.18264034986495972
epoch: 138 training_loss 0.17025899946689604 test_loss: 0.1847011685371399
epoch: 139 training_loss 0.16815834656357764 test_loss: 0.1780668616294861
epoch: 140 training_loss 0.166317722722888 test_loss: 0.17305649518966676
epoch: 141 training_loss 0.16498822443187236 test_loss: 0.17773090600967406
epoch: 142 training_loss 0.1659949866682291 test_loss: 0.17691534757614136
epoch: 143 training_loss 0.17514845207333565 test_loss: 0.1834669828414917
epoch: 144 training_loss 0.16487771451473235 test_loss: 0.18586686849594117
epoch: 145 training_loss 0.17085391230881214 test_loss: 0.18185962438583375
epoch: 146 training_loss 0.16468013111501933 test_loss: 0.17906683683395386
epoch: 147 training_loss 0.16402865648269654 test_loss: 0.190341317653656
epoch: 148 training_loss 0.1621672348678112 test_loss: 0.18337665796279906
epoch: 149 training_loss 0.15989637292921544 test_loss: 0.1790862798690796
epoch: 0 training_loss 8.877576537132263 test_loss: 5.023382949829101
epoch: 1 training_loss 4.036587839126587 test_loss: 3.1489025115966798
epoch: 2 training_loss 2.8338759040832517 test_loss: 2.4464712142944336
epoch: 3 training_loss 2.2426664209365845 test_loss: 2.0285179138183596
epoch: 4 training_loss 1.8800470685958863 test_loss: 1.7627456665039063
epoch: 5 training_loss 1.7020163428783417 test_loss: 1.5964017868041993
epoch: 6 training_loss 1.5612535667419434 test_loss: 1.4928823471069337
epoch: 7 training_loss 1.4062971043586732 test_loss: 1.3971100807189942
epoch: 8 training_loss 1.3427347755432129 test_loss: 1.3133510589599608
epoch: 9 training_loss 1.2865592014789582 test_loss: 1.2488593101501464
epoch: 10 training_loss 1.2327659583091737 test_loss: 1.2070930480957032
epoch: 11 training_loss 1.182010691165924 test_loss: 1.160832977294922
epoch: 12 training_loss 1.0980291533470155 test_loss: 1.1315021514892578
epoch: 13 training_loss 1.0927720862627028 test_loss: 1.0940516471862793
epoch: 14 training_loss 1.0398885267972946 test_loss: 1.0142539024353028
epoch: 15 training_loss 1.0264275735616684 test_loss: 1.0343758583068847
epoch: 16 training_loss 0.9946070444583893 test_loss: 0.9744612693786621
epoch: 17 training_loss 0.9624598252773285 test_loss: 0.9523229598999023
epoch: 18 training_loss 0.9743983799219131 test_loss: 0.9203098297119141
epoch: 19 training_loss 0.9121634197235108 test_loss: 0.9207687377929688
epoch: 20 training_loss 0.9041132247447967 test_loss: 0.8957309722900391
epoch: 21 training_loss 0.8818559837341309 test_loss: 0.9110067367553711
epoch: 22 training_loss 0.8731609010696411 test_loss: 0.8825290679931641
epoch: 23 training_loss 0.8503165394067764 test_loss: 0.880300235748291
epoch: 24 training_loss 0.847361575961113 test_loss: 0.8431025505065918
epoch: 25 training_loss 0.8214547795057296 test_loss: 0.8732577323913574
epoch: 26 training_loss 0.8239828824996949 test_loss: 0.8235989570617676
epoch: 27 training_loss 0.7900608170032501 test_loss: 0.7931520462036132
epoch: 28 training_loss 0.7951317965984345 test_loss: 0.789723539352417
epoch: 29 training_loss 0.783289322257042 test_loss: 0.8175302505493164
epoch: 30 training_loss 0.7747578889131546 test_loss: 0.8017887115478516
epoch: 31 training_loss 0.7622409653663635 test_loss: 0.7463014125823975
epoch: 32 training_loss 0.7650454699993133 test_loss: 0.7513442516326905
epoch: 33 training_loss 0.7523105615377426 test_loss: 0.7631646156311035
epoch: 34 training_loss 0.7404506385326386 test_loss: 0.7364528179168701
epoch: 35 training_loss 0.7341616827249527 test_loss: 0.7194253921508789
epoch: 36 training_loss 0.7234510165452958 test_loss: 0.7347378730773926
epoch: 37 training_loss 0.7144856119155883 test_loss: 0.7147619724273682
epoch: 38 training_loss 0.7154721653461457 test_loss: 0.7205244064331054
epoch: 39 training_loss 0.702707821726799 test_loss: 0.7124087333679199
epoch: 40 training_loss 0.7103880047798157 test_loss: 0.703633975982666
epoch: 41 training_loss 0.6923527771234512 test_loss: 0.7025836944580078
epoch: 42 training_loss 0.6941786980628968 test_loss: 0.700576639175415
epoch: 43 training_loss 0.6775301945209503 test_loss: 0.7135976791381836
epoch: 44 training_loss 0.6754117047786713 test_loss: 0.6749963283538818
epoch: 45 training_loss 0.6789836561679841 test_loss: 0.6794385433197021
epoch: 46 training_loss 0.6662442541122436 test_loss: 0.6732227325439453
epoch: 47 training_loss 0.6647741204500198 test_loss: 0.6738613605499267
epoch: 48 training_loss 0.6568530821800231 test_loss: 0.669509744644165
epoch: 49 training_loss 0.6612769216299057 test_loss: 0.6673614978790283
epoch: 50 training_loss 0.6494936394691467 test_loss: 0.6529368877410888
epoch: 51 training_loss 0.6533398932218552 test_loss: 0.6573257923126221
epoch: 52 training_loss 0.6634246706962585 test_loss: 0.6363286018371582
epoch: 53 training_loss 0.631519981622696 test_loss: 0.6376986980438233
epoch: 54 training_loss 0.637024563550949 test_loss: 0.6380742073059082
epoch: 55 training_loss 0.639409264922142 test_loss: 0.6444169044494629
epoch: 56 training_loss 0.6292458140850067 test_loss: 0.6416717052459717
epoch: 57 training_loss 0.6265562516450882 test_loss: 0.6236955165863037
epoch: 58 training_loss 0.6281585544347763 test_loss: 0.630057430267334
epoch: 59 training_loss 0.6329781740903855 test_loss: 0.6308290958404541
epoch: 60 training_loss 0.6212616640329361 test_loss: 0.6302625179290772
epoch: 61 training_loss 0.6140615713596344 test_loss: 0.6452316761016845
epoch: 62 training_loss 0.6169774347543716 test_loss: 0.613174295425415
epoch: 63 training_loss 0.6135851520299912 test_loss: 0.5982117176055908
epoch: 64 training_loss 0.6101530337333679 test_loss: 0.6014986991882324
epoch: 65 training_loss 0.6079328179359436 test_loss: 0.6190208911895752
epoch: 66 training_loss 0.6091103446483612 test_loss: 0.6111184120178222
epoch: 67 training_loss 0.6014129954576493 test_loss: 0.619219446182251
epoch: 68 training_loss 0.6003642368316651 test_loss: 0.6076535701751709
epoch: 69 training_loss 0.5998821049928665 test_loss: 0.6126962184906006
epoch: 70 training_loss 0.5954769724607467 test_loss: 0.6166982650756836
epoch: 71 training_loss 0.5957159477472306 test_loss: 0.6295256614685059
epoch: 72 training_loss 0.5885916745662689 test_loss: 0.580319881439209
epoch: 73 training_loss 0.5925637340545654 test_loss: 0.6280776977539062
epoch: 74 training_loss 0.5936865210533142 test_loss: 0.5986032962799073
epoch: 75 training_loss 0.5804143017530441 test_loss: 0.5767715454101563
epoch: 76 training_loss 0.5730388391017914 test_loss: 0.5798840522766113
epoch: 77 training_loss 0.5769436553120613 test_loss: 0.5778310298919678
epoch: 78 training_loss 0.5733441060781479 test_loss: 0.5947467803955078
epoch: 79 training_loss 0.5843418800830841 test_loss: 0.5688246726989746
epoch: 80 training_loss 0.5704326272010803 test_loss: 0.5860833644866943
epoch: 81 training_loss 0.5702313679456711 test_loss: 0.56154465675354
epoch: 82 training_loss 0.571308564543724 test_loss: 0.5709599494934082
epoch: 83 training_loss 0.5686609506607055 test_loss: 0.563743782043457
epoch: 84 training_loss 0.567898799777031 test_loss: 0.5792186260223389
epoch: 85 training_loss 0.5676972103118897 test_loss: 0.5564375400543213
epoch: 86 training_loss 0.5541930660605431 test_loss: 0.5975771903991699
epoch: 87 training_loss 0.5712060153484344 test_loss: 0.583729362487793
epoch: 88 training_loss 0.5567927762866021 test_loss: 0.5669415950775146
epoch: 89 training_loss 0.5522767236828804 test_loss: 0.5597951889038086
epoch: 90 training_loss 0.5509222292900086 test_loss: 0.5628325462341308
epoch: 91 training_loss 0.5614932519197464 test_loss: 0.5928226470947265
epoch: 92 training_loss 0.5466237515211105 test_loss: 0.564218282699585
epoch: 93 training_loss 0.5571147072315216 test_loss: 0.5841947555541992
epoch: 94 training_loss 0.5572849228978157 test_loss: 0.5615357398986817
epoch: 95 training_loss 0.5528870046138763 test_loss: 0.5550240516662598
epoch: 96 training_loss 0.5567564067244529 test_loss: 0.5708203792572022
epoch: 97 training_loss 0.5503878247737884 test_loss: 0.582207441329956
epoch: 98 training_loss 0.5487230575084686 test_loss: 0.5477616786956787
epoch: 99 training_loss 0.5388754427433013 test_loss: 0.5388792037963868
epoch: 100 training_loss 0.5520397844910622 test_loss: 0.5743367195129394
epoch: 101 training_loss 0.5395212796330452 test_loss: 0.5499235153198242
epoch: 102 training_loss 0.5344740775227547 test_loss: 0.5464080810546875
epoch: 103 training_loss 0.5418544533848763 test_loss: 0.5411522388458252
epoch: 104 training_loss 0.5340305888652801 test_loss: 0.557719087600708
epoch: 105 training_loss 0.5397938242554665 test_loss: 0.5340762138366699
epoch: 106 training_loss 0.5380424034595489 test_loss: 0.5494286060333252
epoch: 107 training_loss 0.5426002851128578 test_loss: 0.5410060882568359
epoch: 108 training_loss 0.5232344281673431 test_loss: 0.5351299285888672
epoch: 109 training_loss 0.5481326189637185 test_loss: 0.5420598030090332
epoch: 110 training_loss 0.5302751851081848 test_loss: 0.529059886932373
epoch: 111 training_loss 0.5304423278570175 test_loss: 0.5365626335144043
epoch: 112 training_loss 0.5242610228061676 test_loss: 0.5356060028076172
epoch: 113 training_loss 0.5210286766290665 test_loss: 0.5170005321502685
epoch: 114 training_loss 0.5232272166013717 test_loss: 0.5213173866271973
epoch: 115 training_loss 0.5225589290261269 test_loss: 0.5398271083831787
epoch: 116 training_loss 0.5308644878864288 test_loss: 0.5311183452606201
epoch: 117 training_loss 0.522183493077755 test_loss: 0.5273737907409668
epoch: 118 training_loss 0.52226557046175 test_loss: 0.5416291236877442
epoch: 119 training_loss 0.5232787600159645 test_loss: 0.5192341327667236
epoch: 120 training_loss 0.5228099575638772 test_loss: 0.5180673122406005
epoch: 121 training_loss 0.5182720357179642 test_loss: 0.5334138393402099
epoch: 122 training_loss 0.5185983529686928 test_loss: 0.5339786529541015
epoch: 123 training_loss 0.5216118040680885 test_loss: 0.5263484477996826
epoch: 124 training_loss 0.5161697253584862 test_loss: 0.534179401397705
epoch: 125 training_loss 0.5107593443989754 test_loss: 0.5264765739440918
epoch: 126 training_loss 0.5131720307469368 test_loss: 0.5209577083587646
epoch: 127 training_loss 0.512938626408577 test_loss: 0.539989423751831
epoch: 128 training_loss 0.5154166111350059 test_loss: 0.5198448181152344
epoch: 129 training_loss 0.5072933700680733 test_loss: 0.5076560974121094
epoch: 130 training_loss 0.512437348663807 test_loss: 0.52873854637146
epoch: 131 training_loss 0.5129303398728371 test_loss: 0.5663884162902832
epoch: 132 training_loss 0.5117095965147018 test_loss: 0.5157631397247314
epoch: 133 training_loss 0.5069470509886742 test_loss: 0.5190320014953613
epoch: 134 training_loss 0.5031541475653648 test_loss: 0.49959325790405273
epoch: 135 training_loss 0.5080050152540206 test_loss: 0.5187302589416504
epoch: 136 training_loss 0.49575193107128146 test_loss: 0.5008338451385498
epoch: 137 training_loss 0.503941185772419 test_loss: 0.4970242500305176
epoch: 138 training_loss 0.4957764491438866 test_loss: 0.5089779853820801
epoch: 139 training_loss 0.5042735716700554 test_loss: 0.49425926208496096
epoch: 140 training_loss 0.5135411229729653 test_loss: 0.5114593029022216
epoch: 141 training_loss 0.4949884557723999 test_loss: 0.5016453266143799
epoch: 142 training_loss 0.49450981080532075 test_loss: 0.49605512619018555
epoch: 143 training_loss 0.49980307221412656 test_loss: 0.5209622383117676
epoch: 144 training_loss 0.5052025651931763 test_loss: 0.5073004245758057
epoch: 145 training_loss 0.4997444450855255 test_loss: 0.4884603500366211
epoch: 146 training_loss 0.4931486707925796 test_loss: 0.5012125015258789
epoch: 147 training_loss 0.49744216471910474 test_loss: 0.5119458198547363
epoch: 148 training_loss 0.4973228511214256 test_loss: 0.5042483806610107
epoch: 149 training_loss 0.49402762413024903 test_loss: 0.49046101570129397
2084.553367555847
episode: 0 training return: tensor(-162.5843, device='cuda:0')
episode: 1 training return: tensor(-208.5108, device='cuda:0')
episode: 2 training return: tensor(-314.3957, device='cuda:0')
episode: 3 training return: tensor(27.1831, device='cuda:0')
epoch: 1 test_true_pfm: 2274.890191675979 sim_pfm: -209.95719114743406
episode: 4 training return: tensor(213.5551, device='cuda:0')
episode: 5 training return: tensor(-231.8419, device='cuda:0')
episode: 6 training return: tensor(2.5050, device='cuda:0')
episode: 7 training return: tensor(-326.5471, device='cuda:0')
epoch: 2 test_true_pfm: 1795.1883600820802 sim_pfm: -285.885893623461
episode: 8 training return: tensor(-156.5782, device='cuda:0')
episode: 9 training return: tensor(-152.4449, device='cuda:0')
episode: 10 training return: tensor(-330.9234, device='cuda:0')
episode: 11 training return: tensor(-61.1370, device='cuda:0')
epoch: 3 test_true_pfm: 2063.5796550677487 sim_pfm: -71.34756453771843
episode: 12 training return: tensor(-125.6885, device='cuda:0')
episode: 13 training return: tensor(12.5463, device='cuda:0')
episode: 14 training return: tensor(-225.1445, device='cuda:0')
episode: 15 training return: tensor(-223.8447, device='cuda:0')
epoch: 4 test_true_pfm: 1725.216050992567 sim_pfm: -312.2754073568406
episode: 16 training return: tensor(-236.9068, device='cuda:0')
episode: 17 training return: tensor(-119.6261, device='cuda:0')
episode: 18 training return: tensor(-357.1047, device='cuda:0')
episode: 19 training return: tensor(-335.1972, device='cuda:0')
epoch: 5 test_true_pfm: 1488.4079927979274 sim_pfm: -173.2286972804577
episode: 20 training return: tensor(-371.5465, device='cuda:0')
episode: 21 training return: tensor(-226.2032, device='cuda:0')
episode: 22 training return: tensor(-127.0262, device='cuda:0')
episode: 23 training return: tensor(-97.8880, device='cuda:0')
epoch: 6 test_true_pfm: 1555.3644840221896 sim_pfm: -135.8599288566038
episode: 24 training return: tensor(-393.2088, device='cuda:0')
episode: 25 training return: tensor(-362.2462, device='cuda:0')
episode: 26 training return: tensor(-386.2344, device='cuda:0')
episode: 27 training return: tensor(-332.3781, device='cuda:0')
epoch: 7 test_true_pfm: 1545.948788596673 sim_pfm: -168.59467912724358
episode: 28 training return: tensor(-55.7528, device='cuda:0')
episode: 29 training return: tensor(-392.4893, device='cuda:0')
episode: 30 training return: tensor(-349.4592, device='cuda:0')
episode: 31 training return: tensor(-398.9789, device='cuda:0')
epoch: 8 test_true_pfm: 1495.3785083724522 sim_pfm: -321.1480116606787
episode: 32 training return: tensor(-18.9644, device='cuda:0')
episode: 33 training return: tensor(-296.5442, device='cuda:0')
episode: 34 training return: tensor(-377.4427, device='cuda:0')
episode: 35 training return: tensor(-111.6981, device='cuda:0')
epoch: 9 test_true_pfm: 1510.4765733311294 sim_pfm: -269.60006913890055
episode: 36 training return: tensor(-323.5013, device='cuda:0')
episode: 37 training return: tensor(-212.2604, device='cuda:0')
episode: 38 training return: tensor(-362.6806, device='cuda:0')
episode: 39 training return: tensor(-260.3501, device='cuda:0')
epoch: 10 test_true_pfm: 2221.451742434055 sim_pfm: -336.48114518945414
episode: 40 training return: tensor(-102.0013, device='cuda:0')
episode: 41 training return: tensor(-178.1672, device='cuda:0')
episode: 42 training return: tensor(-394.7681, device='cuda:0')
episode: 43 training return: tensor(-317.4850, device='cuda:0')
epoch: 11 test_true_pfm: 1924.2830916428618 sim_pfm: -154.59762392682023
episode: 44 training return: tensor(-357.5061, device='cuda:0')
episode: 45 training return: tensor(-239.9613, device='cuda:0')
episode: 46 training return: tensor(1.0643, device='cuda:0')
episode: 47 training return: tensor(-75.6426, device='cuda:0')
epoch: 12 test_true_pfm: 2034.5297902566526 sim_pfm: -151.94690822507255
episode: 48 training return: tensor(-346.9828, device='cuda:0')
episode: 49 training return: tensor(-260.7356, device='cuda:0')
episode: 50 training return: tensor(-307.0386, device='cuda:0')
episode: 51 training return: tensor(-376.4937, device='cuda:0')
epoch: 13 test_true_pfm: 1868.8275962459168 sim_pfm: -130.76508982495093
episode: 52 training return: tensor(219.4568, device='cuda:0')
episode: 53 training return: tensor(-32.6730, device='cuda:0')
episode: 54 training return: tensor(-69.3321, device='cuda:0')
episode: 55 training return: tensor(-395.0943, device='cuda:0')
epoch: 14 test_true_pfm: 1724.407151516679 sim_pfm: 76.47728364231686
episode: 56 training return: tensor(-4.6997, device='cuda:0')
episode: 57 training return: tensor(-119.6730, device='cuda:0')
episode: 58 training return: tensor(-126.2113, device='cuda:0')
episode: 59 training return: tensor(-342.3002, device='cuda:0')
epoch: 15 test_true_pfm: 2239.916831380688 sim_pfm: -158.20137002630509
episode: 60 training return: tensor(-386.7210, device='cuda:0')
episode: 61 training return: tensor(-221.2190, device='cuda:0')
episode: 62 training return: tensor(-336.2250, device='cuda:0')
episode: 63 training return: tensor(-99.6477, device='cuda:0')
epoch: 16 test_true_pfm: 1820.9736862910124 sim_pfm: 140.88424841441642
episode: 64 training return: tensor(-388.2347, device='cuda:0')
episode: 65 training return: tensor(-388.5866, device='cuda:0')
episode: 66 training return: tensor(-326.4601, device='cuda:0')
episode: 67 training return: tensor(-333.2981, device='cuda:0')
epoch: 17 test_true_pfm: 2176.661936457954 sim_pfm: -130.23331850472218
episode: 68 training return: tensor(-291.6106, device='cuda:0')
episode: 69 training return: tensor(-217.4688, device='cuda:0')
episode: 70 training return: tensor(-18.5638, device='cuda:0')
episode: 71 training return: tensor(16.8777, device='cuda:0')
epoch: 18 test_true_pfm: 1682.426420291086 sim_pfm: -28.257717630282666
episode: 72 training return: tensor(-74.2643, device='cuda:0')
episode: 73 training return: tensor(-52.5037, device='cuda:0')
episode: 74 training return: tensor(-220.2786, device='cuda:0')
episode: 75 training return: tensor(-216.3616, device='cuda:0')
epoch: 19 test_true_pfm: 2885.731018213875 sim_pfm: -63.43788860544252
episode: 76 training return: tensor(-327.6248, device='cuda:0')
episode: 77 training return: tensor(-273.8997, device='cuda:0')
episode: 78 training return: tensor(64.4968, device='cuda:0')
episode: 79 training return: tensor(-301.4393, device='cuda:0')
epoch: 20 test_true_pfm: 1595.1869889543796 sim_pfm: -149.3852438388664
episode: 80 training return: tensor(-23.6545, device='cuda:0')
episode: 81 training return: tensor(-33.9826, device='cuda:0')
episode: 82 training return: tensor(-234.1024, device='cuda:0')
episode: 83 training return: tensor(-11.0782, device='cuda:0')
epoch: 21 test_true_pfm: 1550.0134572361267 sim_pfm: -205.52019269989492
episode: 84 training return: tensor(-335.6463, device='cuda:0')
episode: 85 training return: tensor(-129.3057, device='cuda:0')
episode: 86 training return: tensor(-204.6802, device='cuda:0')
episode: 87 training return: tensor(-380.3513, device='cuda:0')
epoch: 22 test_true_pfm: 2524.85966637125 sim_pfm: -247.67878156275643
episode: 88 training return: tensor(-81.4478, device='cuda:0')
episode: 89 training return: tensor(-132.0001, device='cuda:0')
episode: 90 training return: tensor(-244.2142, device='cuda:0')
episode: 91 training return: tensor(-286.9070, device='cuda:0')
epoch: 23 test_true_pfm: 1691.0645440857888 sim_pfm: -332.35488519479986
episode: 92 training return: tensor(-328.7469, device='cuda:0')
episode: 93 training return: tensor(-288.1294, device='cuda:0')
episode: 94 training return: tensor(-57.8471, device='cuda:0')
episode: 95 training return: tensor(-219.2840, device='cuda:0')
epoch: 24 test_true_pfm: 1746.6895685564723 sim_pfm: 41.97168680006871
episode: 96 training return: tensor(-241.8952, device='cuda:0')
episode: 97 training return: tensor(-222.5685, device='cuda:0')
episode: 98 training return: tensor(-13.4245, device='cuda:0')
episode: 99 training return: tensor(-371.2214, device='cuda:0')
epoch: 25 test_true_pfm: 2002.0508766467756 sim_pfm: -147.45396416028962
episode: 100 training return: tensor(142.8126, device='cuda:0')
episode: 101 training return: tensor(-130.4637, device='cuda:0')
episode: 102 training return: tensor(-47.6516, device='cuda:0')
episode: 103 training return: tensor(-138.2106, device='cuda:0')
epoch: 26 test_true_pfm: 2057.3222719975834 sim_pfm: -128.18047887727153
episode: 104 training return: tensor(58.0251, device='cuda:0')
episode: 105 training return: tensor(-330.1514, device='cuda:0')
episode: 106 training return: tensor(-385.8822, device='cuda:0')
episode: 107 training return: tensor(-267.2581, device='cuda:0')
epoch: 27 test_true_pfm: 1746.2126670493626 sim_pfm: -102.9660694283084
episode: 108 training return: tensor(-388.1043, device='cuda:0')
episode: 109 training return: tensor(-141.9341, device='cuda:0')
episode: 110 training return: tensor(-336.3661, device='cuda:0')
episode: 111 training return: tensor(-295.3377, device='cuda:0')
epoch: 28 test_true_pfm: 2232.160524188131 sim_pfm: 29.21748594082116
episode: 112 training return: tensor(-253.4902, device='cuda:0')
episode: 113 training return: tensor(244.7316, device='cuda:0')
episode: 114 training return: tensor(-122.4939, device='cuda:0')
episode: 115 training return: tensor(-357.1124, device='cuda:0')
epoch: 29 test_true_pfm: 2038.4864062194663 sim_pfm: -108.54177479974653
episode: 116 training return: tensor(-336.3890, device='cuda:0')
episode: 117 training return: tensor(-103.4379, device='cuda:0')
episode: 118 training return: tensor(-265.0693, device='cuda:0')
episode: 119 training return: tensor(191.0139, device='cuda:0')
epoch: 30 test_true_pfm: 2024.3490513427962 sim_pfm: -140.00277325115167
episode: 120 training return: tensor(-383.7447, device='cuda:0')
episode: 121 training return: tensor(-213.2073, device='cuda:0')
episode: 122 training return: tensor(-9.4909, device='cuda:0')
episode: 123 training return: tensor(-270.4047, device='cuda:0')
epoch: 31 test_true_pfm: 2612.520724487788 sim_pfm: -71.49819208448753
episode: 124 training return: tensor(-378.3652, device='cuda:0')
episode: 125 training return: tensor(-250.1121, device='cuda:0')
episode: 126 training return: tensor(121.9206, device='cuda:0')
episode: 127 training return: tensor(-231.4842, device='cuda:0')
epoch: 32 test_true_pfm: 2656.5343348268275 sim_pfm: 2.4361058689731485
episode: 128 training return: tensor(-386.7215, device='cuda:0')
episode: 129 training return: tensor(-76.2771, device='cuda:0')
episode: 130 training return: tensor(159.2943, device='cuda:0')
episode: 131 training return: tensor(60.0347, device='cuda:0')
epoch: 33 test_true_pfm: 2230.8465690485996 sim_pfm: -290.9837159471742
episode: 132 training return: tensor(-387.5008, device='cuda:0')
episode: 133 training return: tensor(53.6443, device='cuda:0')
episode: 134 training return: tensor(-145.2950, device='cuda:0')
episode: 135 training return: tensor(-382.0089, device='cuda:0')
epoch: 34 test_true_pfm: 2116.0810725787796 sim_pfm: -57.01248134362201
episode: 136 training return: tensor(-248.7494, device='cuda:0')
episode: 137 training return: tensor(-282.4412, device='cuda:0')
episode: 138 training return: tensor(-158.9324, device='cuda:0')
episode: 139 training return: tensor(164.8344, device='cuda:0')
epoch: 35 test_true_pfm: 2385.4468655988385 sim_pfm: -71.70225058410627
episode: 140 training return: tensor(-237.5722, device='cuda:0')
episode: 141 training return: tensor(-380.7603, device='cuda:0')
episode: 142 training return: tensor(-386.7389, device='cuda:0')
episode: 143 training return: tensor(-295.3940, device='cuda:0')
epoch: 36 test_true_pfm: 2492.245128283861 sim_pfm: -192.4116118667492
episode: 144 training return: tensor(46.1060, device='cuda:0')
episode: 145 training return: tensor(-403.9174, device='cuda:0')
episode: 146 training return: tensor(-294.6235, device='cuda:0')
episode: 147 training return: tensor(141.7409, device='cuda:0')
epoch: 37 test_true_pfm: 2245.1214257711854 sim_pfm: -122.25760476778184
episode: 148 training return: tensor(-183.1518, device='cuda:0')
episode: 149 training return: tensor(-184.0427, device='cuda:0')
episode: 150 training return: tensor(-315.0529, device='cuda:0')
episode: 151 training return: tensor(-383.8355, device='cuda:0')
epoch: 38 test_true_pfm: 2090.305294210853 sim_pfm: -223.4173314435951
episode: 152 training return: tensor(-386.9374, device='cuda:0')
episode: 153 training return: tensor(-154.6972, device='cuda:0')
episode: 154 training return: tensor(-240.7162, device='cuda:0')
episode: 155 training return: tensor(-231.5637, device='cuda:0')
epoch: 39 test_true_pfm: 2010.9113955278851 sim_pfm: -265.3236681049457
episode: 156 training return: tensor(-367.5357, device='cuda:0')
episode: 157 training return: tensor(-330.5779, device='cuda:0')
episode: 158 training return: tensor(112.8461, device='cuda:0')
episode: 159 training return: tensor(-97.6607, device='cuda:0')
epoch: 40 test_true_pfm: 2152.233778233695 sim_pfm: -172.97559679823462
episode: 160 training return: tensor(263.0356, device='cuda:0')
episode: 161 training return: tensor(-138.8781, device='cuda:0')
episode: 162 training return: tensor(-206.1559, device='cuda:0')
episode: 163 training return: tensor(-243.9435, device='cuda:0')
epoch: 41 test_true_pfm: 1540.2249086237018 sim_pfm: -211.74696560652228
episode: 164 training return: tensor(-232.4445, device='cuda:0')
episode: 165 training return: tensor(-241.2383, device='cuda:0')
episode: 166 training return: tensor(-280.4487, device='cuda:0')
episode: 167 training return: tensor(-49.6077, device='cuda:0')
epoch: 42 test_true_pfm: 1675.2931044657123 sim_pfm: -18.141266311809886
episode: 168 training return: tensor(-293.5080, device='cuda:0')
episode: 169 training return: tensor(-290.4887, device='cuda:0')
episode: 170 training return: tensor(-1.5294, device='cuda:0')
episode: 171 training return: tensor(-214.0451, device='cuda:0')
epoch: 43 test_true_pfm: 1689.7184465681905 sim_pfm: -75.93977276988637
episode: 172 training return: tensor(124.2785, device='cuda:0')
episode: 173 training return: tensor(-276.5553, device='cuda:0')
episode: 174 training return: tensor(-284.2343, device='cuda:0')
episode: 175 training return: tensor(-360.9741, device='cuda:0')
epoch: 44 test_true_pfm: 2104.515193763547 sim_pfm: 17.79522233530103
episode: 176 training return: tensor(-280.2759, device='cuda:0')
episode: 177 training return: tensor(-242.1036, device='cuda:0')
episode: 178 training return: tensor(-368.4322, device='cuda:0')
episode: 179 training return: tensor(79.5960, device='cuda:0')
epoch: 45 test_true_pfm: 1690.9663831289806 sim_pfm: -48.19112864895336
episode: 180 training return: tensor(47.8126, device='cuda:0')
episode: 181 training return: tensor(-139.3944, device='cuda:0')
episode: 182 training return: tensor(-245.0917, device='cuda:0')
episode: 183 training return: tensor(9.1022, device='cuda:0')
epoch: 46 test_true_pfm: 2231.9796143683993 sim_pfm: -170.74950852194647
episode: 184 training return: tensor(-246.2473, device='cuda:0')
episode: 185 training return: tensor(-168.5656, device='cuda:0')
episode: 186 training return: tensor(-288.4641, device='cuda:0')
episode: 187 training return: tensor(-392.4174, device='cuda:0')
epoch: 47 test_true_pfm: 1620.0258153845195 sim_pfm: 3.8690376559194797
episode: 188 training return: tensor(-187.2926, device='cuda:0')
episode: 189 training return: tensor(-287.3960, device='cuda:0')
episode: 190 training return: tensor(-301.1262, device='cuda:0')
episode: 191 training return: tensor(-380.8747, device='cuda:0')
epoch: 48 test_true_pfm: 1588.2053323790908 sim_pfm: -110.1072278911015
episode: 192 training return: tensor(-366.5237, device='cuda:0')
episode: 193 training return: tensor(-305.8332, device='cuda:0')
episode: 194 training return: tensor(23.5556, device='cuda:0')
episode: 195 training return: tensor(-295.7508, device='cuda:0')
epoch: 49 test_true_pfm: 2624.0933564398733 sim_pfm: -54.88345561925477
episode: 196 training return: tensor(-301.2404, device='cuda:0')
episode: 197 training return: tensor(257.0532, device='cuda:0')
episode: 198 training return: tensor(-329.3168, device='cuda:0')
episode: 199 training return: tensor(-209.4894, device='cuda:0')
epoch: 50 test_true_pfm: 1920.287053896369 sim_pfm: 42.73470636938388
episode: 200 training return: tensor(-159.7394, device='cuda:0')
episode: 201 training return: tensor(-119.5422, device='cuda:0')
episode: 202 training return: tensor(46.2192, device='cuda:0')
episode: 203 training return: tensor(-89.3904, device='cuda:0')
epoch: 51 test_true_pfm: 1404.9332882070582 sim_pfm: -262.197489955967
episode: 204 training return: tensor(-279.8892, device='cuda:0')
episode: 205 training return: tensor(-14.3720, device='cuda:0')
episode: 206 training return: tensor(-362.8277, device='cuda:0')
episode: 207 training return: tensor(-228.3052, device='cuda:0')
epoch: 52 test_true_pfm: 2280.049436181844 sim_pfm: -208.8749162865182
episode: 208 training return: tensor(199.1516, device='cuda:0')
episode: 209 training return: tensor(-124.7900, device='cuda:0')
episode: 210 training return: tensor(268.2047, device='cuda:0')
episode: 211 training return: tensor(28.1627, device='cuda:0')
epoch: 53 test_true_pfm: 1689.5443513318198 sim_pfm: -79.04531976655319
episode: 212 training return: tensor(-333.3253, device='cuda:0')
episode: 213 training return: tensor(-341.3812, device='cuda:0')
episode: 214 training return: tensor(-260.8314, device='cuda:0')
episode: 215 training return: tensor(-33.3695, device='cuda:0')
epoch: 54 test_true_pfm: 1575.2411535585325 sim_pfm: -247.79072512250664
episode: 216 training return: tensor(-175.7525, device='cuda:0')
episode: 217 training return: tensor(13.1923, device='cuda:0')
episode: 218 training return: tensor(-335.5844, device='cuda:0')
episode: 219 training return: tensor(-249.7169, device='cuda:0')
epoch: 55 test_true_pfm: 1923.1623038776852 sim_pfm: -109.01015635028791
episode: 220 training return: tensor(-391.4251, device='cuda:0')
episode: 221 training return: tensor(51.7244, device='cuda:0')
episode: 222 training return: tensor(98.8738, device='cuda:0')
episode: 223 training return: tensor(19.9683, device='cuda:0')
epoch: 56 test_true_pfm: 2202.2109250342323 sim_pfm: -182.1445284608344
episode: 224 training return: tensor(-368.8452, device='cuda:0')
episode: 225 training return: tensor(-378.8518, device='cuda:0')
episode: 226 training return: tensor(-101.5019, device='cuda:0')
episode: 227 training return: tensor(-342.6208, device='cuda:0')
epoch: 57 test_true_pfm: 2137.120286489388 sim_pfm: -56.96090312348679
episode: 228 training return: tensor(301.4993, device='cuda:0')
episode: 229 training return: tensor(-374.2191, device='cuda:0')
episode: 230 training return: tensor(-227.7786, device='cuda:0')
episode: 231 training return: tensor(-359.0101, device='cuda:0')
epoch: 58 test_true_pfm: 1816.9880398257644 sim_pfm: -31.71164216872421
episode: 232 training return: tensor(-163.8181, device='cuda:0')
episode: 233 training return: tensor(-339.4752, device='cuda:0')
episode: 234 training return: tensor(-193.8400, device='cuda:0')
episode: 235 training return: tensor(-292.0869, device='cuda:0')
epoch: 59 test_true_pfm: 1849.9479041586203 sim_pfm: -209.66353025163212
episode: 236 training return: tensor(-162.6587, device='cuda:0')
episode: 237 training return: tensor(-64.8573, device='cuda:0')
episode: 238 training return: tensor(-122.2860, device='cuda:0')
episode: 239 training return: tensor(157.4800, device='cuda:0')
epoch: 60 test_true_pfm: 2448.7092798947283 sim_pfm: -90.54066335573832
episode: 240 training return: tensor(-248.9696, device='cuda:0')
episode: 241 training return: tensor(-269.4375, device='cuda:0')
episode: 242 training return: tensor(-164.3536, device='cuda:0')
episode: 243 training return: tensor(-253.1771, device='cuda:0')
epoch: 61 test_true_pfm: 1961.2630346937465 sim_pfm: 39.33548762760862
episode: 244 training return: tensor(44.7107, device='cuda:0')
episode: 245 training return: tensor(-273.8139, device='cuda:0')
episode: 246 training return: tensor(-86.5912, device='cuda:0')
episode: 247 training return: tensor(-63.9614, device='cuda:0')
epoch: 62 test_true_pfm: 2189.6097729944363 sim_pfm: -100.55755976864991
episode: 248 training return: tensor(-324.1791, device='cuda:0')
episode: 249 training return: tensor(305.4200, device='cuda:0')
episode: 250 training return: tensor(89.1196, device='cuda:0')
episode: 251 training return: tensor(-136.7711, device='cuda:0')
epoch: 63 test_true_pfm: 2395.896507016861 sim_pfm: -48.53474600908036
episode: 252 training return: tensor(273.5936, device='cuda:0')
episode: 253 training return: tensor(-51.6079, device='cuda:0')
episode: 254 training return: tensor(-284.9689, device='cuda:0')
episode: 255 training return: tensor(-152.2318, device='cuda:0')
epoch: 64 test_true_pfm: 1919.5404787732298 sim_pfm: -220.04254405999868
episode: 256 training return: tensor(-62.0453, device='cuda:0')
episode: 257 training return: tensor(-120.5694, device='cuda:0')
episode: 258 training return: tensor(-317.6799, device='cuda:0')
episode: 259 training return: tensor(-194.6929, device='cuda:0')
epoch: 65 test_true_pfm: 2192.215293057112 sim_pfm: -172.93315799705064
episode: 260 training return: tensor(-386.5819, device='cuda:0')
episode: 261 training return: tensor(-239.8824, device='cuda:0')
episode: 262 training return: tensor(-330.8632, device='cuda:0')
episode: 263 training return: tensor(-155.3335, device='cuda:0')
epoch: 66 test_true_pfm: 1655.1128794870358 sim_pfm: 41.8478662079627
episode: 264 training return: tensor(-134.7679, device='cuda:0')
episode: 265 training return: tensor(236.7296, device='cuda:0')
episode: 266 training return: tensor(138.6995, device='cuda:0')
episode: 267 training return: tensor(11.3519, device='cuda:0')
epoch: 67 test_true_pfm: 1769.56325872608 sim_pfm: -36.76205027541922
episode: 268 training return: tensor(-337.6842, device='cuda:0')
episode: 269 training return: tensor(-107.7314, device='cuda:0')
episode: 270 training return: tensor(11.8866, device='cuda:0')
episode: 271 training return: tensor(-84.8594, device='cuda:0')
epoch: 68 test_true_pfm: 2023.558081067654 sim_pfm: -162.76633451575375
episode: 272 training return: tensor(-264.9126, device='cuda:0')
episode: 273 training return: tensor(-288.7088, device='cuda:0')
episode: 274 training return: tensor(-262.4036, device='cuda:0')
episode: 275 training return: tensor(-135.0234, device='cuda:0')
epoch: 69 test_true_pfm: 2273.1430742553666 sim_pfm: 113.20253595431375
episode: 276 training return: tensor(17.4583, device='cuda:0')
episode: 277 training return: tensor(-217.4617, device='cuda:0')
episode: 278 training return: tensor(-241.6673, device='cuda:0')
episode: 279 training return: tensor(-136.8093, device='cuda:0')
epoch: 70 test_true_pfm: 2505.6246154521555 sim_pfm: -261.55836537556024
episode: 280 training return: tensor(77.2844, device='cuda:0')
episode: 281 training return: tensor(-304.4619, device='cuda:0')
episode: 282 training return: tensor(-390.2245, device='cuda:0')
episode: 283 training return: tensor(-161.5428, device='cuda:0')
epoch: 71 test_true_pfm: 1574.28131234653 sim_pfm: 20.903793783683795
episode: 284 training return: tensor(-254.0358, device='cuda:0')
episode: 285 training return: tensor(-130.5216, device='cuda:0')
episode: 286 training return: tensor(-176.9877, device='cuda:0')
episode: 287 training return: tensor(-106.0135, device='cuda:0')
epoch: 72 test_true_pfm: 1921.1518241910296 sim_pfm: 72.74752665759297
episode: 288 training return: tensor(243.8899, device='cuda:0')
episode: 289 training return: tensor(153.9844, device='cuda:0')
episode: 290 training return: tensor(-274.8275, device='cuda:0')
episode: 291 training return: tensor(-335.3563, device='cuda:0')
epoch: 73 test_true_pfm: 1713.1104929979472 sim_pfm: 3.3285595879036314
episode: 292 training return: tensor(90.2942, device='cuda:0')
episode: 293 training return: tensor(-70.3878, device='cuda:0')
episode: 294 training return: tensor(-373.8511, device='cuda:0')
episode: 295 training return: tensor(10.6261, device='cuda:0')
epoch: 74 test_true_pfm: 2225.492234419937 sim_pfm: -0.5056893077368537
episode: 296 training return: tensor(37.5073, device='cuda:0')
episode: 297 training return: tensor(-190.3133, device='cuda:0')
episode: 298 training return: tensor(65.2058, device='cuda:0')
episode: 299 training return: tensor(272.2585, device='cuda:0')
epoch: 75 test_true_pfm: 1506.5261702478317 sim_pfm: -108.57578729479185
episode: 300 training return: tensor(-329.7349, device='cuda:0')
episode: 301 training return: tensor(245.9156, device='cuda:0')
episode: 302 training return: tensor(-77.7563, device='cuda:0')
episode: 303 training return: tensor(-310.4672, device='cuda:0')
epoch: 76 test_true_pfm: 2322.3426331710552 sim_pfm: -13.977358725183876
episode: 304 training return: tensor(-354.2710, device='cuda:0')
episode: 305 training return: tensor(120.0956, device='cuda:0')
episode: 306 training return: tensor(27.0281, device='cuda:0')
episode: 307 training return: tensor(-265.7881, device='cuda:0')
epoch: 77 test_true_pfm: 2484.3787779357667 sim_pfm: -46.342939721401
episode: 308 training return: tensor(238.3609, device='cuda:0')
episode: 309 training return: tensor(-298.0691, device='cuda:0')
episode: 310 training return: tensor(43.9969, device='cuda:0')
episode: 311 training return: tensor(-73.0723, device='cuda:0')
epoch: 78 test_true_pfm: 2198.4443145607497 sim_pfm: -172.3245904434783
episode: 312 training return: tensor(87.7187, device='cuda:0')
episode: 313 training return: tensor(-148.1431, device='cuda:0')
episode: 314 training return: tensor(285.2691, device='cuda:0')
episode: 315 training return: tensor(-187.4931, device='cuda:0')
epoch: 79 test_true_pfm: 2615.768799709613 sim_pfm: -14.216058153251652
episode: 316 training return: tensor(42.7928, device='cuda:0')
episode: 317 training return: tensor(-374.5863, device='cuda:0')
episode: 318 training return: tensor(25.0554, device='cuda:0')
episode: 319 training return: tensor(-315.9965, device='cuda:0')
epoch: 80 test_true_pfm: 1777.197723578396 sim_pfm: -41.41110381095981
episode: 320 training return: tensor(99.5358, device='cuda:0')
episode: 321 training return: tensor(-105.1558, device='cuda:0')
episode: 322 training return: tensor(13.0900, device='cuda:0')
episode: 323 training return: tensor(227.8311, device='cuda:0')
epoch: 81 test_true_pfm: 2083.972003318348 sim_pfm: -233.14016099671912
episode: 324 training return: tensor(294.2555, device='cuda:0')
episode: 325 training return: tensor(-114.7646, device='cuda:0')
episode: 326 training return: tensor(130.7727, device='cuda:0')
episode: 327 training return: tensor(-265.8120, device='cuda:0')
epoch: 82 test_true_pfm: 2206.0458931410626 sim_pfm: -161.28929459885694
episode: 328 training return: tensor(-225.2469, device='cuda:0')
episode: 329 training return: tensor(300.4854, device='cuda:0')
episode: 330 training return: tensor(-265.8854, device='cuda:0')
episode: 331 training return: tensor(227.5227, device='cuda:0')
epoch: 83 test_true_pfm: 2457.873404966463 sim_pfm: -218.20721572086526
episode: 332 training return: tensor(-217.7095, device='cuda:0')
episode: 333 training return: tensor(-53.6458, device='cuda:0')
episode: 334 training return: tensor(-170.5486, device='cuda:0')
episode: 335 training return: tensor(-190.7555, device='cuda:0')
epoch: 84 test_true_pfm: 2033.9968399966572 sim_pfm: -192.87407301725275
episode: 336 training return: tensor(-302.6530, device='cuda:0')
episode: 337 training return: tensor(55.2930, device='cuda:0')
episode: 338 training return: tensor(-243.9389, device='cuda:0')
episode: 339 training return: tensor(-246.3366, device='cuda:0')
epoch: 85 test_true_pfm: 2411.117991918864 sim_pfm: -17.390879245940596
episode: 340 training return: tensor(-358.1171, device='cuda:0')
episode: 341 training return: tensor(-152.8088, device='cuda:0')
episode: 342 training return: tensor(95.7006, device='cuda:0')
episode: 343 training return: tensor(-314.9458, device='cuda:0')
epoch: 86 test_true_pfm: 1650.5370297152867 sim_pfm: -217.21063112005746
episode: 344 training return: tensor(-247.7379, device='cuda:0')
episode: 345 training return: tensor(199.6594, device='cuda:0')
episode: 346 training return: tensor(-330.1379, device='cuda:0')
episode: 347 training return: tensor(262.9444, device='cuda:0')
epoch: 87 test_true_pfm: 1845.5771894060324 sim_pfm: -297.5818066383169
episode: 348 training return: tensor(26.3952, device='cuda:0')
episode: 349 training return: tensor(-329.0629, device='cuda:0')
episode: 350 training return: tensor(-337.5732, device='cuda:0')
episode: 351 training return: tensor(-146.6575, device='cuda:0')
epoch: 88 test_true_pfm: 2838.3249345801573 sim_pfm: -236.20239294374673
episode: 352 training return: tensor(273.9587, device='cuda:0')
episode: 353 training return: tensor(7.5208, device='cuda:0')
episode: 354 training return: tensor(254.7955, device='cuda:0')
episode: 355 training return: tensor(-14.2386, device='cuda:0')
epoch: 89 test_true_pfm: 2618.0478728934577 sim_pfm: -178.38922970815716
episode: 356 training return: tensor(-148.0785, device='cuda:0')
episode: 357 training return: tensor(-165.1178, device='cuda:0')
episode: 358 training return: tensor(-8.7184, device='cuda:0')
episode: 359 training return: tensor(-246.6105, device='cuda:0')
epoch: 90 test_true_pfm: 2177.4759022588482 sim_pfm: -126.30604464580149
episode: 360 training return: tensor(-52.2475, device='cuda:0')
episode: 361 training return: tensor(-347.6471, device='cuda:0')
episode: 362 training return: tensor(-87.6143, device='cuda:0')
episode: 363 training return: tensor(-171.9801, device='cuda:0')
epoch: 91 test_true_pfm: 1879.9887949421766 sim_pfm: -75.64301332752802
episode: 364 training return: tensor(-300.4171, device='cuda:0')
episode: 365 training return: tensor(166.6952, device='cuda:0')
episode: 366 training return: tensor(220.4631, device='cuda:0')
episode: 367 training return: tensor(254.6463, device='cuda:0')
epoch: 92 test_true_pfm: 2676.8605335668058 sim_pfm: -52.12168252763028
episode: 368 training return: tensor(-160.4655, device='cuda:0')
episode: 369 training return: tensor(-242.5663, device='cuda:0')
episode: 370 training return: tensor(-187.7445, device='cuda:0')
episode: 371 training return: tensor(-378.9781, device='cuda:0')
epoch: 93 test_true_pfm: 2320.42626985499 sim_pfm: -10.239685260312399
episode: 372 training return: tensor(-125.8165, device='cuda:0')
episode: 373 training return: tensor(-218.5852, device='cuda:0')
episode: 374 training return: tensor(-386.0761, device='cuda:0')
episode: 375 training return: tensor(-377.4112, device='cuda:0')
epoch: 94 test_true_pfm: 1626.1686320515416 sim_pfm: 96.66817552142311
episode: 376 training return: tensor(262.1632, device='cuda:0')
episode: 377 training return: tensor(240.0767, device='cuda:0')
episode: 378 training return: tensor(-235.8037, device='cuda:0')
episode: 379 training return: tensor(-335.1943, device='cuda:0')
epoch: 95 test_true_pfm: 1519.50325756285 sim_pfm: -48.59776418485368
episode: 380 training return: tensor(-216.0440, device='cuda:0')
episode: 381 training return: tensor(-157.9753, device='cuda:0')
episode: 382 training return: tensor(0.1693, device='cuda:0')
episode: 383 training return: tensor(-387.5242, device='cuda:0')
epoch: 96 test_true_pfm: 2407.3844724067244 sim_pfm: -289.0914082006784
episode: 384 training return: tensor(-330.8777, device='cuda:0')
episode: 385 training return: tensor(-264.2900, device='cuda:0')
episode: 386 training return: tensor(-48.2774, device='cuda:0')
episode: 387 training return: tensor(-382.9202, device='cuda:0')
epoch: 97 test_true_pfm: 2501.884134577733 sim_pfm: 10.87883770361077
episode: 388 training return: tensor(-317.6913, device='cuda:0')
episode: 389 training return: tensor(-261.4774, device='cuda:0')
episode: 390 training return: tensor(-286.0016, device='cuda:0')
episode: 391 training return: tensor(-90.2907, device='cuda:0')
epoch: 98 test_true_pfm: 2883.744171182696 sim_pfm: -89.06262608633067
episode: 392 training return: tensor(128.6842, device='cuda:0')
episode: 393 training return: tensor(-335.2114, device='cuda:0')
episode: 394 training return: tensor(-374.2361, device='cuda:0')
episode: 395 training return: tensor(-323.5058, device='cuda:0')
epoch: 99 test_true_pfm: 2089.442817520105 sim_pfm: 62.40501315065194
episode: 396 training return: tensor(-209.1600, device='cuda:0')
episode: 397 training return: tensor(45.8686, device='cuda:0')
episode: 398 training return: tensor(-253.0801, device='cuda:0')
episode: 399 training return: tensor(-61.2424, device='cuda:0')
epoch: 100 test_true_pfm: 2621.740060541142 sim_pfm: -31.495545299112564
episode: 400 training return: tensor(-4.2683, device='cuda:0')
episode: 401 training return: tensor(-297.2763, device='cuda:0')
episode: 402 training return: tensor(237.1803, device='cuda:0')
episode: 403 training return: tensor(20.5586, device='cuda:0')
epoch: 101 test_true_pfm: 1880.960019504919 sim_pfm: -218.06916852210028
episode: 404 training return: tensor(-120.0956, device='cuda:0')
episode: 405 training return: tensor(57.5816, device='cuda:0')
episode: 406 training return: tensor(-285.0650, device='cuda:0')
episode: 407 training return: tensor(-329.6432, device='cuda:0')
epoch: 102 test_true_pfm: 1647.3371125242695 sim_pfm: 154.210681168964
episode: 408 training return: tensor(-39.8035, device='cuda:0')
episode: 409 training return: tensor(-278.5832, device='cuda:0')
episode: 410 training return: tensor(-265.6336, device='cuda:0')
episode: 411 training return: tensor(-27.8673, device='cuda:0')
epoch: 103 test_true_pfm: 2461.001763197078 sim_pfm: 35.88462472490695
episode: 412 training return: tensor(-268.6035, device='cuda:0')
episode: 413 training return: tensor(-215.8017, device='cuda:0')
episode: 414 training return: tensor(-96.2267, device='cuda:0')
episode: 415 training return: tensor(117.9225, device='cuda:0')
epoch: 104 test_true_pfm: 2718.643258190196 sim_pfm: -1.7289132984005846
episode: 416 training return: tensor(-288.3491, device='cuda:0')
episode: 417 training return: tensor(-309.6961, device='cuda:0')
episode: 418 training return: tensor(-320.6940, device='cuda:0')
episode: 419 training return: tensor(-160.8539, device='cuda:0')
epoch: 105 test_true_pfm: 2577.3449118010963 sim_pfm: -191.23263363687633
episode: 420 training return: tensor(-60.0344, device='cuda:0')
episode: 421 training return: tensor(-166.2199, device='cuda:0')
episode: 422 training return: tensor(-381.8153, device='cuda:0')
episode: 423 training return: tensor(-283.5249, device='cuda:0')
epoch: 106 test_true_pfm: 2896.6890750181205 sim_pfm: -172.68660049086125
episode: 424 training return: tensor(-287.6180, device='cuda:0')
episode: 425 training return: tensor(-243.1895, device='cuda:0')
episode: 426 training return: tensor(-259.1919, device='cuda:0')
episode: 427 training return: tensor(-233.4508, device='cuda:0')
epoch: 107 test_true_pfm: 2208.901412127687 sim_pfm: 3.9563962652658424
episode: 428 training return: tensor(-261.8422, device='cuda:0')
episode: 429 training return: tensor(17.1889, device='cuda:0')
episode: 430 training return: tensor(2.3563, device='cuda:0')
episode: 431 training return: tensor(120.5606, device='cuda:0')
epoch: 108 test_true_pfm: 2252.858116303201 sim_pfm: 69.98799477837747
episode: 432 training return: tensor(23.0027, device='cuda:0')
episode: 433 training return: tensor(57.7489, device='cuda:0')
episode: 434 training return: tensor(-372.1797, device='cuda:0')
episode: 435 training return: tensor(-288.2203, device='cuda:0')
epoch: 109 test_true_pfm: 2513.258651677233 sim_pfm: -39.03141153785206
episode: 436 training return: tensor(-321.1293, device='cuda:0')
episode: 437 training return: tensor(-151.5890, device='cuda:0')
episode: 438 training return: tensor(229.5551, device='cuda:0')
episode: 439 training return: tensor(22.6490, device='cuda:0')
epoch: 110 test_true_pfm: 2043.557822366195 sim_pfm: -137.53302551649782
episode: 440 training return: tensor(10.1972, device='cuda:0')
episode: 441 training return: tensor(-361.0336, device='cuda:0')
episode: 442 training return: tensor(16.6841, device='cuda:0')
episode: 443 training return: tensor(-284.5068, device='cuda:0')
epoch: 111 test_true_pfm: 2304.0252263634343 sim_pfm: -71.8822271884225
episode: 444 training return: tensor(215.0197, device='cuda:0')
episode: 445 training return: tensor(-77.2070, device='cuda:0')
episode: 446 training return: tensor(-196.3037, device='cuda:0')
episode: 447 training return: tensor(-222.3162, device='cuda:0')
epoch: 112 test_true_pfm: 1984.5500286938698 sim_pfm: -125.48294102230768
episode: 448 training return: tensor(-226.9185, device='cuda:0')
episode: 449 training return: tensor(-82.2224, device='cuda:0')
episode: 450 training return: tensor(-331.6894, device='cuda:0')
episode: 451 training return: tensor(-289.6462, device='cuda:0')
epoch: 113 test_true_pfm: 2093.7186119288235 sim_pfm: 92.00001986103598
episode: 452 training return: tensor(-340.9060, device='cuda:0')
episode: 453 training return: tensor(-297.6966, device='cuda:0')
episode: 454 training return: tensor(-198.7731, device='cuda:0')
episode: 455 training return: tensor(-122.6000, device='cuda:0')
epoch: 114 test_true_pfm: 2144.9435969367546 sim_pfm: -31.165793928822193
episode: 456 training return: tensor(-385.8060, device='cuda:0')
episode: 457 training return: tensor(236.9139, device='cuda:0')
episode: 458 training return: tensor(-378.6184, device='cuda:0')
episode: 459 training return: tensor(-44.2941, device='cuda:0')
epoch: 115 test_true_pfm: 2646.860999691855 sim_pfm: 42.932100387833394
episode: 460 training return: tensor(-74.0221, device='cuda:0')
episode: 461 training return: tensor(-157.7861, device='cuda:0')
episode: 462 training return: tensor(-334.9492, device='cuda:0')
episode: 463 training return: tensor(-343.0944, device='cuda:0')
epoch: 116 test_true_pfm: 1526.1180345983087 sim_pfm: 29.144774664154586
episode: 464 training return: tensor(-91.7083, device='cuda:0')
episode: 465 training return: tensor(-312.2066, device='cuda:0')
episode: 466 training return: tensor(263.1074, device='cuda:0')
episode: 467 training return: tensor(-338.7263, device='cuda:0')
epoch: 117 test_true_pfm: 2184.6687970591615 sim_pfm: -115.87845980278992
episode: 468 training return: tensor(253.3793, device='cuda:0')
episode: 469 training return: tensor(-216.8339, device='cuda:0')
episode: 470 training return: tensor(-380.1880, device='cuda:0')
episode: 471 training return: tensor(-81.5905, device='cuda:0')
epoch: 118 test_true_pfm: 2343.2036646815295 sim_pfm: -195.93139464243237
episode: 472 training return: tensor(-208.9691, device='cuda:0')
episode: 473 training return: tensor(-258.2445, device='cuda:0')
episode: 474 training return: tensor(-385.1082, device='cuda:0')
episode: 475 training return: tensor(269.0411, device='cuda:0')
epoch: 119 test_true_pfm: 2250.1913059624717 sim_pfm: -124.02630365200457
episode: 476 training return: tensor(-341.2293, device='cuda:0')
episode: 477 training return: tensor(35.5719, device='cuda:0')
episode: 478 training return: tensor(-342.7938, device='cuda:0')
episode: 479 training return: tensor(-70.1516, device='cuda:0')
epoch: 120 test_true_pfm: 2138.624468459893 sim_pfm: -171.49072245925586
episode: 480 training return: tensor(-261.8487, device='cuda:0')
episode: 481 training return: tensor(-155.3199, device='cuda:0')
episode: 482 training return: tensor(273.7702, device='cuda:0')
episode: 483 training return: tensor(-282.2772, device='cuda:0')
epoch: 121 test_true_pfm: 2451.0319434135777 sim_pfm: -25.52116355538601
episode: 484 training return: tensor(-338.6279, device='cuda:0')
episode: 485 training return: tensor(-247.5384, device='cuda:0')
episode: 486 training return: tensor(-145.0014, device='cuda:0')
episode: 487 training return: tensor(-342.1581, device='cuda:0')
epoch: 122 test_true_pfm: 2364.831130655095 sim_pfm: -173.94311108548814
episode: 488 training return: tensor(-70.6292, device='cuda:0')
episode: 489 training return: tensor(39.9188, device='cuda:0')
episode: 490 training return: tensor(121.1076, device='cuda:0')
episode: 491 training return: tensor(-289.0196, device='cuda:0')
epoch: 123 test_true_pfm: 2024.5352279413376 sim_pfm: -238.8084742219265
episode: 492 training return: tensor(-342.7353, device='cuda:0')
episode: 493 training return: tensor(-342.3359, device='cuda:0')
episode: 494 training return: tensor(268.2283, device='cuda:0')
episode: 495 training return: tensor(-110.4447, device='cuda:0')
epoch: 124 test_true_pfm: 2483.2811073552484 sim_pfm: 62.77915510480913
episode: 496 training return: tensor(279.9778, device='cuda:0')
episode: 497 training return: tensor(-170.6169, device='cuda:0')
episode: 498 training return: tensor(-142.1439, device='cuda:0')
episode: 499 training return: tensor(-340.4596, device='cuda:0')
epoch: 125 test_true_pfm: 2290.910052257361 sim_pfm: 10.021670368073197
episode: 500 training return: tensor(-50.0467, device='cuda:0')
episode: 501 training return: tensor(-79.3333, device='cuda:0')
episode: 502 training return: tensor(-294.9403, device='cuda:0')
episode: 503 training return: tensor(-240.1545, device='cuda:0')
epoch: 126 test_true_pfm: 1889.8015200486382 sim_pfm: -31.248034943632472
episode: 504 training return: tensor(-359.7575, device='cuda:0')
episode: 505 training return: tensor(-114.7206, device='cuda:0')
episode: 506 training return: tensor(-326.8151, device='cuda:0')
episode: 507 training return: tensor(-376.5869, device='cuda:0')
epoch: 127 test_true_pfm: 2119.735541977888 sim_pfm: -271.9630722864919
episode: 508 training return: tensor(-199.7537, device='cuda:0')
episode: 509 training return: tensor(-353.9088, device='cuda:0')
episode: 510 training return: tensor(-179.2073, device='cuda:0')
episode: 511 training return: tensor(-248.4175, device='cuda:0')
epoch: 128 test_true_pfm: 3210.4783791118643 sim_pfm: -44.77615159501632
episode: 512 training return: tensor(-253.1842, device='cuda:0')
episode: 513 training return: tensor(-383.4658, device='cuda:0')
episode: 514 training return: tensor(-196.6020, device='cuda:0')
episode: 515 training return: tensor(-194.6098, device='cuda:0')
epoch: 129 test_true_pfm: 1634.0853911270397 sim_pfm: -178.83112171591105
episode: 516 training return: tensor(-285.3154, device='cuda:0')
episode: 517 training return: tensor(-212.0614, device='cuda:0')
episode: 518 training return: tensor(76.4311, device='cuda:0')
episode: 519 training return: tensor(-0.0682, device='cuda:0')
epoch: 130 test_true_pfm: 2424.0265820781237 sim_pfm: -185.44166277240342
episode: 520 training return: tensor(-339.0296, device='cuda:0')
episode: 521 training return: tensor(-243.8167, device='cuda:0')
episode: 522 training return: tensor(-284.5780, device='cuda:0')
episode: 523 training return: tensor(-182.0411, device='cuda:0')
epoch: 131 test_true_pfm: 2824.8859405091466 sim_pfm: -44.0672822065535
episode: 524 training return: tensor(-326.0565, device='cuda:0')
episode: 525 training return: tensor(-65.7040, device='cuda:0')
episode: 526 training return: tensor(-136.4014, device='cuda:0')
episode: 527 training return: tensor(290.4612, device='cuda:0')
epoch: 132 test_true_pfm: 1727.150761941276 sim_pfm: -251.94590724620502
episode: 528 training return: tensor(11.6334, device='cuda:0')
episode: 529 training return: tensor(-113.9175, device='cuda:0')
episode: 530 training return: tensor(-389.1614, device='cuda:0')
episode: 531 training return: tensor(-299.5577, device='cuda:0')
epoch: 133 test_true_pfm: 1752.2422854313184 sim_pfm: -281.87217718577205
episode: 532 training return: tensor(-377.8438, device='cuda:0')
episode: 533 training return: tensor(-291.9031, device='cuda:0')
episode: 534 training return: tensor(-355.1182, device='cuda:0')
episode: 535 training return: tensor(-65.5895, device='cuda:0')
epoch: 134 test_true_pfm: 1855.4297152719616 sim_pfm: 119.37010692117231
episode: 536 training return: tensor(-311.9655, device='cuda:0')
episode: 537 training return: tensor(-245.3029, device='cuda:0')
episode: 538 training return: tensor(-348.9836, device='cuda:0')
episode: 539 training return: tensor(-340.4719, device='cuda:0')
epoch: 135 test_true_pfm: 2396.2069319879593 sim_pfm: -13.877138363934742
episode: 540 training return: tensor(91.0462, device='cuda:0')
episode: 541 training return: tensor(-382.7387, device='cuda:0')
episode: 542 training return: tensor(-244.2503, device='cuda:0')
episode: 543 training return: tensor(-164.0571, device='cuda:0')
epoch: 136 test_true_pfm: 1925.870259266183 sim_pfm: -1.5418384872997801
episode: 544 training return: tensor(-357.0087, device='cuda:0')
episode: 545 training return: tensor(-61.8845, device='cuda:0')
episode: 546 training return: tensor(-307.0793, device='cuda:0')
episode: 547 training return: tensor(-91.6435, device='cuda:0')
epoch: 137 test_true_pfm: 2762.5559182006887 sim_pfm: -97.9308839776398
episode: 548 training return: tensor(321.4810, device='cuda:0')
episode: 549 training return: tensor(-23.2735, device='cuda:0')
episode: 550 training return: tensor(-335.7855, device='cuda:0')
episode: 551 training return: tensor(13.2821, device='cuda:0')
epoch: 138 test_true_pfm: 2208.758401891119 sim_pfm: -77.32633583360196
episode: 552 training return: tensor(-148.7910, device='cuda:0')
episode: 553 training return: tensor(-137.0216, device='cuda:0')
episode: 554 training return: tensor(-340.5097, device='cuda:0')
episode: 555 training return: tensor(-255.4218, device='cuda:0')
epoch: 139 test_true_pfm: 1994.6734272474907 sim_pfm: 70.00486444745911
episode: 556 training return: tensor(-127.1554, device='cuda:0')
episode: 557 training return: tensor(-241.0554, device='cuda:0')
episode: 558 training return: tensor(-298.7600, device='cuda:0')
episode: 559 training return: tensor(-339.1700, device='cuda:0')
epoch: 140 test_true_pfm: 2054.8194849118686 sim_pfm: -137.961417154506
episode: 560 training return: tensor(5.8674, device='cuda:0')
episode: 561 training return: tensor(-198.1981, device='cuda:0')
episode: 562 training return: tensor(-221.1305, device='cuda:0')
episode: 563 training return: tensor(255.7121, device='cuda:0')
epoch: 141 test_true_pfm: 1863.2746298985076 sim_pfm: -181.24589512944416
episode: 564 training return: tensor(-380.4552, device='cuda:0')
episode: 565 training return: tensor(190.4855, device='cuda:0')
episode: 566 training return: tensor(151.2322, device='cuda:0')
episode: 567 training return: tensor(-183.1356, device='cuda:0')
epoch: 142 test_true_pfm: 2174.4035106420215 sim_pfm: -228.77019869411984
episode: 568 training return: tensor(-214.9462, device='cuda:0')
episode: 569 training return: tensor(-388.2503, device='cuda:0')
episode: 570 training return: tensor(-335.2444, device='cuda:0')
episode: 571 training return: tensor(96.1641, device='cuda:0')
epoch: 143 test_true_pfm: 2504.452304710525 sim_pfm: -252.23239172905838
episode: 572 training return: tensor(-50.7255, device='cuda:0')
episode: 573 training return: tensor(-138.9630, device='cuda:0')
episode: 574 training return: tensor(260.8219, device='cuda:0')
episode: 575 training return: tensor(-198.5045, device='cuda:0')
epoch: 144 test_true_pfm: 2126.5621206474384 sim_pfm: 181.38953469281356
episode: 576 training return: tensor(-378.2824, device='cuda:0')
episode: 577 training return: tensor(-95.7667, device='cuda:0')
episode: 578 training return: tensor(-80.1454, device='cuda:0')
episode: 579 training return: tensor(-38.7575, device='cuda:0')
epoch: 145 test_true_pfm: 2204.156411858192 sim_pfm: -191.82266143711362
episode: 580 training return: tensor(-310.9622, device='cuda:0')
episode: 581 training return: tensor(-296.3340, device='cuda:0')
episode: 582 training return: tensor(-42.7635, device='cuda:0')
episode: 583 training return: tensor(-375.6462, device='cuda:0')
epoch: 146 test_true_pfm: 2041.2212584551082 sim_pfm: -12.019443981475584
episode: 584 training return: tensor(50.4247, device='cuda:0')
episode: 585 training return: tensor(-140.8838, device='cuda:0')
episode: 586 training return: tensor(-363.8208, device='cuda:0')
episode: 587 training return: tensor(-240.8174, device='cuda:0')
epoch: 147 test_true_pfm: 2121.6478025463566 sim_pfm: -113.97639840686072
episode: 588 training return: tensor(-374.9995, device='cuda:0')
episode: 589 training return: tensor(-307.0307, device='cuda:0')
episode: 590 training return: tensor(-376.8998, device='cuda:0')
episode: 591 training return: tensor(-381.4634, device='cuda:0')
epoch: 148 test_true_pfm: 1527.843772285809 sim_pfm: 50.83798540501933
episode: 592 training return: tensor(-336.3748, device='cuda:0')
episode: 593 training return: tensor(-235.6334, device='cuda:0')
episode: 594 training return: tensor(-336.4569, device='cuda:0')
episode: 595 training return: tensor(-98.1765, device='cuda:0')
epoch: 149 test_true_pfm: 2654.02486753919 sim_pfm: -21.49857248585128
episode: 596 training return: tensor(-128.4134, device='cuda:0')
episode: 597 training return: tensor(-264.7989, device='cuda:0')
episode: 598 training return: tensor(-335.1867, device='cuda:0')
episode: 599 training return: tensor(-257.7772, device='cuda:0')
epoch: 150 test_true_pfm: 2950.5116259254883 sim_pfm: -164.96722042632368
