['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '1']
epoch: 0 training_loss 0.273228220641613 test_loss: 0.19389832019805908
epoch: 1 training_loss 0.18181811422109603 test_loss: 0.16878148317337036
epoch: 2 training_loss 0.16352890856564045 test_loss: 0.17754652500152587
epoch: 3 training_loss 0.1560304518789053 test_loss: 0.163572359085083
epoch: 4 training_loss 0.14673651475459337 test_loss: 0.1724625587463379
epoch: 5 training_loss 0.13058075446635484 test_loss: 0.1440453886985779
epoch: 6 training_loss 0.15504298098385333 test_loss: 0.15715304613113404
epoch: 7 training_loss 0.14683605428785085 test_loss: 0.11904188394546508
epoch: 8 training_loss 0.143333979845047 test_loss: 0.14051370620727538
epoch: 9 training_loss 0.12863469909876585 test_loss: 0.144940984249115
epoch: 10 training_loss 0.1380873490124941 test_loss: 0.13440256118774413
epoch: 11 training_loss 0.13186907291412353 test_loss: 0.1517771601676941
epoch: 12 training_loss 0.1357612758129835 test_loss: 0.12894816398620607
epoch: 13 training_loss 0.1331330294907093 test_loss: 0.14252514839172364
epoch: 14 training_loss 0.13094267684966326 test_loss: 0.11256438493728638
epoch: 15 training_loss 0.12155068626627326 test_loss: 0.12060990333557128
epoch: 16 training_loss 0.1377173274755478 test_loss: 0.140846586227417
epoch: 17 training_loss 0.12333965856581926 test_loss: 0.11981611251831055
epoch: 18 training_loss 0.12953260205686093 test_loss: 0.12079432010650634
epoch: 19 training_loss 0.12562762964516877 test_loss: 0.1415593981742859
epoch: 20 training_loss 0.12765316423028708 test_loss: 0.17771872282028198
epoch: 21 training_loss 0.12531903095543384 test_loss: 0.13082435131072997
epoch: 22 training_loss 0.12570591624826194 test_loss: 0.12467668056488038
epoch: 23 training_loss 0.12117212656885386 test_loss: 0.1344770908355713
epoch: 24 training_loss 0.11774562943726778 test_loss: 0.15731899738311766
epoch: 25 training_loss 0.12815494801849125 test_loss: 0.14225103855133056
epoch: 26 training_loss 0.1271590132638812 test_loss: 0.1350848436355591
epoch: 27 training_loss 0.12772272661328316 test_loss: 0.12776833772659302
epoch: 28 training_loss 0.13342032995074987 test_loss: 0.1318964958190918
epoch: 29 training_loss 0.12183944810181856 test_loss: 0.13833308219909668
epoch: 30 training_loss 0.12499665632843972 test_loss: 0.11724095344543457
epoch: 31 training_loss 0.11994604663923383 test_loss: 0.12225220203399659
epoch: 32 training_loss 0.12258931659162045 test_loss: 0.10952953100204468
epoch: 33 training_loss 0.12105431471019984 test_loss: 0.11945155858993531
epoch: 34 training_loss 0.12234483901411294 test_loss: 0.1296382188796997
epoch: 35 training_loss 0.12781383220106363 test_loss: 0.1106307029724121
epoch: 36 training_loss 0.11404134517535568 test_loss: 0.15549783706665038
epoch: 37 training_loss 0.1283316731452942 test_loss: 0.12780396938323973
epoch: 38 training_loss 0.11551208388060331 test_loss: 0.12758998870849608
epoch: 39 training_loss 0.12666992515325545 test_loss: 0.11787983179092407
epoch: 40 training_loss 0.11766096070408821 test_loss: 0.11546745300292968
epoch: 41 training_loss 0.11301839236170054 test_loss: 0.14744311571121216
epoch: 42 training_loss 0.1230751956999302 test_loss: 0.1360116720199585
epoch: 43 training_loss 0.1202680809982121 test_loss: 0.11347614526748658
epoch: 44 training_loss 0.11839888516813517 test_loss: 0.1202049732208252
epoch: 45 training_loss 0.1211511243507266 test_loss: 0.11356959342956544
epoch: 46 training_loss 0.1290793249383569 test_loss: 0.11484007835388184
epoch: 47 training_loss 0.1209995947405696 test_loss: 0.13109776973724366
epoch: 48 training_loss 0.11336385488510131 test_loss: 0.13075571060180663
epoch: 49 training_loss 0.1251834163069725 test_loss: 0.10287460088729858
epoch: 50 training_loss 0.11486737176775932 test_loss: 0.12809786796569825
epoch: 51 training_loss 0.11661264684051276 test_loss: 0.13321954011917114
epoch: 52 training_loss 0.12328040407970547 test_loss: 0.12373604774475097
epoch: 53 training_loss 0.12138676300644874 test_loss: 0.1261919617652893
epoch: 54 training_loss 0.12206448843702673 test_loss: 0.12899949550628662
epoch: 55 training_loss 0.1274501496180892 test_loss: 0.10187135934829712
epoch: 56 training_loss 0.12862566761672498 test_loss: 0.13048266172409057
epoch: 57 training_loss 0.11964436147361994 test_loss: 0.13264037370681764
epoch: 58 training_loss 0.11855636777356267 test_loss: 0.09733458161354065
epoch: 59 training_loss 0.12091158177703619 test_loss: 0.11391181945800781
epoch: 60 training_loss 0.11696072828024626 test_loss: 0.10870734453201295
epoch: 61 training_loss 0.12237550849094987 test_loss: 0.1302468180656433
epoch: 62 training_loss 0.1268994779512286 test_loss: 0.1108212947845459
epoch: 63 training_loss 0.1162419917806983 test_loss: 0.1287965416908264
epoch: 64 training_loss 0.11008794674649834 test_loss: 0.11030524969100952
epoch: 65 training_loss 0.1134070604480803 test_loss: 0.12499349117279053
epoch: 66 training_loss 0.11577784664928913 test_loss: 0.13477814197540283
epoch: 67 training_loss 0.11097529400140047 test_loss: 0.1167741060256958
epoch: 68 training_loss 0.12220151994377375 test_loss: 0.1342749834060669
epoch: 69 training_loss 0.11990574695169925 test_loss: 0.12803328037261963
epoch: 70 training_loss 0.1200069311633706 test_loss: 0.12229633331298828
epoch: 71 training_loss 0.10724528461694717 test_loss: 0.14363831281661987
epoch: 72 training_loss 0.11824208335950971 test_loss: 0.12731192111968995
epoch: 73 training_loss 0.1201831634901464 test_loss: 0.11526051759719849
epoch: 74 training_loss 0.12166626255959273 test_loss: 0.11897964477539062
epoch: 75 training_loss 0.11511952295899391 test_loss: 0.11611120700836182
epoch: 76 training_loss 0.11734913125634193 test_loss: 0.12833837270736695
epoch: 77 training_loss 0.1165178881213069 test_loss: 0.10509873628616333
epoch: 78 training_loss 0.10866891074925661 test_loss: 0.12663888931274414
epoch: 79 training_loss 0.12135272808372974 test_loss: 0.10508350133895875
epoch: 80 training_loss 0.11198894899338484 test_loss: 0.1216171145439148
epoch: 81 training_loss 0.11452019479125738 test_loss: 0.10211522579193115
epoch: 82 training_loss 0.11175512677058577 test_loss: 0.10939505100250244
epoch: 83 training_loss 0.11859079994261265 test_loss: 0.13703075647354127
epoch: 84 training_loss 0.12185439335182309 test_loss: 0.1322893500328064
epoch: 85 training_loss 0.11665380178019405 test_loss: 0.12917011976242065
epoch: 86 training_loss 0.11700376119464635 test_loss: 0.11289465427398682
epoch: 87 training_loss 0.1195798709243536 test_loss: 0.12400745153427124
epoch: 88 training_loss 0.12119408909231424 test_loss: 0.11745153665542603
epoch: 89 training_loss 0.1294398283958435 test_loss: 0.11210360527038574
epoch: 90 training_loss 0.12187570858746767 test_loss: 0.1263004422187805
epoch: 91 training_loss 0.11899799216538667 test_loss: 0.11319055557250976
epoch: 92 training_loss 0.12598931189626456 test_loss: 0.11497689485549926
epoch: 93 training_loss 0.11497731857001782 test_loss: 0.12319042682647705
epoch: 94 training_loss 0.1177672816440463 test_loss: 0.12619892358779908
epoch: 95 training_loss 0.1146681533381343 test_loss: 0.10569782257080078
epoch: 96 training_loss 0.12039829462766648 test_loss: 0.12196825742721558
epoch: 97 training_loss 0.12217930223792792 test_loss: 0.11933881044387817
epoch: 98 training_loss 0.11213407818228006 test_loss: 0.10663455724716187
epoch: 99 training_loss 0.12417578659951686 test_loss: 0.11831220388412475
epoch: 100 training_loss 0.11258072588592767 test_loss: 0.11989526748657227
epoch: 101 training_loss 0.12000075563788414 test_loss: 0.12122334241867065
epoch: 102 training_loss 0.11752570983022452 test_loss: 0.11748603582382203
epoch: 103 training_loss 0.1168779994547367 test_loss: 0.10052944421768188
epoch: 104 training_loss 0.11215823736041784 test_loss: 0.12362436056137086
epoch: 105 training_loss 0.12586246933788062 test_loss: 0.1352459669113159
epoch: 106 training_loss 0.12144622977823019 test_loss: 0.11419095993041992
epoch: 107 training_loss 0.11148828899487853 test_loss: 0.13630682229995728
epoch: 108 training_loss 0.11262295238673686 test_loss: 0.1283270001411438
epoch: 109 training_loss 0.11132151683792472 test_loss: 0.12990565299987794
epoch: 110 training_loss 0.12834410529583692 test_loss: 0.11447299718856811
epoch: 111 training_loss 0.10781031740829349 test_loss: 0.12387768030166627
epoch: 112 training_loss 0.1303316891938448 test_loss: 0.1360075831413269
epoch: 113 training_loss 0.11062647366896272 test_loss: 0.13585307598114013
epoch: 114 training_loss 0.11092223595827817 test_loss: 0.13313188552856445
epoch: 115 training_loss 0.11768704961985349 test_loss: 0.1255337119102478
epoch: 116 training_loss 0.11740753546357155 test_loss: 0.1207116723060608
epoch: 117 training_loss 0.12159734660759568 test_loss: 0.13522409200668334
epoch: 118 training_loss 0.1169604110904038 test_loss: 0.12259855270385742
epoch: 119 training_loss 0.1209922556579113 test_loss: 0.1090572714805603
epoch: 120 training_loss 0.12402916448190808 test_loss: 0.11632462739944457
epoch: 121 training_loss 0.11774593684822321 test_loss: 0.11462525129318238
epoch: 122 training_loss 0.12321016404777765 test_loss: 0.1281610369682312
epoch: 123 training_loss 0.11529545288532972 test_loss: 0.11973776817321777
epoch: 124 training_loss 0.11321599252521991 test_loss: 0.12039637565612793
epoch: 125 training_loss 0.11801477283239364 test_loss: 0.11518985033035278
epoch: 126 training_loss 0.11180059462785721 test_loss: 0.11796984672546387
epoch: 127 training_loss 0.11701395798474551 test_loss: 0.12872569561004638
epoch: 128 training_loss 0.12353938844054937 test_loss: 0.10869213342666625
epoch: 129 training_loss 0.11263148285448552 test_loss: 0.1086034893989563
epoch: 130 training_loss 0.11728384027257562 test_loss: 0.11538655757904052
epoch: 131 training_loss 0.11279159646481275 test_loss: 0.12330143451690674
epoch: 132 training_loss 0.1123749303072691 test_loss: 0.137343430519104
epoch: 133 training_loss 0.120678507424891 test_loss: 0.1447285532951355
epoch: 134 training_loss 0.11636254973709584 test_loss: 0.1311575412750244
epoch: 135 training_loss 0.11297157477587462 test_loss: 0.10608161687850952
epoch: 136 training_loss 0.11970855459570885 test_loss: 0.10673341751098633
epoch: 137 training_loss 0.11068837858736515 test_loss: 0.13420356512069703
epoch: 138 training_loss 0.12084476329386235 test_loss: 0.10846195220947266
epoch: 139 training_loss 0.12198457092046738 test_loss: 0.12000330686569213
epoch: 140 training_loss 0.11601090621203185 test_loss: 0.11802525520324707
epoch: 141 training_loss 0.11475537233054638 test_loss: 0.11412023305892945
epoch: 142 training_loss 0.11626094788312911 test_loss: 0.11807851791381836
epoch: 143 training_loss 0.11903036054223776 test_loss: 0.12425493001937866
epoch: 144 training_loss 0.11350121533498168 test_loss: 0.11104856729507447
epoch: 145 training_loss 0.11324564211070537 test_loss: 0.10162615776062012
epoch: 146 training_loss 0.11196358192712069 test_loss: 0.12159568071365356
epoch: 147 training_loss 0.11322406377643347 test_loss: 0.10338859558105469
epoch: 148 training_loss 0.12334323272109032 test_loss: 0.11491086483001708
epoch: 149 training_loss 0.11292014271020889 test_loss: 0.124940025806427
epoch: 0 training_loss 30.93771234512329 test_loss: 10.009595489501953
epoch: 1 training_loss 7.67116268157959 test_loss: 6.16296272277832
epoch: 2 training_loss 5.656331639289856 test_loss: 5.4972991943359375
epoch: 3 training_loss 4.57046266078949 test_loss: 4.162617492675781
epoch: 4 training_loss 4.077805500030518 test_loss: 3.845662307739258
epoch: 5 training_loss 3.598292191028595 test_loss: 3.3455196380615235
epoch: 6 training_loss 3.239767813682556 test_loss: 3.2130149841308593
epoch: 7 training_loss 3.1671909379959104 test_loss: 2.9353437423706055
epoch: 8 training_loss 2.836204504966736 test_loss: 2.908126640319824
epoch: 9 training_loss 2.7235296356678007 test_loss: 2.7009557723999023
epoch: 10 training_loss 2.563625203371048 test_loss: 2.5672340393066406
epoch: 11 training_loss 2.4038303315639498 test_loss: 2.350986289978027
epoch: 12 training_loss 2.3005944979190827 test_loss: 2.323855972290039
epoch: 13 training_loss 2.20379851937294 test_loss: 2.2421770095825195
epoch: 14 training_loss 2.154713032245636 test_loss: 2.168676567077637
epoch: 15 training_loss 2.0943889927864077 test_loss: 2.0989465713500977
epoch: 16 training_loss 2.1234107613563538 test_loss: 2.0170032501220705
epoch: 17 training_loss 1.9705403625965119 test_loss: 1.8146425247192384
epoch: 18 training_loss 1.9836468374729157 test_loss: 1.9944612503051757
epoch: 19 training_loss 1.9167936980724334 test_loss: 1.9347963333129883
epoch: 20 training_loss 1.8679052114486694 test_loss: 1.7816827774047852
epoch: 21 training_loss 1.8543270695209504 test_loss: 1.929638671875
epoch: 22 training_loss 1.8300052654743195 test_loss: 1.8229244232177735
epoch: 23 training_loss 1.8387734639644622 test_loss: 1.683335304260254
epoch: 24 training_loss 1.7654852223396302 test_loss: 1.9145780563354493
epoch: 25 training_loss 1.7137143099308014 test_loss: 1.725276565551758
epoch: 26 training_loss 1.674173219203949 test_loss: 1.6491218566894532
epoch: 27 training_loss 1.7646807384490968 test_loss: 1.6249839782714843
epoch: 28 training_loss 1.6693082499504088 test_loss: 1.6529069900512696
epoch: 29 training_loss 1.645926650762558 test_loss: 1.7070568084716797
epoch: 30 training_loss 1.6595950317382813 test_loss: 1.7537628173828126
epoch: 31 training_loss 1.608833785057068 test_loss: 1.740659523010254
epoch: 32 training_loss 1.634998971223831 test_loss: 1.6510019302368164
epoch: 33 training_loss 1.6433906388282775 test_loss: 1.6480182647705077
epoch: 34 training_loss 1.517898097038269 test_loss: 1.555833339691162
epoch: 35 training_loss 1.5298233067989349 test_loss: 1.5207490921020508
epoch: 36 training_loss 1.498548663854599 test_loss: 1.4403655052185058
epoch: 37 training_loss 1.5264865279197692 test_loss: 1.4799175262451172
epoch: 38 training_loss 1.5683787739276887 test_loss: 1.4252243995666505
epoch: 39 training_loss 1.4941532349586486 test_loss: 1.4606899261474608
epoch: 40 training_loss 1.518045951128006 test_loss: 1.471750545501709
epoch: 41 training_loss 1.4674121272563934 test_loss: 1.4105886459350585
epoch: 42 training_loss 1.5317839395999908 test_loss: 1.539116382598877
epoch: 43 training_loss 1.4485968041419983 test_loss: 1.5043237686157227
epoch: 44 training_loss 1.4410608577728272 test_loss: 1.3944058418273926
epoch: 45 training_loss 1.43652712225914 test_loss: 1.5368031501770019
epoch: 46 training_loss 1.4250549829006196 test_loss: 1.365122413635254
epoch: 47 training_loss 1.433245725631714 test_loss: 1.336710548400879
epoch: 48 training_loss 1.4273041439056398 test_loss: 1.5364479064941405
epoch: 49 training_loss 1.4165410161018372 test_loss: 1.5087525367736816
epoch: 50 training_loss 1.3937133324146271 test_loss: 1.3340785026550293
epoch: 51 training_loss 1.4008244800567626 test_loss: 1.423046875
epoch: 52 training_loss 1.3497528040409088 test_loss: 1.3914201736450196
epoch: 53 training_loss 1.3899165296554565 test_loss: 1.3699102401733398
epoch: 54 training_loss 1.3754757952690124 test_loss: 1.3189186096191405
epoch: 55 training_loss 1.3370890092849732 test_loss: 1.4153306007385253
epoch: 56 training_loss 1.3370831406116486 test_loss: 1.3221714973449707
epoch: 57 training_loss 1.3240562498569488 test_loss: 1.3967507362365723
epoch: 58 training_loss 1.336508365869522 test_loss: 1.2565045356750488
epoch: 59 training_loss 1.2969594812393188 test_loss: 1.3455389976501464
epoch: 60 training_loss 1.3468543410301208 test_loss: 1.3186009407043457
epoch: 61 training_loss 1.3141581594944 test_loss: 1.2349287033081056
epoch: 62 training_loss 1.3169892871379851 test_loss: 1.3484017372131347
epoch: 63 training_loss 1.2842683517932891 test_loss: 1.2930118560791015
epoch: 64 training_loss 1.2869678235054016 test_loss: 1.2900155067443848
epoch: 65 training_loss 1.292403005361557 test_loss: 1.3161883354187012
epoch: 66 training_loss 1.3162419855594636 test_loss: 1.1997969627380372
epoch: 67 training_loss 1.2911219972372054 test_loss: 1.2823086738586427
epoch: 68 training_loss 1.2580250650644302 test_loss: 1.3583070755004882
epoch: 69 training_loss 1.3039263272285462 test_loss: 1.2561545372009277
epoch: 70 training_loss 1.2758433371782303 test_loss: 1.3464084625244142
epoch: 71 training_loss 1.2641398710012437 test_loss: 1.3272218704223633
epoch: 72 training_loss 1.233156879544258 test_loss: 1.2583683013916016
epoch: 73 training_loss 1.2367652666568756 test_loss: 1.2213425636291504
epoch: 74 training_loss 1.2134157502651215 test_loss: 1.2601128578186036
epoch: 75 training_loss 1.2315286993980408 test_loss: 1.1899620056152345
epoch: 76 training_loss 1.2480973887443543 test_loss: 1.203416919708252
epoch: 77 training_loss 1.2299158108234405 test_loss: 1.237543487548828
epoch: 78 training_loss 1.1900724476575852 test_loss: 1.2464968681335449
epoch: 79 training_loss 1.2385908967256547 test_loss: 1.1761783599853515
epoch: 80 training_loss 1.2146147948503494 test_loss: 1.2064446449279784
epoch: 81 training_loss 1.1913028752803803 test_loss: 1.215183162689209
epoch: 82 training_loss 1.2333688217401504 test_loss: 1.1762356758117676
epoch: 83 training_loss 1.230866438150406 test_loss: 1.1786751747131348
epoch: 84 training_loss 1.2297357630729675 test_loss: 1.119125747680664
epoch: 85 training_loss 1.2029346072673797 test_loss: 1.20127592086792
epoch: 86 training_loss 1.194108853340149 test_loss: 1.1595439910888672
epoch: 87 training_loss 1.1575853365659714 test_loss: 1.1464245796203614
epoch: 88 training_loss 1.193147016763687 test_loss: 1.1930790901184083
epoch: 89 training_loss 1.1694529116153718 test_loss: 1.2136302947998048
epoch: 90 training_loss 1.2023132520914077 test_loss: 1.2223286628723145
epoch: 91 training_loss 1.1877167856693267 test_loss: 1.1734636306762696
epoch: 92 training_loss 1.1421761065721512 test_loss: 1.2104702949523927
epoch: 93 training_loss 1.162663113474846 test_loss: 1.15440092086792
epoch: 94 training_loss 1.1597250819206237 test_loss: 1.115386962890625
epoch: 95 training_loss 1.137613462805748 test_loss: 1.0524612426757813
epoch: 96 training_loss 1.1451747459173203 test_loss: 1.1884403228759766
epoch: 97 training_loss 1.1313637602329254 test_loss: 1.1553797721862793
epoch: 98 training_loss 1.1665317434072495 test_loss: 1.1108908653259277
epoch: 99 training_loss 1.138610103726387 test_loss: 1.0771368980407714
epoch: 100 training_loss 1.1451223409175872 test_loss: 1.1217001914978026
epoch: 101 training_loss 1.1136904019117355 test_loss: 1.1289121627807617
epoch: 102 training_loss 1.1317916870117188 test_loss: 1.1279023170471192
epoch: 103 training_loss 1.1274187332391739 test_loss: 1.0673513412475586
epoch: 104 training_loss 1.1170002681016922 test_loss: 1.051936721801758
epoch: 105 training_loss 1.1500519919395447 test_loss: 1.1660552024841309
epoch: 106 training_loss 1.1160088986158372 test_loss: 1.1241835594177245
epoch: 107 training_loss 1.1118812191486358 test_loss: 1.1275546073913574
epoch: 108 training_loss 1.094144319295883 test_loss: 1.1036364555358886
epoch: 109 training_loss 1.0977875512838364 test_loss: 1.1143880844116212
epoch: 110 training_loss 1.078767482638359 test_loss: 1.0727395057678222
epoch: 111 training_loss 1.103655468225479 test_loss: 1.1927985191345214
epoch: 112 training_loss 1.0888468092679977 test_loss: 1.1141051292419433
epoch: 113 training_loss 1.1044326013326644 test_loss: 1.0832982063293457
epoch: 114 training_loss 1.1013202148675918 test_loss: 1.0631832122802733
epoch: 115 training_loss 1.109949756860733 test_loss: 1.039193344116211
epoch: 116 training_loss 1.106762580871582 test_loss: 1.0847434997558594
epoch: 117 training_loss 1.0829623198509217 test_loss: 1.117096519470215
epoch: 118 training_loss 1.1400584173202515 test_loss: 1.0782931327819825
epoch: 119 training_loss 1.0806930381059647 test_loss: 1.0448646545410156
epoch: 120 training_loss 1.0990476113557817 test_loss: 1.062450122833252
epoch: 121 training_loss 1.0593961226940154 test_loss: 1.0497726440429687
epoch: 122 training_loss 1.0826546502113343 test_loss: 1.0298233032226562
epoch: 123 training_loss 1.0698679089546204 test_loss: 1.0663426399230957
epoch: 124 training_loss 1.098349182009697 test_loss: 1.14322509765625
epoch: 125 training_loss 1.055749869942665 test_loss: 1.0385100364685058
epoch: 126 training_loss 1.0639095866680146 test_loss: 1.0307697296142577
epoch: 127 training_loss 1.0790725147724152 test_loss: 1.0785438537597656
epoch: 128 training_loss 1.0780563056468964 test_loss: 1.0355902671813966
epoch: 129 training_loss 1.0359920728206635 test_loss: 1.0526556015014648
epoch: 130 training_loss 1.0825819146633149 test_loss: 1.047799301147461
epoch: 131 training_loss 1.067632269859314 test_loss: 1.0247379302978517
epoch: 132 training_loss 1.046904885172844 test_loss: 1.023698616027832
epoch: 133 training_loss 1.0398836612701416 test_loss: 1.0649125099182128
epoch: 134 training_loss 1.0511487889289857 test_loss: 1.1730680465698242
epoch: 135 training_loss 1.0645166581869125 test_loss: 1.0767393112182617
epoch: 136 training_loss 1.0457526302337647 test_loss: 1.0104155540466309
epoch: 137 training_loss 1.0522910052537917 test_loss: 1.0027358055114746
epoch: 138 training_loss 1.0549032694101335 test_loss: 1.0484992027282716
epoch: 139 training_loss 1.0614558893442154 test_loss: 1.0660377502441407
epoch: 140 training_loss 1.03542041182518 test_loss: 1.0402966499328614
epoch: 141 training_loss 1.0478953075408937 test_loss: 1.04515380859375
epoch: 142 training_loss 1.0130766230821608 test_loss: 1.0390327453613282
epoch: 143 training_loss 1.0309852182865142 test_loss: 1.0513988494873048
epoch: 144 training_loss 1.029223200082779 test_loss: 1.0291854858398437
epoch: 145 training_loss 1.0432418632507323 test_loss: 1.0629240036010743
epoch: 146 training_loss 1.0176983445882797 test_loss: 0.9850109100341797
epoch: 147 training_loss 1.0228554213047027 test_loss: 0.9977903366088867
epoch: 148 training_loss 1.0051502180099487 test_loss: 1.0168349266052246
epoch: 149 training_loss 1.018560482263565 test_loss: 1.0197868347167969
3994.122593259855
episode: 0 training return: tensor(-93.7335, device='cuda:0')
episode: 1 training return: tensor(-47.7045, device='cuda:0')
episode: 2 training return: tensor(-897.5744, device='cuda:0')
episode: 3 training return: tensor(-128.1827, device='cuda:0')
epoch: 1 test_true_pfm: 4003.3608055317036 sim_pfm: -93.28256388709026
episode: 4 training return: tensor(-75.8702, device='cuda:0')
episode: 5 training return: tensor(-100.6724, device='cuda:0')
episode: 6 training return: tensor(-139.2423, device='cuda:0')
episode: 7 training return: tensor(-77.1138, device='cuda:0')
epoch: 2 test_true_pfm: 4017.7555349357426 sim_pfm: -301.3366578880814
episode: 8 training return: tensor(-64.2527, device='cuda:0')
episode: 9 training return: tensor(-112.6127, device='cuda:0')
episode: 10 training return: tensor(-38.9710, device='cuda:0')
episode: 11 training return: tensor(-815.7888, device='cuda:0')
epoch: 3 test_true_pfm: 2857.469887719905 sim_pfm: -519.8144949662965
episode: 12 training return: tensor(-92.7293, device='cuda:0')
episode: 13 training return: tensor(-29.7978, device='cuda:0')
episode: 14 training return: tensor(-69.9957, device='cuda:0')
episode: 15 training return: tensor(-644.8657, device='cuda:0')
epoch: 4 test_true_pfm: 1964.4712005850135 sim_pfm: -929.0392016526117
episode: 16 training return: tensor(-916.8274, device='cuda:0')
episode: 17 training return: tensor(-87.0876, device='cuda:0')
episode: 18 training return: tensor(-86.4111, device='cuda:0')
episode: 19 training return: tensor(-87.2750, device='cuda:0')
epoch: 5 test_true_pfm: 3985.5452482002233 sim_pfm: -608.3238516749698
episode: 20 training return: tensor(-43.7207, device='cuda:0')
episode: 21 training return: tensor(-759.7921, device='cuda:0')
episode: 22 training return: tensor(-74.6096, device='cuda:0')
episode: 23 training return: tensor(-884.6037, device='cuda:0')
epoch: 6 test_true_pfm: 3945.73113292511 sim_pfm: -93.47803793686519
episode: 24 training return: tensor(-786.2338, device='cuda:0')
episode: 25 training return: tensor(-876.1903, device='cuda:0')
episode: 26 training return: tensor(-109.8894, device='cuda:0')
episode: 27 training return: tensor(-73.1474, device='cuda:0')
epoch: 7 test_true_pfm: 4029.6371618513404 sim_pfm: -112.21970920854558
episode: 28 training return: tensor(-97.1316, device='cuda:0')
episode: 29 training return: tensor(-28.8316, device='cuda:0')
episode: 30 training return: tensor(-65.2567, device='cuda:0')
episode: 31 training return: tensor(-49.4419, device='cuda:0')
epoch: 8 test_true_pfm: 4005.970098645999 sim_pfm: -82.77740624576109
episode: 32 training return: tensor(-123.8083, device='cuda:0')
episode: 33 training return: tensor(-85.0503, device='cuda:0')
episode: 34 training return: tensor(-161.9962, device='cuda:0')
episode: 35 training return: tensor(-95.8162, device='cuda:0')
epoch: 9 test_true_pfm: 1873.5272559395742 sim_pfm: -601.0095724035054
episode: 36 training return: tensor(-110.1551, device='cuda:0')
episode: 37 training return: tensor(-182.6684, device='cuda:0')
episode: 38 training return: tensor(-939.2393, device='cuda:0')
episode: 39 training return: tensor(-42.3654, device='cuda:0')
epoch: 10 test_true_pfm: 3959.967189101395 sim_pfm: -360.2931530890055
episode: 40 training return: tensor(-155.6758, device='cuda:0')
episode: 41 training return: tensor(-89.8829, device='cuda:0')
episode: 42 training return: tensor(-120.4265, device='cuda:0')
episode: 43 training return: tensor(-147.1485, device='cuda:0')
epoch: 11 test_true_pfm: 4010.408689162248 sim_pfm: -91.99588904935324
episode: 44 training return: tensor(-956.6235, device='cuda:0')
episode: 45 training return: tensor(-19.7765, device='cuda:0')
episode: 46 training return: tensor(-59.8625, device='cuda:0')
episode: 47 training return: tensor(-36.7627, device='cuda:0')
epoch: 12 test_true_pfm: 3977.212591514246 sim_pfm: -537.728552785314
episode: 48 training return: tensor(-91.7265, device='cuda:0')
episode: 49 training return: tensor(-120.0034, device='cuda:0')
episode: 50 training return: tensor(-81.6191, device='cuda:0')
episode: 51 training return: tensor(-80.8870, device='cuda:0')
epoch: 13 test_true_pfm: 3945.6403169451573 sim_pfm: -116.33802683647566
episode: 52 training return: tensor(-69.6657, device='cuda:0')
episode: 53 training return: tensor(-100.0678, device='cuda:0')
episode: 54 training return: tensor(-99.9290, device='cuda:0')
episode: 55 training return: tensor(-136.2802, device='cuda:0')
epoch: 14 test_true_pfm: 4000.1100844105727 sim_pfm: -88.84734823684751
episode: 56 training return: tensor(-69.9748, device='cuda:0')
episode: 57 training return: tensor(-54.7791, device='cuda:0')
episode: 58 training return: tensor(-160.1371, device='cuda:0')
episode: 59 training return: tensor(-27.1052, device='cuda:0')
epoch: 15 test_true_pfm: 3975.5779047919054 sim_pfm: -88.85060511057964
episode: 60 training return: tensor(-112.4983, device='cuda:0')
episode: 61 training return: tensor(-143.3968, device='cuda:0')
episode: 62 training return: tensor(-99.3845, device='cuda:0')
episode: 63 training return: tensor(-151.8676, device='cuda:0')
epoch: 16 test_true_pfm: 3960.630367435237 sim_pfm: -64.62985465597983
episode: 64 training return: tensor(-61.2912, device='cuda:0')
episode: 65 training return: tensor(-72.3146, device='cuda:0')
episode: 66 training return: tensor(-72.2732, device='cuda:0')
episode: 67 training return: tensor(-18.9871, device='cuda:0')
epoch: 17 test_true_pfm: 4019.4338519303274 sim_pfm: -70.80083242473968
episode: 68 training return: tensor(-66.2760, device='cuda:0')
episode: 69 training return: tensor(-104.0209, device='cuda:0')
episode: 70 training return: tensor(-103.3514, device='cuda:0')
episode: 71 training return: tensor(-80.8104, device='cuda:0')
epoch: 18 test_true_pfm: 3938.365205704724 sim_pfm: -77.33440976772302
episode: 72 training return: tensor(-45.4544, device='cuda:0')
episode: 73 training return: tensor(-72.8811, device='cuda:0')
episode: 74 training return: tensor(-41.1225, device='cuda:0')
episode: 75 training return: tensor(-49.3918, device='cuda:0')
epoch: 19 test_true_pfm: 3972.7248384221402 sim_pfm: -91.59999338299774
episode: 76 training return: tensor(-147.8716, device='cuda:0')
episode: 77 training return: tensor(-931.7696, device='cuda:0')
episode: 78 training return: tensor(-116.1528, device='cuda:0')
episode: 79 training return: tensor(-123.1732, device='cuda:0')
epoch: 20 test_true_pfm: 4010.186978292435 sim_pfm: -27.937527760475252
episode: 80 training return: tensor(-157.1307, device='cuda:0')
episode: 81 training return: tensor(-46.7530, device='cuda:0')
episode: 82 training return: tensor(-60.3355, device='cuda:0')
episode: 83 training return: tensor(-118.8819, device='cuda:0')
epoch: 21 test_true_pfm: 3932.7187773872306 sim_pfm: -67.68712466330423
episode: 84 training return: tensor(-2.1736, device='cuda:0')
episode: 85 training return: tensor(-71.3751, device='cuda:0')
episode: 86 training return: tensor(-43.0947, device='cuda:0')
episode: 87 training return: tensor(-40.7678, device='cuda:0')
epoch: 22 test_true_pfm: 4003.93919073609 sim_pfm: -94.26372092978757
episode: 88 training return: tensor(-103.3360, device='cuda:0')
episode: 89 training return: tensor(-112.2075, device='cuda:0')
episode: 90 training return: tensor(-833.8321, device='cuda:0')
episode: 91 training return: tensor(-106.5372, device='cuda:0')
epoch: 23 test_true_pfm: 4010.3508407168647 sim_pfm: -35.05966894693362
episode: 92 training return: tensor(-28.9575, device='cuda:0')
episode: 93 training return: tensor(-939.3370, device='cuda:0')
episode: 94 training return: tensor(-748.3772, device='cuda:0')
episode: 95 training return: tensor(-25.1109, device='cuda:0')
epoch: 24 test_true_pfm: 4032.674666368468 sim_pfm: -41.64116392581491
episode: 96 training return: tensor(-83.2861, device='cuda:0')
episode: 97 training return: tensor(-807.4088, device='cuda:0')
episode: 98 training return: tensor(-906.7064, device='cuda:0')
episode: 99 training return: tensor(-122.3457, device='cuda:0')
epoch: 25 test_true_pfm: 4019.06289040306 sim_pfm: -82.80204224597158
episode: 100 training return: tensor(17.9671, device='cuda:0')
episode: 101 training return: tensor(-12.1737, device='cuda:0')
episode: 102 training return: tensor(-890.2822, device='cuda:0')
episode: 103 training return: tensor(-105.8424, device='cuda:0')
epoch: 26 test_true_pfm: 3948.678851546691 sim_pfm: -111.82864046294708
episode: 104 training return: tensor(-92.5554, device='cuda:0')
episode: 105 training return: tensor(-31.5516, device='cuda:0')
episode: 106 training return: tensor(-66.9674, device='cuda:0')
episode: 107 training return: tensor(-43.5932, device='cuda:0')
epoch: 27 test_true_pfm: 2907.9440657095074 sim_pfm: -53.713066095525086
episode: 108 training return: tensor(-48.5934, device='cuda:0')
episode: 109 training return: tensor(-101.1472, device='cuda:0')
episode: 110 training return: tensor(-92.6405, device='cuda:0')
episode: 111 training return: tensor(-95.3854, device='cuda:0')
epoch: 28 test_true_pfm: 4015.498430534344 sim_pfm: -63.11669577163411
episode: 112 training return: tensor(-131.1504, device='cuda:0')
episode: 113 training return: tensor(-108.9678, device='cuda:0')
episode: 114 training return: tensor(-85.8375, device='cuda:0')
episode: 115 training return: tensor(-144.6521, device='cuda:0')
epoch: 29 test_true_pfm: 4013.377029573932 sim_pfm: -46.26886648633323
episode: 116 training return: tensor(-61.8736, device='cuda:0')
episode: 117 training return: tensor(-38.3403, device='cuda:0')
episode: 118 training return: tensor(-79.9634, device='cuda:0')
episode: 119 training return: tensor(-883.6293, device='cuda:0')
epoch: 30 test_true_pfm: 4011.4011626704378 sim_pfm: -69.89062487021631
episode: 120 training return: tensor(-53.9628, device='cuda:0')
episode: 121 training return: tensor(-88.5845, device='cuda:0')
episode: 122 training return: tensor(-51.0998, device='cuda:0')
episode: 123 training return: tensor(-62.5924, device='cuda:0')
epoch: 31 test_true_pfm: 3994.2200646042525 sim_pfm: -87.14696147467475
episode: 124 training return: tensor(-44.6351, device='cuda:0')
episode: 125 training return: tensor(-75.6910, device='cuda:0')
episode: 126 training return: tensor(-54.2056, device='cuda:0')
episode: 127 training return: tensor(-156.5504, device='cuda:0')
epoch: 32 test_true_pfm: 3996.4854157703 sim_pfm: -59.62508119446769
episode: 128 training return: tensor(-102.4260, device='cuda:0')
episode: 129 training return: tensor(-115.0973, device='cuda:0')
episode: 130 training return: tensor(-900.6265, device='cuda:0')
episode: 131 training return: tensor(-141.6300, device='cuda:0')
epoch: 33 test_true_pfm: 3980.189506636603 sim_pfm: -21.183882889628876
episode: 132 training return: tensor(-27.2443, device='cuda:0')
episode: 133 training return: tensor(-78.8952, device='cuda:0')
episode: 134 training return: tensor(-42.5601, device='cuda:0')
episode: 135 training return: tensor(-48.5405, device='cuda:0')
epoch: 34 test_true_pfm: 4025.3221292043986 sim_pfm: -93.63256006697581
episode: 136 training return: tensor(-107.1956, device='cuda:0')
episode: 137 training return: tensor(-44.5655, device='cuda:0')
episode: 138 training return: tensor(-959.1248, device='cuda:0')
episode: 139 training return: tensor(-112.0129, device='cuda:0')
epoch: 35 test_true_pfm: 3971.8301008910553 sim_pfm: -42.05227350757923
episode: 140 training return: tensor(-90.4905, device='cuda:0')
episode: 141 training return: tensor(-39.4426, device='cuda:0')
episode: 142 training return: tensor(-40.0097, device='cuda:0')
episode: 143 training return: tensor(-100.4589, device='cuda:0')
epoch: 36 test_true_pfm: 4009.0217136476954 sim_pfm: -45.28909047060491
episode: 144 training return: tensor(-20.4664, device='cuda:0')
episode: 145 training return: tensor(-67.1155, device='cuda:0')
episode: 146 training return: tensor(-84.9140, device='cuda:0')
episode: 147 training return: tensor(-43.7387, device='cuda:0')
epoch: 37 test_true_pfm: 4011.8609843345425 sim_pfm: -32.83625802836226
episode: 148 training return: tensor(-80.0353, device='cuda:0')
episode: 149 training return: tensor(-34.1067, device='cuda:0')
episode: 150 training return: tensor(-2.2912, device='cuda:0')
episode: 151 training return: tensor(-40.4561, device='cuda:0')
epoch: 38 test_true_pfm: 3997.544437555385 sim_pfm: -34.4073060270263
episode: 152 training return: tensor(-877.6213, device='cuda:0')
episode: 153 training return: tensor(-109.9344, device='cuda:0')
episode: 154 training return: tensor(-33.6880, device='cuda:0')
episode: 155 training return: tensor(-46.2333, device='cuda:0')
epoch: 39 test_true_pfm: 4008.7891919428257 sim_pfm: -43.393694667630676
episode: 156 training return: tensor(-63.0154, device='cuda:0')
episode: 157 training return: tensor(-922.6274, device='cuda:0')
episode: 158 training return: tensor(-40.1100, device='cuda:0')
episode: 159 training return: tensor(-69.8085, device='cuda:0')
epoch: 40 test_true_pfm: 4009.8477546081017 sim_pfm: -42.20177385639787
episode: 160 training return: tensor(-21.6761, device='cuda:0')
episode: 161 training return: tensor(-106.2719, device='cuda:0')
episode: 162 training return: tensor(-53.0293, device='cuda:0')
episode: 163 training return: tensor(-121.0524, device='cuda:0')
epoch: 41 test_true_pfm: 3986.9280874095907 sim_pfm: -40.530884105624864
episode: 164 training return: tensor(-55.1606, device='cuda:0')
episode: 165 training return: tensor(-95.9011, device='cuda:0')
episode: 166 training return: tensor(-39.2069, device='cuda:0')
episode: 167 training return: tensor(-74.8254, device='cuda:0')
epoch: 42 test_true_pfm: 4012.4032048882887 sim_pfm: -65.19719139242079
episode: 168 training return: tensor(-178.2355, device='cuda:0')
episode: 169 training return: tensor(-75.8736, device='cuda:0')
episode: 170 training return: tensor(-27.4458, device='cuda:0')
episode: 171 training return: tensor(-99.7833, device='cuda:0')
epoch: 43 test_true_pfm: 3976.2092926173796 sim_pfm: -74.29789807814329
episode: 172 training return: tensor(-29.2022, device='cuda:0')
episode: 173 training return: tensor(-100.0815, device='cuda:0')
episode: 174 training return: tensor(-56.5894, device='cuda:0')
episode: 175 training return: tensor(-66.7504, device='cuda:0')
epoch: 44 test_true_pfm: 4007.434574966201 sim_pfm: -82.45187119398422
episode: 176 training return: tensor(-106.8879, device='cuda:0')
episode: 177 training return: tensor(-76.0125, device='cuda:0')
episode: 178 training return: tensor(-36.8621, device='cuda:0')
episode: 179 training return: tensor(-97.2276, device='cuda:0')
epoch: 45 test_true_pfm: 3954.440670102274 sim_pfm: -104.75489246744353
episode: 180 training return: tensor(-26.3823, device='cuda:0')
episode: 181 training return: tensor(-26.9573, device='cuda:0')
episode: 182 training return: tensor(-58.3392, device='cuda:0')
episode: 183 training return: tensor(-889.6490, device='cuda:0')
epoch: 46 test_true_pfm: 4003.0017413396763 sim_pfm: -90.91104088514112
episode: 184 training return: tensor(-81.6234, device='cuda:0')
episode: 185 training return: tensor(-90.3872, device='cuda:0')
episode: 186 training return: tensor(-31.8186, device='cuda:0')
episode: 187 training return: tensor(-11.7441, device='cuda:0')
epoch: 47 test_true_pfm: 4028.2299861505403 sim_pfm: -59.17489119429956
episode: 188 training return: tensor(-106.6452, device='cuda:0')
episode: 189 training return: tensor(-21.5784, device='cuda:0')
episode: 190 training return: tensor(-3.9095, device='cuda:0')
episode: 191 training return: tensor(-38.6574, device='cuda:0')
epoch: 48 test_true_pfm: 3970.248247028931 sim_pfm: -62.730926132935565
episode: 192 training return: tensor(-61.0442, device='cuda:0')
episode: 193 training return: tensor(-921.5502, device='cuda:0')
episode: 194 training return: tensor(-91.5878, device='cuda:0')
episode: 195 training return: tensor(-747.9080, device='cuda:0')
epoch: 49 test_true_pfm: 3964.856325607357 sim_pfm: -57.568176326914305
episode: 196 training return: tensor(-70.2845, device='cuda:0')
episode: 197 training return: tensor(-136.3741, device='cuda:0')
episode: 198 training return: tensor(-15.8920, device='cuda:0')
episode: 199 training return: tensor(-97.1039, device='cuda:0')
epoch: 50 test_true_pfm: 3997.7317344410662 sim_pfm: -44.32636128879191
episode: 200 training return: tensor(-27.0993, device='cuda:0')
episode: 201 training return: tensor(-130.7017, device='cuda:0')
episode: 202 training return: tensor(-58.7593, device='cuda:0')
episode: 203 training return: tensor(-881.2931, device='cuda:0')
epoch: 51 test_true_pfm: 4004.045671315687 sim_pfm: -44.15036481111505
episode: 204 training return: tensor(-123.0059, device='cuda:0')
episode: 205 training return: tensor(-133.1781, device='cuda:0')
episode: 206 training return: tensor(-37.3128, device='cuda:0')
episode: 207 training return: tensor(-39.5069, device='cuda:0')
epoch: 52 test_true_pfm: 4009.173005436422 sim_pfm: -58.710880097312234
episode: 208 training return: tensor(-52.2785, device='cuda:0')
episode: 209 training return: tensor(-84.8497, device='cuda:0')
episode: 210 training return: tensor(-38.8436, device='cuda:0')
episode: 211 training return: tensor(-126.8357, device='cuda:0')
epoch: 53 test_true_pfm: 3968.0962818033618 sim_pfm: -55.97375773763633
episode: 212 training return: tensor(-27.7022, device='cuda:0')
episode: 213 training return: tensor(-78.1199, device='cuda:0')
episode: 214 training return: tensor(-1.4779, device='cuda:0')
episode: 215 training return: tensor(-75.6884, device='cuda:0')
epoch: 54 test_true_pfm: 4003.7168970424304 sim_pfm: -37.31012770404535
episode: 216 training return: tensor(-87.3015, device='cuda:0')
episode: 217 training return: tensor(-813.1594, device='cuda:0')
episode: 218 training return: tensor(-861.3677, device='cuda:0')
episode: 219 training return: tensor(-821.5010, device='cuda:0')
epoch: 55 test_true_pfm: 3989.0801987693226 sim_pfm: -40.16194756953822
episode: 220 training return: tensor(-47.6956, device='cuda:0')
episode: 221 training return: tensor(-893.5165, device='cuda:0')
episode: 222 training return: tensor(-98.5625, device='cuda:0')
episode: 223 training return: tensor(-70.7238, device='cuda:0')
epoch: 56 test_true_pfm: 4004.889044348994 sim_pfm: -32.558432875957806
episode: 224 training return: tensor(-54.8438, device='cuda:0')
episode: 225 training return: tensor(-113.6912, device='cuda:0')
episode: 226 training return: tensor(-73.9446, device='cuda:0')
episode: 227 training return: tensor(-120.5460, device='cuda:0')
epoch: 57 test_true_pfm: 4048.633846365181 sim_pfm: -92.14523205897422
episode: 228 training return: tensor(-57.6311, device='cuda:0')
episode: 229 training return: tensor(-37.0457, device='cuda:0')
episode: 230 training return: tensor(-40.1938, device='cuda:0')
episode: 231 training return: tensor(-68.2656, device='cuda:0')
epoch: 58 test_true_pfm: 4010.795259313596 sim_pfm: -73.49577349648462
episode: 232 training return: tensor(-27.0971, device='cuda:0')
episode: 233 training return: tensor(-71.3214, device='cuda:0')
episode: 234 training return: tensor(-88.8793, device='cuda:0')
episode: 235 training return: tensor(-21.4902, device='cuda:0')
epoch: 59 test_true_pfm: 4031.920966103185 sim_pfm: -35.811546618147986
episode: 236 training return: tensor(-53.2134, device='cuda:0')
episode: 237 training return: tensor(-92.0778, device='cuda:0')
episode: 238 training return: tensor(-77.6796, device='cuda:0')
episode: 239 training return: tensor(-83.5576, device='cuda:0')
epoch: 60 test_true_pfm: 3989.982155411272 sim_pfm: -42.61232636359637
episode: 240 training return: tensor(-11.3168, device='cuda:0')
episode: 241 training return: tensor(8.1734, device='cuda:0')
episode: 242 training return: tensor(-44.8870, device='cuda:0')
episode: 243 training return: tensor(-73.2330, device='cuda:0')
epoch: 61 test_true_pfm: 4003.332402524918 sim_pfm: -34.164412090642145
episode: 244 training return: tensor(-105.9296, device='cuda:0')
episode: 245 training return: tensor(-106.2141, device='cuda:0')
episode: 246 training return: tensor(-52.3517, device='cuda:0')
episode: 247 training return: tensor(-48.7715, device='cuda:0')
epoch: 62 test_true_pfm: 4007.404858887398 sim_pfm: -54.637301830051
episode: 248 training return: tensor(-141.9524, device='cuda:0')
episode: 249 training return: tensor(-102.6182, device='cuda:0')
episode: 250 training return: tensor(-59.5317, device='cuda:0')
episode: 251 training return: tensor(-72.8858, device='cuda:0')
epoch: 63 test_true_pfm: 4031.4960356137667 sim_pfm: -35.654038474545814
episode: 252 training return: tensor(-33.8708, device='cuda:0')
episode: 253 training return: tensor(-85.6772, device='cuda:0')
episode: 254 training return: tensor(-121.1497, device='cuda:0')
episode: 255 training return: tensor(-45.0191, device='cuda:0')
epoch: 64 test_true_pfm: 4034.6004479872704 sim_pfm: -74.96451160984968
episode: 256 training return: tensor(-949.1276, device='cuda:0')
episode: 257 training return: tensor(-142.0003, device='cuda:0')
episode: 258 training return: tensor(-69.7708, device='cuda:0')
episode: 259 training return: tensor(-43.2703, device='cuda:0')
epoch: 65 test_true_pfm: 4013.864998457178 sim_pfm: -43.7811323920129
episode: 260 training return: tensor(-3.1252, device='cuda:0')
episode: 261 training return: tensor(-90.9885, device='cuda:0')
episode: 262 training return: tensor(-98.9141, device='cuda:0')
episode: 263 training return: tensor(-53.4356, device='cuda:0')
epoch: 66 test_true_pfm: 4029.90622418165 sim_pfm: -49.47231343020879
episode: 264 training return: tensor(-33.3548, device='cuda:0')
episode: 265 training return: tensor(-56.3088, device='cuda:0')
episode: 266 training return: tensor(-42.4960, device='cuda:0')
episode: 267 training return: tensor(-92.4540, device='cuda:0')
epoch: 67 test_true_pfm: 3988.0451362413564 sim_pfm: -45.36959577529342
episode: 268 training return: tensor(-922.9877, device='cuda:0')
episode: 269 training return: tensor(-17.2224, device='cuda:0')
episode: 270 training return: tensor(-22.4180, device='cuda:0')
episode: 271 training return: tensor(-83.7917, device='cuda:0')
epoch: 68 test_true_pfm: 3961.26932282119 sim_pfm: -55.266082347360985
episode: 272 training return: tensor(-72.3198, device='cuda:0')
episode: 273 training return: tensor(-73.8265, device='cuda:0')
episode: 274 training return: tensor(-98.0306, device='cuda:0')
episode: 275 training return: tensor(-82.8920, device='cuda:0')
epoch: 69 test_true_pfm: 3990.152390823077 sim_pfm: -36.53610756532483
episode: 276 training return: tensor(-31.6240, device='cuda:0')
episode: 277 training return: tensor(-72.5836, device='cuda:0')
episode: 278 training return: tensor(-12.2992, device='cuda:0')
episode: 279 training return: tensor(-80.1977, device='cuda:0')
epoch: 70 test_true_pfm: 4030.0818434771354 sim_pfm: -63.05703093888587
episode: 280 training return: tensor(-48.7059, device='cuda:0')
episode: 281 training return: tensor(-99.4590, device='cuda:0')
episode: 282 training return: tensor(-33.3140, device='cuda:0')
episode: 283 training return: tensor(-75.4052, device='cuda:0')
epoch: 71 test_true_pfm: 4045.4440431226453 sim_pfm: -15.788237954926444
episode: 284 training return: tensor(-52.0414, device='cuda:0')
episode: 285 training return: tensor(-41.1265, device='cuda:0')
episode: 286 training return: tensor(-49.5261, device='cuda:0')
episode: 287 training return: tensor(-96.5671, device='cuda:0')
epoch: 72 test_true_pfm: 3991.862726181023 sim_pfm: -18.617286047529586
episode: 288 training return: tensor(-70.9802, device='cuda:0')
episode: 289 training return: tensor(-50.2815, device='cuda:0')
episode: 290 training return: tensor(-40.8298, device='cuda:0')
episode: 291 training return: tensor(-37.6934, device='cuda:0')
epoch: 73 test_true_pfm: 4007.878037467834 sim_pfm: -84.64771635263848
episode: 292 training return: tensor(-36.7690, device='cuda:0')
episode: 293 training return: tensor(-71.7582, device='cuda:0')
episode: 294 training return: tensor(-59.0754, device='cuda:0')
episode: 295 training return: tensor(-34.7259, device='cuda:0')
epoch: 74 test_true_pfm: 3968.505543257248 sim_pfm: -70.23330330050278
episode: 296 training return: tensor(-75.5196, device='cuda:0')
episode: 297 training return: tensor(-65.8393, device='cuda:0')
episode: 298 training return: tensor(-67.2197, device='cuda:0')
episode: 299 training return: tensor(-81.8511, device='cuda:0')
epoch: 75 test_true_pfm: 4041.7071441536755 sim_pfm: -68.87512478854235
episode: 300 training return: tensor(-98.3530, device='cuda:0')
episode: 301 training return: tensor(-84.4020, device='cuda:0')
episode: 302 training return: tensor(-781.9175, device='cuda:0')
episode: 303 training return: tensor(-19.9177, device='cuda:0')
epoch: 76 test_true_pfm: 4027.782633209688 sim_pfm: -38.56476997964395
episode: 304 training return: tensor(-70.6534, device='cuda:0')
episode: 305 training return: tensor(-91.0274, device='cuda:0')
episode: 306 training return: tensor(-59.3524, device='cuda:0')
episode: 307 training return: tensor(-69.3808, device='cuda:0')
epoch: 77 test_true_pfm: 3981.524853501965 sim_pfm: -53.05990624798384
episode: 308 training return: tensor(-822.8479, device='cuda:0')
episode: 309 training return: tensor(-27.5346, device='cuda:0')
episode: 310 training return: tensor(-73.3688, device='cuda:0')
episode: 311 training return: tensor(-65.2724, device='cuda:0')
epoch: 78 test_true_pfm: 4017.7646207663456 sim_pfm: -56.09019800163029
episode: 312 training return: tensor(-74.1381, device='cuda:0')
episode: 313 training return: tensor(-50.0501, device='cuda:0')
episode: 314 training return: tensor(-155.6772, device='cuda:0')
episode: 315 training return: tensor(-8.3916, device='cuda:0')
epoch: 79 test_true_pfm: 4004.45153540068 sim_pfm: -75.75387109089449
episode: 316 training return: tensor(-16.1631, device='cuda:0')
episode: 317 training return: tensor(-113.8038, device='cuda:0')
episode: 318 training return: tensor(-74.6375, device='cuda:0')
episode: 319 training return: tensor(-21.2902, device='cuda:0')
epoch: 80 test_true_pfm: 4029.5987600079297 sim_pfm: -33.518355932222526
episode: 320 training return: tensor(-75.3436, device='cuda:0')
episode: 321 training return: tensor(-150.1268, device='cuda:0')
episode: 322 training return: tensor(-58.2920, device='cuda:0')
episode: 323 training return: tensor(-104.2798, device='cuda:0')
epoch: 81 test_true_pfm: 4031.0989216102475 sim_pfm: -38.44278046059966
episode: 324 training return: tensor(-75.5398, device='cuda:0')
episode: 325 training return: tensor(-129.9629, device='cuda:0')
episode: 326 training return: tensor(-32.9387, device='cuda:0')
episode: 327 training return: tensor(-83.2128, device='cuda:0')
epoch: 82 test_true_pfm: 4014.6512849168153 sim_pfm: -39.62244614790931
episode: 328 training return: tensor(-29.9621, device='cuda:0')
episode: 329 training return: tensor(-81.9581, device='cuda:0')
episode: 330 training return: tensor(-134.5552, device='cuda:0')
episode: 331 training return: tensor(-30.5366, device='cuda:0')
epoch: 83 test_true_pfm: 4020.9283205702973 sim_pfm: -53.9543324058856
episode: 332 training return: tensor(9.8611, device='cuda:0')
episode: 333 training return: tensor(-92.0391, device='cuda:0')
episode: 334 training return: tensor(-109.2996, device='cuda:0')
episode: 335 training return: tensor(-63.5368, device='cuda:0')
epoch: 84 test_true_pfm: 4048.9712503396017 sim_pfm: -34.578578133698706
episode: 336 training return: tensor(-29.1145, device='cuda:0')
episode: 337 training return: tensor(-18.4522, device='cuda:0')
episode: 338 training return: tensor(-32.1202, device='cuda:0')
episode: 339 training return: tensor(-4.4444, device='cuda:0')
epoch: 85 test_true_pfm: 4035.0973327850916 sim_pfm: -21.12161224128795
episode: 340 training return: tensor(-797.6017, device='cuda:0')
episode: 341 training return: tensor(-123.0307, device='cuda:0')
episode: 342 training return: tensor(-14.6864, device='cuda:0')
episode: 343 training return: tensor(-34.9326, device='cuda:0')
epoch: 86 test_true_pfm: 4049.492665390256 sim_pfm: -35.69066856889791
episode: 344 training return: tensor(-21.0817, device='cuda:0')
episode: 345 training return: tensor(-30.4578, device='cuda:0')
episode: 346 training return: tensor(-20.0963, device='cuda:0')
episode: 347 training return: tensor(-27.2009, device='cuda:0')
epoch: 87 test_true_pfm: 4029.7952371310953 sim_pfm: -23.753357987016596
episode: 348 training return: tensor(-36.8485, device='cuda:0')
episode: 349 training return: tensor(-66.2510, device='cuda:0')
episode: 350 training return: tensor(-50.8835, device='cuda:0')
episode: 351 training return: tensor(-95.5047, device='cuda:0')
epoch: 88 test_true_pfm: 4019.0653015033276 sim_pfm: -35.2939269116711
episode: 352 training return: tensor(-65.6870, device='cuda:0')
episode: 353 training return: tensor(-731.1780, device='cuda:0')
episode: 354 training return: tensor(-49.8866, device='cuda:0')
episode: 355 training return: tensor(-50.7236, device='cuda:0')
epoch: 89 test_true_pfm: 4022.6167409810782 sim_pfm: -23.697716971859336
episode: 356 training return: tensor(-946.5534, device='cuda:0')
episode: 357 training return: tensor(-136.6118, device='cuda:0')
episode: 358 training return: tensor(-22.5909, device='cuda:0')
episode: 359 training return: tensor(-32.6609, device='cuda:0')
epoch: 90 test_true_pfm: 4018.9282262324646 sim_pfm: -47.17927491373848
episode: 360 training return: tensor(-30.8909, device='cuda:0')
episode: 361 training return: tensor(-105.3211, device='cuda:0')
episode: 362 training return: tensor(-52.2512, device='cuda:0')
episode: 363 training return: tensor(-35.2593, device='cuda:0')
epoch: 91 test_true_pfm: 4009.3454653317863 sim_pfm: -47.99931803006135
episode: 364 training return: tensor(-27.0265, device='cuda:0')
episode: 365 training return: tensor(-47.4172, device='cuda:0')
episode: 366 training return: tensor(-34.8396, device='cuda:0')
episode: 367 training return: tensor(-56.8716, device='cuda:0')
epoch: 92 test_true_pfm: 4014.354393419931 sim_pfm: -46.701935272081755
episode: 368 training return: tensor(-26.0582, device='cuda:0')
episode: 369 training return: tensor(-53.6834, device='cuda:0')
episode: 370 training return: tensor(-71.2189, device='cuda:0')
episode: 371 training return: tensor(-28.4229, device='cuda:0')
epoch: 93 test_true_pfm: 4000.759085347834 sim_pfm: -23.85585331522937
episode: 372 training return: tensor(-39.0668, device='cuda:0')
episode: 373 training return: tensor(-84.5527, device='cuda:0')
episode: 374 training return: tensor(-68.8656, device='cuda:0')
episode: 375 training return: tensor(-77.8657, device='cuda:0')
epoch: 94 test_true_pfm: 4040.0190178142984 sim_pfm: -41.50975773683361
episode: 376 training return: tensor(-13.4894, device='cuda:0')
episode: 377 training return: tensor(-57.5064, device='cuda:0')
episode: 378 training return: tensor(-95.3469, device='cuda:0')
episode: 379 training return: tensor(-24.9263, device='cuda:0')
epoch: 95 test_true_pfm: 4026.902361285093 sim_pfm: -37.46440026144652
episode: 380 training return: tensor(-61.6336, device='cuda:0')
episode: 381 training return: tensor(-58.2407, device='cuda:0')
episode: 382 training return: tensor(-59.8675, device='cuda:0')
episode: 383 training return: tensor(-32.1127, device='cuda:0')
epoch: 96 test_true_pfm: 4015.7189969697624 sim_pfm: -46.381728283168435
episode: 384 training return: tensor(-71.4675, device='cuda:0')
episode: 385 training return: tensor(-902.2219, device='cuda:0')
episode: 386 training return: tensor(-85.2278, device='cuda:0')
episode: 387 training return: tensor(-76.6647, device='cuda:0')
epoch: 97 test_true_pfm: 4056.933652969176 sim_pfm: -55.44358141126577
episode: 388 training return: tensor(-49.9384, device='cuda:0')
episode: 389 training return: tensor(-117.3701, device='cuda:0')
episode: 390 training return: tensor(-70.1315, device='cuda:0')
episode: 391 training return: tensor(-20.9157, device='cuda:0')
epoch: 98 test_true_pfm: 3954.4856106790926 sim_pfm: -72.35606396425283
episode: 392 training return: tensor(-23.3724, device='cuda:0')
episode: 393 training return: tensor(-96.6710, device='cuda:0')
episode: 394 training return: tensor(-60.0026, device='cuda:0')
episode: 395 training return: tensor(-101.9091, device='cuda:0')
epoch: 99 test_true_pfm: 4026.597641351627 sim_pfm: 0.415657568499834
episode: 396 training return: tensor(-33.8248, device='cuda:0')
episode: 397 training return: tensor(-94.6054, device='cuda:0')
episode: 398 training return: tensor(-36.7726, device='cuda:0')
episode: 399 training return: tensor(-44.5754, device='cuda:0')
epoch: 100 test_true_pfm: 3991.9882751356563 sim_pfm: -73.8063503166098
episode: 400 training return: tensor(-59.9287, device='cuda:0')
episode: 401 training return: tensor(-69.5743, device='cuda:0')
episode: 402 training return: tensor(-11.0152, device='cuda:0')
episode: 403 training return: tensor(-49.5786, device='cuda:0')
epoch: 101 test_true_pfm: 4018.287615894982 sim_pfm: -66.45399512347649
episode: 404 training return: tensor(-64.4273, device='cuda:0')
episode: 405 training return: tensor(-31.0168, device='cuda:0')
episode: 406 training return: tensor(-21.5329, device='cuda:0')
episode: 407 training return: tensor(-89.0963, device='cuda:0')
epoch: 102 test_true_pfm: 4022.1338513092833 sim_pfm: -47.25233111526662
episode: 408 training return: tensor(-16.3139, device='cuda:0')
episode: 409 training return: tensor(-22.7229, device='cuda:0')
episode: 410 training return: tensor(-63.7870, device='cuda:0')
episode: 411 training return: tensor(-32.9315, device='cuda:0')
epoch: 103 test_true_pfm: 4002.249272969388 sim_pfm: -30.874292535270797
episode: 412 training return: tensor(-38.1593, device='cuda:0')
episode: 413 training return: tensor(-36.7113, device='cuda:0')
episode: 414 training return: tensor(-102.5781, device='cuda:0')
episode: 415 training return: tensor(-124.9570, device='cuda:0')
epoch: 104 test_true_pfm: 4031.317658493872 sim_pfm: -53.45522864639255
episode: 416 training return: tensor(-60.8051, device='cuda:0')
episode: 417 training return: tensor(-45.2065, device='cuda:0')
episode: 418 training return: tensor(-59.9762, device='cuda:0')
episode: 419 training return: tensor(-12.5106, device='cuda:0')
epoch: 105 test_true_pfm: 4010.3825839067335 sim_pfm: -48.085415441300334
episode: 420 training return: tensor(-86.8606, device='cuda:0')
episode: 421 training return: tensor(-46.1212, device='cuda:0')
episode: 422 training return: tensor(-18.8978, device='cuda:0')
episode: 423 training return: tensor(-59.2312, device='cuda:0')
epoch: 106 test_true_pfm: 3989.570443115634 sim_pfm: -51.50375237124778
episode: 424 training return: tensor(-83.2034, device='cuda:0')
episode: 425 training return: tensor(-27.6781, device='cuda:0')
episode: 426 training return: tensor(-16.6560, device='cuda:0')
episode: 427 training return: tensor(-31.9750, device='cuda:0')
epoch: 107 test_true_pfm: 4050.593154810776 sim_pfm: -29.053669859936537
episode: 428 training return: tensor(-14.8521, device='cuda:0')
episode: 429 training return: tensor(-32.4334, device='cuda:0')
episode: 430 training return: tensor(-21.8997, device='cuda:0')
episode: 431 training return: tensor(-805.8818, device='cuda:0')
epoch: 108 test_true_pfm: 4013.354199917916 sim_pfm: -54.991897260702295
episode: 432 training return: tensor(-64.1326, device='cuda:0')
episode: 433 training return: tensor(-37.9009, device='cuda:0')
episode: 434 training return: tensor(-57.8036, device='cuda:0')
episode: 435 training return: tensor(-35.8208, device='cuda:0')
epoch: 109 test_true_pfm: 4018.786611559208 sim_pfm: -30.87405325701305
episode: 436 training return: tensor(-759.3625, device='cuda:0')
episode: 437 training return: tensor(-44.7082, device='cuda:0')
episode: 438 training return: tensor(-9.1914, device='cuda:0')
episode: 439 training return: tensor(-58.5863, device='cuda:0')
epoch: 110 test_true_pfm: 3990.166731515447 sim_pfm: -65.3719160661567
episode: 440 training return: tensor(-11.7157, device='cuda:0')
episode: 441 training return: tensor(-82.8294, device='cuda:0')
episode: 442 training return: tensor(-47.8258, device='cuda:0')
episode: 443 training return: tensor(-67.2537, device='cuda:0')
epoch: 111 test_true_pfm: 4014.382230781785 sim_pfm: -39.48133384261746
episode: 444 training return: tensor(-9.5354, device='cuda:0')
episode: 445 training return: tensor(-51.0939, device='cuda:0')
episode: 446 training return: tensor(-38.8797, device='cuda:0')
episode: 447 training return: tensor(-38.8567, device='cuda:0')
epoch: 112 test_true_pfm: 3996.3800487956337 sim_pfm: -47.76446280104574
episode: 448 training return: tensor(-36.3148, device='cuda:0')
episode: 449 training return: tensor(-50.0409, device='cuda:0')
episode: 450 training return: tensor(-31.9679, device='cuda:0')
episode: 451 training return: tensor(-29.9887, device='cuda:0')
epoch: 113 test_true_pfm: 4020.7402817892253 sim_pfm: -12.580600184291447
episode: 452 training return: tensor(0.1051, device='cuda:0')
episode: 453 training return: tensor(-77.4033, device='cuda:0')
episode: 454 training return: tensor(-27.4062, device='cuda:0')
episode: 455 training return: tensor(-17.3655, device='cuda:0')
epoch: 114 test_true_pfm: 4018.7738554034568 sim_pfm: -53.7492444118058
episode: 456 training return: tensor(-32.9301, device='cuda:0')
episode: 457 training return: tensor(-26.4019, device='cuda:0')
episode: 458 training return: tensor(-24.5243, device='cuda:0')
episode: 459 training return: tensor(-13.0599, device='cuda:0')
epoch: 115 test_true_pfm: 4036.6648688352793 sim_pfm: -49.25205532686474
episode: 460 training return: tensor(-29.7805, device='cuda:0')
episode: 461 training return: tensor(11.3269, device='cuda:0')
episode: 462 training return: tensor(-32.6245, device='cuda:0')
episode: 463 training return: tensor(-122.7295, device='cuda:0')
epoch: 116 test_true_pfm: 4013.921950333009 sim_pfm: -37.82911555600973
episode: 464 training return: tensor(-127.6898, device='cuda:0')
episode: 465 training return: tensor(-41.1549, device='cuda:0')
episode: 466 training return: tensor(-28.3487, device='cuda:0')
episode: 467 training return: tensor(-29.9299, device='cuda:0')
epoch: 117 test_true_pfm: 4052.080688431084 sim_pfm: -37.1485881289797
episode: 468 training return: tensor(-14.8143, device='cuda:0')
episode: 469 training return: tensor(-717.4190, device='cuda:0')
episode: 470 training return: tensor(-35.4135, device='cuda:0')
episode: 471 training return: tensor(-42.4252, device='cuda:0')
epoch: 118 test_true_pfm: 4003.583705600999 sim_pfm: -59.48224779728722
episode: 472 training return: tensor(-18.1514, device='cuda:0')
episode: 473 training return: tensor(-123.2834, device='cuda:0')
episode: 474 training return: tensor(-21.3615, device='cuda:0')
episode: 475 training return: tensor(-68.0414, device='cuda:0')
epoch: 119 test_true_pfm: 4015.8090095785633 sim_pfm: -22.848846676934045
episode: 476 training return: tensor(-31.3194, device='cuda:0')
episode: 477 training return: tensor(-98.9741, device='cuda:0')
episode: 478 training return: tensor(-20.0998, device='cuda:0')
episode: 479 training return: tensor(-29.3916, device='cuda:0')
epoch: 120 test_true_pfm: 4031.2200134927084 sim_pfm: -26.742743952219218
episode: 480 training return: tensor(-841.8237, device='cuda:0')
episode: 481 training return: tensor(-31.1137, device='cuda:0')
episode: 482 training return: tensor(-81.0355, device='cuda:0')
episode: 483 training return: tensor(-37.0942, device='cuda:0')
epoch: 121 test_true_pfm: 4028.264270212528 sim_pfm: -37.95774987070278
episode: 484 training return: tensor(-65.3730, device='cuda:0')
episode: 485 training return: tensor(-77.9775, device='cuda:0')
episode: 486 training return: tensor(-91.7660, device='cuda:0')
episode: 487 training return: tensor(-93.1001, device='cuda:0')
epoch: 122 test_true_pfm: 4036.9580304165825 sim_pfm: -19.50819765518342
episode: 488 training return: tensor(-61.5459, device='cuda:0')
episode: 489 training return: tensor(-38.0970, device='cuda:0')
episode: 490 training return: tensor(-30.8926, device='cuda:0')
episode: 491 training return: tensor(-91.3662, device='cuda:0')
epoch: 123 test_true_pfm: 4022.5130466385494 sim_pfm: -44.73165005569657
episode: 492 training return: tensor(-42.9512, device='cuda:0')
episode: 493 training return: tensor(35.0825, device='cuda:0')
episode: 494 training return: tensor(-33.6648, device='cuda:0')
episode: 495 training return: tensor(-77.3506, device='cuda:0')
epoch: 124 test_true_pfm: 4016.3249915852552 sim_pfm: -57.64270623853857
episode: 496 training return: tensor(-19.7083, device='cuda:0')
episode: 497 training return: tensor(-41.8060, device='cuda:0')
episode: 498 training return: tensor(-18.3272, device='cuda:0')
episode: 499 training return: tensor(-47.0149, device='cuda:0')
epoch: 125 test_true_pfm: 4017.9586037715794 sim_pfm: -43.62241700585582
episode: 500 training return: tensor(-24.9508, device='cuda:0')
episode: 501 training return: tensor(-42.4390, device='cuda:0')
episode: 502 training return: tensor(-67.9387, device='cuda:0')
episode: 503 training return: tensor(-44.2538, device='cuda:0')
epoch: 126 test_true_pfm: 3997.0815280253096 sim_pfm: -15.045310757714711
episode: 504 training return: tensor(-73.0899, device='cuda:0')
episode: 505 training return: tensor(-87.1400, device='cuda:0')
episode: 506 training return: tensor(-803.1394, device='cuda:0')
episode: 507 training return: tensor(-63.6756, device='cuda:0')
epoch: 127 test_true_pfm: 4020.1070758701026 sim_pfm: -51.97793095312469
episode: 508 training return: tensor(-66.5233, device='cuda:0')
episode: 509 training return: tensor(-6.7644, device='cuda:0')
episode: 510 training return: tensor(-79.9756, device='cuda:0')
episode: 511 training return: tensor(-38.7511, device='cuda:0')
epoch: 128 test_true_pfm: 4031.7888861094107 sim_pfm: -47.51805282954592
episode: 512 training return: tensor(-37.6256, device='cuda:0')
episode: 513 training return: tensor(-35.2956, device='cuda:0')
episode: 514 training return: tensor(-114.8581, device='cuda:0')
episode: 515 training return: tensor(-70.5709, device='cuda:0')
epoch: 129 test_true_pfm: 4025.315191227873 sim_pfm: -35.605414546327665
episode: 516 training return: tensor(-30.3022, device='cuda:0')
episode: 517 training return: tensor(-26.3368, device='cuda:0')
episode: 518 training return: tensor(-83.7162, device='cuda:0')
episode: 519 training return: tensor(-30.7807, device='cuda:0')
epoch: 130 test_true_pfm: 3994.045387588784 sim_pfm: -21.35381891472692
episode: 520 training return: tensor(-43.9487, device='cuda:0')
episode: 521 training return: tensor(-75.5447, device='cuda:0')
episode: 522 training return: tensor(-77.2672, device='cuda:0')
episode: 523 training return: tensor(-83.8014, device='cuda:0')
epoch: 131 test_true_pfm: 4016.961546671904 sim_pfm: -19.450041855239153
episode: 524 training return: tensor(-35.2704, device='cuda:0')
episode: 525 training return: tensor(-10.1485, device='cuda:0')
episode: 526 training return: tensor(-58.9764, device='cuda:0')
episode: 527 training return: tensor(-25.5161, device='cuda:0')
epoch: 132 test_true_pfm: 4001.4725227844433 sim_pfm: -41.44448492328714
episode: 528 training return: tensor(-23.8467, device='cuda:0')
episode: 529 training return: tensor(-34.0116, device='cuda:0')
episode: 530 training return: tensor(-22.9728, device='cuda:0')
episode: 531 training return: tensor(-63.6629, device='cuda:0')
epoch: 133 test_true_pfm: 4021.491042741027 sim_pfm: -31.97957066631837
episode: 532 training return: tensor(-19.8871, device='cuda:0')
episode: 533 training return: tensor(-34.9930, device='cuda:0')
episode: 534 training return: tensor(-40.5453, device='cuda:0')
episode: 535 training return: tensor(-19.5060, device='cuda:0')
epoch: 134 test_true_pfm: 4056.3622651831042 sim_pfm: -16.667721638048533
episode: 536 training return: tensor(-116.7122, device='cuda:0')
episode: 537 training return: tensor(-60.5230, device='cuda:0')
episode: 538 training return: tensor(-39.8358, device='cuda:0')
episode: 539 training return: tensor(-24.3148, device='cuda:0')
epoch: 135 test_true_pfm: 4056.783622659774 sim_pfm: -17.92960169836685
episode: 540 training return: tensor(2.9576, device='cuda:0')
episode: 541 training return: tensor(-19.5073, device='cuda:0')
episode: 542 training return: tensor(-35.0816, device='cuda:0')
episode: 543 training return: tensor(-60.9139, device='cuda:0')
epoch: 136 test_true_pfm: 4052.4336685416692 sim_pfm: -24.334311433534214
episode: 544 training return: tensor(-92.3344, device='cuda:0')
episode: 545 training return: tensor(-27.0223, device='cuda:0')
episode: 546 training return: tensor(-17.2594, device='cuda:0')
episode: 547 training return: tensor(-39.9972, device='cuda:0')
epoch: 137 test_true_pfm: 4041.279487560167 sim_pfm: -37.8293681073701
episode: 548 training return: tensor(-30.0895, device='cuda:0')
episode: 549 training return: tensor(-111.3942, device='cuda:0')
episode: 550 training return: tensor(-9.2954, device='cuda:0')
episode: 551 training return: tensor(-51.4512, device='cuda:0')
epoch: 138 test_true_pfm: 4047.42039347526 sim_pfm: -34.81052610482826
episode: 552 training return: tensor(-19.0262, device='cuda:0')
episode: 553 training return: tensor(-28.9779, device='cuda:0')
episode: 554 training return: tensor(-8.5786, device='cuda:0')
episode: 555 training return: tensor(-58.5455, device='cuda:0')
epoch: 139 test_true_pfm: 4017.0996354498316 sim_pfm: -28.959571664085768
episode: 556 training return: tensor(3.1191, device='cuda:0')
episode: 557 training return: tensor(-52.5447, device='cuda:0')
episode: 558 training return: tensor(-69.5370, device='cuda:0')
episode: 559 training return: tensor(-5.3405, device='cuda:0')
epoch: 140 test_true_pfm: 4003.3833560639255 sim_pfm: -45.37794643283511
episode: 560 training return: tensor(-74.0900, device='cuda:0')
episode: 561 training return: tensor(-21.9324, device='cuda:0')
episode: 562 training return: tensor(-17.1197, device='cuda:0')
episode: 563 training return: tensor(-66.8805, device='cuda:0')
epoch: 141 test_true_pfm: 4044.4341473229547 sim_pfm: -41.69414488526915
episode: 564 training return: tensor(-22.6135, device='cuda:0')
episode: 565 training return: tensor(-86.7107, device='cuda:0')
episode: 566 training return: tensor(-56.6349, device='cuda:0')
episode: 567 training return: tensor(-41.1101, device='cuda:0')
epoch: 142 test_true_pfm: 3997.1341992617035 sim_pfm: -17.01855119781491
episode: 568 training return: tensor(-37.0840, device='cuda:0')
episode: 569 training return: tensor(-43.8335, device='cuda:0')
episode: 570 training return: tensor(-52.5008, device='cuda:0')
episode: 571 training return: tensor(-65.6770, device='cuda:0')
epoch: 143 test_true_pfm: 4004.130124200167 sim_pfm: -13.725650281994604
episode: 572 training return: tensor(-19.0359, device='cuda:0')
episode: 573 training return: tensor(-44.5968, device='cuda:0')
episode: 574 training return: tensor(-140.3055, device='cuda:0')
episode: 575 training return: tensor(-96.7056, device='cuda:0')
epoch: 144 test_true_pfm: 4017.185578418363 sim_pfm: -40.05961843612992
episode: 576 training return: tensor(-6.4012, device='cuda:0')
episode: 577 training return: tensor(-51.6743, device='cuda:0')
episode: 578 training return: tensor(-919.2725, device='cuda:0')
episode: 579 training return: tensor(-9.3612, device='cuda:0')
epoch: 145 test_true_pfm: 4018.835617754816 sim_pfm: -33.488949554991756
episode: 580 training return: tensor(-39.9651, device='cuda:0')
episode: 581 training return: tensor(-93.3712, device='cuda:0')
episode: 582 training return: tensor(2.5777, device='cuda:0')
episode: 583 training return: tensor(-70.8700, device='cuda:0')
epoch: 146 test_true_pfm: 4003.519428409291 sim_pfm: -16.28296345054211
episode: 584 training return: tensor(-55.1217, device='cuda:0')
episode: 585 training return: tensor(-705.5623, device='cuda:0')
episode: 586 training return: tensor(-45.2758, device='cuda:0')
episode: 587 training return: tensor(-66.4932, device='cuda:0')
epoch: 147 test_true_pfm: 4023.7990061932687 sim_pfm: -22.405360357040383
episode: 588 training return: tensor(-36.4890, device='cuda:0')
episode: 589 training return: tensor(-42.7700, device='cuda:0')
episode: 590 training return: tensor(-13.4277, device='cuda:0')
episode: 591 training return: tensor(-26.8580, device='cuda:0')
epoch: 148 test_true_pfm: 4044.1983152876946 sim_pfm: -28.61547174033088
episode: 592 training return: tensor(-71.6803, device='cuda:0')
episode: 593 training return: tensor(-82.9079, device='cuda:0')
episode: 594 training return: tensor(-56.1696, device='cuda:0')
episode: 595 training return: tensor(-3.4258, device='cuda:0')
epoch: 149 test_true_pfm: 4048.261901925638 sim_pfm: -19.6537977453069
episode: 596 training return: tensor(-13.0019, device='cuda:0')
episode: 597 training return: tensor(0.7335, device='cuda:0')
episode: 598 training return: tensor(-44.5498, device='cuda:0')
episode: 599 training return: tensor(-27.5716, device='cuda:0')
epoch: 150 test_true_pfm: 4045.611923341454 sim_pfm: -29.9496334407789
