['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '10000']
epoch: 0 training_loss 0.2891653813421726 test_loss: 0.18352617025375367
epoch: 1 training_loss 0.15616913959383966 test_loss: 0.15622231960296631
epoch: 2 training_loss 0.13803641244769096 test_loss: 0.14505381584167482
epoch: 3 training_loss 0.12497441459447145 test_loss: 0.13847339153289795
epoch: 4 training_loss 0.11655376441776752 test_loss: 0.12318533658981323
epoch: 5 training_loss 0.1137661837413907 test_loss: 0.11752862930297851
epoch: 6 training_loss 0.1121694004163146 test_loss: 0.1237371325492859
epoch: 7 training_loss 0.10017561230808497 test_loss: 0.10679419040679931
epoch: 8 training_loss 0.09761766018345952 test_loss: 0.12012674808502197
epoch: 9 training_loss 0.09566784873604775 test_loss: 0.1080209493637085
epoch: 10 training_loss 0.09665018949657679 test_loss: 0.11914225816726684
epoch: 11 training_loss 0.0992630840651691 test_loss: 0.12122527360916138
epoch: 12 training_loss 0.09954880127683281 test_loss: 0.11762014627456666
epoch: 13 training_loss 0.09880733106285333 test_loss: 0.11036330461502075
epoch: 14 training_loss 0.09803292494267225 test_loss: 0.10736632347106934
epoch: 15 training_loss 0.09716250617057084 test_loss: 0.11074972152709961
epoch: 16 training_loss 0.0903793904185295 test_loss: 0.11260406970977783
epoch: 17 training_loss 0.08408048050478101 test_loss: 0.10180553197860717
epoch: 18 training_loss 0.09402312513440847 test_loss: 0.12859013080596923
epoch: 19 training_loss 0.09259047277271748 test_loss: 0.11847577095031739
epoch: 20 training_loss 0.08989471171051264 test_loss: 0.11443110704421997
epoch: 21 training_loss 0.08686820151284337 test_loss: 0.10335074663162232
epoch: 22 training_loss 0.08350374994799495 test_loss: 0.10415735244750976
epoch: 23 training_loss 0.08802835550159216 test_loss: 0.11099674701690673
epoch: 24 training_loss 0.0846387318149209 test_loss: 0.12571861743927001
epoch: 25 training_loss 0.08887873703613877 test_loss: 0.10341000556945801
epoch: 26 training_loss 0.08355700742453337 test_loss: 0.10049854516983033
epoch: 27 training_loss 0.08110807312652468 test_loss: 0.10754374265670777
epoch: 28 training_loss 0.08222277913242579 test_loss: 0.09591042399406433
epoch: 29 training_loss 0.0855091966688633 test_loss: 0.11267235279083251
epoch: 30 training_loss 0.08541887637227774 test_loss: 0.11646113395690919
epoch: 31 training_loss 0.09136093134060502 test_loss: 0.1115912675857544
epoch: 32 training_loss 0.082685212995857 test_loss: 0.10929899215698242
epoch: 33 training_loss 0.08227234723046423 test_loss: 0.10759602785110474
epoch: 34 training_loss 0.08096058213151991 test_loss: 0.11778364181518555
epoch: 35 training_loss 0.08936914604157209 test_loss: 0.10397812128067016
epoch: 36 training_loss 0.08467309581115842 test_loss: 0.1243207335472107
epoch: 37 training_loss 0.08483042933046818 test_loss: 0.11302121877670288
epoch: 38 training_loss 0.07419280745089055 test_loss: 0.09648056030273437
epoch: 39 training_loss 0.08074450856074691 test_loss: 0.12190688848495483
epoch: 40 training_loss 0.08143587909638882 test_loss: 0.10971112251281738
epoch: 41 training_loss 0.07872292831540108 test_loss: 0.11703257560729981
epoch: 42 training_loss 0.0806039746850729 test_loss: 0.11198375225067139
epoch: 43 training_loss 0.08324782075360418 test_loss: 0.1409103512763977
epoch: 44 training_loss 0.0763559652864933 test_loss: 0.11141728162765503
epoch: 45 training_loss 0.07872112307697535 test_loss: 0.10920940637588501
epoch: 46 training_loss 0.07969712471589446 test_loss: 0.11808258295059204
epoch: 47 training_loss 0.07222093594260513 test_loss: 0.13540738821029663
epoch: 48 training_loss 0.07926735358312725 test_loss: 0.1145760178565979
epoch: 49 training_loss 0.07226231012493373 test_loss: 0.12619235515594482
epoch: 50 training_loss 0.07504785984754563 test_loss: 0.12771129608154297
epoch: 51 training_loss 0.07651948969811201 test_loss: 0.14506505727767943
epoch: 52 training_loss 0.07943148925900459 test_loss: 0.11588053703308106
epoch: 53 training_loss 0.06938757320865989 test_loss: 0.10793932676315307
epoch: 54 training_loss 0.07088990605436266 test_loss: 0.13249982595443727
epoch: 55 training_loss 0.07466164615005255 test_loss: 0.11341981887817383
epoch: 56 training_loss 0.07043234031647444 test_loss: 0.13694236278533936
epoch: 57 training_loss 0.06935459185391664 test_loss: 0.12206019163131714
epoch: 58 training_loss 0.07023276379331947 test_loss: 0.13908990621566772
epoch: 59 training_loss 0.06995704468339682 test_loss: 0.11649125814437866
epoch: 60 training_loss 0.06875666167587041 test_loss: 0.11268832683563232
epoch: 61 training_loss 0.06665877943858504 test_loss: 0.12872976064682007
epoch: 62 training_loss 0.07286014327779412 test_loss: 0.11203044652938843
epoch: 63 training_loss 0.06814562832936645 test_loss: 0.11388354301452637
epoch: 64 training_loss 0.06708359140902757 test_loss: 0.121189284324646
epoch: 65 training_loss 0.06465037308633327 test_loss: 0.13082640171051024
epoch: 66 training_loss 0.06370592351071537 test_loss: 0.1445605516433716
epoch: 67 training_loss 0.06630221867933869 test_loss: 0.11981154680252075
epoch: 68 training_loss 0.059666859451681376 test_loss: 0.11618757247924805
epoch: 69 training_loss 0.06545256609097123 test_loss: 0.13180227279663087
epoch: 70 training_loss 0.062469114065170285 test_loss: 0.13009675741195678
epoch: 71 training_loss 0.06368315725587309 test_loss: 0.136772620677948
epoch: 72 training_loss 0.06657199751585723 test_loss: 0.12467280626296998
epoch: 73 training_loss 0.060780816674232484 test_loss: 0.13325482606887817
epoch: 74 training_loss 0.059635203871876 test_loss: 0.13166964054107666
epoch: 75 training_loss 0.057872608909383415 test_loss: 0.1273580551147461
epoch: 76 training_loss 0.06019123319536448 test_loss: 0.12823606729507447
epoch: 77 training_loss 0.06325189609080553 test_loss: 0.1271488308906555
epoch: 78 training_loss 0.05686973382718861 test_loss: 0.14897000789642334
epoch: 79 training_loss 0.06052959933876991 test_loss: 0.12908042669296266
epoch: 80 training_loss 0.05940763793885708 test_loss: 0.14217007160186768
epoch: 81 training_loss 0.06096468553878367 test_loss: 0.12842705249786376
epoch: 82 training_loss 0.05827645478770137 test_loss: 0.12205840349197387
epoch: 83 training_loss 0.0558386982884258 test_loss: 0.13359148502349855
epoch: 84 training_loss 0.058847363712266086 test_loss: 0.1226200819015503
epoch: 85 training_loss 0.05739338917657733 test_loss: 0.14116475582122803
epoch: 86 training_loss 0.056272131502628324 test_loss: 0.13701604604721068
epoch: 87 training_loss 0.052261451240628955 test_loss: 0.14593392610549927
epoch: 88 training_loss 0.05714856313541532 test_loss: 0.14569867849349977
epoch: 89 training_loss 0.05193073854781687 test_loss: 0.133298659324646
epoch: 90 training_loss 0.04905705003067851 test_loss: 0.13750332593917847
epoch: 91 training_loss 0.05166988628916443 test_loss: 0.14144357442855834
epoch: 92 training_loss 0.05071568694896996 test_loss: 0.14181479215621948
epoch: 93 training_loss 0.04898184909485281 test_loss: 0.1395142197608948
epoch: 94 training_loss 0.05765331722795963 test_loss: 0.14128895998001098
epoch: 95 training_loss 0.04676586015149951 test_loss: 0.1289760708808899
epoch: 96 training_loss 0.04641333560459316 test_loss: 0.15579233169555665
epoch: 97 training_loss 0.053801397383213045 test_loss: 0.13212249279022217
epoch: 98 training_loss 0.054681804515421394 test_loss: 0.14175760746002197
epoch: 99 training_loss 0.049904611660167575 test_loss: 0.1619460701942444
epoch: 100 training_loss 0.04721220847219229 test_loss: 0.12322136163711547
epoch: 101 training_loss 0.050555425733327865 test_loss: 0.14042108058929442
epoch: 102 training_loss 0.0481060297973454 test_loss: 0.14060628414154053
epoch: 103 training_loss 0.044534737104550005 test_loss: 0.14076464176177977
epoch: 104 training_loss 0.04143287627026439 test_loss: 0.1448211193084717
epoch: 105 training_loss 0.044264584351330995 test_loss: 0.14862349033355712
epoch: 106 training_loss 0.04361321703530848 test_loss: 0.1553433895111084
epoch: 107 training_loss 0.038117770506069064 test_loss: 0.15152652263641359
epoch: 108 training_loss 0.04784837598912418 test_loss: 0.15131720304489135
epoch: 109 training_loss 0.04830992358736694 test_loss: 0.14366657733917237
epoch: 110 training_loss 0.04317608995363116 test_loss: 0.1600991368293762
epoch: 111 training_loss 0.042372729126363994 test_loss: 0.15282546281814574
epoch: 112 training_loss 0.045689958115108315 test_loss: 0.13615918159484863
epoch: 113 training_loss 0.04552589725703001 test_loss: 0.1470617890357971
epoch: 114 training_loss 0.037855860451236366 test_loss: 0.14718291759490967
epoch: 115 training_loss 0.03950247671455145 test_loss: 0.15275477170944213
epoch: 116 training_loss 0.0424441765062511 test_loss: 0.15329625606536865
epoch: 117 training_loss 0.038568062307313085 test_loss: 0.14794000387191772
epoch: 118 training_loss 0.037703140489757064 test_loss: 0.14407895803451537
epoch: 119 training_loss 0.03970358855091036 test_loss: 0.16264380216598512
epoch: 120 training_loss 0.04192294417880475 test_loss: 0.16166534423828124
epoch: 121 training_loss 0.03856432840228081 test_loss: 0.14623137712478637
epoch: 122 training_loss 0.0391756185516715 test_loss: 0.1693948030471802
epoch: 123 training_loss 0.04266966372728348 test_loss: 0.16479010581970216
epoch: 124 training_loss 0.04122204635757953 test_loss: 0.16119223833084106
epoch: 125 training_loss 0.035928491838276386 test_loss: 0.1570933699607849
epoch: 126 training_loss 0.032390899248421194 test_loss: 0.14705787897109984
epoch: 127 training_loss 0.038480077907443046 test_loss: 0.18573182821273804
epoch: 128 training_loss 0.0316028596367687 test_loss: 0.16157699823379518
epoch: 129 training_loss 0.03505346277728677 test_loss: 0.17089638710021973
epoch: 130 training_loss 0.03471493133343756 test_loss: 0.1567789673805237
epoch: 131 training_loss 0.03869943582452834 test_loss: 0.13814014196395874
epoch: 132 training_loss 0.034227522872388366 test_loss: 0.16295270919799804
epoch: 133 training_loss 0.030812516724690795 test_loss: 0.17321648597717285
epoch: 134 training_loss 0.03446334951557219 test_loss: 0.16514476537704467
epoch: 135 training_loss 0.03134252166375518 test_loss: 0.1528657555580139
epoch: 136 training_loss 0.03198750262148678 test_loss: 0.15669511556625365
epoch: 137 training_loss 0.0316603140719235 test_loss: 0.1691317081451416
epoch: 138 training_loss 0.034061107868328694 test_loss: 0.16580438613891602
epoch: 139 training_loss 0.034687545830383894 test_loss: 0.17120248079299927
epoch: 140 training_loss 0.031172470063902436 test_loss: 0.15872950553894044
epoch: 141 training_loss 0.032326938956975936 test_loss: 0.1395532011985779
epoch: 142 training_loss 0.029089758237823844 test_loss: 0.18206001520156861
epoch: 143 training_loss 0.029994279369711874 test_loss: 0.16776310205459594
epoch: 144 training_loss 0.030516609875485302 test_loss: 0.15894523859024048
epoch: 145 training_loss 0.02935608981177211 test_loss: 0.1642041325569153
epoch: 146 training_loss 0.027469794829376042 test_loss: 0.16204832792282103
epoch: 147 training_loss 0.027000059811398386 test_loss: 0.19104048013687133
epoch: 148 training_loss 0.02781033467967063 test_loss: 0.17792201042175293
epoch: 149 training_loss 0.02770609514322132 test_loss: 0.17688958644866942
epoch: 0 training_loss 51.805949993133545 test_loss: 24.28944091796875
epoch: 1 training_loss 18.367640571594237 test_loss: 14.915179443359374
epoch: 2 training_loss 12.915926008224487 test_loss: 11.434700775146485
epoch: 3 training_loss 9.88095989227295 test_loss: 9.198602294921875
epoch: 4 training_loss 8.486914939880371 test_loss: 8.01064453125
epoch: 5 training_loss 7.391681137084961 test_loss: 7.096976470947266
epoch: 6 training_loss 6.7450298500061034 test_loss: 6.268220138549805
epoch: 7 training_loss 6.119137477874756 test_loss: 5.934714889526367
epoch: 8 training_loss 5.446512904167175 test_loss: 5.491821670532227
epoch: 9 training_loss 5.0789700126647945 test_loss: 5.078735733032227
epoch: 10 training_loss 4.674575533866882 test_loss: 4.570607376098633
epoch: 11 training_loss 4.302895736694336 test_loss: 4.391547775268554
epoch: 12 training_loss 4.057752408981323 test_loss: 4.261098861694336
epoch: 13 training_loss 3.9863046622276306 test_loss: 4.104733657836914
epoch: 14 training_loss 3.7218671226501465 test_loss: 3.8190322875976563
epoch: 15 training_loss 3.5326523089408877 test_loss: 3.7200416564941405
epoch: 16 training_loss 3.3752972412109377 test_loss: 3.662615966796875
epoch: 17 training_loss 3.310880675315857 test_loss: 3.4316116333007813
epoch: 18 training_loss 3.1598540902137757 test_loss: 3.3606552124023437
epoch: 19 training_loss 3.061788549423218 test_loss: 3.092051887512207
epoch: 20 training_loss 2.9940025711059572 test_loss: 3.1766292572021486
epoch: 21 training_loss 2.9250825548172 test_loss: 3.0506433486938476
epoch: 22 training_loss 2.859966721534729 test_loss: 2.8988422393798827
epoch: 23 training_loss 2.8011688804626464 test_loss: 2.87685489654541
epoch: 24 training_loss 2.6881666588783264 test_loss: 2.804676818847656
epoch: 25 training_loss 2.6513647556304933 test_loss: 2.6742359161376954
epoch: 26 training_loss 2.650322163105011 test_loss: 2.654258155822754
epoch: 27 training_loss 2.631777386665344 test_loss: 2.6648582458496093
epoch: 28 training_loss 2.4959456276893617 test_loss: 2.6128824234008787
epoch: 29 training_loss 2.4573500657081606 test_loss: 2.4979951858520506
epoch: 30 training_loss 2.464226703643799 test_loss: 2.631138229370117
epoch: 31 training_loss 2.4013831460475923 test_loss: 2.61151065826416
epoch: 32 training_loss 2.3467462348937986 test_loss: 2.4964530944824217
epoch: 33 training_loss 2.302268843650818 test_loss: 2.3838937759399412
epoch: 34 training_loss 2.3105589032173155 test_loss: 2.4283966064453124
epoch: 35 training_loss 2.248020337820053 test_loss: 2.4273744583129884
epoch: 36 training_loss 2.2163681519031524 test_loss: 2.464703178405762
epoch: 37 training_loss 2.192404878139496 test_loss: 2.364506149291992
epoch: 38 training_loss 2.179978904724121 test_loss: 2.2950925827026367
epoch: 39 training_loss 2.1700605821609495 test_loss: 2.330924415588379
epoch: 40 training_loss 2.1163138270378115 test_loss: 2.4195249557495115
epoch: 41 training_loss 2.1157570910453796 test_loss: 2.263108825683594
epoch: 42 training_loss 2.094616931676865 test_loss: 2.2720621109008787
epoch: 43 training_loss 2.0820466339588166 test_loss: 2.2725797653198243
epoch: 44 training_loss 2.0614085042476655 test_loss: 2.1960886001586912
epoch: 45 training_loss 2.064725686311722 test_loss: 2.1432994842529296
epoch: 46 training_loss 2.02771205663681 test_loss: 2.1678083419799803
epoch: 47 training_loss 2.0337794971466066 test_loss: 2.175554084777832
epoch: 48 training_loss 2.038530571460724 test_loss: 2.0591053009033202
epoch: 49 training_loss 1.9930888068675996 test_loss: 2.126361846923828
epoch: 50 training_loss 1.9900094842910767 test_loss: 2.1016605377197264
epoch: 51 training_loss 1.999177770614624 test_loss: 2.0476747512817384
epoch: 52 training_loss 1.9551369559764862 test_loss: 2.1688976287841797
epoch: 53 training_loss 1.9697599232196807 test_loss: 2.094456672668457
epoch: 54 training_loss 1.9043953502178192 test_loss: 2.074824333190918
epoch: 55 training_loss 1.9062442922592162 test_loss: 2.1061609268188475
epoch: 56 training_loss 1.93106942653656 test_loss: 2.02028751373291
epoch: 57 training_loss 1.8958147394657134 test_loss: 1.9781187057495118
epoch: 58 training_loss 1.9109553217887878 test_loss: 2.006534194946289
epoch: 59 training_loss 1.845105893611908 test_loss: 2.0623249053955077
epoch: 60 training_loss 1.8304266905784607 test_loss: 1.949655532836914
epoch: 61 training_loss 1.8378296399116516 test_loss: 1.838435173034668
epoch: 62 training_loss 1.8450023889541627 test_loss: 2.0077163696289064
epoch: 63 training_loss 1.8573846757411956 test_loss: 1.8554561614990235
epoch: 64 training_loss 1.784654815196991 test_loss: 1.9214460372924804
epoch: 65 training_loss 1.8076151847839355 test_loss: 1.9582687377929688
epoch: 66 training_loss 1.7853082096576691 test_loss: 1.860990333557129
epoch: 67 training_loss 1.8024175488948821 test_loss: 1.9595844268798828
epoch: 68 training_loss 1.7880102574825287 test_loss: 1.9317224502563477
epoch: 69 training_loss 1.7465559875965118 test_loss: 1.9141345977783204
epoch: 70 training_loss 1.7263657653331757 test_loss: 1.8287437438964844
epoch: 71 training_loss 1.7544604885578154 test_loss: 1.8219045639038085
epoch: 72 training_loss 1.745330365896225 test_loss: 1.89661865234375
epoch: 73 training_loss 1.7574158775806428 test_loss: 1.8584671020507812
epoch: 74 training_loss 1.7509045267105103 test_loss: 1.748992919921875
epoch: 75 training_loss 1.7771586751937867 test_loss: 1.8401235580444335
epoch: 76 training_loss 1.748642120361328 test_loss: 1.880643081665039
epoch: 77 training_loss 1.6976888811588287 test_loss: 1.8650793075561523
epoch: 78 training_loss 1.6897724616527556 test_loss: 1.7190250396728515
epoch: 79 training_loss 1.7758732342720032 test_loss: 1.864979362487793
epoch: 80 training_loss 1.746474256515503 test_loss: 1.7816131591796875
epoch: 81 training_loss 1.6951599168777465 test_loss: 1.8470840454101562
epoch: 82 training_loss 1.6734131479263306 test_loss: 1.8245916366577148
epoch: 83 training_loss 1.6858041560649872 test_loss: 1.8452520370483398
epoch: 84 training_loss 1.6523026371002196 test_loss: 1.6969371795654298
epoch: 85 training_loss 1.6694634974002838 test_loss: 1.8223278045654296
epoch: 86 training_loss 1.6724506187438966 test_loss: 1.757957649230957
epoch: 87 training_loss 1.66298264503479 test_loss: 1.7846065521240235
epoch: 88 training_loss 1.632916637659073 test_loss: 1.8188491821289063
epoch: 89 training_loss 1.6518407917022706 test_loss: 1.7013334274291991
epoch: 90 training_loss 1.6339521789550782 test_loss: 1.7731935501098632
epoch: 91 training_loss 1.6320052742958069 test_loss: 1.7375524520874024
epoch: 92 training_loss 1.617047153711319 test_loss: 1.7784290313720703
epoch: 93 training_loss 1.5999670922756195 test_loss: 1.7288217544555664
epoch: 94 training_loss 1.5969081687927247 test_loss: 1.74132080078125
epoch: 95 training_loss 1.6045615828037263 test_loss: 1.7401542663574219
epoch: 96 training_loss 1.6081825399398804 test_loss: 1.7713829040527345
epoch: 97 training_loss 1.5895949506759643 test_loss: 1.7327999114990233
epoch: 98 training_loss 1.5888342368602753 test_loss: 1.6740972518920898
epoch: 99 training_loss 1.6167030489444734 test_loss: 1.6960538864135741
epoch: 100 training_loss 1.5820486009120942 test_loss: 1.6052410125732421
epoch: 101 training_loss 1.5719523310661316 test_loss: 1.7532335281372071
epoch: 102 training_loss 1.5653081238269806 test_loss: 1.682855224609375
epoch: 103 training_loss 1.5487597143650056 test_loss: 1.712610626220703
epoch: 104 training_loss 1.5583343780040741 test_loss: 1.6153587341308593
epoch: 105 training_loss 1.5270812976360322 test_loss: 1.6923871994018556
epoch: 106 training_loss 1.538837720155716 test_loss: 1.6022455215454101
epoch: 107 training_loss 1.5677245950698853 test_loss: 1.7406276702880858
epoch: 108 training_loss 1.5533678662776946 test_loss: 1.6828092575073241
epoch: 109 training_loss 1.5463108110427857 test_loss: 1.6470361709594727
epoch: 110 training_loss 1.5705873441696168 test_loss: 1.692068862915039
epoch: 111 training_loss 1.529868298768997 test_loss: 1.6223833084106445
epoch: 112 training_loss 1.561724464893341 test_loss: 1.6211576461791992
epoch: 113 training_loss 1.5218181276321412 test_loss: 1.650341796875
epoch: 114 training_loss 1.5332100415229797 test_loss: 1.6107706069946288
epoch: 115 training_loss 1.5327964532375336 test_loss: 1.641267967224121
epoch: 116 training_loss 1.5192715811729431 test_loss: 1.668184471130371
epoch: 117 training_loss 1.5200542414188385 test_loss: 1.647418212890625
epoch: 118 training_loss 1.5343144989013673 test_loss: 1.5854369163513184
epoch: 119 training_loss 1.5175801467895509 test_loss: 1.6567472457885741
epoch: 120 training_loss 1.5044631326198579 test_loss: 1.6261274337768554
epoch: 121 training_loss 1.5206763088703155 test_loss: 1.6520328521728516
epoch: 122 training_loss 1.5182786774635315 test_loss: 1.6026994705200195
epoch: 123 training_loss 1.4883473753929137 test_loss: 1.67342529296875
epoch: 124 training_loss 1.5093029057979583 test_loss: 1.5954461097717285
epoch: 125 training_loss 1.5220368695259094 test_loss: 1.5663724899291993
epoch: 126 training_loss 1.4760950422286987 test_loss: 1.6292173385620117
epoch: 127 training_loss 1.4872172355651856 test_loss: 1.6623964309692383
epoch: 128 training_loss 1.4882690918445587 test_loss: 1.6241069793701173
epoch: 129 training_loss 1.4965239489078521 test_loss: 1.5661500930786132
epoch: 130 training_loss 1.4828323101997376 test_loss: 1.6003290176391602
epoch: 131 training_loss 1.4571419036388398 test_loss: 1.5830202102661133
epoch: 132 training_loss 1.4518830811977386 test_loss: 1.5544327735900878
epoch: 133 training_loss 1.4997600769996644 test_loss: 1.635568618774414
epoch: 134 training_loss 1.5027717387676238 test_loss: 1.5873693466186523
epoch: 135 training_loss 1.4508739149570464 test_loss: 1.5769131660461426
epoch: 136 training_loss 1.4537740790843963 test_loss: 1.6110822677612304
epoch: 137 training_loss 1.4680396151542663 test_loss: 1.5686261177062988
epoch: 138 training_loss 1.467799174785614 test_loss: 1.5841865539550781
epoch: 139 training_loss 1.4578007376194 test_loss: 1.6100492477416992
epoch: 140 training_loss 1.4526423072814942 test_loss: 1.563399314880371
epoch: 141 training_loss 1.453852620124817 test_loss: 1.5482831001281738
epoch: 142 training_loss 1.4760878264904023 test_loss: 1.5806238174438476
epoch: 143 training_loss 1.4574869406223296 test_loss: 1.5171324729919433
epoch: 144 training_loss 1.4338966953754424 test_loss: 1.5988747596740722
epoch: 145 training_loss 1.4501161170005799 test_loss: 1.5952439308166504
epoch: 146 training_loss 1.4289842224121094 test_loss: 1.5780119895935059
epoch: 147 training_loss 1.432018632888794 test_loss: 1.5385523796081544
epoch: 148 training_loss 1.4571624350547792 test_loss: 1.5230376243591308
epoch: 149 training_loss 1.4326750719547272 test_loss: 1.5290648460388183
5128.396873020661
episode: 0 training return: tensor(-14.5647, device='cuda:0')
episode: 1 training return: tensor(27.2725, device='cuda:0')
episode: 2 training return: tensor(-76.1084, device='cuda:0')
episode: 3 training return: tensor(19.0418, device='cuda:0')
epoch: 1 test_true_pfm: 5038.544641261323 sim_pfm: 147.72036010632291
episode: 4 training return: tensor(-37.1386, device='cuda:0')
episode: 5 training return: tensor(51.1772, device='cuda:0')
episode: 6 training return: tensor(-271.1729, device='cuda:0')
episode: 7 training return: tensor(13.6544, device='cuda:0')
epoch: 2 test_true_pfm: 4992.446082377707 sim_pfm: 12.22448102174288
episode: 8 training return: tensor(65.0499, device='cuda:0')
episode: 9 training return: tensor(-11.5698, device='cuda:0')
episode: 10 training return: tensor(-68.7735, device='cuda:0')
episode: 11 training return: tensor(14.1661, device='cuda:0')
epoch: 3 test_true_pfm: 5049.652578371459 sim_pfm: -49.97473074418182
episode: 12 training return: tensor(171.8911, device='cuda:0')
episode: 13 training return: tensor(40.8410, device='cuda:0')
episode: 14 training return: tensor(63.3318, device='cuda:0')
episode: 15 training return: tensor(55.6059, device='cuda:0')
epoch: 4 test_true_pfm: 5002.945671860187 sim_pfm: 79.40458282878778
episode: 16 training return: tensor(46.2918, device='cuda:0')
episode: 17 training return: tensor(109.7500, device='cuda:0')
episode: 18 training return: tensor(-56.7960, device='cuda:0')
episode: 19 training return: tensor(82.4304, device='cuda:0')
epoch: 5 test_true_pfm: 5120.321609759746 sim_pfm: 98.60660799189161
episode: 20 training return: tensor(74.8298, device='cuda:0')
episode: 21 training return: tensor(45.3027, device='cuda:0')
episode: 22 training return: tensor(121.7612, device='cuda:0')
episode: 23 training return: tensor(225.6718, device='cuda:0')
epoch: 6 test_true_pfm: 5038.704620398251 sim_pfm: 110.23086476966273
episode: 24 training return: tensor(209.0681, device='cuda:0')
episode: 25 training return: tensor(102.5171, device='cuda:0')
episode: 26 training return: tensor(70.4485, device='cuda:0')
episode: 27 training return: tensor(22.7583, device='cuda:0')
epoch: 7 test_true_pfm: 5191.897747777592 sim_pfm: 183.52830895696147
episode: 28 training return: tensor(0.2389, device='cuda:0')
episode: 29 training return: tensor(163.6572, device='cuda:0')
episode: 30 training return: tensor(161.9075, device='cuda:0')
episode: 31 training return: tensor(40.9203, device='cuda:0')
epoch: 8 test_true_pfm: 5237.552346018043 sim_pfm: 117.77671846390392
episode: 32 training return: tensor(133.2305, device='cuda:0')
episode: 33 training return: tensor(92.2251, device='cuda:0')
episode: 34 training return: tensor(27.1430, device='cuda:0')
episode: 35 training return: tensor(103.4975, device='cuda:0')
epoch: 9 test_true_pfm: 5116.496977679621 sim_pfm: 166.26586116997836
episode: 36 training return: tensor(247.9729, device='cuda:0')
episode: 37 training return: tensor(78.7196, device='cuda:0')
episode: 38 training return: tensor(99.8061, device='cuda:0')
episode: 39 training return: tensor(187.8990, device='cuda:0')
epoch: 10 test_true_pfm: 5197.359041905605 sim_pfm: 174.1811049550888
episode: 40 training return: tensor(93.8388, device='cuda:0')
episode: 41 training return: tensor(-35.6086, device='cuda:0')
episode: 42 training return: tensor(120.3790, device='cuda:0')
episode: 43 training return: tensor(142.1380, device='cuda:0')
epoch: 11 test_true_pfm: 5037.182852601076 sim_pfm: 243.70312514317143
episode: 44 training return: tensor(309.2559, device='cuda:0')
episode: 45 training return: tensor(214.0932, device='cuda:0')
episode: 46 training return: tensor(-45.6902, device='cuda:0')
episode: 47 training return: tensor(45.4286, device='cuda:0')
epoch: 12 test_true_pfm: 5118.72329186411 sim_pfm: 191.28842314650925
episode: 48 training return: tensor(227.1018, device='cuda:0')
episode: 49 training return: tensor(119.8634, device='cuda:0')
episode: 50 training return: tensor(6.9445, device='cuda:0')
episode: 51 training return: tensor(148.0568, device='cuda:0')
epoch: 13 test_true_pfm: 5270.449904665514 sim_pfm: 207.0661097197832
episode: 52 training return: tensor(-12.4133, device='cuda:0')
episode: 53 training return: tensor(171.4493, device='cuda:0')
episode: 54 training return: tensor(146.7598, device='cuda:0')
episode: 55 training return: tensor(255.1315, device='cuda:0')
epoch: 14 test_true_pfm: 5320.079228841331 sim_pfm: 335.0602319832736
episode: 56 training return: tensor(195.8525, device='cuda:0')
episode: 57 training return: tensor(342.0881, device='cuda:0')
episode: 58 training return: tensor(153.1026, device='cuda:0')
episode: 59 training return: tensor(249.1681, device='cuda:0')
epoch: 15 test_true_pfm: 5295.080026898214 sim_pfm: 300.61829811515054
episode: 60 training return: tensor(173.5963, device='cuda:0')
episode: 61 training return: tensor(77.3026, device='cuda:0')
episode: 62 training return: tensor(103.2759, device='cuda:0')
episode: 63 training return: tensor(210.0851, device='cuda:0')
epoch: 16 test_true_pfm: 5297.571483545385 sim_pfm: 263.0037990677326
episode: 64 training return: tensor(162.6735, device='cuda:0')
episode: 65 training return: tensor(12.2708, device='cuda:0')
episode: 66 training return: tensor(71.2785, device='cuda:0')
episode: 67 training return: tensor(196.7815, device='cuda:0')
epoch: 17 test_true_pfm: 5310.361849055586 sim_pfm: 313.6270109426502
episode: 68 training return: tensor(190.1563, device='cuda:0')
episode: 69 training return: tensor(27.0195, device='cuda:0')
episode: 70 training return: tensor(213.5484, device='cuda:0')
episode: 71 training return: tensor(162.1273, device='cuda:0')
epoch: 18 test_true_pfm: 5253.527872151766 sim_pfm: 207.85283518624297
episode: 72 training return: tensor(14.1509, device='cuda:0')
episode: 73 training return: tensor(117.8938, device='cuda:0')
episode: 74 training return: tensor(186.0123, device='cuda:0')
episode: 75 training return: tensor(185.5898, device='cuda:0')
epoch: 19 test_true_pfm: 5322.624819379141 sim_pfm: 286.21598233119585
episode: 76 training return: tensor(264.4861, device='cuda:0')
episode: 77 training return: tensor(53.6345, device='cuda:0')
episode: 78 training return: tensor(124.7676, device='cuda:0')
episode: 79 training return: tensor(68.9339, device='cuda:0')
epoch: 20 test_true_pfm: 5309.351347298375 sim_pfm: 169.61547369718514
episode: 80 training return: tensor(248.4292, device='cuda:0')
episode: 81 training return: tensor(38.5852, device='cuda:0')
episode: 82 training return: tensor(242.8284, device='cuda:0')
episode: 83 training return: tensor(265.1245, device='cuda:0')
epoch: 21 test_true_pfm: 5400.364054505136 sim_pfm: 316.37941998120124
episode: 84 training return: tensor(313.6121, device='cuda:0')
episode: 85 training return: tensor(256.2547, device='cuda:0')
episode: 86 training return: tensor(165.7020, device='cuda:0')
episode: 87 training return: tensor(201.6602, device='cuda:0')
epoch: 22 test_true_pfm: 5288.0320903616985 sim_pfm: 234.08732339553535
episode: 88 training return: tensor(332.5965, device='cuda:0')
episode: 89 training return: tensor(-494.1767, device='cuda:0')
episode: 90 training return: tensor(190.9182, device='cuda:0')
episode: 91 training return: tensor(330.0309, device='cuda:0')
epoch: 23 test_true_pfm: 5370.636167342737 sim_pfm: 280.4186980808736
episode: 92 training return: tensor(191.1675, device='cuda:0')
episode: 93 training return: tensor(119.8591, device='cuda:0')
episode: 94 training return: tensor(216.1585, device='cuda:0')
episode: 95 training return: tensor(178.5839, device='cuda:0')
epoch: 24 test_true_pfm: 5308.922143894412 sim_pfm: 288.51240064087324
episode: 96 training return: tensor(158.4602, device='cuda:0')
episode: 97 training return: tensor(199.3001, device='cuda:0')
episode: 98 training return: tensor(147.4501, device='cuda:0')
episode: 99 training return: tensor(236.2806, device='cuda:0')
epoch: 25 test_true_pfm: 5443.279047437307 sim_pfm: 364.86900561167084
episode: 100 training return: tensor(200.4464, device='cuda:0')
episode: 101 training return: tensor(189.4292, device='cuda:0')
episode: 102 training return: tensor(220.7594, device='cuda:0')
episode: 103 training return: tensor(235.8868, device='cuda:0')
epoch: 26 test_true_pfm: 5436.865548911228 sim_pfm: 423.43698294850765
episode: 104 training return: tensor(237.5168, device='cuda:0')
episode: 105 training return: tensor(147.1831, device='cuda:0')
episode: 106 training return: tensor(241.2735, device='cuda:0')
episode: 107 training return: tensor(154.9052, device='cuda:0')
epoch: 27 test_true_pfm: 5392.268741713736 sim_pfm: 387.5882352278568
episode: 108 training return: tensor(282.9950, device='cuda:0')
episode: 109 training return: tensor(35.0867, device='cuda:0')
episode: 110 training return: tensor(181.4765, device='cuda:0')
episode: 111 training return: tensor(209.1550, device='cuda:0')
epoch: 28 test_true_pfm: 5350.585172500434 sim_pfm: 374.32241942718
episode: 112 training return: tensor(217.6070, device='cuda:0')
episode: 113 training return: tensor(179.6767, device='cuda:0')
episode: 114 training return: tensor(468.8397, device='cuda:0')
episode: 115 training return: tensor(323.3932, device='cuda:0')
epoch: 29 test_true_pfm: 5331.79414033772 sim_pfm: 361.13970586971845
episode: 116 training return: tensor(208.4769, device='cuda:0')
episode: 117 training return: tensor(98.7008, device='cuda:0')
episode: 118 training return: tensor(-20.6988, device='cuda:0')
episode: 119 training return: tensor(270.2171, device='cuda:0')
epoch: 30 test_true_pfm: 5360.656053902049 sim_pfm: 226.44448391969004
episode: 120 training return: tensor(40.6897, device='cuda:0')
episode: 121 training return: tensor(107.5687, device='cuda:0')
episode: 122 training return: tensor(179.6770, device='cuda:0')
episode: 123 training return: tensor(323.5894, device='cuda:0')
epoch: 31 test_true_pfm: 5189.8098025415065 sim_pfm: 324.7994475291246
episode: 124 training return: tensor(400.6082, device='cuda:0')
episode: 125 training return: tensor(307.8270, device='cuda:0')
episode: 126 training return: tensor(200.4160, device='cuda:0')
episode: 127 training return: tensor(359.8827, device='cuda:0')
epoch: 32 test_true_pfm: 5486.785274174678 sim_pfm: 361.29524485805695
episode: 128 training return: tensor(311.7570, device='cuda:0')
episode: 129 training return: tensor(195.6101, device='cuda:0')
episode: 130 training return: tensor(264.7746, device='cuda:0')
episode: 131 training return: tensor(195.3027, device='cuda:0')
epoch: 33 test_true_pfm: 5361.432588960674 sim_pfm: 412.9738304387526
episode: 132 training return: tensor(204.9535, device='cuda:0')
episode: 133 training return: tensor(311.8242, device='cuda:0')
episode: 134 training return: tensor(242.5316, device='cuda:0')
episode: 135 training return: tensor(286.3658, device='cuda:0')
epoch: 34 test_true_pfm: 5373.5507947146625 sim_pfm: 400.6188086505281
episode: 136 training return: tensor(357.6796, device='cuda:0')
episode: 137 training return: tensor(259.4106, device='cuda:0')
episode: 138 training return: tensor(240.4069, device='cuda:0')
episode: 139 training return: tensor(227.3372, device='cuda:0')
epoch: 35 test_true_pfm: 5366.3503846467975 sim_pfm: 446.13858092347317
episode: 140 training return: tensor(251.1265, device='cuda:0')
episode: 141 training return: tensor(161.2200, device='cuda:0')
episode: 142 training return: tensor(185.5524, device='cuda:0')
episode: 143 training return: tensor(230.1683, device='cuda:0')
epoch: 36 test_true_pfm: 5375.970093263215 sim_pfm: 370.60544190674165
episode: 144 training return: tensor(217.4115, device='cuda:0')
episode: 145 training return: tensor(208.8661, device='cuda:0')
episode: 146 training return: tensor(228.1594, device='cuda:0')
episode: 147 training return: tensor(237.1201, device='cuda:0')
epoch: 37 test_true_pfm: 5523.712127415471 sim_pfm: 345.50282975508406
episode: 148 training return: tensor(408.0938, device='cuda:0')
episode: 149 training return: tensor(152.4840, device='cuda:0')
episode: 150 training return: tensor(134.9736, device='cuda:0')
episode: 151 training return: tensor(294.6640, device='cuda:0')
epoch: 38 test_true_pfm: 5463.214584800885 sim_pfm: 426.2435275754833
episode: 152 training return: tensor(287.9963, device='cuda:0')
episode: 153 training return: tensor(397.7529, device='cuda:0')
episode: 154 training return: tensor(341.0089, device='cuda:0')
episode: 155 training return: tensor(283.5830, device='cuda:0')
epoch: 39 test_true_pfm: 5409.925243229696 sim_pfm: 344.9422871294664
episode: 156 training return: tensor(111.3180, device='cuda:0')
episode: 157 training return: tensor(330.7169, device='cuda:0')
episode: 158 training return: tensor(307.8946, device='cuda:0')
episode: 159 training return: tensor(345.0109, device='cuda:0')
epoch: 40 test_true_pfm: 5485.469687861773 sim_pfm: 406.59652467390214
episode: 160 training return: tensor(271.8703, device='cuda:0')
episode: 161 training return: tensor(350.5259, device='cuda:0')
episode: 162 training return: tensor(336.2545, device='cuda:0')
episode: 163 training return: tensor(197.4579, device='cuda:0')
epoch: 41 test_true_pfm: 5481.321927102693 sim_pfm: 428.7013513454003
episode: 164 training return: tensor(318.4133, device='cuda:0')
episode: 165 training return: tensor(262.2608, device='cuda:0')
episode: 166 training return: tensor(297.6658, device='cuda:0')
episode: 167 training return: tensor(167.1154, device='cuda:0')
epoch: 42 test_true_pfm: 5503.03111564722 sim_pfm: 406.86189342990593
episode: 168 training return: tensor(237.2541, device='cuda:0')
episode: 169 training return: tensor(327.9207, device='cuda:0')
episode: 170 training return: tensor(406.9947, device='cuda:0')
episode: 171 training return: tensor(260.8858, device='cuda:0')
epoch: 43 test_true_pfm: 5465.428066806395 sim_pfm: 421.5065593606366
episode: 172 training return: tensor(253.1649, device='cuda:0')
episode: 173 training return: tensor(243.9002, device='cuda:0')
episode: 174 training return: tensor(441.9202, device='cuda:0')
episode: 175 training return: tensor(161.9600, device='cuda:0')
epoch: 44 test_true_pfm: 5349.635907601969 sim_pfm: 441.7158353730726
episode: 176 training return: tensor(293.6927, device='cuda:0')
episode: 177 training return: tensor(324.5949, device='cuda:0')
episode: 178 training return: tensor(306.8349, device='cuda:0')
episode: 179 training return: tensor(391.4315, device='cuda:0')
epoch: 45 test_true_pfm: 5395.12817025491 sim_pfm: 462.4257857069218
episode: 180 training return: tensor(276.0546, device='cuda:0')
episode: 181 training return: tensor(125.5746, device='cuda:0')
episode: 182 training return: tensor(367.5866, device='cuda:0')
episode: 183 training return: tensor(357.8409, device='cuda:0')
epoch: 46 test_true_pfm: 5493.474312479174 sim_pfm: 354.9879497133273
episode: 184 training return: tensor(346.3435, device='cuda:0')
episode: 185 training return: tensor(284.8730, device='cuda:0')
episode: 186 training return: tensor(254.2292, device='cuda:0')
episode: 187 training return: tensor(263.3563, device='cuda:0')
epoch: 47 test_true_pfm: 5495.474963113687 sim_pfm: 408.5727020278573
episode: 188 training return: tensor(308.0945, device='cuda:0')
episode: 189 training return: tensor(331.0009, device='cuda:0')
episode: 190 training return: tensor(275.5483, device='cuda:0')
episode: 191 training return: tensor(225.8154, device='cuda:0')
epoch: 48 test_true_pfm: 5488.177271709638 sim_pfm: 509.82434939682327
episode: 192 training return: tensor(207.3622, device='cuda:0')
episode: 193 training return: tensor(268.4954, device='cuda:0')
episode: 194 training return: tensor(322.5190, device='cuda:0')
episode: 195 training return: tensor(267.5128, device='cuda:0')
epoch: 49 test_true_pfm: 5503.256377459846 sim_pfm: 444.5992734079191
episode: 196 training return: tensor(294.4886, device='cuda:0')
episode: 197 training return: tensor(220.0354, device='cuda:0')
episode: 198 training return: tensor(234.2904, device='cuda:0')
episode: 199 training return: tensor(388.2182, device='cuda:0')
epoch: 50 test_true_pfm: 5290.9246361686555 sim_pfm: 405.64692683487857
episode: 200 training return: tensor(379.8385, device='cuda:0')
episode: 201 training return: tensor(247.3680, device='cuda:0')
episode: 202 training return: tensor(291.4199, device='cuda:0')
episode: 203 training return: tensor(171.6126, device='cuda:0')
epoch: 51 test_true_pfm: 5498.979813437499 sim_pfm: 399.820199686219
episode: 204 training return: tensor(320.0899, device='cuda:0')
episode: 205 training return: tensor(409.7548, device='cuda:0')
episode: 206 training return: tensor(329.5241, device='cuda:0')
episode: 207 training return: tensor(378.5604, device='cuda:0')
epoch: 52 test_true_pfm: 5511.497595120646 sim_pfm: 439.09373849638115
episode: 208 training return: tensor(412.4901, device='cuda:0')
episode: 209 training return: tensor(374.9812, device='cuda:0')
episode: 210 training return: tensor(310.7430, device='cuda:0')
episode: 211 training return: tensor(369.1371, device='cuda:0')
epoch: 53 test_true_pfm: 5607.672989561601 sim_pfm: 513.0104828040252
episode: 212 training return: tensor(340.8766, device='cuda:0')
episode: 213 training return: tensor(207.6678, device='cuda:0')
episode: 214 training return: tensor(259.9655, device='cuda:0')
episode: 215 training return: tensor(334.8543, device='cuda:0')
epoch: 54 test_true_pfm: 5555.819850836845 sim_pfm: 399.00361376216944
episode: 216 training return: tensor(394.4918, device='cuda:0')
episode: 217 training return: tensor(267.0397, device='cuda:0')
episode: 218 training return: tensor(164.9801, device='cuda:0')
episode: 219 training return: tensor(343.1936, device='cuda:0')
epoch: 55 test_true_pfm: 5495.901833717159 sim_pfm: 395.3766747290695
episode: 220 training return: tensor(311.3641, device='cuda:0')
episode: 221 training return: tensor(215.7714, device='cuda:0')
episode: 222 training return: tensor(289.6750, device='cuda:0')
episode: 223 training return: tensor(328.3088, device='cuda:0')
epoch: 56 test_true_pfm: 5441.613097105772 sim_pfm: 422.3383516026079
episode: 224 training return: tensor(170.7248, device='cuda:0')
episode: 225 training return: tensor(233.8227, device='cuda:0')
episode: 226 training return: tensor(445.2267, device='cuda:0')
episode: 227 training return: tensor(243.0523, device='cuda:0')
epoch: 57 test_true_pfm: 5524.877530649036 sim_pfm: 441.36784050516627
episode: 228 training return: tensor(434.3301, device='cuda:0')
episode: 229 training return: tensor(304.4440, device='cuda:0')
episode: 230 training return: tensor(330.5376, device='cuda:0')
episode: 231 training return: tensor(481.9279, device='cuda:0')
epoch: 58 test_true_pfm: 5464.219940470742 sim_pfm: 433.4931176655421
episode: 232 training return: tensor(214.3645, device='cuda:0')
episode: 233 training return: tensor(379.9663, device='cuda:0')
episode: 234 training return: tensor(307.5473, device='cuda:0')
episode: 235 training return: tensor(358.4480, device='cuda:0')
epoch: 59 test_true_pfm: 5519.787894340159 sim_pfm: 491.41199154919013
episode: 236 training return: tensor(315.2596, device='cuda:0')
episode: 237 training return: tensor(416.1334, device='cuda:0')
episode: 238 training return: tensor(433.4629, device='cuda:0')
episode: 239 training return: tensor(393.9745, device='cuda:0')
epoch: 60 test_true_pfm: 5466.29498174468 sim_pfm: 344.3132868704852
episode: 240 training return: tensor(214.3284, device='cuda:0')
episode: 241 training return: tensor(281.5992, device='cuda:0')
episode: 242 training return: tensor(350.4746, device='cuda:0')
episode: 243 training return: tensor(367.2505, device='cuda:0')
epoch: 61 test_true_pfm: 5506.943352655716 sim_pfm: 519.0221925723212
episode: 244 training return: tensor(303.6509, device='cuda:0')
episode: 245 training return: tensor(337.3821, device='cuda:0')
episode: 246 training return: tensor(298.5920, device='cuda:0')
episode: 247 training return: tensor(387.8539, device='cuda:0')
epoch: 62 test_true_pfm: 5533.022960471154 sim_pfm: 521.1584599483758
episode: 248 training return: tensor(339.0058, device='cuda:0')
episode: 249 training return: tensor(287.8272, device='cuda:0')
episode: 250 training return: tensor(369.7518, device='cuda:0')
episode: 251 training return: tensor(269.3997, device='cuda:0')
epoch: 63 test_true_pfm: 5570.298639306256 sim_pfm: 525.3848182193469
episode: 252 training return: tensor(245.4559, device='cuda:0')
episode: 253 training return: tensor(385.4785, device='cuda:0')
episode: 254 training return: tensor(354.8881, device='cuda:0')
episode: 255 training return: tensor(252.2969, device='cuda:0')
epoch: 64 test_true_pfm: 5577.912364191646 sim_pfm: 535.2506567157883
episode: 256 training return: tensor(413.1522, device='cuda:0')
episode: 257 training return: tensor(163.8811, device='cuda:0')
episode: 258 training return: tensor(388.5081, device='cuda:0')
episode: 259 training return: tensor(401.5261, device='cuda:0')
epoch: 65 test_true_pfm: 5512.2819052126315 sim_pfm: 459.0335270407765
episode: 260 training return: tensor(351.7191, device='cuda:0')
episode: 261 training return: tensor(300.0410, device='cuda:0')
episode: 262 training return: tensor(481.6145, device='cuda:0')
episode: 263 training return: tensor(345.0931, device='cuda:0')
epoch: 66 test_true_pfm: 5412.147234980876 sim_pfm: 473.76151001267135
episode: 264 training return: tensor(290.4853, device='cuda:0')
episode: 265 training return: tensor(450.2566, device='cuda:0')
episode: 266 training return: tensor(201.8904, device='cuda:0')
episode: 267 training return: tensor(236.0517, device='cuda:0')
epoch: 67 test_true_pfm: 5587.772420510245 sim_pfm: 464.0399384656921
episode: 268 training return: tensor(540.9996, device='cuda:0')
episode: 269 training return: tensor(389.0251, device='cuda:0')
episode: 270 training return: tensor(336.5345, device='cuda:0')
episode: 271 training return: tensor(448.5241, device='cuda:0')
epoch: 68 test_true_pfm: 5541.3407021715975 sim_pfm: 493.14817822530557
episode: 272 training return: tensor(458.0935, device='cuda:0')
episode: 273 training return: tensor(371.1937, device='cuda:0')
episode: 274 training return: tensor(250.9754, device='cuda:0')
episode: 275 training return: tensor(479.2503, device='cuda:0')
epoch: 69 test_true_pfm: 5465.076275288968 sim_pfm: 432.68110070045805
episode: 276 training return: tensor(332.3581, device='cuda:0')
episode: 277 training return: tensor(376.1003, device='cuda:0')
episode: 278 training return: tensor(339.0025, device='cuda:0')
episode: 279 training return: tensor(410.1574, device='cuda:0')
epoch: 70 test_true_pfm: 5563.332625024042 sim_pfm: 440.9581318177904
episode: 280 training return: tensor(377.0930, device='cuda:0')
episode: 281 training return: tensor(239.2536, device='cuda:0')
episode: 282 training return: tensor(486.9227, device='cuda:0')
episode: 283 training return: tensor(343.6577, device='cuda:0')
epoch: 71 test_true_pfm: 5653.769604560662 sim_pfm: 501.1658782887583
episode: 284 training return: tensor(480.8686, device='cuda:0')
episode: 285 training return: tensor(302.8845, device='cuda:0')
episode: 286 training return: tensor(430.6109, device='cuda:0')
episode: 287 training return: tensor(522.8915, device='cuda:0')
epoch: 72 test_true_pfm: 5612.320040253752 sim_pfm: 487.17868554756086
episode: 288 training return: tensor(277.4509, device='cuda:0')
episode: 289 training return: tensor(292.4394, device='cuda:0')
episode: 290 training return: tensor(294.6938, device='cuda:0')
episode: 291 training return: tensor(481.6990, device='cuda:0')
epoch: 73 test_true_pfm: 5578.52342968526 sim_pfm: 542.2695275886994
episode: 292 training return: tensor(413.4529, device='cuda:0')
episode: 293 training return: tensor(402.4931, device='cuda:0')
episode: 294 training return: tensor(274.3151, device='cuda:0')
episode: 295 training return: tensor(563.5127, device='cuda:0')
epoch: 74 test_true_pfm: 5564.969285667845 sim_pfm: 517.6836323289511
episode: 296 training return: tensor(343.1831, device='cuda:0')
episode: 297 training return: tensor(423.5048, device='cuda:0')
episode: 298 training return: tensor(358.8987, device='cuda:0')
episode: 299 training return: tensor(444.2924, device='cuda:0')
epoch: 75 test_true_pfm: 5537.781868662049 sim_pfm: 474.42190207526437
episode: 300 training return: tensor(478.4722, device='cuda:0')
episode: 301 training return: tensor(397.0883, device='cuda:0')
episode: 302 training return: tensor(345.0402, device='cuda:0')
episode: 303 training return: tensor(268.2059, device='cuda:0')
epoch: 76 test_true_pfm: 5611.327803170141 sim_pfm: 497.7512192776582
episode: 304 training return: tensor(355.1700, device='cuda:0')
episode: 305 training return: tensor(496.7667, device='cuda:0')
episode: 306 training return: tensor(226.0102, device='cuda:0')
episode: 307 training return: tensor(329.1172, device='cuda:0')
epoch: 77 test_true_pfm: 5568.338015666287 sim_pfm: 445.0601275280933
episode: 308 training return: tensor(385.6121, device='cuda:0')
episode: 309 training return: tensor(423.2845, device='cuda:0')
episode: 310 training return: tensor(475.1180, device='cuda:0')
episode: 311 training return: tensor(275.5589, device='cuda:0')
epoch: 78 test_true_pfm: 5590.398618343501 sim_pfm: 479.3995053163574
episode: 312 training return: tensor(432.5292, device='cuda:0')
episode: 313 training return: tensor(486.8853, device='cuda:0')
episode: 314 training return: tensor(504.7425, device='cuda:0')
episode: 315 training return: tensor(449.6707, device='cuda:0')
epoch: 79 test_true_pfm: 5548.810929995121 sim_pfm: 551.9865220567057
episode: 316 training return: tensor(327.9361, device='cuda:0')
episode: 317 training return: tensor(441.4645, device='cuda:0')
episode: 318 training return: tensor(389.8888, device='cuda:0')
episode: 319 training return: tensor(268.5219, device='cuda:0')
epoch: 80 test_true_pfm: 5499.957127554408 sim_pfm: 456.30234220894636
episode: 320 training return: tensor(481.0116, device='cuda:0')
episode: 321 training return: tensor(392.3399, device='cuda:0')
episode: 322 training return: tensor(357.1944, device='cuda:0')
episode: 323 training return: tensor(482.3043, device='cuda:0')
epoch: 81 test_true_pfm: 5545.995914505918 sim_pfm: 531.7614510272591
episode: 324 training return: tensor(462.8938, device='cuda:0')
episode: 325 training return: tensor(462.7538, device='cuda:0')
episode: 326 training return: tensor(358.1664, device='cuda:0')
episode: 327 training return: tensor(430.2350, device='cuda:0')
epoch: 82 test_true_pfm: 5551.400273581937 sim_pfm: 561.6854122870621
episode: 328 training return: tensor(460.2317, device='cuda:0')
episode: 329 training return: tensor(307.8478, device='cuda:0')
episode: 330 training return: tensor(370.2746, device='cuda:0')
episode: 331 training return: tensor(501.3450, device='cuda:0')
epoch: 83 test_true_pfm: 5536.757826644866 sim_pfm: 491.3744311093663
episode: 332 training return: tensor(423.3680, device='cuda:0')
episode: 333 training return: tensor(471.2203, device='cuda:0')
episode: 334 training return: tensor(309.4275, device='cuda:0')
episode: 335 training return: tensor(243.9392, device='cuda:0')
epoch: 84 test_true_pfm: 5649.570357015077 sim_pfm: 521.58519757248
episode: 336 training return: tensor(490.8698, device='cuda:0')
episode: 337 training return: tensor(486.0701, device='cuda:0')
episode: 338 training return: tensor(476.4181, device='cuda:0')
episode: 339 training return: tensor(453.4002, device='cuda:0')
epoch: 85 test_true_pfm: 5648.203467366213 sim_pfm: 527.1702506383299
episode: 340 training return: tensor(388.3883, device='cuda:0')
episode: 341 training return: tensor(378.2243, device='cuda:0')
episode: 342 training return: tensor(431.7034, device='cuda:0')
episode: 343 training return: tensor(487.4736, device='cuda:0')
epoch: 86 test_true_pfm: 5646.104706440624 sim_pfm: 511.0888073007421
episode: 344 training return: tensor(193.2999, device='cuda:0')
episode: 345 training return: tensor(578.9753, device='cuda:0')
episode: 346 training return: tensor(389.6964, device='cuda:0')
episode: 347 training return: tensor(513.2540, device='cuda:0')
epoch: 87 test_true_pfm: 5650.217915776445 sim_pfm: 525.1456285451228
episode: 348 training return: tensor(265.8081, device='cuda:0')
episode: 349 training return: tensor(445.5159, device='cuda:0')
episode: 350 training return: tensor(368.3896, device='cuda:0')
episode: 351 training return: tensor(401.0914, device='cuda:0')
epoch: 88 test_true_pfm: 5666.323525206103 sim_pfm: 514.1665802637677
episode: 352 training return: tensor(502.9074, device='cuda:0')
episode: 353 training return: tensor(632.7692, device='cuda:0')
episode: 354 training return: tensor(536.9270, device='cuda:0')
episode: 355 training return: tensor(370.3924, device='cuda:0')
epoch: 89 test_true_pfm: 5697.54163417999 sim_pfm: 551.8501312659743
episode: 356 training return: tensor(383.1335, device='cuda:0')
episode: 357 training return: tensor(413.6247, device='cuda:0')
episode: 358 training return: tensor(372.1611, device='cuda:0')
episode: 359 training return: tensor(388.9190, device='cuda:0')
epoch: 90 test_true_pfm: 5624.584427605823 sim_pfm: 518.1285979571791
episode: 360 training return: tensor(370.1397, device='cuda:0')
episode: 361 training return: tensor(437.4303, device='cuda:0')
episode: 362 training return: tensor(490.3316, device='cuda:0')
episode: 363 training return: tensor(352.9795, device='cuda:0')
epoch: 91 test_true_pfm: 5670.81257790532 sim_pfm: 506.1616545965274
episode: 364 training return: tensor(403.6086, device='cuda:0')
episode: 365 training return: tensor(434.5648, device='cuda:0')
episode: 366 training return: tensor(352.5968, device='cuda:0')
episode: 367 training return: tensor(438.0143, device='cuda:0')
epoch: 92 test_true_pfm: 5598.716889896104 sim_pfm: 548.8783150747477
episode: 368 training return: tensor(450.7026, device='cuda:0')
episode: 369 training return: tensor(263.9430, device='cuda:0')
episode: 370 training return: tensor(419.5493, device='cuda:0')
episode: 371 training return: tensor(478.2865, device='cuda:0')
epoch: 93 test_true_pfm: 5618.586692082797 sim_pfm: 526.1747158545768
episode: 372 training return: tensor(426.2845, device='cuda:0')
episode: 373 training return: tensor(408.9546, device='cuda:0')
episode: 374 training return: tensor(464.8423, device='cuda:0')
episode: 375 training return: tensor(439.2564, device='cuda:0')
epoch: 94 test_true_pfm: 5609.406330108664 sim_pfm: 548.2163342546361
episode: 376 training return: tensor(470.8329, device='cuda:0')
episode: 377 training return: tensor(334.5091, device='cuda:0')
episode: 378 training return: tensor(447.7364, device='cuda:0')
episode: 379 training return: tensor(482.3729, device='cuda:0')
epoch: 95 test_true_pfm: 5596.378554257168 sim_pfm: 620.5397112942592
episode: 380 training return: tensor(297.8770, device='cuda:0')
episode: 381 training return: tensor(560.4183, device='cuda:0')
episode: 382 training return: tensor(507.9646, device='cuda:0')
episode: 383 training return: tensor(476.7109, device='cuda:0')
epoch: 96 test_true_pfm: 5692.574045698614 sim_pfm: 561.5890044690265
episode: 384 training return: tensor(504.4201, device='cuda:0')
episode: 385 training return: tensor(398.9571, device='cuda:0')
episode: 386 training return: tensor(367.6165, device='cuda:0')
episode: 387 training return: tensor(399.5420, device='cuda:0')
epoch: 97 test_true_pfm: 5606.869439025179 sim_pfm: 478.9242584859118
episode: 388 training return: tensor(542.3384, device='cuda:0')
episode: 389 training return: tensor(504.4842, device='cuda:0')
episode: 390 training return: tensor(398.6534, device='cuda:0')
episode: 391 training return: tensor(567.3000, device='cuda:0')
epoch: 98 test_true_pfm: 5584.003082729948 sim_pfm: 492.5770943961106
episode: 392 training return: tensor(527.4835, device='cuda:0')
episode: 393 training return: tensor(460.4942, device='cuda:0')
episode: 394 training return: tensor(448.5101, device='cuda:0')
episode: 395 training return: tensor(401.2032, device='cuda:0')
epoch: 99 test_true_pfm: 5640.193654695481 sim_pfm: 512.1358420445371
episode: 396 training return: tensor(512.5026, device='cuda:0')
episode: 397 training return: tensor(344.3764, device='cuda:0')
episode: 398 training return: tensor(410.1024, device='cuda:0')
episode: 399 training return: tensor(445.3275, device='cuda:0')
epoch: 100 test_true_pfm: 5612.535759886455 sim_pfm: 563.6811979858903
episode: 400 training return: tensor(385.8402, device='cuda:0')
episode: 401 training return: tensor(357.0415, device='cuda:0')
episode: 402 training return: tensor(449.1144, device='cuda:0')
episode: 403 training return: tensor(513.3439, device='cuda:0')
epoch: 101 test_true_pfm: 5665.0255596367615 sim_pfm: 538.6398550940406
episode: 404 training return: tensor(474.7245, device='cuda:0')
episode: 405 training return: tensor(415.5798, device='cuda:0')
episode: 406 training return: tensor(458.5231, device='cuda:0')
episode: 407 training return: tensor(424.3821, device='cuda:0')
epoch: 102 test_true_pfm: 5625.805986244213 sim_pfm: 526.324001794875
episode: 408 training return: tensor(361.9803, device='cuda:0')
episode: 409 training return: tensor(346.2405, device='cuda:0')
episode: 410 training return: tensor(524.0165, device='cuda:0')
episode: 411 training return: tensor(438.2049, device='cuda:0')
epoch: 103 test_true_pfm: 5609.99816221454 sim_pfm: 568.2866859595136
episode: 412 training return: tensor(472.6494, device='cuda:0')
episode: 413 training return: tensor(309.5322, device='cuda:0')
episode: 414 training return: tensor(536.5865, device='cuda:0')
episode: 415 training return: tensor(514.1479, device='cuda:0')
epoch: 104 test_true_pfm: 5743.143136960473 sim_pfm: 561.6976915578125
episode: 416 training return: tensor(516.8407, device='cuda:0')
episode: 417 training return: tensor(522.9022, device='cuda:0')
episode: 418 training return: tensor(556.5574, device='cuda:0')
episode: 419 training return: tensor(401.2971, device='cuda:0')
epoch: 105 test_true_pfm: 5673.597662118539 sim_pfm: 613.7134282860594
episode: 420 training return: tensor(504.0149, device='cuda:0')
episode: 421 training return: tensor(378.3010, device='cuda:0')
episode: 422 training return: tensor(523.3333, device='cuda:0')
episode: 423 training return: tensor(384.0181, device='cuda:0')
epoch: 106 test_true_pfm: 5675.333759768325 sim_pfm: 549.5576018438442
episode: 424 training return: tensor(459.6196, device='cuda:0')
episode: 425 training return: tensor(306.0240, device='cuda:0')
episode: 426 training return: tensor(440.7867, device='cuda:0')
episode: 427 training return: tensor(480.2682, device='cuda:0')
epoch: 107 test_true_pfm: 5636.007421406744 sim_pfm: 552.1899887172234
episode: 428 training return: tensor(369.1471, device='cuda:0')
episode: 429 training return: tensor(456.1719, device='cuda:0')
episode: 430 training return: tensor(415.5323, device='cuda:0')
episode: 431 training return: tensor(198.6657, device='cuda:0')
epoch: 108 test_true_pfm: 5667.778601672446 sim_pfm: 584.6596135861959
episode: 432 training return: tensor(425.0990, device='cuda:0')
episode: 433 training return: tensor(328.2627, device='cuda:0')
episode: 434 training return: tensor(416.8773, device='cuda:0')
episode: 435 training return: tensor(404.3144, device='cuda:0')
epoch: 109 test_true_pfm: 5586.876360066589 sim_pfm: 528.6730969793667
episode: 436 training return: tensor(390.9683, device='cuda:0')
episode: 437 training return: tensor(295.6691, device='cuda:0')
episode: 438 training return: tensor(424.7581, device='cuda:0')
episode: 439 training return: tensor(410.1099, device='cuda:0')
epoch: 110 test_true_pfm: 5678.086665766314 sim_pfm: 498.0549595877528
episode: 440 training return: tensor(451.1775, device='cuda:0')
episode: 441 training return: tensor(386.2423, device='cuda:0')
episode: 442 training return: tensor(421.8788, device='cuda:0')
episode: 443 training return: tensor(490.7185, device='cuda:0')
epoch: 111 test_true_pfm: 5582.357469860673 sim_pfm: 583.1359133734368
episode: 444 training return: tensor(503.1596, device='cuda:0')
episode: 445 training return: tensor(289.6779, device='cuda:0')
episode: 446 training return: tensor(378.4400, device='cuda:0')
episode: 447 training return: tensor(440.0066, device='cuda:0')
epoch: 112 test_true_pfm: 5570.29776612017 sim_pfm: 537.7725749195864
episode: 448 training return: tensor(297.6880, device='cuda:0')
episode: 449 training return: tensor(477.9436, device='cuda:0')
episode: 450 training return: tensor(433.3818, device='cuda:0')
episode: 451 training return: tensor(407.7914, device='cuda:0')
epoch: 113 test_true_pfm: 5654.536739807162 sim_pfm: 503.14514215340995
episode: 452 training return: tensor(485.4003, device='cuda:0')
episode: 453 training return: tensor(383.2468, device='cuda:0')
episode: 454 training return: tensor(460.0400, device='cuda:0')
episode: 455 training return: tensor(465.6917, device='cuda:0')
epoch: 114 test_true_pfm: 5644.109131013657 sim_pfm: 564.3720762569961
episode: 456 training return: tensor(461.2312, device='cuda:0')
episode: 457 training return: tensor(440.9945, device='cuda:0')
episode: 458 training return: tensor(516.1397, device='cuda:0')
episode: 459 training return: tensor(546.8392, device='cuda:0')
epoch: 115 test_true_pfm: 5638.9579718920695 sim_pfm: 539.7842562480364
episode: 460 training return: tensor(506.0328, device='cuda:0')
episode: 461 training return: tensor(283.7431, device='cuda:0')
episode: 462 training return: tensor(390.1492, device='cuda:0')
episode: 463 training return: tensor(374.8528, device='cuda:0')
epoch: 116 test_true_pfm: 5600.216248199385 sim_pfm: 562.7975001305167
episode: 464 training return: tensor(421.5400, device='cuda:0')
episode: 465 training return: tensor(339.3619, device='cuda:0')
episode: 466 training return: tensor(266.9315, device='cuda:0')
episode: 467 training return: tensor(598.9594, device='cuda:0')
epoch: 117 test_true_pfm: 5640.792656084312 sim_pfm: 600.5993058915677
episode: 468 training return: tensor(368.5883, device='cuda:0')
episode: 469 training return: tensor(439.2605, device='cuda:0')
episode: 470 training return: tensor(489.3160, device='cuda:0')
episode: 471 training return: tensor(484.4377, device='cuda:0')
epoch: 118 test_true_pfm: 5618.831308768946 sim_pfm: 519.329055062728
episode: 472 training return: tensor(545.2479, device='cuda:0')
episode: 473 training return: tensor(458.8404, device='cuda:0')
episode: 474 training return: tensor(471.7522, device='cuda:0')
episode: 475 training return: tensor(528.0950, device='cuda:0')
epoch: 119 test_true_pfm: 5661.652919937068 sim_pfm: 523.8350778077729
episode: 476 training return: tensor(480.8603, device='cuda:0')
episode: 477 training return: tensor(475.4511, device='cuda:0')
episode: 478 training return: tensor(317.2631, device='cuda:0')
episode: 479 training return: tensor(383.3897, device='cuda:0')
epoch: 120 test_true_pfm: 5631.9701370106295 sim_pfm: 596.4530693041743
episode: 480 training return: tensor(464.9750, device='cuda:0')
episode: 481 training return: tensor(417.7322, device='cuda:0')
episode: 482 training return: tensor(511.9553, device='cuda:0')
episode: 483 training return: tensor(464.8778, device='cuda:0')
epoch: 121 test_true_pfm: 5654.569119927325 sim_pfm: 587.8728991640382
episode: 484 training return: tensor(489.8468, device='cuda:0')
episode: 485 training return: tensor(429.1135, device='cuda:0')
episode: 486 training return: tensor(417.1212, device='cuda:0')
episode: 487 training return: tensor(399.8589, device='cuda:0')
epoch: 122 test_true_pfm: 5591.740232457595 sim_pfm: 624.423004567856
episode: 488 training return: tensor(561.4472, device='cuda:0')
episode: 489 training return: tensor(376.5170, device='cuda:0')
episode: 490 training return: tensor(533.4166, device='cuda:0')
episode: 491 training return: tensor(341.9097, device='cuda:0')
epoch: 123 test_true_pfm: 5602.854080441984 sim_pfm: 562.2922968901112
episode: 492 training return: tensor(468.3184, device='cuda:0')
episode: 493 training return: tensor(450.3182, device='cuda:0')
episode: 494 training return: tensor(461.7898, device='cuda:0')
episode: 495 training return: tensor(365.6419, device='cuda:0')
epoch: 124 test_true_pfm: 5649.189966469283 sim_pfm: 601.8378850513836
episode: 496 training return: tensor(567.7010, device='cuda:0')
episode: 497 training return: tensor(573.7212, device='cuda:0')
episode: 498 training return: tensor(334.1428, device='cuda:0')
episode: 499 training return: tensor(526.5867, device='cuda:0')
epoch: 125 test_true_pfm: 5552.097965931615 sim_pfm: 586.1720543419555
episode: 500 training return: tensor(603.7527, device='cuda:0')
episode: 501 training return: tensor(459.8557, device='cuda:0')
episode: 502 training return: tensor(567.9751, device='cuda:0')
episode: 503 training return: tensor(476.0415, device='cuda:0')
epoch: 126 test_true_pfm: 5720.959575604672 sim_pfm: 578.297412387367
episode: 504 training return: tensor(430.7468, device='cuda:0')
episode: 505 training return: tensor(550.0611, device='cuda:0')
episode: 506 training return: tensor(321.8792, device='cuda:0')
episode: 507 training return: tensor(392.9900, device='cuda:0')
epoch: 127 test_true_pfm: 5704.966414512863 sim_pfm: 492.98585729665746
episode: 508 training return: tensor(438.2673, device='cuda:0')
episode: 509 training return: tensor(439.4172, device='cuda:0')
episode: 510 training return: tensor(384.7612, device='cuda:0')
episode: 511 training return: tensor(399.0935, device='cuda:0')
epoch: 128 test_true_pfm: 5696.712498199538 sim_pfm: 576.9870544672012
episode: 512 training return: tensor(496.4876, device='cuda:0')
episode: 513 training return: tensor(413.3860, device='cuda:0')
episode: 514 training return: tensor(427.0152, device='cuda:0')
episode: 515 training return: tensor(451.9263, device='cuda:0')
epoch: 129 test_true_pfm: 5670.446446887711 sim_pfm: 563.9350062138401
episode: 516 training return: tensor(407.0639, device='cuda:0')
episode: 517 training return: tensor(486.0802, device='cuda:0')
episode: 518 training return: tensor(349.5465, device='cuda:0')
episode: 519 training return: tensor(292.6601, device='cuda:0')
epoch: 130 test_true_pfm: 5668.029849049206 sim_pfm: 528.6648293938488
episode: 520 training return: tensor(369.6751, device='cuda:0')
episode: 521 training return: tensor(464.1342, device='cuda:0')
episode: 522 training return: tensor(519.5494, device='cuda:0')
episode: 523 training return: tensor(567.6201, device='cuda:0')
epoch: 131 test_true_pfm: 5636.486583455199 sim_pfm: 605.9860154157117
episode: 524 training return: tensor(525.2657, device='cuda:0')
episode: 525 training return: tensor(420.2746, device='cuda:0')
episode: 526 training return: tensor(437.6025, device='cuda:0')
episode: 527 training return: tensor(427.1872, device='cuda:0')
epoch: 132 test_true_pfm: 5657.393501092468 sim_pfm: 599.6283031786637
episode: 528 training return: tensor(510.3746, device='cuda:0')
episode: 529 training return: tensor(470.6938, device='cuda:0')
episode: 530 training return: tensor(441.4075, device='cuda:0')
episode: 531 training return: tensor(497.1935, device='cuda:0')
epoch: 133 test_true_pfm: 5657.873433302327 sim_pfm: 532.7353364371714
episode: 532 training return: tensor(438.8641, device='cuda:0')
episode: 533 training return: tensor(385.0263, device='cuda:0')
episode: 534 training return: tensor(345.5812, device='cuda:0')
episode: 535 training return: tensor(470.1483, device='cuda:0')
epoch: 134 test_true_pfm: 5681.842708530022 sim_pfm: 604.3266658766273
episode: 536 training return: tensor(365.8223, device='cuda:0')
episode: 537 training return: tensor(379.6370, device='cuda:0')
episode: 538 training return: tensor(413.6415, device='cuda:0')
episode: 539 training return: tensor(621.9057, device='cuda:0')
epoch: 135 test_true_pfm: 5639.473843297507 sim_pfm: 579.569939125989
episode: 540 training return: tensor(469.3657, device='cuda:0')
episode: 541 training return: tensor(372.9907, device='cuda:0')
episode: 542 training return: tensor(584.5920, device='cuda:0')
episode: 543 training return: tensor(504.7433, device='cuda:0')
epoch: 136 test_true_pfm: 5712.737438574889 sim_pfm: 619.5387555400084
episode: 544 training return: tensor(478.5223, device='cuda:0')
episode: 545 training return: tensor(479.3738, device='cuda:0')
episode: 546 training return: tensor(486.9335, device='cuda:0')
episode: 547 training return: tensor(460.6775, device='cuda:0')
epoch: 137 test_true_pfm: 5735.0221670900255 sim_pfm: 570.4868072049381
episode: 548 training return: tensor(419.1855, device='cuda:0')
episode: 549 training return: tensor(538.0872, device='cuda:0')
episode: 550 training return: tensor(514.9030, device='cuda:0')
episode: 551 training return: tensor(422.9553, device='cuda:0')
epoch: 138 test_true_pfm: 5659.850255850983 sim_pfm: 560.1156163069342
episode: 552 training return: tensor(477.7622, device='cuda:0')
episode: 553 training return: tensor(545.5518, device='cuda:0')
episode: 554 training return: tensor(569.6736, device='cuda:0')
episode: 555 training return: tensor(495.3570, device='cuda:0')
epoch: 139 test_true_pfm: 5686.790320321317 sim_pfm: 543.8921007166695
episode: 556 training return: tensor(633.7087, device='cuda:0')
episode: 557 training return: tensor(229.7260, device='cuda:0')
episode: 558 training return: tensor(510.5130, device='cuda:0')
episode: 559 training return: tensor(409.8949, device='cuda:0')
epoch: 140 test_true_pfm: 5695.539500035783 sim_pfm: 604.7922003264539
episode: 560 training return: tensor(380.8750, device='cuda:0')
episode: 561 training return: tensor(381.7675, device='cuda:0')
episode: 562 training return: tensor(482.1346, device='cuda:0')
episode: 563 training return: tensor(527.0128, device='cuda:0')
epoch: 141 test_true_pfm: 5702.5777947934475 sim_pfm: 557.6544456970975
episode: 564 training return: tensor(388.8934, device='cuda:0')
episode: 565 training return: tensor(385.5832, device='cuda:0')
episode: 566 training return: tensor(525.4173, device='cuda:0')
episode: 567 training return: tensor(477.6606, device='cuda:0')
epoch: 142 test_true_pfm: 5686.539999745776 sim_pfm: 654.4096367627693
episode: 568 training return: tensor(453.6992, device='cuda:0')
episode: 569 training return: tensor(491.7655, device='cuda:0')
episode: 570 training return: tensor(448.7319, device='cuda:0')
episode: 571 training return: tensor(457.6604, device='cuda:0')
epoch: 143 test_true_pfm: 5724.481137227666 sim_pfm: 572.2101017110011
episode: 572 training return: tensor(486.2529, device='cuda:0')
episode: 573 training return: tensor(435.8910, device='cuda:0')
episode: 574 training return: tensor(510.7022, device='cuda:0')
episode: 575 training return: tensor(513.3229, device='cuda:0')
epoch: 144 test_true_pfm: 5706.06960045745 sim_pfm: 574.128861593393
episode: 576 training return: tensor(332.6344, device='cuda:0')
episode: 577 training return: tensor(423.1502, device='cuda:0')
episode: 578 training return: tensor(380.7628, device='cuda:0')
episode: 579 training return: tensor(539.7305, device='cuda:0')
epoch: 145 test_true_pfm: 5606.295541889292 sim_pfm: 592.9583277853671
episode: 580 training return: tensor(391.3074, device='cuda:0')
episode: 581 training return: tensor(443.2985, device='cuda:0')
episode: 582 training return: tensor(399.6348, device='cuda:0')
episode: 583 training return: tensor(441.6088, device='cuda:0')
epoch: 146 test_true_pfm: 5707.747966946052 sim_pfm: 589.8123602628087
episode: 584 training return: tensor(535.9005, device='cuda:0')
episode: 585 training return: tensor(522.9978, device='cuda:0')
episode: 586 training return: tensor(398.9415, device='cuda:0')
episode: 587 training return: tensor(443.9853, device='cuda:0')
epoch: 147 test_true_pfm: 5691.689032757473 sim_pfm: 648.5792196178809
episode: 588 training return: tensor(554.7515, device='cuda:0')
episode: 589 training return: tensor(458.2280, device='cuda:0')
episode: 590 training return: tensor(533.9453, device='cuda:0')
episode: 591 training return: tensor(448.0719, device='cuda:0')
epoch: 148 test_true_pfm: 5663.281080597056 sim_pfm: 550.1859244197452
episode: 592 training return: tensor(563.6476, device='cuda:0')
episode: 593 training return: tensor(501.2450, device='cuda:0')
episode: 594 training return: tensor(450.9398, device='cuda:0')
episode: 595 training return: tensor(428.3742, device='cuda:0')
epoch: 149 test_true_pfm: 5695.050913160977 sim_pfm: 587.7314651698883
episode: 596 training return: tensor(242.5711, device='cuda:0')
episode: 597 training return: tensor(468.4199, device='cuda:0')
episode: 598 training return: tensor(478.2712, device='cuda:0')
episode: 599 training return: tensor(518.7420, device='cuda:0')
epoch: 150 test_true_pfm: 5697.165650091826 sim_pfm: 588.4180047772048
