['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '2']
epoch: 0 training_loss 0.31313814893364905 test_loss: 0.21062881946563722
epoch: 1 training_loss 0.1754891887307167 test_loss: 0.1687619686126709
epoch: 2 training_loss 0.14335804395377635 test_loss: 0.14317038059234619
epoch: 3 training_loss 0.14466627649962902 test_loss: 0.13121262788772584
epoch: 4 training_loss 0.13406578343361616 test_loss: 0.13007855415344238
epoch: 5 training_loss 0.1305827858671546 test_loss: 0.1309574604034424
epoch: 6 training_loss 0.1287149791046977 test_loss: 0.12964818477630616
epoch: 7 training_loss 0.12944894708693028 test_loss: 0.11029583215713501
epoch: 8 training_loss 0.11969716347754002 test_loss: 0.11749701499938965
epoch: 9 training_loss 0.11634776130318641 test_loss: 0.1142755389213562
epoch: 10 training_loss 0.11652258601039649 test_loss: 0.12545511722564698
epoch: 11 training_loss 0.12037736639380454 test_loss: 0.10597602128982545
epoch: 12 training_loss 0.11617345035076142 test_loss: 0.1212122082710266
epoch: 13 training_loss 0.1147055491618812 test_loss: 0.1277611017227173
epoch: 14 training_loss 0.1245870053768158 test_loss: 0.13027987480163575
epoch: 15 training_loss 0.12554244447499513 test_loss: 0.10103538036346435
epoch: 16 training_loss 0.11568458467721938 test_loss: 0.11201459169387817
epoch: 17 training_loss 0.11697556510567665 test_loss: 0.10841969251632691
epoch: 18 training_loss 0.11689637398347258 test_loss: 0.13093767166137696
epoch: 19 training_loss 0.11172119911760092 test_loss: 0.10840421915054321
epoch: 20 training_loss 0.1166281382739544 test_loss: 0.10953724384307861
epoch: 21 training_loss 0.10658139202743769 test_loss: 0.117313551902771
epoch: 22 training_loss 0.12242131419479847 test_loss: 0.13870917558670043
epoch: 23 training_loss 0.11137061629444361 test_loss: 0.11610383987426758
epoch: 24 training_loss 0.10829302299767733 test_loss: 0.11353614330291747
epoch: 25 training_loss 0.10403598515316843 test_loss: 0.10877465009689331
epoch: 26 training_loss 0.11223493540659547 test_loss: 0.12490352392196655
epoch: 27 training_loss 0.11281551171094178 test_loss: 0.100868558883667
epoch: 28 training_loss 0.10887243466451764 test_loss: 0.11934893131256104
epoch: 29 training_loss 0.10424956232309342 test_loss: 0.09980949163436889
epoch: 30 training_loss 0.11174434181302786 test_loss: 0.1056322693824768
epoch: 31 training_loss 0.1104362553730607 test_loss: 0.0972661554813385
epoch: 32 training_loss 0.10561820331960917 test_loss: 0.09733461141586304
epoch: 33 training_loss 0.11243355199694634 test_loss: 0.11616286039352416
epoch: 34 training_loss 0.10562155408784747 test_loss: 0.10420563220977783
epoch: 35 training_loss 0.10526608984917402 test_loss: 0.1077256441116333
epoch: 36 training_loss 0.10469751473516226 test_loss: 0.1281091809272766
epoch: 37 training_loss 0.11379425350576639 test_loss: 0.11811578273773193
epoch: 38 training_loss 0.10522417236119509 test_loss: 0.10875459909439086
epoch: 39 training_loss 0.11082702852785588 test_loss: 0.11620639562606812
epoch: 40 training_loss 0.10688549038022757 test_loss: 0.12117277383804322
epoch: 41 training_loss 0.10474827609956265 test_loss: 0.11876456737518311
epoch: 42 training_loss 0.115302904099226 test_loss: 0.1018143653869629
epoch: 43 training_loss 0.10265741601586342 test_loss: 0.11616688966751099
epoch: 44 training_loss 0.10406911529600621 test_loss: 0.13743600845336915
epoch: 45 training_loss 0.10135984158143402 test_loss: 0.12301820516586304
epoch: 46 training_loss 0.10554946593940258 test_loss: 0.1103819727897644
epoch: 47 training_loss 0.1049859606847167 test_loss: 0.11764236688613891
epoch: 48 training_loss 0.10559148520231247 test_loss: 0.11967238187789916
epoch: 49 training_loss 0.10778034629300237 test_loss: 0.11817117929458618
epoch: 50 training_loss 0.10272773545235396 test_loss: 0.1250470519065857
epoch: 51 training_loss 0.10810823634266853 test_loss: 0.1217265248298645
epoch: 52 training_loss 0.10074904525652528 test_loss: 0.10595008134841918
epoch: 53 training_loss 0.11584701407700777 test_loss: 0.12645491361618041
epoch: 54 training_loss 0.10798945741727949 test_loss: 0.11558225154876708
epoch: 55 training_loss 0.0997167494520545 test_loss: 0.11387925148010254
epoch: 56 training_loss 0.10684398610144853 test_loss: 0.11870547533035278
epoch: 57 training_loss 0.10470690319314599 test_loss: 0.095802903175354
epoch: 58 training_loss 0.10362581435590983 test_loss: 0.11440703868865967
epoch: 59 training_loss 0.1080670403316617 test_loss: 0.125330650806427
epoch: 60 training_loss 0.10298695836216211 test_loss: 0.11166607141494751
epoch: 61 training_loss 0.10849718984216451 test_loss: 0.11015163660049439
epoch: 62 training_loss 0.09835068361833692 test_loss: 0.10081794261932372
epoch: 63 training_loss 0.10234461925923824 test_loss: 0.09998428225517272
epoch: 64 training_loss 0.10568606052547694 test_loss: 0.10630942583084106
epoch: 65 training_loss 0.1005532518401742 test_loss: 0.09830219745635986
epoch: 66 training_loss 0.09722251284867525 test_loss: 0.12164305448532105
epoch: 67 training_loss 0.10325194589793682 test_loss: 0.11503775119781494
epoch: 68 training_loss 0.09728469960391521 test_loss: 0.11314665079116822
epoch: 69 training_loss 0.10235827742144465 test_loss: 0.0981067180633545
epoch: 70 training_loss 0.10988299630582332 test_loss: 0.12262247800827027
epoch: 71 training_loss 0.10356320567429066 test_loss: 0.11989612579345703
epoch: 72 training_loss 0.10551073133945466 test_loss: 0.11729108095169068
epoch: 73 training_loss 0.10502374105155468 test_loss: 0.09898342490196228
epoch: 74 training_loss 0.1087154621630907 test_loss: 0.11384702920913696
epoch: 75 training_loss 0.1078896988555789 test_loss: 0.10571097135543824
epoch: 76 training_loss 0.10619626358151436 test_loss: 0.13789308071136475
epoch: 77 training_loss 0.11776995413005352 test_loss: 0.10886726379394532
epoch: 78 training_loss 0.09717149149626493 test_loss: 0.10727540254592896
epoch: 79 training_loss 0.10146806906908751 test_loss: 0.1148985743522644
epoch: 80 training_loss 0.10710681334137917 test_loss: 0.09994893670082092
epoch: 81 training_loss 0.10010401099920273 test_loss: 0.10455328226089478
epoch: 82 training_loss 0.10196352586150169 test_loss: 0.09973299503326416
epoch: 83 training_loss 0.10144901402294636 test_loss: 0.10188919305801392
epoch: 84 training_loss 0.10574743149802089 test_loss: 0.10989292860031127
epoch: 85 training_loss 0.10478525202721357 test_loss: 0.10646989345550537
epoch: 86 training_loss 0.10773389309644699 test_loss: 0.1170077919960022
epoch: 87 training_loss 0.09853863904252648 test_loss: 0.12912794351577758
epoch: 88 training_loss 0.10040272848680615 test_loss: 0.0950294554233551
epoch: 89 training_loss 0.10869489051401615 test_loss: 0.10653223991394042
epoch: 90 training_loss 0.111627626568079 test_loss: 0.11574089527130127
epoch: 91 training_loss 0.1029340823739767 test_loss: 0.10891811847686768
epoch: 92 training_loss 0.0987285845912993 test_loss: 0.10113447904586792
epoch: 93 training_loss 0.10653912544250488 test_loss: 0.10319979190826416
epoch: 94 training_loss 0.105207405295223 test_loss: 0.0926309883594513
epoch: 95 training_loss 0.10203703325241804 test_loss: 0.09440757036209106
epoch: 96 training_loss 0.10293273881077766 test_loss: 0.11274724006652832
epoch: 97 training_loss 0.10465215113013983 test_loss: 0.10248534679412842
epoch: 98 training_loss 0.10107789175584912 test_loss: 0.10941801071166993
epoch: 99 training_loss 0.09522139955312013 test_loss: 0.10812571048736572
epoch: 100 training_loss 0.11104123350232839 test_loss: 0.10713725090026856
epoch: 101 training_loss 0.10787672411650419 test_loss: 0.11705371141433715
epoch: 102 training_loss 0.11011005725711584 test_loss: 0.09594631791114808
epoch: 103 training_loss 0.09955428637564183 test_loss: 0.10823004245758057
epoch: 104 training_loss 0.1065200862661004 test_loss: 0.10342241525650024
epoch: 105 training_loss 0.10188478786498308 test_loss: 0.11709381341934204
epoch: 106 training_loss 0.10779053837060928 test_loss: 0.10326673984527587
epoch: 107 training_loss 0.10275648813694715 test_loss: 0.10594776868820191
epoch: 108 training_loss 0.10308758884668351 test_loss: 0.08304787874221801
epoch: 109 training_loss 0.11528893470764161 test_loss: 0.10034332275390626
epoch: 110 training_loss 0.10013180198147893 test_loss: 0.10539143085479737
epoch: 111 training_loss 0.10944457691162825 test_loss: 0.11628708839416504
epoch: 112 training_loss 0.1089083787612617 test_loss: 0.11823133230209351
epoch: 113 training_loss 0.10053406108170748 test_loss: 0.09952685832977295
epoch: 114 training_loss 0.11249551717191934 test_loss: 0.10331496000289916
epoch: 115 training_loss 0.09882953651249408 test_loss: 0.10381550788879394
epoch: 116 training_loss 0.10644673060625792 test_loss: 0.11863272190093994
epoch: 117 training_loss 0.10497820293530821 test_loss: 0.1138993263244629
epoch: 118 training_loss 0.10609830182045699 test_loss: 0.1095537543296814
epoch: 119 training_loss 0.10444470219314099 test_loss: 0.11303219795227051
epoch: 120 training_loss 0.10245100809261203 test_loss: 0.11530414819717408
epoch: 121 training_loss 0.10235077273100615 test_loss: 0.10420424938201904
epoch: 122 training_loss 0.10636147003620863 test_loss: 0.09847555160522461
epoch: 123 training_loss 0.11076364398002625 test_loss: 0.11700160503387451
epoch: 124 training_loss 0.10458814285695553 test_loss: 0.10506272315979004
epoch: 125 training_loss 0.10015245649963617 test_loss: 0.10458812713623047
epoch: 126 training_loss 0.1070798484608531 test_loss: 0.11550004482269287
epoch: 127 training_loss 0.09939323999918998 test_loss: 0.1211887001991272
epoch: 128 training_loss 0.09854724142700434 test_loss: 0.1025127649307251
epoch: 129 training_loss 0.10322248354554177 test_loss: 0.12772659063339234
epoch: 130 training_loss 0.09994759768247605 test_loss: 0.10749868154525757
epoch: 131 training_loss 0.10524582566693425 test_loss: 0.10836182832717896
epoch: 132 training_loss 0.10523802250623703 test_loss: 0.11048989295959473
epoch: 133 training_loss 0.1023980943672359 test_loss: 0.10322419404983521
epoch: 134 training_loss 0.10178293012082577 test_loss: 0.108603835105896
epoch: 135 training_loss 0.10194016579538584 test_loss: 0.10496346950531006
epoch: 136 training_loss 0.10564248815178871 test_loss: 0.11348555088043213
epoch: 137 training_loss 0.101774822473526 test_loss: 0.096981680393219
epoch: 138 training_loss 0.10569799713790416 test_loss: 0.11174869537353516
epoch: 139 training_loss 0.10287548486143351 test_loss: 0.11464661359786987
epoch: 140 training_loss 0.09819906275719405 test_loss: 0.09100533723831176
epoch: 141 training_loss 0.11006487464532257 test_loss: 0.10956376791000366
epoch: 142 training_loss 0.10621576692909002 test_loss: 0.11043123006820679
epoch: 143 training_loss 0.09384479533880949 test_loss: 0.09464993476867675
epoch: 144 training_loss 0.10324150355532766 test_loss: 0.1067314863204956
epoch: 145 training_loss 0.11278324995189905 test_loss: 0.10711722373962403
epoch: 146 training_loss 0.1051182254217565 test_loss: 0.1038041353225708
epoch: 147 training_loss 0.1023533321171999 test_loss: 0.1047895908355713
epoch: 148 training_loss 0.10984826251864434 test_loss: 0.08927737474441529
epoch: 149 training_loss 0.10018019676208496 test_loss: 0.09907152056694031
epoch: 0 training_loss 47.61966135025025 test_loss: 22.635630798339843
epoch: 1 training_loss 17.804806814193725 test_loss: 14.570333862304688
epoch: 2 training_loss 12.764905233383178 test_loss: 11.749591064453124
epoch: 3 training_loss 10.461564474105835 test_loss: 9.68204574584961
epoch: 4 training_loss 9.07358512878418 test_loss: 8.276612854003906
epoch: 5 training_loss 7.844223985671997 test_loss: 7.385682678222656
epoch: 6 training_loss 6.856859316825867 test_loss: 6.37493782043457
epoch: 7 training_loss 6.234172606468201 test_loss: 5.700310897827149
epoch: 8 training_loss 5.62848653793335 test_loss: 5.253929138183594
epoch: 9 training_loss 5.284223432540894 test_loss: 5.027640151977539
epoch: 10 training_loss 4.7912208843231205 test_loss: 4.522524642944336
epoch: 11 training_loss 4.550112471580506 test_loss: 4.288018798828125
epoch: 12 training_loss 4.224099473953247 test_loss: 4.117169189453125
epoch: 13 training_loss 3.958912272453308 test_loss: 4.021826171875
epoch: 14 training_loss 3.8562802124023436 test_loss: 3.8334300994873045
epoch: 15 training_loss 3.668463315963745 test_loss: 3.4923625946044923
epoch: 16 training_loss 3.5543127036094666 test_loss: 3.295092010498047
epoch: 17 training_loss 3.501648397445679 test_loss: 3.3664173126220702
epoch: 18 training_loss 3.3265135884284973 test_loss: 3.241565704345703
epoch: 19 training_loss 3.2426634311676024 test_loss: 3.183578681945801
epoch: 20 training_loss 3.151287932395935 test_loss: 3.1611860275268553
epoch: 21 training_loss 3.0923555994033816 test_loss: 3.0914255142211915
epoch: 22 training_loss 2.9409130430221557 test_loss: 2.9020196914672853
epoch: 23 training_loss 2.86608806848526 test_loss: 2.8786678314208984
epoch: 24 training_loss 2.81817435503006 test_loss: 2.827886390686035
epoch: 25 training_loss 2.87386244058609 test_loss: 2.756027030944824
epoch: 26 training_loss 2.7838377809524535 test_loss: 2.638810729980469
epoch: 27 training_loss 2.663422839641571 test_loss: 2.6514875411987306
epoch: 28 training_loss 2.6277604508399963 test_loss: 2.5233413696289064
epoch: 29 training_loss 2.5116787433624266 test_loss: 2.5286386489868162
epoch: 30 training_loss 2.611182072162628 test_loss: 2.651251029968262
epoch: 31 training_loss 2.5170516395568847 test_loss: 2.5209609985351564
epoch: 32 training_loss 2.4930334520339965 test_loss: 2.4521858215332033
epoch: 33 training_loss 2.3871542263031005 test_loss: 2.3012765884399413
epoch: 34 training_loss 2.473202495574951 test_loss: 2.4197643280029295
epoch: 35 training_loss 2.3528305673599244 test_loss: 2.4784975051879883
epoch: 36 training_loss 2.3278326058387755 test_loss: 2.395088768005371
epoch: 37 training_loss 2.286858239173889 test_loss: 2.2590106964111327
epoch: 38 training_loss 2.2946525764465333 test_loss: 2.2234594345092775
epoch: 39 training_loss 2.2018740260601044 test_loss: 2.2218936920166015
epoch: 40 training_loss 2.2153495383262634 test_loss: 2.264460563659668
epoch: 41 training_loss 2.24569548368454 test_loss: 2.177235412597656
epoch: 42 training_loss 2.221921486854553 test_loss: 2.1166606903076173
epoch: 43 training_loss 2.1919533586502076 test_loss: 2.1777618408203123
epoch: 44 training_loss 2.1691305661201477 test_loss: 2.180979347229004
epoch: 45 training_loss 2.2096248483657837 test_loss: 2.1193376541137696
epoch: 46 training_loss 2.1198438370227812 test_loss: 2.095601272583008
epoch: 47 training_loss 2.0757133758068083 test_loss: 2.137075996398926
epoch: 48 training_loss 2.0730692315101624 test_loss: 2.0185358047485353
epoch: 49 training_loss 2.0323750507831573 test_loss: 2.1046274185180662
epoch: 50 training_loss 2.058276113271713 test_loss: 2.067793273925781
epoch: 51 training_loss 2.0077522587776184 test_loss: 2.032196807861328
epoch: 52 training_loss 2.042752023935318 test_loss: 1.9976425170898438
epoch: 53 training_loss 2.017106046676636 test_loss: 1.9448551177978515
epoch: 54 training_loss 1.971720231771469 test_loss: 2.0384477615356444
epoch: 55 training_loss 1.956706191301346 test_loss: 1.977396011352539
epoch: 56 training_loss 1.939391061067581 test_loss: 1.9051759719848633
epoch: 57 training_loss 1.9511656618118287 test_loss: 1.9553466796875
epoch: 58 training_loss 1.9052486836910247 test_loss: 1.9419450759887695
epoch: 59 training_loss 1.9140754294395448 test_loss: 1.874382781982422
epoch: 60 training_loss 1.9199790823459626 test_loss: 1.8645204544067382
epoch: 61 training_loss 1.8987476539611816 test_loss: 1.909684181213379
epoch: 62 training_loss 1.9394999611377717 test_loss: 1.9428831100463868
epoch: 63 training_loss 1.8886095666885376 test_loss: 1.8444538116455078
epoch: 64 training_loss 1.8604720914363861 test_loss: 1.8724330902099608
epoch: 65 training_loss 1.8598485374450684 test_loss: 1.8014245986938477
epoch: 66 training_loss 1.8860204708576203 test_loss: 1.8952537536621095
epoch: 67 training_loss 1.8431930601596833 test_loss: 1.7986257553100586
epoch: 68 training_loss 1.8389492523670197 test_loss: 1.8228536605834962
epoch: 69 training_loss 1.7967421078681947 test_loss: 1.7896961212158202
epoch: 70 training_loss 1.7996784830093384 test_loss: 1.8343936920166015
epoch: 71 training_loss 1.8175075244903565 test_loss: 1.7261545181274414
epoch: 72 training_loss 1.8038187396526337 test_loss: 1.7691408157348634
epoch: 73 training_loss 1.7902903413772584 test_loss: 1.7860851287841797
epoch: 74 training_loss 1.7994713282585144 test_loss: 1.7340738296508789
epoch: 75 training_loss 1.7652115523815155 test_loss: 1.7760421752929687
epoch: 76 training_loss 1.798657205104828 test_loss: 1.8698192596435548
epoch: 77 training_loss 1.7539238667488097 test_loss: 1.7385242462158204
epoch: 78 training_loss 1.7599664545059204 test_loss: 1.839318084716797
epoch: 79 training_loss 1.7233653306961059 test_loss: 1.759086799621582
epoch: 80 training_loss 1.7472956049442292 test_loss: 1.7672168731689453
epoch: 81 training_loss 1.7558084988594056 test_loss: 1.7799230575561524
epoch: 82 training_loss 1.736679081916809 test_loss: 1.7357263565063477
epoch: 83 training_loss 1.731844551563263 test_loss: 1.7104879379272462
epoch: 84 training_loss 1.6822941088676453 test_loss: 1.8060857772827148
epoch: 85 training_loss 1.6887505221366883 test_loss: 1.692374038696289
epoch: 86 training_loss 1.7217756819725036 test_loss: 1.6401353836059571
epoch: 87 training_loss 1.7324239540100097 test_loss: 1.7022079467773437
epoch: 88 training_loss 1.6847387039661408 test_loss: 1.7036476135253906
epoch: 89 training_loss 1.6740255010128022 test_loss: 1.6581781387329102
epoch: 90 training_loss 1.6971253228187562 test_loss: 1.660527229309082
epoch: 91 training_loss 1.6614798426628112 test_loss: 1.6192665100097656
epoch: 92 training_loss 1.6911905562877656 test_loss: 1.7161867141723632
epoch: 93 training_loss 1.6791319978237151 test_loss: 1.6721881866455077
epoch: 94 training_loss 1.6852538454532624 test_loss: 1.6652915954589844
epoch: 95 training_loss 1.6689349555969237 test_loss: 1.6244771957397461
epoch: 96 training_loss 1.6821692395210266 test_loss: 1.6598443984985352
epoch: 97 training_loss 1.6610988426208495 test_loss: 1.6288299560546875
epoch: 98 training_loss 1.6740484261512756 test_loss: 1.7125293731689453
epoch: 99 training_loss 1.6615356075763703 test_loss: 1.6202663421630858
epoch: 100 training_loss 1.6323298561573027 test_loss: 1.6851819992065429
epoch: 101 training_loss 1.6518827390670776 test_loss: 1.6498628616333009
epoch: 102 training_loss 1.6291953945159912 test_loss: 1.6876527786254882
epoch: 103 training_loss 1.613174420595169 test_loss: 1.6638622283935547
epoch: 104 training_loss 1.6191863715648651 test_loss: 1.5984450340270997
epoch: 105 training_loss 1.6383681964874268 test_loss: 1.6378990173339845
epoch: 106 training_loss 1.5938084387779237 test_loss: 1.5751091957092285
epoch: 107 training_loss 1.6066390728950501 test_loss: 1.6198740005493164
epoch: 108 training_loss 1.6346837162971497 test_loss: 1.5952337265014649
epoch: 109 training_loss 1.5933424365520477 test_loss: 1.5930158615112304
epoch: 110 training_loss 1.5858629274368286 test_loss: 1.5642440795898438
epoch: 111 training_loss 1.5923998308181764 test_loss: 1.5853095054626465
epoch: 112 training_loss 1.6021052849292756 test_loss: 1.5943424224853515
epoch: 113 training_loss 1.5944384336471558 test_loss: 1.6059127807617188
epoch: 114 training_loss 1.5844866287708284 test_loss: 1.5660103797912597
epoch: 115 training_loss 1.5964695882797242 test_loss: 1.6044897079467773
epoch: 116 training_loss 1.5717076361179352 test_loss: 1.589355754852295
epoch: 117 training_loss 1.5960038495063782 test_loss: 1.5455171585083007
epoch: 118 training_loss 1.5926157462596893 test_loss: 1.5573180198669434
epoch: 119 training_loss 1.5734349763393403 test_loss: 1.559078311920166
epoch: 120 training_loss 1.5629720771312714 test_loss: 1.5999669075012206
epoch: 121 training_loss 1.5457524073123932 test_loss: 1.5891740798950196
epoch: 122 training_loss 1.593656244277954 test_loss: 1.6059837341308594
epoch: 123 training_loss 1.5817535853385924 test_loss: 1.591872787475586
epoch: 124 training_loss 1.5540203213691712 test_loss: 1.5874950408935546
epoch: 125 training_loss 1.5551275742053985 test_loss: 1.5337428092956542
epoch: 126 training_loss 1.5615854394435882 test_loss: 1.5199159622192382
epoch: 127 training_loss 1.531774582862854 test_loss: 1.604502296447754
epoch: 128 training_loss 1.5499886357784272 test_loss: 1.5072519302368164
epoch: 129 training_loss 1.5262825298309326 test_loss: 1.5079959869384765
epoch: 130 training_loss 1.5248076915740967 test_loss: 1.5470389366149901
epoch: 131 training_loss 1.543587396144867 test_loss: 1.5630866050720216
epoch: 132 training_loss 1.5560466265678405 test_loss: 1.5167951583862305
epoch: 133 training_loss 1.5128412580490112 test_loss: 1.5664707183837892
epoch: 134 training_loss 1.5321038389205932 test_loss: 1.493761157989502
epoch: 135 training_loss 1.550979814529419 test_loss: 1.4943065643310547
epoch: 136 training_loss 1.5197533869743347 test_loss: 1.5227461814880372
epoch: 137 training_loss 1.534565247297287 test_loss: 1.5326653480529786
epoch: 138 training_loss 1.5102738451957702 test_loss: 1.5813639640808106
epoch: 139 training_loss 1.5317822372913361 test_loss: 1.5218422889709473
epoch: 140 training_loss 1.4956802761554717 test_loss: 1.511258602142334
epoch: 141 training_loss 1.492776563167572 test_loss: 1.5472834587097168
epoch: 142 training_loss 1.5134725320339202 test_loss: 1.4861180305480957
epoch: 143 training_loss 1.51868661403656 test_loss: 1.5642560005187989
epoch: 144 training_loss 1.5274547052383423 test_loss: 1.4830951690673828
epoch: 145 training_loss 1.4801504063606261 test_loss: 1.4921195030212402
epoch: 146 training_loss 1.4865617418289185 test_loss: 1.5120759963989259
epoch: 147 training_loss 1.5033552861213684 test_loss: 1.5066882133483888
epoch: 148 training_loss 1.5180459892749787 test_loss: 1.5218921661376954
epoch: 149 training_loss 1.4929345738887787 test_loss: 1.5034483909606933
5073.898728190399
episode: 0 training return: tensor(-118.9478, device='cuda:0')
episode: 1 training return: tensor(-62.1381, device='cuda:0')
episode: 2 training return: tensor(-62.4621, device='cuda:0')
episode: 3 training return: tensor(-176.5933, device='cuda:0')
epoch: 1 test_true_pfm: 5025.964252915281 sim_pfm: -125.80140377796488
episode: 4 training return: tensor(-145.5278, device='cuda:0')
episode: 5 training return: tensor(-190.1429, device='cuda:0')
episode: 6 training return: tensor(-149.5348, device='cuda:0')
episode: 7 training return: tensor(-164.6676, device='cuda:0')
epoch: 2 test_true_pfm: 5049.020058287395 sim_pfm: -61.569982214229334
episode: 8 training return: tensor(55.4416, device='cuda:0')
episode: 9 training return: tensor(-28.5812, device='cuda:0')
episode: 10 training return: tensor(-230.0575, device='cuda:0')
episode: 11 training return: tensor(-106.6272, device='cuda:0')
epoch: 3 test_true_pfm: 5064.288330583791 sim_pfm: -78.2586455202351
episode: 12 training return: tensor(-60.0119, device='cuda:0')
episode: 13 training return: tensor(-233.5522, device='cuda:0')
episode: 14 training return: tensor(-148.8055, device='cuda:0')
episode: 15 training return: tensor(-46.5431, device='cuda:0')
epoch: 4 test_true_pfm: 5192.461976064652 sim_pfm: -47.14717577424987
episode: 16 training return: tensor(64.4217, device='cuda:0')
episode: 17 training return: tensor(-28.2794, device='cuda:0')
episode: 18 training return: tensor(-95.0023, device='cuda:0')
episode: 19 training return: tensor(-114.3144, device='cuda:0')
epoch: 5 test_true_pfm: 5110.861025140668 sim_pfm: 23.897758782608435
episode: 20 training return: tensor(-30.3310, device='cuda:0')
episode: 21 training return: tensor(-130.1603, device='cuda:0')
episode: 22 training return: tensor(-38.3428, device='cuda:0')
episode: 23 training return: tensor(-109.9209, device='cuda:0')
epoch: 6 test_true_pfm: 4201.678486126343 sim_pfm: 11.138737151942527
episode: 24 training return: tensor(54.1190, device='cuda:0')
episode: 25 training return: tensor(3.7060, device='cuda:0')
episode: 26 training return: tensor(10.2261, device='cuda:0')
episode: 27 training return: tensor(-51.6982, device='cuda:0')
epoch: 7 test_true_pfm: 5148.794801999207 sim_pfm: -10.781944262387697
episode: 28 training return: tensor(-101.3345, device='cuda:0')
episode: 29 training return: tensor(-92.0901, device='cuda:0')
episode: 30 training return: tensor(-86.7153, device='cuda:0')
episode: 31 training return: tensor(-91.7673, device='cuda:0')
epoch: 8 test_true_pfm: 5229.72660018091 sim_pfm: -48.21105280254657
episode: 32 training return: tensor(-43.2595, device='cuda:0')
episode: 33 training return: tensor(-771.0155, device='cuda:0')
episode: 34 training return: tensor(-117.6320, device='cuda:0')
episode: 35 training return: tensor(-114.7230, device='cuda:0')
epoch: 9 test_true_pfm: 5224.97346962304 sim_pfm: 44.81377918155825
episode: 36 training return: tensor(22.6317, device='cuda:0')
episode: 37 training return: tensor(-61.3354, device='cuda:0')
episode: 38 training return: tensor(-57.5159, device='cuda:0')
episode: 39 training return: tensor(-22.1663, device='cuda:0')
epoch: 10 test_true_pfm: 5208.055380372488 sim_pfm: -227.0182282354023
episode: 40 training return: tensor(-10.4332, device='cuda:0')
episode: 41 training return: tensor(61.5736, device='cuda:0')
episode: 42 training return: tensor(161.4889, device='cuda:0')
episode: 43 training return: tensor(-16.2564, device='cuda:0')
epoch: 11 test_true_pfm: 5246.176574905558 sim_pfm: 6.890463859880886
episode: 44 training return: tensor(-14.0342, device='cuda:0')
episode: 45 training return: tensor(-123.4198, device='cuda:0')
episode: 46 training return: tensor(-20.8616, device='cuda:0')
episode: 47 training return: tensor(69.0427, device='cuda:0')
epoch: 12 test_true_pfm: 5127.859094945091 sim_pfm: 36.79429283788583
episode: 48 training return: tensor(65.7966, device='cuda:0')
episode: 49 training return: tensor(34.9460, device='cuda:0')
episode: 50 training return: tensor(-22.2443, device='cuda:0')
episode: 51 training return: tensor(-149.9153, device='cuda:0')
epoch: 13 test_true_pfm: 5202.305422991914 sim_pfm: 58.60463164238414
episode: 52 training return: tensor(38.6915, device='cuda:0')
episode: 53 training return: tensor(-11.9852, device='cuda:0')
episode: 54 training return: tensor(-36.0860, device='cuda:0')
episode: 55 training return: tensor(77.0196, device='cuda:0')
epoch: 14 test_true_pfm: 5343.416684927375 sim_pfm: 23.68217691369743
episode: 56 training return: tensor(-58.3094, device='cuda:0')
episode: 57 training return: tensor(-168.8712, device='cuda:0')
episode: 58 training return: tensor(79.8640, device='cuda:0')
episode: 59 training return: tensor(-202.9927, device='cuda:0')
epoch: 15 test_true_pfm: 5294.540817098375 sim_pfm: 58.29797428605767
episode: 60 training return: tensor(12.0580, device='cuda:0')
episode: 61 training return: tensor(26.5347, device='cuda:0')
episode: 62 training return: tensor(73.9071, device='cuda:0')
episode: 63 training return: tensor(27.5404, device='cuda:0')
epoch: 16 test_true_pfm: 5019.035962682728 sim_pfm: 56.33945059031248
episode: 64 training return: tensor(-9.2493, device='cuda:0')
episode: 65 training return: tensor(10.1148, device='cuda:0')
episode: 66 training return: tensor(-43.2360, device='cuda:0')
episode: 67 training return: tensor(190.6101, device='cuda:0')
epoch: 17 test_true_pfm: 5191.715851648731 sim_pfm: 115.25946131082794
episode: 68 training return: tensor(-35.7258, device='cuda:0')
episode: 69 training return: tensor(-39.8856, device='cuda:0')
episode: 70 training return: tensor(-35.6313, device='cuda:0')
episode: 71 training return: tensor(-331.1076, device='cuda:0')
epoch: 18 test_true_pfm: 5236.8855943596545 sim_pfm: 8.133989598146096
episode: 72 training return: tensor(69.7538, device='cuda:0')
episode: 73 training return: tensor(65.8223, device='cuda:0')
episode: 74 training return: tensor(-82.8807, device='cuda:0')
episode: 75 training return: tensor(145.2657, device='cuda:0')
epoch: 19 test_true_pfm: 5278.930038907695 sim_pfm: 121.08200838967848
episode: 76 training return: tensor(60.1998, device='cuda:0')
episode: 77 training return: tensor(-130.1017, device='cuda:0')
episode: 78 training return: tensor(43.5612, device='cuda:0')
episode: 79 training return: tensor(-32.0151, device='cuda:0')
epoch: 20 test_true_pfm: 5365.433954305403 sim_pfm: 126.13632573738384
episode: 80 training return: tensor(-164.7421, device='cuda:0')
episode: 81 training return: tensor(83.3020, device='cuda:0')
episode: 82 training return: tensor(13.4280, device='cuda:0')
episode: 83 training return: tensor(27.2275, device='cuda:0')
epoch: 21 test_true_pfm: 5297.485921776721 sim_pfm: 88.21636947465595
episode: 84 training return: tensor(129.2370, device='cuda:0')
episode: 85 training return: tensor(-15.2501, device='cuda:0')
episode: 86 training return: tensor(112.9470, device='cuda:0')
episode: 87 training return: tensor(4.8598, device='cuda:0')
epoch: 22 test_true_pfm: 5355.8131559885305 sim_pfm: 94.24503131523186
episode: 88 training return: tensor(71.0917, device='cuda:0')
episode: 89 training return: tensor(48.8451, device='cuda:0')
episode: 90 training return: tensor(153.0731, device='cuda:0')
episode: 91 training return: tensor(58.0995, device='cuda:0')
epoch: 23 test_true_pfm: 5277.749827261768 sim_pfm: 106.08904664486181
episode: 92 training return: tensor(80.9164, device='cuda:0')
episode: 93 training return: tensor(83.2363, device='cuda:0')
episode: 94 training return: tensor(-118.5846, device='cuda:0')
episode: 95 training return: tensor(96.5628, device='cuda:0')
epoch: 24 test_true_pfm: 5346.205500720499 sim_pfm: 40.8641333366589
episode: 96 training return: tensor(133.6933, device='cuda:0')
episode: 97 training return: tensor(80.4757, device='cuda:0')
episode: 98 training return: tensor(85.6748, device='cuda:0')
episode: 99 training return: tensor(54.4606, device='cuda:0')
epoch: 25 test_true_pfm: 5410.385324509316 sim_pfm: 162.18485416506883
episode: 100 training return: tensor(-43.8275, device='cuda:0')
episode: 101 training return: tensor(-231.1243, device='cuda:0')
episode: 102 training return: tensor(16.4697, device='cuda:0')
episode: 103 training return: tensor(88.1756, device='cuda:0')
epoch: 26 test_true_pfm: 5320.999379061419 sim_pfm: 141.55346260532193
episode: 104 training return: tensor(55.8510, device='cuda:0')
episode: 105 training return: tensor(123.9177, device='cuda:0')
episode: 106 training return: tensor(100.1418, device='cuda:0')
episode: 107 training return: tensor(203.0804, device='cuda:0')
epoch: 27 test_true_pfm: 5345.204332020163 sim_pfm: 180.1360985666785
episode: 108 training return: tensor(14.3816, device='cuda:0')
episode: 109 training return: tensor(78.8928, device='cuda:0')
episode: 110 training return: tensor(146.2574, device='cuda:0')
episode: 111 training return: tensor(51.4747, device='cuda:0')
epoch: 28 test_true_pfm: 5309.875532046802 sim_pfm: 215.25250026238305
episode: 112 training return: tensor(200.2838, device='cuda:0')
episode: 113 training return: tensor(33.0827, device='cuda:0')
episode: 114 training return: tensor(-30.3337, device='cuda:0')
episode: 115 training return: tensor(125.4401, device='cuda:0')
epoch: 29 test_true_pfm: 5339.545762689781 sim_pfm: 246.8353291884802
episode: 116 training return: tensor(-52.2315, device='cuda:0')
episode: 117 training return: tensor(30.6125, device='cuda:0')
episode: 118 training return: tensor(128.8401, device='cuda:0')
episode: 119 training return: tensor(103.2831, device='cuda:0')
epoch: 30 test_true_pfm: 5408.634084480086 sim_pfm: 142.6902993027276
episode: 120 training return: tensor(99.6367, device='cuda:0')
episode: 121 training return: tensor(88.5039, device='cuda:0')
episode: 122 training return: tensor(-57.4906, device='cuda:0')
episode: 123 training return: tensor(-51.0307, device='cuda:0')
epoch: 31 test_true_pfm: 5358.8869727302 sim_pfm: 216.7438402073749
episode: 124 training return: tensor(61.8853, device='cuda:0')
episode: 125 training return: tensor(74.6693, device='cuda:0')
episode: 126 training return: tensor(-46.6475, device='cuda:0')
episode: 127 training return: tensor(110.6171, device='cuda:0')
epoch: 32 test_true_pfm: 5378.3751537007 sim_pfm: 172.2550913154361
episode: 128 training return: tensor(74.7094, device='cuda:0')
episode: 129 training return: tensor(114.1695, device='cuda:0')
episode: 130 training return: tensor(69.4505, device='cuda:0')
episode: 131 training return: tensor(6.0947, device='cuda:0')
epoch: 33 test_true_pfm: 5395.197771199527 sim_pfm: 161.49553542470676
episode: 132 training return: tensor(202.4461, device='cuda:0')
episode: 133 training return: tensor(78.5128, device='cuda:0')
episode: 134 training return: tensor(37.6202, device='cuda:0')
episode: 135 training return: tensor(107.3867, device='cuda:0')
epoch: 34 test_true_pfm: 5425.693485563847 sim_pfm: 158.50461934285704
episode: 136 training return: tensor(-13.1739, device='cuda:0')
episode: 137 training return: tensor(104.4863, device='cuda:0')
episode: 138 training return: tensor(-113.1694, device='cuda:0')
episode: 139 training return: tensor(210.0089, device='cuda:0')
epoch: 35 test_true_pfm: 5460.754303417002 sim_pfm: 257.1542478638585
episode: 140 training return: tensor(63.8769, device='cuda:0')
episode: 141 training return: tensor(154.6376, device='cuda:0')
episode: 142 training return: tensor(-186.5230, device='cuda:0')
episode: 143 training return: tensor(41.7964, device='cuda:0')
epoch: 36 test_true_pfm: 5320.640677691161 sim_pfm: 221.51995300619942
episode: 144 training return: tensor(109.3809, device='cuda:0')
episode: 145 training return: tensor(132.4953, device='cuda:0')
episode: 146 training return: tensor(-7.1563, device='cuda:0')
episode: 147 training return: tensor(127.9499, device='cuda:0')
epoch: 37 test_true_pfm: 5394.343940880747 sim_pfm: 246.55734724355474
episode: 148 training return: tensor(75.5396, device='cuda:0')
episode: 149 training return: tensor(60.9454, device='cuda:0')
episode: 150 training return: tensor(124.5759, device='cuda:0')
episode: 151 training return: tensor(116.3156, device='cuda:0')
epoch: 38 test_true_pfm: 5345.639435076076 sim_pfm: 228.06742701503876
episode: 152 training return: tensor(212.4260, device='cuda:0')
episode: 153 training return: tensor(159.7384, device='cuda:0')
episode: 154 training return: tensor(209.7249, device='cuda:0')
episode: 155 training return: tensor(-8.2873, device='cuda:0')
epoch: 39 test_true_pfm: 5262.867306401909 sim_pfm: 241.91385893305414
episode: 156 training return: tensor(46.2103, device='cuda:0')
episode: 157 training return: tensor(89.8527, device='cuda:0')
episode: 158 training return: tensor(217.2076, device='cuda:0')
episode: 159 training return: tensor(67.8500, device='cuda:0')
epoch: 40 test_true_pfm: 5388.520303828521 sim_pfm: 174.95631612076735
episode: 160 training return: tensor(51.1225, device='cuda:0')
episode: 161 training return: tensor(166.6261, device='cuda:0')
episode: 162 training return: tensor(123.3349, device='cuda:0')
episode: 163 training return: tensor(56.6931, device='cuda:0')
epoch: 41 test_true_pfm: 5401.6393841570825 sim_pfm: 239.0516388429678
episode: 164 training return: tensor(-69.6559, device='cuda:0')
episode: 165 training return: tensor(109.1226, device='cuda:0')
episode: 166 training return: tensor(127.0016, device='cuda:0')
episode: 167 training return: tensor(154.8991, device='cuda:0')
epoch: 42 test_true_pfm: 5443.761135119533 sim_pfm: 234.26276991799628
episode: 168 training return: tensor(69.7991, device='cuda:0')
episode: 169 training return: tensor(27.3616, device='cuda:0')
episode: 170 training return: tensor(101.3365, device='cuda:0')
episode: 171 training return: tensor(-1.3950, device='cuda:0')
epoch: 43 test_true_pfm: 5358.355489185648 sim_pfm: 271.369383631992
episode: 172 training return: tensor(103.5270, device='cuda:0')
episode: 173 training return: tensor(-14.2499, device='cuda:0')
episode: 174 training return: tensor(134.0504, device='cuda:0')
episode: 175 training return: tensor(139.0284, device='cuda:0')
epoch: 44 test_true_pfm: 5453.149037514316 sim_pfm: 265.6179200499707
episode: 176 training return: tensor(222.8253, device='cuda:0')
episode: 177 training return: tensor(143.7613, device='cuda:0')
episode: 178 training return: tensor(66.0967, device='cuda:0')
episode: 179 training return: tensor(212.3233, device='cuda:0')
epoch: 45 test_true_pfm: 5418.647301883018 sim_pfm: 190.77642021287465
episode: 180 training return: tensor(146.0996, device='cuda:0')
episode: 181 training return: tensor(162.2167, device='cuda:0')
episode: 182 training return: tensor(18.1813, device='cuda:0')
episode: 183 training return: tensor(169.1851, device='cuda:0')
epoch: 46 test_true_pfm: 5468.280896463685 sim_pfm: 273.2628349345371
episode: 184 training return: tensor(-777.2606, device='cuda:0')
episode: 185 training return: tensor(28.0165, device='cuda:0')
episode: 186 training return: tensor(149.2015, device='cuda:0')
episode: 187 training return: tensor(189.7675, device='cuda:0')
epoch: 47 test_true_pfm: 5407.238504265802 sim_pfm: 220.64877562440233
episode: 188 training return: tensor(87.4641, device='cuda:0')
episode: 189 training return: tensor(-6.9993, device='cuda:0')
episode: 190 training return: tensor(89.6570, device='cuda:0')
episode: 191 training return: tensor(137.8543, device='cuda:0')
epoch: 48 test_true_pfm: 5474.130969925031 sim_pfm: 274.4716649844777
episode: 192 training return: tensor(129.6889, device='cuda:0')
episode: 193 training return: tensor(105.8271, device='cuda:0')
episode: 194 training return: tensor(110.5310, device='cuda:0')
episode: 195 training return: tensor(129.2886, device='cuda:0')
epoch: 49 test_true_pfm: 5392.057743898443 sim_pfm: 220.32565630336953
episode: 196 training return: tensor(148.3322, device='cuda:0')
episode: 197 training return: tensor(136.6733, device='cuda:0')
episode: 198 training return: tensor(126.2818, device='cuda:0')
episode: 199 training return: tensor(218.3949, device='cuda:0')
epoch: 50 test_true_pfm: 5352.906909211627 sim_pfm: 206.65076632598843
episode: 200 training return: tensor(136.3719, device='cuda:0')
episode: 201 training return: tensor(205.3954, device='cuda:0')
episode: 202 training return: tensor(73.1272, device='cuda:0')
episode: 203 training return: tensor(228.4114, device='cuda:0')
epoch: 51 test_true_pfm: 5394.542403242165 sim_pfm: 212.88939540383095
episode: 204 training return: tensor(189.7244, device='cuda:0')
episode: 205 training return: tensor(80.6638, device='cuda:0')
episode: 206 training return: tensor(138.6919, device='cuda:0')
episode: 207 training return: tensor(337.6568, device='cuda:0')
epoch: 52 test_true_pfm: 5431.784157266947 sim_pfm: 222.69120338869593
episode: 208 training return: tensor(134.4442, device='cuda:0')
episode: 209 training return: tensor(75.8045, device='cuda:0')
episode: 210 training return: tensor(179.6966, device='cuda:0')
episode: 211 training return: tensor(148.0515, device='cuda:0')
epoch: 53 test_true_pfm: 5428.154043337777 sim_pfm: 246.27147692496268
episode: 212 training return: tensor(100.5968, device='cuda:0')
episode: 213 training return: tensor(100.1931, device='cuda:0')
episode: 214 training return: tensor(98.1763, device='cuda:0')
episode: 215 training return: tensor(166.0287, device='cuda:0')
epoch: 54 test_true_pfm: 5399.9948576810575 sim_pfm: 227.00079723195327
episode: 216 training return: tensor(188.0634, device='cuda:0')
episode: 217 training return: tensor(156.7116, device='cuda:0')
episode: 218 training return: tensor(141.8749, device='cuda:0')
episode: 219 training return: tensor(77.2693, device='cuda:0')
epoch: 55 test_true_pfm: 5340.333722076694 sim_pfm: 303.83530747075565
episode: 220 training return: tensor(254.7024, device='cuda:0')
episode: 221 training return: tensor(78.5076, device='cuda:0')
episode: 222 training return: tensor(179.4194, device='cuda:0')
episode: 223 training return: tensor(96.6963, device='cuda:0')
epoch: 56 test_true_pfm: 5397.2446770582765 sim_pfm: 195.43496449949453
episode: 224 training return: tensor(270.0965, device='cuda:0')
episode: 225 training return: tensor(132.8139, device='cuda:0')
episode: 226 training return: tensor(127.8317, device='cuda:0')
episode: 227 training return: tensor(189.5458, device='cuda:0')
epoch: 57 test_true_pfm: 5423.247232397763 sim_pfm: 260.9075408243807
episode: 228 training return: tensor(100.0995, device='cuda:0')
episode: 229 training return: tensor(128.8386, device='cuda:0')
episode: 230 training return: tensor(121.8206, device='cuda:0')
episode: 231 training return: tensor(35.5044, device='cuda:0')
epoch: 58 test_true_pfm: 5419.351839051688 sim_pfm: 260.57559309812495
episode: 232 training return: tensor(112.2416, device='cuda:0')
episode: 233 training return: tensor(107.2319, device='cuda:0')
episode: 234 training return: tensor(191.3609, device='cuda:0')
episode: 235 training return: tensor(127.9918, device='cuda:0')
epoch: 59 test_true_pfm: 5528.778741886464 sim_pfm: 198.55603720311774
episode: 236 training return: tensor(294.5219, device='cuda:0')
episode: 237 training return: tensor(121.6169, device='cuda:0')
episode: 238 training return: tensor(28.2411, device='cuda:0')
episode: 239 training return: tensor(142.5769, device='cuda:0')
epoch: 60 test_true_pfm: 5545.152201843733 sim_pfm: 260.26123424214893
episode: 240 training return: tensor(102.8243, device='cuda:0')
episode: 241 training return: tensor(186.1655, device='cuda:0')
episode: 242 training return: tensor(181.3457, device='cuda:0')
episode: 243 training return: tensor(134.0528, device='cuda:0')
epoch: 61 test_true_pfm: 5522.934731217604 sim_pfm: 273.8591350464267
episode: 244 training return: tensor(169.3080, device='cuda:0')
episode: 245 training return: tensor(137.0084, device='cuda:0')
episode: 246 training return: tensor(66.7163, device='cuda:0')
episode: 247 training return: tensor(135.6050, device='cuda:0')
epoch: 62 test_true_pfm: 5434.056795488711 sim_pfm: 337.50856192652526
episode: 248 training return: tensor(29.3262, device='cuda:0')
episode: 249 training return: tensor(-677.9289, device='cuda:0')
episode: 250 training return: tensor(233.5361, device='cuda:0')
episode: 251 training return: tensor(115.7071, device='cuda:0')
epoch: 63 test_true_pfm: 5448.656546993857 sim_pfm: 258.4511948978082
episode: 252 training return: tensor(304.6536, device='cuda:0')
episode: 253 training return: tensor(140.8267, device='cuda:0')
episode: 254 training return: tensor(218.6454, device='cuda:0')
episode: 255 training return: tensor(251.4344, device='cuda:0')
epoch: 64 test_true_pfm: 5490.903305088909 sim_pfm: 301.02499363194994
episode: 256 training return: tensor(144.7433, device='cuda:0')
episode: 257 training return: tensor(84.7840, device='cuda:0')
episode: 258 training return: tensor(126.8389, device='cuda:0')
episode: 259 training return: tensor(149.0714, device='cuda:0')
epoch: 65 test_true_pfm: 5542.586564871621 sim_pfm: 232.35372514048746
episode: 260 training return: tensor(212.5721, device='cuda:0')
episode: 261 training return: tensor(177.9116, device='cuda:0')
episode: 262 training return: tensor(136.3452, device='cuda:0')
episode: 263 training return: tensor(106.6458, device='cuda:0')
epoch: 66 test_true_pfm: 5489.501674833624 sim_pfm: 213.0041086336132
episode: 264 training return: tensor(137.0226, device='cuda:0')
episode: 265 training return: tensor(95.7040, device='cuda:0')
episode: 266 training return: tensor(196.7534, device='cuda:0')
episode: 267 training return: tensor(109.5632, device='cuda:0')
epoch: 67 test_true_pfm: 5579.547731662847 sim_pfm: 241.97059979343126
episode: 268 training return: tensor(105.4985, device='cuda:0')
episode: 269 training return: tensor(229.5535, device='cuda:0')
episode: 270 training return: tensor(189.2739, device='cuda:0')
episode: 271 training return: tensor(231.1686, device='cuda:0')
epoch: 68 test_true_pfm: 5445.079499311379 sim_pfm: 288.5275266506942
episode: 272 training return: tensor(74.3136, device='cuda:0')
episode: 273 training return: tensor(104.6378, device='cuda:0')
episode: 274 training return: tensor(263.3612, device='cuda:0')
episode: 275 training return: tensor(145.8067, device='cuda:0')
epoch: 69 test_true_pfm: 5509.382714725779 sim_pfm: 295.48819906489615
episode: 276 training return: tensor(216.5804, device='cuda:0')
episode: 277 training return: tensor(55.4995, device='cuda:0')
episode: 278 training return: tensor(191.0869, device='cuda:0')
episode: 279 training return: tensor(233.4742, device='cuda:0')
epoch: 70 test_true_pfm: 5520.459578286001 sim_pfm: 303.2632886819968
episode: 280 training return: tensor(96.6278, device='cuda:0')
episode: 281 training return: tensor(103.7933, device='cuda:0')
episode: 282 training return: tensor(217.2833, device='cuda:0')
episode: 283 training return: tensor(271.6748, device='cuda:0')
epoch: 71 test_true_pfm: 5554.447020888055 sim_pfm: 272.90959837514674
episode: 284 training return: tensor(181.8605, device='cuda:0')
episode: 285 training return: tensor(180.5094, device='cuda:0')
episode: 286 training return: tensor(220.0807, device='cuda:0')
episode: 287 training return: tensor(203.1801, device='cuda:0')
epoch: 72 test_true_pfm: 5418.55145533795 sim_pfm: 278.9825223634446
episode: 288 training return: tensor(34.3441, device='cuda:0')
episode: 289 training return: tensor(269.5144, device='cuda:0')
episode: 290 training return: tensor(179.7392, device='cuda:0')
episode: 291 training return: tensor(168.7115, device='cuda:0')
epoch: 73 test_true_pfm: 5568.160931114525 sim_pfm: 294.04242507872794
episode: 292 training return: tensor(135.8554, device='cuda:0')
episode: 293 training return: tensor(89.2684, device='cuda:0')
episode: 294 training return: tensor(213.9678, device='cuda:0')
episode: 295 training return: tensor(226.6881, device='cuda:0')
epoch: 74 test_true_pfm: 5498.416648275869 sim_pfm: 287.921151823306
episode: 296 training return: tensor(171.9112, device='cuda:0')
episode: 297 training return: tensor(182.3312, device='cuda:0')
episode: 298 training return: tensor(200.9208, device='cuda:0')
episode: 299 training return: tensor(267.9163, device='cuda:0')
epoch: 75 test_true_pfm: 5494.694580834515 sim_pfm: 252.25684263107055
episode: 300 training return: tensor(149.1132, device='cuda:0')
episode: 301 training return: tensor(123.6463, device='cuda:0')
episode: 302 training return: tensor(238.7390, device='cuda:0')
episode: 303 training return: tensor(122.1620, device='cuda:0')
epoch: 76 test_true_pfm: 5585.9413968829585 sim_pfm: 272.6759609465371
episode: 304 training return: tensor(227.0550, device='cuda:0')
episode: 305 training return: tensor(4.2329, device='cuda:0')
episode: 306 training return: tensor(181.1859, device='cuda:0')
episode: 307 training return: tensor(194.7828, device='cuda:0')
epoch: 77 test_true_pfm: 5525.465152229037 sim_pfm: 267.16068521966616
episode: 308 training return: tensor(176.6618, device='cuda:0')
episode: 309 training return: tensor(-485.3735, device='cuda:0')
episode: 310 training return: tensor(198.7523, device='cuda:0')
episode: 311 training return: tensor(257.0154, device='cuda:0')
epoch: 78 test_true_pfm: 5480.955328844531 sim_pfm: 313.5415157376944
episode: 312 training return: tensor(215.6297, device='cuda:0')
episode: 313 training return: tensor(326.2424, device='cuda:0')
episode: 314 training return: tensor(288.9035, device='cuda:0')
episode: 315 training return: tensor(165.0505, device='cuda:0')
epoch: 79 test_true_pfm: 5536.674255991521 sim_pfm: 281.3814783657629
episode: 316 training return: tensor(285.8412, device='cuda:0')
episode: 317 training return: tensor(206.9993, device='cuda:0')
episode: 318 training return: tensor(232.3037, device='cuda:0')
episode: 319 training return: tensor(57.4300, device='cuda:0')
epoch: 80 test_true_pfm: 5564.832216065879 sim_pfm: 308.1675181061534
episode: 320 training return: tensor(177.6832, device='cuda:0')
episode: 321 training return: tensor(205.7464, device='cuda:0')
episode: 322 training return: tensor(77.0721, device='cuda:0')
episode: 323 training return: tensor(96.0253, device='cuda:0')
epoch: 81 test_true_pfm: 5497.1718852400345 sim_pfm: 307.81019325738697
episode: 324 training return: tensor(147.4978, device='cuda:0')
episode: 325 training return: tensor(117.7231, device='cuda:0')
episode: 326 training return: tensor(170.3592, device='cuda:0')
episode: 327 training return: tensor(180.5924, device='cuda:0')
epoch: 82 test_true_pfm: 5529.881470100251 sim_pfm: 332.43496400325483
episode: 328 training return: tensor(256.7480, device='cuda:0')
episode: 329 training return: tensor(194.9423, device='cuda:0')
episode: 330 training return: tensor(246.3045, device='cuda:0')
episode: 331 training return: tensor(259.1211, device='cuda:0')
epoch: 83 test_true_pfm: 5501.168726656172 sim_pfm: 260.89746090205153
episode: 332 training return: tensor(246.6689, device='cuda:0')
episode: 333 training return: tensor(228.9970, device='cuda:0')
episode: 334 training return: tensor(170.1320, device='cuda:0')
episode: 335 training return: tensor(209.8253, device='cuda:0')
epoch: 84 test_true_pfm: 5534.335683497414 sim_pfm: 298.8954401470061
episode: 336 training return: tensor(193.6937, device='cuda:0')
episode: 337 training return: tensor(151.7240, device='cuda:0')
episode: 338 training return: tensor(123.3262, device='cuda:0')
episode: 339 training return: tensor(135.9274, device='cuda:0')
epoch: 85 test_true_pfm: 5473.636864189273 sim_pfm: 374.3203464212517
episode: 340 training return: tensor(190.9842, device='cuda:0')
episode: 341 training return: tensor(135.3413, device='cuda:0')
episode: 342 training return: tensor(211.7173, device='cuda:0')
episode: 343 training return: tensor(197.4253, device='cuda:0')
epoch: 86 test_true_pfm: 5530.843728334509 sim_pfm: 318.7733514123247
episode: 344 training return: tensor(205.5807, device='cuda:0')
episode: 345 training return: tensor(121.2223, device='cuda:0')
episode: 346 training return: tensor(260.3568, device='cuda:0')
episode: 347 training return: tensor(245.8717, device='cuda:0')
epoch: 87 test_true_pfm: 5571.818841949011 sim_pfm: 277.3608761707049
episode: 348 training return: tensor(218.2747, device='cuda:0')
episode: 349 training return: tensor(109.1362, device='cuda:0')
episode: 350 training return: tensor(195.5760, device='cuda:0')
episode: 351 training return: tensor(237.6425, device='cuda:0')
epoch: 88 test_true_pfm: 5530.035638920465 sim_pfm: 323.59905766374624
episode: 352 training return: tensor(201.4111, device='cuda:0')
episode: 353 training return: tensor(226.7993, device='cuda:0')
episode: 354 training return: tensor(240.6404, device='cuda:0')
episode: 355 training return: tensor(255.0868, device='cuda:0')
epoch: 89 test_true_pfm: 5613.507440396085 sim_pfm: 326.4735847823322
episode: 356 training return: tensor(161.8628, device='cuda:0')
episode: 357 training return: tensor(116.6661, device='cuda:0')
episode: 358 training return: tensor(244.0466, device='cuda:0')
episode: 359 training return: tensor(240.8046, device='cuda:0')
epoch: 90 test_true_pfm: 5538.203961951615 sim_pfm: 274.42983611372375
episode: 360 training return: tensor(190.7451, device='cuda:0')
episode: 361 training return: tensor(59.3707, device='cuda:0')
episode: 362 training return: tensor(243.9884, device='cuda:0')
episode: 363 training return: tensor(260.8065, device='cuda:0')
epoch: 91 test_true_pfm: 5522.3251658273 sim_pfm: 267.6521707676584
episode: 364 training return: tensor(42.5100, device='cuda:0')
episode: 365 training return: tensor(179.0358, device='cuda:0')
episode: 366 training return: tensor(61.2863, device='cuda:0')
episode: 367 training return: tensor(207.3850, device='cuda:0')
epoch: 92 test_true_pfm: 5578.827794404781 sim_pfm: 272.9731280797375
episode: 368 training return: tensor(300.7536, device='cuda:0')
episode: 369 training return: tensor(353.5665, device='cuda:0')
episode: 370 training return: tensor(229.4707, device='cuda:0')
episode: 371 training return: tensor(294.0819, device='cuda:0')
epoch: 93 test_true_pfm: 5514.380695325756 sim_pfm: 334.75166550492094
episode: 372 training return: tensor(229.8562, device='cuda:0')
episode: 373 training return: tensor(322.6091, device='cuda:0')
episode: 374 training return: tensor(208.8048, device='cuda:0')
episode: 375 training return: tensor(253.4729, device='cuda:0')
epoch: 94 test_true_pfm: 5577.070780398141 sim_pfm: 317.9152935031646
episode: 376 training return: tensor(299.3443, device='cuda:0')
episode: 377 training return: tensor(221.1107, device='cuda:0')
episode: 378 training return: tensor(224.0047, device='cuda:0')
episode: 379 training return: tensor(310.7298, device='cuda:0')
epoch: 95 test_true_pfm: 5530.494103786102 sim_pfm: 344.7426319765703
episode: 380 training return: tensor(169.5524, device='cuda:0')
episode: 381 training return: tensor(179.8846, device='cuda:0')
episode: 382 training return: tensor(138.5407, device='cuda:0')
episode: 383 training return: tensor(142.7191, device='cuda:0')
epoch: 96 test_true_pfm: 5519.984054222634 sim_pfm: 353.6020785823542
episode: 384 training return: tensor(112.6629, device='cuda:0')
episode: 385 training return: tensor(246.2306, device='cuda:0')
episode: 386 training return: tensor(252.3676, device='cuda:0')
episode: 387 training return: tensor(217.0238, device='cuda:0')
epoch: 97 test_true_pfm: 5464.0060289693065 sim_pfm: 298.40930928704137
episode: 388 training return: tensor(128.3103, device='cuda:0')
episode: 389 training return: tensor(286.9049, device='cuda:0')
episode: 390 training return: tensor(261.0058, device='cuda:0')
episode: 391 training return: tensor(153.8091, device='cuda:0')
epoch: 98 test_true_pfm: 5586.747200538078 sim_pfm: 355.74858530903776
episode: 392 training return: tensor(232.0107, device='cuda:0')
episode: 393 training return: tensor(173.8420, device='cuda:0')
episode: 394 training return: tensor(142.5991, device='cuda:0')
episode: 395 training return: tensor(234.0806, device='cuda:0')
epoch: 99 test_true_pfm: 5478.074713281035 sim_pfm: 367.9280451073234
episode: 396 training return: tensor(243.6018, device='cuda:0')
episode: 397 training return: tensor(146.0499, device='cuda:0')
episode: 398 training return: tensor(203.7233, device='cuda:0')
episode: 399 training return: tensor(216.7871, device='cuda:0')
epoch: 100 test_true_pfm: 5514.256877308012 sim_pfm: 313.92238039602915
episode: 400 training return: tensor(229.3321, device='cuda:0')
episode: 401 training return: tensor(140.7668, device='cuda:0')
episode: 402 training return: tensor(243.9323, device='cuda:0')
episode: 403 training return: tensor(204.1958, device='cuda:0')
epoch: 101 test_true_pfm: 5534.528870252012 sim_pfm: 325.8148708936739
episode: 404 training return: tensor(123.9607, device='cuda:0')
episode: 405 training return: tensor(229.7782, device='cuda:0')
episode: 406 training return: tensor(285.7263, device='cuda:0')
episode: 407 training return: tensor(241.1284, device='cuda:0')
epoch: 102 test_true_pfm: 5647.292683365017 sim_pfm: 251.0759558515662
episode: 408 training return: tensor(93.0754, device='cuda:0')
episode: 409 training return: tensor(232.6404, device='cuda:0')
episode: 410 training return: tensor(297.4543, device='cuda:0')
episode: 411 training return: tensor(325.2519, device='cuda:0')
epoch: 103 test_true_pfm: 5593.027419902827 sim_pfm: 397.01532353811007
episode: 412 training return: tensor(198.4927, device='cuda:0')
episode: 413 training return: tensor(209.5748, device='cuda:0')
episode: 414 training return: tensor(245.0462, device='cuda:0')
episode: 415 training return: tensor(217.1345, device='cuda:0')
epoch: 104 test_true_pfm: 5580.819904403638 sim_pfm: 312.68520009791246
episode: 416 training return: tensor(287.2560, device='cuda:0')
episode: 417 training return: tensor(342.6991, device='cuda:0')
episode: 418 training return: tensor(241.7546, device='cuda:0')
episode: 419 training return: tensor(143.0773, device='cuda:0')
epoch: 105 test_true_pfm: 5540.143255180451 sim_pfm: 348.3779293817158
episode: 420 training return: tensor(209.9229, device='cuda:0')
episode: 421 training return: tensor(66.8583, device='cuda:0')
episode: 422 training return: tensor(169.3820, device='cuda:0')
episode: 423 training return: tensor(176.6325, device='cuda:0')
epoch: 106 test_true_pfm: 5652.33595602828 sim_pfm: 343.3761036406892
episode: 424 training return: tensor(213.9805, device='cuda:0')
episode: 425 training return: tensor(286.6316, device='cuda:0')
episode: 426 training return: tensor(293.2332, device='cuda:0')
episode: 427 training return: tensor(172.1759, device='cuda:0')
epoch: 107 test_true_pfm: 5501.567624350241 sim_pfm: 344.16777207628667
episode: 428 training return: tensor(270.3047, device='cuda:0')
episode: 429 training return: tensor(156.0182, device='cuda:0')
episode: 430 training return: tensor(217.5560, device='cuda:0')
episode: 431 training return: tensor(23.3069, device='cuda:0')
epoch: 108 test_true_pfm: 5621.588887963171 sim_pfm: 316.8911469767724
episode: 432 training return: tensor(242.1662, device='cuda:0')
episode: 433 training return: tensor(225.5325, device='cuda:0')
episode: 434 training return: tensor(178.0382, device='cuda:0')
episode: 435 training return: tensor(200.0731, device='cuda:0')
epoch: 109 test_true_pfm: 5559.513209844834 sim_pfm: 289.2307260248538
episode: 436 training return: tensor(275.0708, device='cuda:0')
episode: 437 training return: tensor(218.3250, device='cuda:0')
episode: 438 training return: tensor(289.7257, device='cuda:0')
episode: 439 training return: tensor(232.8156, device='cuda:0')
epoch: 110 test_true_pfm: 5523.270421837288 sim_pfm: 293.9438114409568
episode: 440 training return: tensor(241.0861, device='cuda:0')
episode: 441 training return: tensor(159.9940, device='cuda:0')
episode: 442 training return: tensor(271.5776, device='cuda:0')
episode: 443 training return: tensor(171.6193, device='cuda:0')
epoch: 111 test_true_pfm: 5509.237219916006 sim_pfm: 332.1573218484118
episode: 444 training return: tensor(324.1657, device='cuda:0')
episode: 445 training return: tensor(218.0820, device='cuda:0')
episode: 446 training return: tensor(-884.9137, device='cuda:0')
episode: 447 training return: tensor(236.2164, device='cuda:0')
epoch: 112 test_true_pfm: 5560.45619035864 sim_pfm: 276.7581466634486
episode: 448 training return: tensor(183.5364, device='cuda:0')
episode: 449 training return: tensor(263.4795, device='cuda:0')
episode: 450 training return: tensor(208.0518, device='cuda:0')
episode: 451 training return: tensor(190.0365, device='cuda:0')
epoch: 113 test_true_pfm: 5544.7344406017355 sim_pfm: 386.15081103871734
episode: 452 training return: tensor(294.5683, device='cuda:0')
episode: 453 training return: tensor(210.9270, device='cuda:0')
episode: 454 training return: tensor(147.7451, device='cuda:0')
episode: 455 training return: tensor(151.1318, device='cuda:0')
epoch: 114 test_true_pfm: 5515.257793635985 sim_pfm: 356.28826451536344
episode: 456 training return: tensor(214.3955, device='cuda:0')
episode: 457 training return: tensor(248.5814, device='cuda:0')
episode: 458 training return: tensor(278.2052, device='cuda:0')
episode: 459 training return: tensor(257.6343, device='cuda:0')
epoch: 115 test_true_pfm: 5627.217831155729 sim_pfm: 316.8006936265544
episode: 460 training return: tensor(209.4935, device='cuda:0')
episode: 461 training return: tensor(160.8623, device='cuda:0')
episode: 462 training return: tensor(190.0525, device='cuda:0')
episode: 463 training return: tensor(54.7692, device='cuda:0')
epoch: 116 test_true_pfm: 5577.0511874287695 sim_pfm: 317.5545415851132
episode: 464 training return: tensor(182.1415, device='cuda:0')
episode: 465 training return: tensor(195.0905, device='cuda:0')
episode: 466 training return: tensor(167.6186, device='cuda:0')
episode: 467 training return: tensor(236.0823, device='cuda:0')
epoch: 117 test_true_pfm: 5497.746878801988 sim_pfm: 348.3955925851672
episode: 468 training return: tensor(139.2840, device='cuda:0')
episode: 469 training return: tensor(-820.1891, device='cuda:0')
episode: 470 training return: tensor(103.7615, device='cuda:0')
episode: 471 training return: tensor(214.7533, device='cuda:0')
epoch: 118 test_true_pfm: 5615.394653423831 sim_pfm: 297.02667722628877
episode: 472 training return: tensor(75.7135, device='cuda:0')
episode: 473 training return: tensor(223.1641, device='cuda:0')
episode: 474 training return: tensor(318.3895, device='cuda:0')
episode: 475 training return: tensor(206.2775, device='cuda:0')
epoch: 119 test_true_pfm: 5622.804688636547 sim_pfm: 377.66445829502
episode: 476 training return: tensor(211.2838, device='cuda:0')
episode: 477 training return: tensor(302.9657, device='cuda:0')
episode: 478 training return: tensor(253.7773, device='cuda:0')
episode: 479 training return: tensor(160.0517, device='cuda:0')
epoch: 120 test_true_pfm: 5568.321971651539 sim_pfm: 357.8480913947278
episode: 480 training return: tensor(203.1470, device='cuda:0')
episode: 481 training return: tensor(304.2366, device='cuda:0')
episode: 482 training return: tensor(220.4692, device='cuda:0')
episode: 483 training return: tensor(182.6959, device='cuda:0')
epoch: 121 test_true_pfm: 5632.757052268226 sim_pfm: 326.3749217537309
episode: 484 training return: tensor(147.7783, device='cuda:0')
episode: 485 training return: tensor(55.9789, device='cuda:0')
episode: 486 training return: tensor(370.9954, device='cuda:0')
episode: 487 training return: tensor(303.7305, device='cuda:0')
epoch: 122 test_true_pfm: 5572.496899748144 sim_pfm: 256.5016799216003
episode: 488 training return: tensor(261.1354, device='cuda:0')
episode: 489 training return: tensor(260.1903, device='cuda:0')
episode: 490 training return: tensor(283.6839, device='cuda:0')
episode: 491 training return: tensor(250.6097, device='cuda:0')
epoch: 123 test_true_pfm: 5638.509226414267 sim_pfm: 357.09834818225744
episode: 492 training return: tensor(280.2197, device='cuda:0')
episode: 493 training return: tensor(238.7303, device='cuda:0')
episode: 494 training return: tensor(210.1815, device='cuda:0')
episode: 495 training return: tensor(179.8151, device='cuda:0')
epoch: 124 test_true_pfm: 5584.616328711359 sim_pfm: 370.7644126750529
episode: 496 training return: tensor(226.3100, device='cuda:0')
episode: 497 training return: tensor(261.1619, device='cuda:0')
episode: 498 training return: tensor(351.2139, device='cuda:0')
episode: 499 training return: tensor(133.5245, device='cuda:0')
epoch: 125 test_true_pfm: 5636.686056176109 sim_pfm: 329.9087735484063
episode: 500 training return: tensor(144.1070, device='cuda:0')
episode: 501 training return: tensor(154.1141, device='cuda:0')
episode: 502 training return: tensor(321.2685, device='cuda:0')
episode: 503 training return: tensor(218.2485, device='cuda:0')
epoch: 126 test_true_pfm: 5667.490746729739 sim_pfm: 357.3991539121295
episode: 504 training return: tensor(377.6850, device='cuda:0')
episode: 505 training return: tensor(288.1178, device='cuda:0')
episode: 506 training return: tensor(222.6350, device='cuda:0')
episode: 507 training return: tensor(148.2991, device='cuda:0')
epoch: 127 test_true_pfm: 5604.19567726964 sim_pfm: 320.22012056893436
episode: 508 training return: tensor(204.3023, device='cuda:0')
episode: 509 training return: tensor(334.9605, device='cuda:0')
episode: 510 training return: tensor(171.9461, device='cuda:0')
episode: 511 training return: tensor(357.6393, device='cuda:0')
epoch: 128 test_true_pfm: 5566.28372085174 sim_pfm: 331.7635363647714
episode: 512 training return: tensor(319.8264, device='cuda:0')
episode: 513 training return: tensor(257.7617, device='cuda:0')
episode: 514 training return: tensor(193.5982, device='cuda:0')
episode: 515 training return: tensor(334.4037, device='cuda:0')
epoch: 129 test_true_pfm: 5527.636063303893 sim_pfm: 262.63692112762755
episode: 516 training return: tensor(276.0700, device='cuda:0')
episode: 517 training return: tensor(306.2653, device='cuda:0')
episode: 518 training return: tensor(221.1749, device='cuda:0')
episode: 519 training return: tensor(356.3003, device='cuda:0')
epoch: 130 test_true_pfm: 5590.480935449122 sim_pfm: 375.8431878531119
episode: 520 training return: tensor(145.9734, device='cuda:0')
episode: 521 training return: tensor(203.1733, device='cuda:0')
episode: 522 training return: tensor(237.5930, device='cuda:0')
episode: 523 training return: tensor(287.3958, device='cuda:0')
epoch: 131 test_true_pfm: 5593.842653308704 sim_pfm: 369.59566991675337
episode: 524 training return: tensor(207.9259, device='cuda:0')
episode: 525 training return: tensor(236.8464, device='cuda:0')
episode: 526 training return: tensor(102.1563, device='cuda:0')
episode: 527 training return: tensor(233.8162, device='cuda:0')
epoch: 132 test_true_pfm: 5597.103361406697 sim_pfm: 432.1878221344862
episode: 528 training return: tensor(254.3794, device='cuda:0')
episode: 529 training return: tensor(185.2614, device='cuda:0')
episode: 530 training return: tensor(295.9208, device='cuda:0')
episode: 531 training return: tensor(175.0387, device='cuda:0')
epoch: 133 test_true_pfm: 5597.036298325081 sim_pfm: 327.1399241130275
episode: 532 training return: tensor(278.4834, device='cuda:0')
episode: 533 training return: tensor(231.3507, device='cuda:0')
episode: 534 training return: tensor(315.4261, device='cuda:0')
episode: 535 training return: tensor(228.1413, device='cuda:0')
epoch: 134 test_true_pfm: 5605.781751684529 sim_pfm: 364.2326191584386
episode: 536 training return: tensor(255.0385, device='cuda:0')
episode: 537 training return: tensor(299.8402, device='cuda:0')
episode: 538 training return: tensor(188.8559, device='cuda:0')
episode: 539 training return: tensor(140.6486, device='cuda:0')
epoch: 135 test_true_pfm: 5688.935371798691 sim_pfm: 333.65937045938335
episode: 540 training return: tensor(130.0289, device='cuda:0')
episode: 541 training return: tensor(330.9642, device='cuda:0')
episode: 542 training return: tensor(234.7229, device='cuda:0')
episode: 543 training return: tensor(50.9245, device='cuda:0')
epoch: 136 test_true_pfm: 5644.8081836425745 sim_pfm: 297.52277720626444
episode: 544 training return: tensor(209.1734, device='cuda:0')
episode: 545 training return: tensor(179.1420, device='cuda:0')
episode: 546 training return: tensor(362.2646, device='cuda:0')
episode: 547 training return: tensor(239.5746, device='cuda:0')
epoch: 137 test_true_pfm: 5625.913736837567 sim_pfm: 397.6267471060467
episode: 548 training return: tensor(301.2250, device='cuda:0')
episode: 549 training return: tensor(269.3835, device='cuda:0')
episode: 550 training return: tensor(292.6235, device='cuda:0')
episode: 551 training return: tensor(109.0850, device='cuda:0')
epoch: 138 test_true_pfm: 5621.507200026369 sim_pfm: 401.19623051791376
episode: 552 training return: tensor(145.8872, device='cuda:0')
episode: 553 training return: tensor(142.8733, device='cuda:0')
episode: 554 training return: tensor(253.9382, device='cuda:0')
episode: 555 training return: tensor(172.7827, device='cuda:0')
epoch: 139 test_true_pfm: 5685.548622640536 sim_pfm: 339.4745804118381
episode: 556 training return: tensor(194.2643, device='cuda:0')
episode: 557 training return: tensor(361.1006, device='cuda:0')
episode: 558 training return: tensor(283.3770, device='cuda:0')
episode: 559 training return: tensor(266.7137, device='cuda:0')
epoch: 140 test_true_pfm: 5595.516202860959 sim_pfm: 415.18047165784327
episode: 560 training return: tensor(314.7313, device='cuda:0')
episode: 561 training return: tensor(221.0645, device='cuda:0')
episode: 562 training return: tensor(285.7431, device='cuda:0')
episode: 563 training return: tensor(229.0694, device='cuda:0')
epoch: 141 test_true_pfm: 5586.332499646614 sim_pfm: 359.20544009504374
episode: 564 training return: tensor(334.6845, device='cuda:0')
episode: 565 training return: tensor(257.0628, device='cuda:0')
episode: 566 training return: tensor(253.9833, device='cuda:0')
episode: 567 training return: tensor(179.4094, device='cuda:0')
epoch: 142 test_true_pfm: 5664.208864614261 sim_pfm: 363.97018758576206
episode: 568 training return: tensor(325.9246, device='cuda:0')
episode: 569 training return: tensor(275.7326, device='cuda:0')
episode: 570 training return: tensor(233.5693, device='cuda:0')
episode: 571 training return: tensor(247.4561, device='cuda:0')
epoch: 143 test_true_pfm: 5624.56400641918 sim_pfm: 383.478645126801
episode: 572 training return: tensor(212.5096, device='cuda:0')
episode: 573 training return: tensor(235.0470, device='cuda:0')
episode: 574 training return: tensor(267.8175, device='cuda:0')
episode: 575 training return: tensor(169.3825, device='cuda:0')
epoch: 144 test_true_pfm: 5592.788512344737 sim_pfm: 351.9095550568697
episode: 576 training return: tensor(172.5239, device='cuda:0')
episode: 577 training return: tensor(395.7859, device='cuda:0')
episode: 578 training return: tensor(313.8923, device='cuda:0')
episode: 579 training return: tensor(238.0660, device='cuda:0')
epoch: 145 test_true_pfm: 5682.785872270073 sim_pfm: 343.7429280538151
episode: 580 training return: tensor(198.7855, device='cuda:0')
episode: 581 training return: tensor(260.8102, device='cuda:0')
episode: 582 training return: tensor(182.7038, device='cuda:0')
episode: 583 training return: tensor(273.4451, device='cuda:0')
epoch: 146 test_true_pfm: 5617.325672365293 sim_pfm: 368.59505008832394
episode: 584 training return: tensor(315.3645, device='cuda:0')
episode: 585 training return: tensor(319.9146, device='cuda:0')
episode: 586 training return: tensor(224.8666, device='cuda:0')
episode: 587 training return: tensor(164.4817, device='cuda:0')
epoch: 147 test_true_pfm: 5646.0573203542945 sim_pfm: 436.9255101127589
episode: 588 training return: tensor(335.0261, device='cuda:0')
episode: 589 training return: tensor(317.4525, device='cuda:0')
episode: 590 training return: tensor(319.9003, device='cuda:0')
episode: 591 training return: tensor(243.8043, device='cuda:0')
epoch: 148 test_true_pfm: 5648.980769105132 sim_pfm: 329.2319128937476
episode: 592 training return: tensor(224.6552, device='cuda:0')
episode: 593 training return: tensor(396.5560, device='cuda:0')
episode: 594 training return: tensor(187.2707, device='cuda:0')
episode: 595 training return: tensor(131.2503, device='cuda:0')
epoch: 149 test_true_pfm: 5586.419463972728 sim_pfm: 325.51923877835117
episode: 596 training return: tensor(215.5956, device='cuda:0')
episode: 597 training return: tensor(192.8196, device='cuda:0')
episode: 598 training return: tensor(292.4364, device='cuda:0')
episode: 599 training return: tensor(249.4682, device='cuda:0')
epoch: 150 test_true_pfm: 5631.941750353394 sim_pfm: 362.53702706664143
