['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '30000']
epoch: 0 training_loss 0.2440868942439556 test_loss: 0.23219583034515381
epoch: 1 training_loss 0.20184130400419234 test_loss: 0.20738790035247803
epoch: 2 training_loss 0.19405750378966333 test_loss: 0.20662972927093506
epoch: 3 training_loss 0.20182140678167343 test_loss: 0.17701191902160646
epoch: 4 training_loss 0.19431011594831943 test_loss: 0.21206462383270264
epoch: 5 training_loss 0.19518783956766128 test_loss: 0.15326119661331178
epoch: 6 training_loss 0.1852319598197937 test_loss: 0.1955939769744873
epoch: 7 training_loss 0.19054497160017492 test_loss: 0.17774327993392944
epoch: 8 training_loss 0.18888331472873687 test_loss: 0.16323179006576538
epoch: 9 training_loss 0.18469884790480137 test_loss: 0.18172242641448974
epoch: 10 training_loss 0.1873857744783163 test_loss: 0.200050687789917
epoch: 11 training_loss 0.19157656125724315 test_loss: 0.17349276542663575
epoch: 12 training_loss 0.18632653191685677 test_loss: 0.17506588697433473
epoch: 13 training_loss 0.18447440311312677 test_loss: 0.17050201892852784
epoch: 14 training_loss 0.1854841747134924 test_loss: 0.17600983381271362
epoch: 15 training_loss 0.17813585624098777 test_loss: 0.17896662950515746
epoch: 16 training_loss 0.179461370408535 test_loss: 0.18466010093688964
epoch: 17 training_loss 0.182177577316761 test_loss: 0.1833440899848938
epoch: 18 training_loss 0.18695651814341546 test_loss: 0.18566881418228148
epoch: 19 training_loss 0.18823526680469513 test_loss: 0.18259954452514648
epoch: 20 training_loss 0.18803136557340622 test_loss: 0.1791725516319275
epoch: 21 training_loss 0.1828020590543747 test_loss: 0.1864599347114563
epoch: 22 training_loss 0.1822226685285568 test_loss: 0.19420061111450196
epoch: 23 training_loss 0.18931273840367793 test_loss: 0.16905319690704346
epoch: 24 training_loss 0.18109066039323807 test_loss: 0.18083486557006836
epoch: 25 training_loss 0.1771504854410887 test_loss: 0.17384294271469117
epoch: 26 training_loss 0.1795423310250044 test_loss: 0.17978354692459106
epoch: 27 training_loss 0.17834823727607726 test_loss: 0.1818707227706909
epoch: 28 training_loss 0.17808544225990772 test_loss: 0.1921013593673706
epoch: 29 training_loss 0.1789247652888298 test_loss: 0.18637720346450806
epoch: 30 training_loss 0.18273304164409637 test_loss: 0.1815129280090332
epoch: 31 training_loss 0.1758154233917594 test_loss: 0.18089301586151124
epoch: 32 training_loss 0.19020988442003728 test_loss: 0.1881190776824951
epoch: 33 training_loss 0.1866798371076584 test_loss: 0.16971938610076903
epoch: 34 training_loss 0.1768955198675394 test_loss: 0.17332086563110352
epoch: 35 training_loss 0.18582988001406192 test_loss: 0.1709771990776062
epoch: 36 training_loss 0.1836523811519146 test_loss: 0.1912998914718628
epoch: 37 training_loss 0.17899160899221897 test_loss: 0.1898460268974304
epoch: 38 training_loss 0.18130803227424622 test_loss: 0.1678157329559326
epoch: 39 training_loss 0.18004951126873492 test_loss: 0.17771608829498292
epoch: 40 training_loss 0.17669919334352016 test_loss: 0.15852904319763184
epoch: 41 training_loss 0.17952306181192398 test_loss: 0.19171369075775146
epoch: 42 training_loss 0.1780012957751751 test_loss: 0.1791927456855774
epoch: 43 training_loss 0.18191278845071793 test_loss: 0.17316581010818483
epoch: 44 training_loss 0.18797626346349716 test_loss: 0.1782710313796997
epoch: 45 training_loss 0.18242591962218285 test_loss: 0.184653377532959
epoch: 46 training_loss 0.1828003665059805 test_loss: 0.180168616771698
epoch: 47 training_loss 0.18416750848293303 test_loss: 0.1757008194923401
epoch: 48 training_loss 0.17627746514976025 test_loss: 0.17947874069213868
epoch: 49 training_loss 0.17484609484672547 test_loss: 0.18578206300735473
epoch: 50 training_loss 0.18198602497577668 test_loss: 0.20201382637023926
epoch: 51 training_loss 0.173587546646595 test_loss: 0.2070826530456543
epoch: 52 training_loss 0.17751630671322347 test_loss: 0.16049312353134154
epoch: 53 training_loss 0.1800040002912283 test_loss: 0.18563438653945924
epoch: 54 training_loss 0.17767134867608547 test_loss: 0.17152189016342162
epoch: 55 training_loss 0.17121666714549064 test_loss: 0.18501975536346435
epoch: 56 training_loss 0.18837934888899327 test_loss: 0.17558788061141967
epoch: 57 training_loss 0.17139305174350739 test_loss: 0.175201153755188
epoch: 58 training_loss 0.17439867436885834 test_loss: 0.1810097098350525
epoch: 59 training_loss 0.17849619574844838 test_loss: 0.17922881841659546
epoch: 60 training_loss 0.18417837351560593 test_loss: 0.15473226308822632
epoch: 61 training_loss 0.18015179358422756 test_loss: 0.19069812297821045
epoch: 62 training_loss 0.17810244083404542 test_loss: 0.16747615337371827
epoch: 63 training_loss 0.17089818350970745 test_loss: 0.1850326418876648
epoch: 64 training_loss 0.18095242746174336 test_loss: 0.17194826602935792
epoch: 65 training_loss 0.17734099105000495 test_loss: 0.1746610403060913
epoch: 66 training_loss 0.18006865963339805 test_loss: 0.1714981436729431
epoch: 67 training_loss 0.18126467131078244 test_loss: 0.19056062698364257
epoch: 68 training_loss 0.17155924636870623 test_loss: 0.18406888246536254
epoch: 69 training_loss 0.17946928583085536 test_loss: 0.1704334497451782
epoch: 70 training_loss 0.17829421035945414 test_loss: 0.19061259031295777
epoch: 71 training_loss 0.16493893843144178 test_loss: 0.18169909715652466
epoch: 72 training_loss 0.18238801255822182 test_loss: 0.18357731103897096
epoch: 73 training_loss 0.1776959414035082 test_loss: 0.19021294116973878
epoch: 74 training_loss 0.17888323165476322 test_loss: 0.18292678594589235
epoch: 75 training_loss 0.1790605664998293 test_loss: 0.15608348846435546
epoch: 76 training_loss 0.18156066671013832 test_loss: 0.18035157918930053
epoch: 77 training_loss 0.16846784755587577 test_loss: 0.18742309808731078
epoch: 78 training_loss 0.18341273956000806 test_loss: 0.16513122320175172
epoch: 79 training_loss 0.17491541177034378 test_loss: 0.18200668096542358
epoch: 80 training_loss 0.17375866662710904 test_loss: 0.16852346658706666
epoch: 81 training_loss 0.17771912612020968 test_loss: 0.17001148462295532
epoch: 82 training_loss 0.1718673374503851 test_loss: 0.17721092700958252
epoch: 83 training_loss 0.17324040308594704 test_loss: 0.1552114963531494
epoch: 84 training_loss 0.16726076528429984 test_loss: 0.17326894998550416
epoch: 85 training_loss 0.1869545926898718 test_loss: 0.1802416205406189
epoch: 86 training_loss 0.17452935248613358 test_loss: 0.17648212909698485
epoch: 87 training_loss 0.16881741791963578 test_loss: 0.1687609314918518
epoch: 88 training_loss 0.17795162193477154 test_loss: 0.17532473802566528
epoch: 89 training_loss 0.1771317058056593 test_loss: 0.19190987348556518
epoch: 90 training_loss 0.17373554490506649 test_loss: 0.1797760009765625
epoch: 91 training_loss 0.1759064245596528 test_loss: 0.1707458019256592
epoch: 92 training_loss 0.17228018820285798 test_loss: 0.1834697961807251
epoch: 93 training_loss 0.17743828408420087 test_loss: 0.16784063577651978
epoch: 94 training_loss 0.17893660731613636 test_loss: 0.16861783266067504
epoch: 95 training_loss 0.1717255724221468 test_loss: 0.1767970323562622
epoch: 96 training_loss 0.17726194411516188 test_loss: 0.17449508905410765
epoch: 97 training_loss 0.17397547036409378 test_loss: 0.18686380386352539
epoch: 98 training_loss 0.1787677190452814 test_loss: 0.19319018125534057
epoch: 99 training_loss 0.17874116595834494 test_loss: 0.19978026151657105
epoch: 100 training_loss 0.1724968359619379 test_loss: 0.1823730945587158
epoch: 101 training_loss 0.17923002198338508 test_loss: 0.19334603548049928
epoch: 102 training_loss 0.18679257109761238 test_loss: 0.1734616994857788
epoch: 103 training_loss 0.17589467763900757 test_loss: 0.16149978637695311
epoch: 104 training_loss 0.1775563208758831 test_loss: 0.169721519947052
epoch: 105 training_loss 0.18361108377575874 test_loss: 0.17929712533950806
epoch: 106 training_loss 0.18315659798681735 test_loss: 0.18229047060012818
epoch: 107 training_loss 0.1774453626573086 test_loss: 0.1699591636657715
epoch: 108 training_loss 0.1748560257256031 test_loss: 0.182425594329834
epoch: 109 training_loss 0.1788530531525612 test_loss: 0.15300447940826417
epoch: 110 training_loss 0.17878484774380923 test_loss: 0.18883423805236815
epoch: 111 training_loss 0.16840317010879516 test_loss: 0.16321512460708618
epoch: 112 training_loss 0.1721329267323017 test_loss: 0.19466508626937867
epoch: 113 training_loss 0.17182179480791093 test_loss: 0.1799646258354187
epoch: 114 training_loss 0.17550812296569349 test_loss: 0.15738329887390137
epoch: 115 training_loss 0.17606979459524155 test_loss: 0.1818306565284729
epoch: 116 training_loss 0.17938934586942196 test_loss: 0.18477193117141724
epoch: 117 training_loss 0.17827343478798865 test_loss: 0.17679868936538695
epoch: 118 training_loss 0.17504825934767723 test_loss: 0.17452880144119262
epoch: 119 training_loss 0.16742584794759752 test_loss: 0.17510251998901366
epoch: 120 training_loss 0.1795815146714449 test_loss: 0.1884554147720337
epoch: 121 training_loss 0.17632517375051976 test_loss: 0.17866640090942382
epoch: 122 training_loss 0.17837840922176837 test_loss: 0.17170218229293824
epoch: 123 training_loss 0.1732878129184246 test_loss: 0.18301359415054322
epoch: 124 training_loss 0.17259660564363002 test_loss: 0.17179137468338013
epoch: 125 training_loss 0.1771099829673767 test_loss: 0.16501911878585815
epoch: 126 training_loss 0.18228268399834632 test_loss: 0.18547606468200684
epoch: 127 training_loss 0.1779626102745533 test_loss: 0.17569812536239623
epoch: 128 training_loss 0.18101745299994945 test_loss: 0.17567590475082398
epoch: 129 training_loss 0.17515008457005024 test_loss: 0.17736575603485108
epoch: 130 training_loss 0.16921773977577687 test_loss: 0.19303375482559204
epoch: 131 training_loss 0.17868703573942185 test_loss: 0.18282727003097535
epoch: 132 training_loss 0.17865983344614506 test_loss: 0.16879266500473022
epoch: 133 training_loss 0.16911838009953498 test_loss: 0.18813544511795044
epoch: 134 training_loss 0.17776131719350816 test_loss: 0.1893460750579834
epoch: 135 training_loss 0.17540461402386426 test_loss: 0.16747299432754517
epoch: 136 training_loss 0.17503205075860023 test_loss: 0.1777050733566284
epoch: 137 training_loss 0.18190353699028491 test_loss: 0.1603922963142395
epoch: 138 training_loss 0.18259706147015095 test_loss: 0.1611497163772583
epoch: 139 training_loss 0.17807343736290931 test_loss: 0.18246079683303834
epoch: 140 training_loss 0.1748514736443758 test_loss: 0.17414398193359376
epoch: 141 training_loss 0.1827261061221361 test_loss: 0.16484804153442384
epoch: 142 training_loss 0.17765339873731137 test_loss: 0.19250136613845825
epoch: 143 training_loss 0.16658898644149303 test_loss: 0.1824400544166565
epoch: 144 training_loss 0.1811251627653837 test_loss: 0.18208565711975097
epoch: 145 training_loss 0.16761221811175347 test_loss: 0.17405625581741332
epoch: 146 training_loss 0.1747888918220997 test_loss: 0.1613175630569458
epoch: 147 training_loss 0.16943492233753205 test_loss: 0.1730724096298218
epoch: 148 training_loss 0.17179103009402752 test_loss: 0.17868201732635497
epoch: 149 training_loss 0.18115167394280435 test_loss: 0.18812475204467774
epoch: 0 training_loss 8.025836482048035 test_loss: 4.775569915771484
epoch: 1 training_loss 3.727004597187042 test_loss: 2.987033462524414
epoch: 2 training_loss 2.5602151465415957 test_loss: 2.1776851654052733
epoch: 3 training_loss 2.0333659791946412 test_loss: 1.7634567260742187
epoch: 4 training_loss 1.7126436471939086 test_loss: 1.580941104888916
epoch: 5 training_loss 1.551435739994049 test_loss: 1.4552515983581542
epoch: 6 training_loss 1.4234929943084718 test_loss: 1.3782259941101074
epoch: 7 training_loss 1.315164874792099 test_loss: 1.2229756355285644
epoch: 8 training_loss 1.2427798187732697 test_loss: 1.2535739898681642
epoch: 9 training_loss 1.191003168821335 test_loss: 1.1946664810180665
epoch: 10 training_loss 1.1478164398670196 test_loss: 1.1252843856811523
epoch: 11 training_loss 1.1092911899089812 test_loss: 1.0620737075805664
epoch: 12 training_loss 1.0606612372398376 test_loss: 1.0416958808898926
epoch: 13 training_loss 1.0380359452962875 test_loss: 1.006591510772705
epoch: 14 training_loss 1.0037757682800292 test_loss: 0.9979616165161133
epoch: 15 training_loss 0.979564660191536 test_loss: 1.004442024230957
epoch: 16 training_loss 0.9537833094596863 test_loss: 0.9275261878967285
epoch: 17 training_loss 0.9567018282413483 test_loss: 0.9189692497253418
epoch: 18 training_loss 0.9048821306228638 test_loss: 0.8840940475463868
epoch: 19 training_loss 0.8772850531339645 test_loss: 0.8667780876159668
epoch: 20 training_loss 0.8886956429481506 test_loss: 0.8598909378051758
epoch: 21 training_loss 0.8605400288105011 test_loss: 0.8811178207397461
epoch: 22 training_loss 0.8412330728769303 test_loss: 0.8223662376403809
epoch: 23 training_loss 0.8234808987379074 test_loss: 0.7997592926025391
epoch: 24 training_loss 0.825182923078537 test_loss: 0.8244207382202149
epoch: 25 training_loss 0.8142486053705216 test_loss: 0.818016242980957
epoch: 26 training_loss 0.7939389431476593 test_loss: 0.7891564846038819
epoch: 27 training_loss 0.7845958811044693 test_loss: 0.807087516784668
epoch: 28 training_loss 0.7812656909227371 test_loss: 0.8061310768127441
epoch: 29 training_loss 0.7721768057346344 test_loss: 0.7729988098144531
epoch: 30 training_loss 0.7697925931215286 test_loss: 0.7445532321929932
epoch: 31 training_loss 0.7392332011461258 test_loss: 0.7558809757232666
epoch: 32 training_loss 0.7384913128614425 test_loss: 0.7381772994995117
epoch: 33 training_loss 0.7374254077672958 test_loss: 0.7532622814178467
epoch: 34 training_loss 0.7234999364614487 test_loss: 0.7314100742340088
epoch: 35 training_loss 0.7209324383735657 test_loss: 0.7060425281524658
epoch: 36 training_loss 0.7216957283020019 test_loss: 0.7059678554534912
epoch: 37 training_loss 0.7147415578365326 test_loss: 0.6682108402252197
epoch: 38 training_loss 0.7115190178155899 test_loss: 0.6994801998138428
epoch: 39 training_loss 0.7000085818767547 test_loss: 0.6975124835968017
epoch: 40 training_loss 0.6838026398420334 test_loss: 0.6874758720397949
epoch: 41 training_loss 0.6724607998132706 test_loss: 0.7146152019500732
epoch: 42 training_loss 0.6840341794490814 test_loss: 0.7054064750671387
epoch: 43 training_loss 0.6635227662324905 test_loss: 0.6776169776916504
epoch: 44 training_loss 0.6665042173862458 test_loss: 0.6770190238952637
epoch: 45 training_loss 0.6641664028167724 test_loss: 0.6628712177276611
epoch: 46 training_loss 0.6674601125717163 test_loss: 0.6504081726074219
epoch: 47 training_loss 0.6561919045448303 test_loss: 0.6473012924194336
epoch: 48 training_loss 0.6559777730703353 test_loss: 0.6678132534027099
epoch: 49 training_loss 0.6485475653409958 test_loss: 0.63539137840271
epoch: 50 training_loss 0.6468817991018295 test_loss: 0.6197769641876221
epoch: 51 training_loss 0.6350552505254745 test_loss: 0.6477433681488037
epoch: 52 training_loss 0.6235071611404419 test_loss: 0.6284264087677002
epoch: 53 training_loss 0.6362174779176712 test_loss: 0.6184954643249512
epoch: 54 training_loss 0.625116680264473 test_loss: 0.6316762447357178
epoch: 55 training_loss 0.639905007481575 test_loss: 0.6083813190460206
epoch: 56 training_loss 0.627982502579689 test_loss: 0.6514240741729737
epoch: 57 training_loss 0.6262341868877411 test_loss: 0.6205219745635986
epoch: 58 training_loss 0.617036954164505 test_loss: 0.6047236442565918
epoch: 59 training_loss 0.6243596154451371 test_loss: 0.6280229568481446
epoch: 60 training_loss 0.612929396033287 test_loss: 0.6038081645965576
epoch: 61 training_loss 0.612913955450058 test_loss: 0.6290620803833008
epoch: 62 training_loss 0.6063218748569489 test_loss: 0.597852087020874
epoch: 63 training_loss 0.6053510493040085 test_loss: 0.610772180557251
epoch: 64 training_loss 0.6200293815135955 test_loss: 0.6021546840667724
epoch: 65 training_loss 0.6067013722658158 test_loss: 0.5994525909423828
epoch: 66 training_loss 0.601794205904007 test_loss: 0.627159309387207
epoch: 67 training_loss 0.6015374040603638 test_loss: 0.577006196975708
epoch: 68 training_loss 0.5901637035608291 test_loss: 0.5870668411254882
epoch: 69 training_loss 0.5991069322824478 test_loss: 0.5980573177337647
epoch: 70 training_loss 0.5926359713077545 test_loss: 0.6008912563323975
epoch: 71 training_loss 0.5838298392295838 test_loss: 0.6074368000030518
epoch: 72 training_loss 0.5884342724084854 test_loss: 0.581125020980835
epoch: 73 training_loss 0.5831681990623474 test_loss: 0.5868309020996094
epoch: 74 training_loss 0.5761771029233933 test_loss: 0.5924979209899902
epoch: 75 training_loss 0.5856198447942734 test_loss: 0.5791967391967774
epoch: 76 training_loss 0.5807238763570786 test_loss: 0.5680166721343994
epoch: 77 training_loss 0.5847448366880417 test_loss: 0.5858415603637696
epoch: 78 training_loss 0.5673126026988029 test_loss: 0.578076982498169
epoch: 79 training_loss 0.585328386425972 test_loss: 0.5764464855194091
epoch: 80 training_loss 0.5722492924332618 test_loss: 0.5778950691223145
epoch: 81 training_loss 0.565259057879448 test_loss: 0.5702401161193847
epoch: 82 training_loss 0.5663634550571441 test_loss: 0.5633563041687012
epoch: 83 training_loss 0.5608108425140381 test_loss: 0.5817688941955567
epoch: 84 training_loss 0.5734142625331878 test_loss: 0.577843427658081
epoch: 85 training_loss 0.5727802288532257 test_loss: 0.5680584907531738
epoch: 86 training_loss 0.5724666142463684 test_loss: 0.5610477924346924
epoch: 87 training_loss 0.5611196929216384 test_loss: 0.566882848739624
epoch: 88 training_loss 0.5588023295998573 test_loss: 0.5502645969390869
epoch: 89 training_loss 0.5521769765019416 test_loss: 0.5578879833221435
epoch: 90 training_loss 0.5699083563685418 test_loss: 0.5495126724243165
epoch: 91 training_loss 0.5664768955111503 test_loss: 0.5780051231384278
epoch: 92 training_loss 0.5514889013767242 test_loss: 0.5564255714416504
epoch: 93 training_loss 0.5550219857692719 test_loss: 0.537315034866333
epoch: 94 training_loss 0.5438125076889991 test_loss: 0.5458158493041992
epoch: 95 training_loss 0.5543725997209549 test_loss: 0.543324613571167
epoch: 96 training_loss 0.5485299101471901 test_loss: 0.5422708511352539
epoch: 97 training_loss 0.5498080933094025 test_loss: 0.521978759765625
epoch: 98 training_loss 0.5515330839157104 test_loss: 0.5692643642425537
epoch: 99 training_loss 0.5465436923503876 test_loss: 0.5320513725280762
epoch: 100 training_loss 0.5319463136792183 test_loss: 0.5242869853973389
epoch: 101 training_loss 0.5391289469599724 test_loss: 0.5475162982940673
epoch: 102 training_loss 0.5439981344342232 test_loss: 0.5730969905853271
epoch: 103 training_loss 0.5514342349767685 test_loss: 0.540266752243042
epoch: 104 training_loss 0.5406840348243713 test_loss: 0.5326590538024902
epoch: 105 training_loss 0.5392793965339661 test_loss: 0.5400240898132325
epoch: 106 training_loss 0.5321164360642433 test_loss: 0.5333867549896241
epoch: 107 training_loss 0.5341724896430969 test_loss: 0.5562264442443847
epoch: 108 training_loss 0.5323577502369881 test_loss: 0.537896728515625
epoch: 109 training_loss 0.5339137989282609 test_loss: 0.5304884433746337
epoch: 110 training_loss 0.5418629139661789 test_loss: 0.5221379280090332
epoch: 111 training_loss 0.5201380655169487 test_loss: 0.5298543930053711
epoch: 112 training_loss 0.5338579928874969 test_loss: 0.5366787433624267
epoch: 113 training_loss 0.5271867120265961 test_loss: 0.5324302196502686
epoch: 114 training_loss 0.526246484220028 test_loss: 0.5168341159820556
epoch: 115 training_loss 0.5254993399977684 test_loss: 0.5263371467590332
epoch: 116 training_loss 0.5289936435222625 test_loss: 0.5404001712799072
epoch: 117 training_loss 0.5297232249379158 test_loss: 0.5102007865905762
epoch: 118 training_loss 0.5322095373272896 test_loss: 0.5183159828186035
epoch: 119 training_loss 0.5203068986535072 test_loss: 0.5273204326629639
epoch: 120 training_loss 0.5254162839055061 test_loss: 0.5278409004211426
epoch: 121 training_loss 0.5220333468914032 test_loss: 0.5074166774749755
epoch: 122 training_loss 0.5228964784741401 test_loss: 0.5358502864837646
epoch: 123 training_loss 0.5131862166523934 test_loss: 0.53809494972229
epoch: 124 training_loss 0.5164032223820686 test_loss: 0.5071220874786377
epoch: 125 training_loss 0.526218184530735 test_loss: 0.508546781539917
epoch: 126 training_loss 0.5093065318465233 test_loss: 0.5487199306488038
epoch: 127 training_loss 0.5169182333350182 test_loss: 0.5152887344360352
epoch: 128 training_loss 0.5147003063559532 test_loss: 0.5164011478424072
epoch: 129 training_loss 0.5091930437088013 test_loss: 0.5083301544189454
epoch: 130 training_loss 0.5195451122522354 test_loss: 0.5174250602722168
epoch: 131 training_loss 0.5233795666694641 test_loss: 0.5044894695281983
epoch: 132 training_loss 0.5091806048154831 test_loss: 0.5218972206115723
epoch: 133 training_loss 0.51819360435009 test_loss: 0.5430410861968994
epoch: 134 training_loss 0.5045012366771698 test_loss: 0.5045055389404297
epoch: 135 training_loss 0.5080056887865066 test_loss: 0.50276780128479
epoch: 136 training_loss 0.5093769258260727 test_loss: 0.5104705810546875
epoch: 137 training_loss 0.5020917317271233 test_loss: 0.49809651374816893
epoch: 138 training_loss 0.5057450121641159 test_loss: 0.49641265869140627
epoch: 139 training_loss 0.5092073509097099 test_loss: 0.4994650363922119
epoch: 140 training_loss 0.5057667878270149 test_loss: 0.5086058139801025
epoch: 141 training_loss 0.5139444506168366 test_loss: 0.5018980979919434
epoch: 142 training_loss 0.4954513183236122 test_loss: 0.5087531089782715
epoch: 143 training_loss 0.5126557168364525 test_loss: 0.5301510334014893
epoch: 144 training_loss 0.5054626947641373 test_loss: 0.49521098136901853
epoch: 145 training_loss 0.4994794422388077 test_loss: 0.5057706356048584
epoch: 146 training_loss 0.503351874947548 test_loss: 0.5145207405090332
epoch: 147 training_loss 0.5015961879491806 test_loss: 0.49025874137878417
epoch: 148 training_loss 0.5077466729283333 test_loss: 0.4935513973236084
epoch: 149 training_loss 0.5010112842917442 test_loss: 0.5006755352020263
2782.242652403861
episode: 0 training return: tensor(-247.4629, device='cuda:0')
episode: 1 training return: tensor(223.7632, device='cuda:0')
episode: 2 training return: tensor(-170.8388, device='cuda:0')
episode: 3 training return: tensor(-371.2448, device='cuda:0')
epoch: 1 test_true_pfm: 2886.136215460832 sim_pfm: 194.06486331989677
episode: 4 training return: tensor(-234.6265, device='cuda:0')
episode: 5 training return: tensor(-307.9678, device='cuda:0')
episode: 6 training return: tensor(80.6870, device='cuda:0')
episode: 7 training return: tensor(-393.8133, device='cuda:0')
epoch: 2 test_true_pfm: 2049.3935699475624 sim_pfm: -67.53109676685806
episode: 8 training return: tensor(-374.6367, device='cuda:0')
episode: 9 training return: tensor(255.4954, device='cuda:0')
episode: 10 training return: tensor(-321.7661, device='cuda:0')
episode: 11 training return: tensor(118.0948, device='cuda:0')
epoch: 3 test_true_pfm: 2616.486606450467 sim_pfm: 64.28608405066188
episode: 12 training return: tensor(-273.0986, device='cuda:0')
episode: 13 training return: tensor(34.4269, device='cuda:0')
episode: 14 training return: tensor(299.1880, device='cuda:0')
episode: 15 training return: tensor(226.4787, device='cuda:0')
epoch: 4 test_true_pfm: 1726.9581570879589 sim_pfm: -99.00616471094934
episode: 16 training return: tensor(298.0283, device='cuda:0')
episode: 17 training return: tensor(-385.6386, device='cuda:0')
episode: 18 training return: tensor(-392.1720, device='cuda:0')
episode: 19 training return: tensor(-16.2436, device='cuda:0')
epoch: 5 test_true_pfm: 2431.27542136388 sim_pfm: 154.5210039417337
episode: 20 training return: tensor(-324.9852, device='cuda:0')
episode: 21 training return: tensor(142.9328, device='cuda:0')
episode: 22 training return: tensor(-90.1966, device='cuda:0')
episode: 23 training return: tensor(-379.0445, device='cuda:0')
epoch: 6 test_true_pfm: 2637.0322465234008 sim_pfm: -21.07443942849447
episode: 24 training return: tensor(282.4145, device='cuda:0')
episode: 25 training return: tensor(-71.4557, device='cuda:0')
episode: 26 training return: tensor(205.9194, device='cuda:0')
episode: 27 training return: tensor(80.1596, device='cuda:0')
epoch: 7 test_true_pfm: 2474.9772815979286 sim_pfm: 141.52073812985327
episode: 28 training return: tensor(36.2876, device='cuda:0')
episode: 29 training return: tensor(124.2959, device='cuda:0')
episode: 30 training return: tensor(36.2640, device='cuda:0')
episode: 31 training return: tensor(-328.9514, device='cuda:0')
epoch: 8 test_true_pfm: 2624.0628699926547 sim_pfm: 201.03235299828034
episode: 32 training return: tensor(-295.6536, device='cuda:0')
episode: 33 training return: tensor(-237.2603, device='cuda:0')
episode: 34 training return: tensor(-177.0317, device='cuda:0')
episode: 35 training return: tensor(107.1779, device='cuda:0')
epoch: 9 test_true_pfm: 2821.3657457422873 sim_pfm: 20.489040920472082
episode: 36 training return: tensor(193.7739, device='cuda:0')
episode: 37 training return: tensor(-284.8576, device='cuda:0')
episode: 38 training return: tensor(-223.0084, device='cuda:0')
episode: 39 training return: tensor(11.0293, device='cuda:0')
epoch: 10 test_true_pfm: 2788.3606477789253 sim_pfm: 238.7651400351509
episode: 40 training return: tensor(-247.9886, device='cuda:0')
episode: 41 training return: tensor(-380.9867, device='cuda:0')
episode: 42 training return: tensor(-353.9326, device='cuda:0')
episode: 43 training return: tensor(-229.1169, device='cuda:0')
epoch: 11 test_true_pfm: 3060.0504567995795 sim_pfm: 85.67374085328386
episode: 44 training return: tensor(-349.7854, device='cuda:0')
episode: 45 training return: tensor(252.4121, device='cuda:0')
episode: 46 training return: tensor(-247.7125, device='cuda:0')
episode: 47 training return: tensor(-94.0892, device='cuda:0')
epoch: 12 test_true_pfm: 2937.6807271618363 sim_pfm: 49.71170104000097
episode: 48 training return: tensor(209.7853, device='cuda:0')
episode: 49 training return: tensor(-30.4251, device='cuda:0')
episode: 50 training return: tensor(-211.8957, device='cuda:0')
episode: 51 training return: tensor(-383.6595, device='cuda:0')
epoch: 13 test_true_pfm: 2144.5582760376365 sim_pfm: 142.32495712953582
episode: 52 training return: tensor(41.1331, device='cuda:0')
episode: 53 training return: tensor(-384.1334, device='cuda:0')
episode: 54 training return: tensor(23.4994, device='cuda:0')
episode: 55 training return: tensor(-256.9509, device='cuda:0')
epoch: 14 test_true_pfm: 2973.7149040835407 sim_pfm: -60.74531649631293
episode: 56 training return: tensor(121.5444, device='cuda:0')
episode: 57 training return: tensor(-42.8390, device='cuda:0')
episode: 58 training return: tensor(232.9095, device='cuda:0')
episode: 59 training return: tensor(-218.7733, device='cuda:0')
epoch: 15 test_true_pfm: 2983.6429294573245 sim_pfm: 59.13011582908803
episode: 60 training return: tensor(122.2867, device='cuda:0')
episode: 61 training return: tensor(-24.7135, device='cuda:0')
episode: 62 training return: tensor(259.7578, device='cuda:0')
episode: 63 training return: tensor(-226.6131, device='cuda:0')
epoch: 16 test_true_pfm: 2237.2152475630555 sim_pfm: -11.718013918725774
episode: 64 training return: tensor(193.7833, device='cuda:0')
episode: 65 training return: tensor(243.1551, device='cuda:0')
episode: 66 training return: tensor(294.2736, device='cuda:0')
episode: 67 training return: tensor(-288.9817, device='cuda:0')
epoch: 17 test_true_pfm: 2621.033415291828 sim_pfm: 113.48693231320551
episode: 68 training return: tensor(-70.2847, device='cuda:0')
episode: 69 training return: tensor(-274.4589, device='cuda:0')
episode: 70 training return: tensor(-263.9782, device='cuda:0')
episode: 71 training return: tensor(-320.6653, device='cuda:0')
epoch: 18 test_true_pfm: 2888.1059043681325 sim_pfm: 61.93260876554996
episode: 72 training return: tensor(25.2488, device='cuda:0')
episode: 73 training return: tensor(291.4103, device='cuda:0')
episode: 74 training return: tensor(-47.9775, device='cuda:0')
episode: 75 training return: tensor(257.7420, device='cuda:0')
epoch: 19 test_true_pfm: 2486.4349626368517 sim_pfm: 152.47094774610983
episode: 76 training return: tensor(-336.6080, device='cuda:0')
episode: 77 training return: tensor(241.1989, device='cuda:0')
episode: 78 training return: tensor(122.0196, device='cuda:0')
episode: 79 training return: tensor(-379.3330, device='cuda:0')
epoch: 20 test_true_pfm: 2583.4010201971196 sim_pfm: 70.44193078142901
episode: 80 training return: tensor(-232.8900, device='cuda:0')
episode: 81 training return: tensor(8.8885, device='cuda:0')
episode: 82 training return: tensor(-132.7860, device='cuda:0')
episode: 83 training return: tensor(-343.3944, device='cuda:0')
epoch: 21 test_true_pfm: 2847.736430536444 sim_pfm: -8.137561369124645
episode: 84 training return: tensor(231.2612, device='cuda:0')
episode: 85 training return: tensor(-294.5333, device='cuda:0')
episode: 86 training return: tensor(-29.7752, device='cuda:0')
episode: 87 training return: tensor(101.3648, device='cuda:0')
epoch: 22 test_true_pfm: 3069.6948298019142 sim_pfm: 145.45051690509231
episode: 88 training return: tensor(-204.1304, device='cuda:0')
episode: 89 training return: tensor(-252.0227, device='cuda:0')
episode: 90 training return: tensor(116.3121, device='cuda:0')
episode: 91 training return: tensor(300.2937, device='cuda:0')
epoch: 23 test_true_pfm: 2602.8420179941554 sim_pfm: -2.909773825900629
episode: 92 training return: tensor(-101.9687, device='cuda:0')
episode: 93 training return: tensor(107.8075, device='cuda:0')
episode: 94 training return: tensor(-57.8434, device='cuda:0')
episode: 95 training return: tensor(-257.1367, device='cuda:0')
epoch: 24 test_true_pfm: 2903.8732757409193 sim_pfm: 138.97045718958057
episode: 96 training return: tensor(265.5869, device='cuda:0')
episode: 97 training return: tensor(-336.7474, device='cuda:0')
episode: 98 training return: tensor(-375.0003, device='cuda:0')
episode: 99 training return: tensor(251.6397, device='cuda:0')
epoch: 25 test_true_pfm: 2860.8939814905425 sim_pfm: 161.578131553911
episode: 100 training return: tensor(-225.7412, device='cuda:0')
episode: 101 training return: tensor(-313.4558, device='cuda:0')
episode: 102 training return: tensor(19.1783, device='cuda:0')
episode: 103 training return: tensor(108.5925, device='cuda:0')
epoch: 26 test_true_pfm: 3184.6677444562897 sim_pfm: 180.14210135303438
episode: 104 training return: tensor(-244.1935, device='cuda:0')
episode: 105 training return: tensor(-328.8148, device='cuda:0')
episode: 106 training return: tensor(-330.5756, device='cuda:0')
episode: 107 training return: tensor(-335.1083, device='cuda:0')
epoch: 27 test_true_pfm: 2012.5770857382147 sim_pfm: -33.97725407104008
episode: 108 training return: tensor(-10.6907, device='cuda:0')
episode: 109 training return: tensor(-311.3489, device='cuda:0')
episode: 110 training return: tensor(-283.1417, device='cuda:0')
episode: 111 training return: tensor(261.0989, device='cuda:0')
epoch: 28 test_true_pfm: 2591.320721604781 sim_pfm: 96.62824241017613
episode: 112 training return: tensor(263.7370, device='cuda:0')
episode: 113 training return: tensor(-387.4201, device='cuda:0')
episode: 114 training return: tensor(265.6045, device='cuda:0')
episode: 115 training return: tensor(-237.2180, device='cuda:0')
epoch: 29 test_true_pfm: 2136.9867687015653 sim_pfm: 102.98500589624746
episode: 116 training return: tensor(-242.4244, device='cuda:0')
episode: 117 training return: tensor(-279.5270, device='cuda:0')
episode: 118 training return: tensor(63.5607, device='cuda:0')
episode: 119 training return: tensor(273.8410, device='cuda:0')
epoch: 30 test_true_pfm: 2562.563459429064 sim_pfm: 6.814496951876208
episode: 120 training return: tensor(-46.2043, device='cuda:0')
episode: 121 training return: tensor(-291.0824, device='cuda:0')
episode: 122 training return: tensor(-373.7401, device='cuda:0')
episode: 123 training return: tensor(249.2000, device='cuda:0')
epoch: 31 test_true_pfm: 2128.782574981066 sim_pfm: 263.449004922586
episode: 124 training return: tensor(-23.8499, device='cuda:0')
episode: 125 training return: tensor(-95.1737, device='cuda:0')
episode: 126 training return: tensor(254.9964, device='cuda:0')
episode: 127 training return: tensor(-353.9103, device='cuda:0')
epoch: 32 test_true_pfm: 2508.614923754322 sim_pfm: 61.488896278499546
episode: 128 training return: tensor(301.5217, device='cuda:0')
episode: 129 training return: tensor(121.7893, device='cuda:0')
episode: 130 training return: tensor(-346.5173, device='cuda:0')
episode: 131 training return: tensor(-308.5872, device='cuda:0')
epoch: 33 test_true_pfm: 2691.7508089050275 sim_pfm: 193.95745878186426
episode: 132 training return: tensor(-292.5387, device='cuda:0')
episode: 133 training return: tensor(-80.0061, device='cuda:0')
episode: 134 training return: tensor(69.8950, device='cuda:0')
episode: 135 training return: tensor(237.8062, device='cuda:0')
epoch: 34 test_true_pfm: 2052.9159055521054 sim_pfm: -11.614473102449361
episode: 136 training return: tensor(260.0321, device='cuda:0')
episode: 137 training return: tensor(-214.2138, device='cuda:0')
episode: 138 training return: tensor(-355.0117, device='cuda:0')
episode: 139 training return: tensor(236.9282, device='cuda:0')
epoch: 35 test_true_pfm: 2766.1741800659415 sim_pfm: -111.90819784269358
episode: 140 training return: tensor(229.1914, device='cuda:0')
episode: 141 training return: tensor(219.5250, device='cuda:0')
episode: 142 training return: tensor(225.1911, device='cuda:0')
episode: 143 training return: tensor(-43.7807, device='cuda:0')
epoch: 36 test_true_pfm: 2703.0743952432003 sim_pfm: 48.4654169617473
episode: 144 training return: tensor(240.6714, device='cuda:0')
episode: 145 training return: tensor(-326.7039, device='cuda:0')
episode: 146 training return: tensor(236.9670, device='cuda:0')
episode: 147 training return: tensor(251.6607, device='cuda:0')
epoch: 37 test_true_pfm: 2596.001037212329 sim_pfm: 46.83360793022439
episode: 148 training return: tensor(-359.3587, device='cuda:0')
episode: 149 training return: tensor(-317.9722, device='cuda:0')
episode: 150 training return: tensor(56.1655, device='cuda:0')
episode: 151 training return: tensor(-331.3407, device='cuda:0')
epoch: 38 test_true_pfm: 2653.386386775902 sim_pfm: 107.46193754424651
episode: 152 training return: tensor(-9.6532, device='cuda:0')
episode: 153 training return: tensor(-309.1898, device='cuda:0')
episode: 154 training return: tensor(225.4615, device='cuda:0')
episode: 155 training return: tensor(275.1946, device='cuda:0')
epoch: 39 test_true_pfm: 2213.287170544231 sim_pfm: -44.008816412630644
episode: 156 training return: tensor(-236.6383, device='cuda:0')
episode: 157 training return: tensor(-291.7472, device='cuda:0')
episode: 158 training return: tensor(-96.2949, device='cuda:0')
episode: 159 training return: tensor(3.3826, device='cuda:0')
epoch: 40 test_true_pfm: 2901.989974360065 sim_pfm: 73.00848423235584
episode: 160 training return: tensor(239.0927, device='cuda:0')
episode: 161 training return: tensor(-290.8864, device='cuda:0')
episode: 162 training return: tensor(-308.8859, device='cuda:0')
episode: 163 training return: tensor(135.6817, device='cuda:0')
epoch: 41 test_true_pfm: 2712.3461813181048 sim_pfm: 41.08326911999999
episode: 164 training return: tensor(-368.0621, device='cuda:0')
episode: 165 training return: tensor(-360.4303, device='cuda:0')
episode: 166 training return: tensor(-62.9253, device='cuda:0')
episode: 167 training return: tensor(-328.7735, device='cuda:0')
epoch: 42 test_true_pfm: 3053.9496163254976 sim_pfm: 52.89000991297265
episode: 168 training return: tensor(22.7489, device='cuda:0')
episode: 169 training return: tensor(-42.5895, device='cuda:0')
episode: 170 training return: tensor(9.0193, device='cuda:0')
episode: 171 training return: tensor(290.3655, device='cuda:0')
epoch: 43 test_true_pfm: 2686.9204450702387 sim_pfm: -57.365498665953055
episode: 172 training return: tensor(-375.8890, device='cuda:0')
episode: 173 training return: tensor(252.5455, device='cuda:0')
episode: 174 training return: tensor(-221.2508, device='cuda:0')
episode: 175 training return: tensor(37.4783, device='cuda:0')
epoch: 44 test_true_pfm: 2928.9455156384734 sim_pfm: -116.19312639945808
episode: 176 training return: tensor(236.4006, device='cuda:0')
episode: 177 training return: tensor(74.8285, device='cuda:0')
episode: 178 training return: tensor(-265.3097, device='cuda:0')
episode: 179 training return: tensor(-32.0107, device='cuda:0')
epoch: 45 test_true_pfm: 2918.1848007072936 sim_pfm: -127.91164726438001
episode: 180 training return: tensor(250.1007, device='cuda:0')
episode: 181 training return: tensor(-383.1258, device='cuda:0')
episode: 182 training return: tensor(-380.1165, device='cuda:0')
episode: 183 training return: tensor(-90.5788, device='cuda:0')
epoch: 46 test_true_pfm: 2473.032882978151 sim_pfm: 32.66786800895352
episode: 184 training return: tensor(-355.1270, device='cuda:0')
episode: 185 training return: tensor(-95.0770, device='cuda:0')
episode: 186 training return: tensor(-316.7681, device='cuda:0')
episode: 187 training return: tensor(-296.9224, device='cuda:0')
epoch: 47 test_true_pfm: 2686.0441215150295 sim_pfm: -13.71641500806436
episode: 188 training return: tensor(259.9490, device='cuda:0')
episode: 189 training return: tensor(-220.9244, device='cuda:0')
episode: 190 training return: tensor(-173.4167, device='cuda:0')
episode: 191 training return: tensor(235.9092, device='cuda:0')
epoch: 48 test_true_pfm: 2619.6201240163423 sim_pfm: -14.381457928568125
episode: 192 training return: tensor(250.0359, device='cuda:0')
episode: 193 training return: tensor(51.3300, device='cuda:0')
episode: 194 training return: tensor(-353.7824, device='cuda:0')
episode: 195 training return: tensor(-304.0914, device='cuda:0')
epoch: 49 test_true_pfm: 2411.8602186035287 sim_pfm: -191.5906462019775
episode: 196 training return: tensor(-296.2713, device='cuda:0')
episode: 197 training return: tensor(-292.6414, device='cuda:0')
episode: 198 training return: tensor(-138.7451, device='cuda:0')
episode: 199 training return: tensor(-370.1396, device='cuda:0')
epoch: 50 test_true_pfm: 2428.9156019317943 sim_pfm: -158.92765507808267
episode: 200 training return: tensor(239.6731, device='cuda:0')
episode: 201 training return: tensor(109.2337, device='cuda:0')
episode: 202 training return: tensor(-257.2404, device='cuda:0')
episode: 203 training return: tensor(-295.3642, device='cuda:0')
epoch: 51 test_true_pfm: 3071.757707804603 sim_pfm: 28.256031450417748
episode: 204 training return: tensor(283.2164, device='cuda:0')
episode: 205 training return: tensor(-217.9928, device='cuda:0')
episode: 206 training return: tensor(268.7108, device='cuda:0')
episode: 207 training return: tensor(-202.8036, device='cuda:0')
epoch: 52 test_true_pfm: 2655.2064050995054 sim_pfm: 63.73208893206902
episode: 208 training return: tensor(-78.4293, device='cuda:0')
episode: 209 training return: tensor(-335.0563, device='cuda:0')
episode: 210 training return: tensor(125.0945, device='cuda:0')
episode: 211 training return: tensor(-216.8350, device='cuda:0')
epoch: 53 test_true_pfm: 3100.826470211358 sim_pfm: -88.73572898221512
episode: 212 training return: tensor(-287.7584, device='cuda:0')
episode: 213 training return: tensor(-219.8318, device='cuda:0')
episode: 214 training return: tensor(-214.3306, device='cuda:0')
episode: 215 training return: tensor(-384.2140, device='cuda:0')
epoch: 54 test_true_pfm: 2698.411634662821 sim_pfm: -33.92521487083286
episode: 216 training return: tensor(58.2222, device='cuda:0')
episode: 217 training return: tensor(-282.8104, device='cuda:0')
episode: 218 training return: tensor(-339.0278, device='cuda:0')
episode: 219 training return: tensor(-337.6984, device='cuda:0')
epoch: 55 test_true_pfm: 1974.12287357923 sim_pfm: -34.92408354345631
episode: 220 training return: tensor(-298.6196, device='cuda:0')
episode: 221 training return: tensor(208.2343, device='cuda:0')
episode: 222 training return: tensor(5.6680, device='cuda:0')
episode: 223 training return: tensor(-186.6837, device='cuda:0')
epoch: 56 test_true_pfm: 1874.2379544923197 sim_pfm: 62.359359967095465
episode: 224 training return: tensor(-80.8138, device='cuda:0')
episode: 225 training return: tensor(133.0790, device='cuda:0')
episode: 226 training return: tensor(47.3605, device='cuda:0')
episode: 227 training return: tensor(263.8288, device='cuda:0')
epoch: 57 test_true_pfm: 1908.4236305031727 sim_pfm: 84.21085455648911
episode: 228 training return: tensor(254.8378, device='cuda:0')
episode: 229 training return: tensor(212.5615, device='cuda:0')
episode: 230 training return: tensor(202.7399, device='cuda:0')
episode: 231 training return: tensor(-330.1108, device='cuda:0')
epoch: 58 test_true_pfm: 2742.6895076387646 sim_pfm: -29.894691822429497
episode: 232 training return: tensor(241.9923, device='cuda:0')
episode: 233 training return: tensor(-92.1558, device='cuda:0')
episode: 234 training return: tensor(-310.7080, device='cuda:0')
episode: 235 training return: tensor(-284.5502, device='cuda:0')
epoch: 59 test_true_pfm: 2572.3022388080753 sim_pfm: 72.2954919730546
episode: 236 training return: tensor(-282.2803, device='cuda:0')
episode: 237 training return: tensor(-284.5947, device='cuda:0')
episode: 238 training return: tensor(-310.4906, device='cuda:0')
episode: 239 training return: tensor(30.5053, device='cuda:0')
epoch: 60 test_true_pfm: 2636.945022633369 sim_pfm: -2.9035825359945497
episode: 240 training return: tensor(95.8865, device='cuda:0')
episode: 241 training return: tensor(-224.4571, device='cuda:0')
episode: 242 training return: tensor(259.5015, device='cuda:0')
episode: 243 training return: tensor(89.2441, device='cuda:0')
epoch: 61 test_true_pfm: 2397.169476158882 sim_pfm: 278.19977313753526
episode: 244 training return: tensor(-280.9442, device='cuda:0')
episode: 245 training return: tensor(291.7110, device='cuda:0')
episode: 246 training return: tensor(-119.9832, device='cuda:0')
episode: 247 training return: tensor(-24.2598, device='cuda:0')
epoch: 62 test_true_pfm: 2543.6549087084345 sim_pfm: 65.91895684227347
episode: 248 training return: tensor(-203.9336, device='cuda:0')
episode: 249 training return: tensor(217.1406, device='cuda:0')
episode: 250 training return: tensor(-337.5750, device='cuda:0')
episode: 251 training return: tensor(236.9324, device='cuda:0')
epoch: 63 test_true_pfm: 2397.141177128539 sim_pfm: 13.467133229676014
episode: 252 training return: tensor(-380.6939, device='cuda:0')
episode: 253 training return: tensor(138.0434, device='cuda:0')
episode: 254 training return: tensor(-237.5641, device='cuda:0')
episode: 255 training return: tensor(-17.2827, device='cuda:0')
epoch: 64 test_true_pfm: 2611.843038749889 sim_pfm: 86.9720193183748
episode: 256 training return: tensor(203.8652, device='cuda:0')
episode: 257 training return: tensor(260.0377, device='cuda:0')
episode: 258 training return: tensor(-238.5956, device='cuda:0')
episode: 259 training return: tensor(-93.6840, device='cuda:0')
epoch: 65 test_true_pfm: 2373.434135387019 sim_pfm: -53.066069269611035
episode: 260 training return: tensor(-47.0132, device='cuda:0')
episode: 261 training return: tensor(256.7792, device='cuda:0')
episode: 262 training return: tensor(249.0815, device='cuda:0')
episode: 263 training return: tensor(43.0416, device='cuda:0')
epoch: 66 test_true_pfm: 2206.5350749438935 sim_pfm: 147.40239574752437
episode: 264 training return: tensor(305.5625, device='cuda:0')
episode: 265 training return: tensor(-268.2692, device='cuda:0')
episode: 266 training return: tensor(279.2858, device='cuda:0')
episode: 267 training return: tensor(-286.9549, device='cuda:0')
epoch: 67 test_true_pfm: 2431.584056814277 sim_pfm: -64.02890792333831
episode: 268 training return: tensor(242.7643, device='cuda:0')
episode: 269 training return: tensor(248.8478, device='cuda:0')
episode: 270 training return: tensor(-5.6816, device='cuda:0')
episode: 271 training return: tensor(-220.4357, device='cuda:0')
epoch: 68 test_true_pfm: 2928.8228502822735 sim_pfm: 175.57462952584805
episode: 272 training return: tensor(137.2392, device='cuda:0')
episode: 273 training return: tensor(-336.3527, device='cuda:0')
episode: 274 training return: tensor(-65.1868, device='cuda:0')
episode: 275 training return: tensor(190.2578, device='cuda:0')
epoch: 69 test_true_pfm: 2460.1811174967115 sim_pfm: 31.396916567968827
episode: 276 training return: tensor(197.7744, device='cuda:0')
episode: 277 training return: tensor(-21.6636, device='cuda:0')
episode: 278 training return: tensor(-372.1499, device='cuda:0')
episode: 279 training return: tensor(-246.3774, device='cuda:0')
epoch: 70 test_true_pfm: 2973.1231136336937 sim_pfm: 122.40347985442106
episode: 280 training return: tensor(-289.7935, device='cuda:0')
episode: 281 training return: tensor(207.7057, device='cuda:0')
episode: 282 training return: tensor(-326.4866, device='cuda:0')
episode: 283 training return: tensor(242.2448, device='cuda:0')
epoch: 71 test_true_pfm: 1895.13032794469 sim_pfm: -41.404784925786466
episode: 284 training return: tensor(249.0954, device='cuda:0')
episode: 285 training return: tensor(39.0580, device='cuda:0')
episode: 286 training return: tensor(232.8113, device='cuda:0')
episode: 287 training return: tensor(-294.8370, device='cuda:0')
epoch: 72 test_true_pfm: 1932.209843822501 sim_pfm: 55.44020719348919
episode: 288 training return: tensor(-240.8885, device='cuda:0')
episode: 289 training return: tensor(307.9474, device='cuda:0')
episode: 290 training return: tensor(-214.0697, device='cuda:0')
episode: 291 training return: tensor(208.2160, device='cuda:0')
epoch: 73 test_true_pfm: 2402.321474084454 sim_pfm: 43.30589973643267
episode: 292 training return: tensor(-316.6645, device='cuda:0')
episode: 293 training return: tensor(-316.0077, device='cuda:0')
episode: 294 training return: tensor(-261.7426, device='cuda:0')
episode: 295 training return: tensor(-93.6955, device='cuda:0')
epoch: 74 test_true_pfm: 2731.7229872191624 sim_pfm: 39.04846486276559
episode: 296 training return: tensor(-380.5678, device='cuda:0')
episode: 297 training return: tensor(-364.3149, device='cuda:0')
episode: 298 training return: tensor(-355.2493, device='cuda:0')
episode: 299 training return: tensor(-358.1581, device='cuda:0')
epoch: 75 test_true_pfm: 2661.82582474065 sim_pfm: 3.752498329927524
episode: 300 training return: tensor(-213.6511, device='cuda:0')
episode: 301 training return: tensor(56.4937, device='cuda:0')
episode: 302 training return: tensor(254.8023, device='cuda:0')
episode: 303 training return: tensor(187.9637, device='cuda:0')
epoch: 76 test_true_pfm: 3024.861198233877 sim_pfm: 114.47742200751479
episode: 304 training return: tensor(133.7467, device='cuda:0')
episode: 305 training return: tensor(-281.3067, device='cuda:0')
episode: 306 training return: tensor(55.3717, device='cuda:0')
episode: 307 training return: tensor(-290.6629, device='cuda:0')
epoch: 77 test_true_pfm: 2910.3983970111435 sim_pfm: -120.13127949816408
episode: 308 training return: tensor(129.6210, device='cuda:0')
episode: 309 training return: tensor(-26.5941, device='cuda:0')
episode: 310 training return: tensor(-58.5527, device='cuda:0')
episode: 311 training return: tensor(180.0400, device='cuda:0')
epoch: 78 test_true_pfm: 3034.6076144063395 sim_pfm: -49.345293951996915
episode: 312 training return: tensor(-67.0534, device='cuda:0')
episode: 313 training return: tensor(-289.0552, device='cuda:0')
episode: 314 training return: tensor(-132.3599, device='cuda:0')
episode: 315 training return: tensor(-207.3752, device='cuda:0')
epoch: 79 test_true_pfm: 2165.4015943658537 sim_pfm: 101.27281786928263
episode: 316 training return: tensor(-129.5439, device='cuda:0')
episode: 317 training return: tensor(-246.1029, device='cuda:0')
episode: 318 training return: tensor(-381.2516, device='cuda:0')
episode: 319 training return: tensor(-139.6672, device='cuda:0')
epoch: 80 test_true_pfm: 2432.908509459757 sim_pfm: 54.762450721580535
episode: 320 training return: tensor(-277.9365, device='cuda:0')
episode: 321 training return: tensor(-294.4737, device='cuda:0')
episode: 322 training return: tensor(-88.2555, device='cuda:0')
episode: 323 training return: tensor(238.9973, device='cuda:0')
epoch: 81 test_true_pfm: 2972.1408563460986 sim_pfm: 143.6564433707584
episode: 324 training return: tensor(-336.6253, device='cuda:0')
episode: 325 training return: tensor(-321.7545, device='cuda:0')
episode: 326 training return: tensor(-147.2587, device='cuda:0')
episode: 327 training return: tensor(-278.0896, device='cuda:0')
epoch: 82 test_true_pfm: 3331.222055514509 sim_pfm: 102.8634909298756
episode: 328 training return: tensor(-33.5937, device='cuda:0')
episode: 329 training return: tensor(-314.5459, device='cuda:0')
episode: 330 training return: tensor(51.7735, device='cuda:0')
episode: 331 training return: tensor(61.3191, device='cuda:0')
epoch: 83 test_true_pfm: 2991.1927956619024 sim_pfm: 150.27566486371992
episode: 332 training return: tensor(249.3757, device='cuda:0')
episode: 333 training return: tensor(-331.0703, device='cuda:0')
episode: 334 training return: tensor(130.8249, device='cuda:0')
episode: 335 training return: tensor(-376.9818, device='cuda:0')
epoch: 84 test_true_pfm: 1934.4128564155546 sim_pfm: 215.94880715745967
episode: 336 training return: tensor(232.9314, device='cuda:0')
episode: 337 training return: tensor(24.2546, device='cuda:0')
episode: 338 training return: tensor(266.7883, device='cuda:0')
episode: 339 training return: tensor(-245.5405, device='cuda:0')
epoch: 85 test_true_pfm: 2210.257414542054 sim_pfm: 54.21100543427747
episode: 340 training return: tensor(105.8092, device='cuda:0')
episode: 341 training return: tensor(3.6774, device='cuda:0')
episode: 342 training return: tensor(-286.3233, device='cuda:0')
episode: 343 training return: tensor(81.8184, device='cuda:0')
epoch: 86 test_true_pfm: 2370.9467458315066 sim_pfm: -3.386443562922068
episode: 344 training return: tensor(301.2559, device='cuda:0')
episode: 345 training return: tensor(-124.3031, device='cuda:0')
episode: 346 training return: tensor(-274.4402, device='cuda:0')
episode: 347 training return: tensor(-328.8098, device='cuda:0')
epoch: 87 test_true_pfm: 2939.1573806680576 sim_pfm: 16.11825825817262
episode: 348 training return: tensor(-95.7304, device='cuda:0')
episode: 349 training return: tensor(-286.3107, device='cuda:0')
episode: 350 training return: tensor(224.5109, device='cuda:0')
episode: 351 training return: tensor(-318.5538, device='cuda:0')
epoch: 88 test_true_pfm: 2604.5705254145664 sim_pfm: -16.660055053109925
episode: 352 training return: tensor(-328.2827, device='cuda:0')
episode: 353 training return: tensor(-348.8309, device='cuda:0')
episode: 354 training return: tensor(-257.6570, device='cuda:0')
episode: 355 training return: tensor(-374.4218, device='cuda:0')
epoch: 89 test_true_pfm: 1952.4662383376701 sim_pfm: 54.22514445094081
episode: 356 training return: tensor(-57.4689, device='cuda:0')
episode: 357 training return: tensor(-271.8615, device='cuda:0')
episode: 358 training return: tensor(-220.8915, device='cuda:0')
episode: 359 training return: tensor(127.9673, device='cuda:0')
epoch: 90 test_true_pfm: 2475.0910998871655 sim_pfm: 38.94211287693664
episode: 360 training return: tensor(16.8158, device='cuda:0')
episode: 361 training return: tensor(233.4834, device='cuda:0')
episode: 362 training return: tensor(43.5260, device='cuda:0')
episode: 363 training return: tensor(228.2779, device='cuda:0')
epoch: 91 test_true_pfm: 2403.886193726602 sim_pfm: -2.3060655503844223
episode: 364 training return: tensor(-297.2388, device='cuda:0')
episode: 365 training return: tensor(-87.6017, device='cuda:0')
episode: 366 training return: tensor(26.8018, device='cuda:0')
episode: 367 training return: tensor(42.9791, device='cuda:0')
epoch: 92 test_true_pfm: 2215.0502245318644 sim_pfm: -125.71775785647333
episode: 368 training return: tensor(-334.2969, device='cuda:0')
episode: 369 training return: tensor(-291.6103, device='cuda:0')
episode: 370 training return: tensor(-285.1243, device='cuda:0')
episode: 371 training return: tensor(-315.1567, device='cuda:0')
epoch: 93 test_true_pfm: 1830.142034791422 sim_pfm: -158.08450373537684
episode: 372 training return: tensor(-387.3270, device='cuda:0')
episode: 373 training return: tensor(-40.6301, device='cuda:0')
episode: 374 training return: tensor(242.9013, device='cuda:0')
episode: 375 training return: tensor(-388.4688, device='cuda:0')
epoch: 94 test_true_pfm: 1907.0769068117152 sim_pfm: 158.2541622062757
episode: 376 training return: tensor(303.3675, device='cuda:0')
episode: 377 training return: tensor(45.0734, device='cuda:0')
episode: 378 training return: tensor(258.1426, device='cuda:0')
episode: 379 training return: tensor(53.3664, device='cuda:0')
epoch: 95 test_true_pfm: 2911.886088717068 sim_pfm: 21.408061070590822
episode: 380 training return: tensor(-285.2843, device='cuda:0')
episode: 381 training return: tensor(-133.0133, device='cuda:0')
episode: 382 training return: tensor(177.3217, device='cuda:0')
episode: 383 training return: tensor(-68.6108, device='cuda:0')
epoch: 96 test_true_pfm: 2574.4979344370254 sim_pfm: -42.18768605139727
episode: 384 training return: tensor(53.2688, device='cuda:0')
episode: 385 training return: tensor(130.2426, device='cuda:0')
episode: 386 training return: tensor(-325.5532, device='cuda:0')
episode: 387 training return: tensor(49.8518, device='cuda:0')
epoch: 97 test_true_pfm: 2721.9501035804556 sim_pfm: -30.04605108251174
episode: 388 training return: tensor(-68.9921, device='cuda:0')
episode: 389 training return: tensor(-379.9897, device='cuda:0')
episode: 390 training return: tensor(-40.9527, device='cuda:0')
episode: 391 training return: tensor(-273.5024, device='cuda:0')
epoch: 98 test_true_pfm: 2315.3529454791137 sim_pfm: -81.4157248288393
episode: 392 training return: tensor(-48.3720, device='cuda:0')
episode: 393 training return: tensor(-30.3892, device='cuda:0')
episode: 394 training return: tensor(283.1339, device='cuda:0')
episode: 395 training return: tensor(-286.8069, device='cuda:0')
epoch: 99 test_true_pfm: 2177.2541100949225 sim_pfm: 178.09524512118273
episode: 396 training return: tensor(301.0695, device='cuda:0')
episode: 397 training return: tensor(-206.6840, device='cuda:0')
episode: 398 training return: tensor(-250.9312, device='cuda:0')
episode: 399 training return: tensor(-336.3260, device='cuda:0')
epoch: 100 test_true_pfm: 2025.9538422547391 sim_pfm: 128.64038947967734
episode: 400 training return: tensor(245.4075, device='cuda:0')
episode: 401 training return: tensor(129.5416, device='cuda:0')
episode: 402 training return: tensor(-227.3844, device='cuda:0')
episode: 403 training return: tensor(-13.3869, device='cuda:0')
epoch: 101 test_true_pfm: 2288.2324725866006 sim_pfm: -52.76500078639947
episode: 404 training return: tensor(-279.5120, device='cuda:0')
episode: 405 training return: tensor(-336.1909, device='cuda:0')
episode: 406 training return: tensor(234.0044, device='cuda:0')
episode: 407 training return: tensor(254.2571, device='cuda:0')
epoch: 102 test_true_pfm: 2919.4414400675264 sim_pfm: 59.205589383025654
episode: 408 training return: tensor(-315.6721, device='cuda:0')
episode: 409 training return: tensor(-266.0836, device='cuda:0')
episode: 410 training return: tensor(-281.1959, device='cuda:0')
episode: 411 training return: tensor(-91.0031, device='cuda:0')
epoch: 103 test_true_pfm: 2436.037983403458 sim_pfm: 102.87969210794351
episode: 412 training return: tensor(87.6531, device='cuda:0')
episode: 413 training return: tensor(-261.5133, device='cuda:0')
episode: 414 training return: tensor(-229.1927, device='cuda:0')
episode: 415 training return: tensor(-0.9893, device='cuda:0')
epoch: 104 test_true_pfm: 2364.412035708309 sim_pfm: -97.38741490609634
episode: 416 training return: tensor(272.1207, device='cuda:0')
episode: 417 training return: tensor(235.4992, device='cuda:0')
episode: 418 training return: tensor(-244.4409, device='cuda:0')
episode: 419 training return: tensor(-303.4945, device='cuda:0')
epoch: 105 test_true_pfm: 2580.970531173492 sim_pfm: -55.3891554128204
episode: 420 training return: tensor(202.2594, device='cuda:0')
episode: 421 training return: tensor(-310.5434, device='cuda:0')
episode: 422 training return: tensor(-300.4259, device='cuda:0')
episode: 423 training return: tensor(282.0712, device='cuda:0')
epoch: 106 test_true_pfm: 2766.652063792863 sim_pfm: -139.5280379137645
episode: 424 training return: tensor(-382.0516, device='cuda:0')
episode: 425 training return: tensor(-360.5865, device='cuda:0')
episode: 426 training return: tensor(39.9576, device='cuda:0')
episode: 427 training return: tensor(-209.5497, device='cuda:0')
epoch: 107 test_true_pfm: 2124.3057791029437 sim_pfm: -43.74926606131097
episode: 428 training return: tensor(274.1953, device='cuda:0')
episode: 429 training return: tensor(281.5240, device='cuda:0')
episode: 430 training return: tensor(11.2847, device='cuda:0')
episode: 431 training return: tensor(-308.1400, device='cuda:0')
epoch: 108 test_true_pfm: 2639.5888715512738 sim_pfm: 82.31350842764368
episode: 432 training return: tensor(231.3473, device='cuda:0')
episode: 433 training return: tensor(-232.7908, device='cuda:0')
episode: 434 training return: tensor(186.1694, device='cuda:0')
episode: 435 training return: tensor(-14.1752, device='cuda:0')
epoch: 109 test_true_pfm: 3075.568005761242 sim_pfm: -5.308400597966586
episode: 436 training return: tensor(-114.8179, device='cuda:0')
episode: 437 training return: tensor(-309.7455, device='cuda:0')
episode: 438 training return: tensor(-25.5620, device='cuda:0')
episode: 439 training return: tensor(-267.7493, device='cuda:0')
epoch: 110 test_true_pfm: 2896.2138118031376 sim_pfm: 48.027262015035376
episode: 440 training return: tensor(-250.6075, device='cuda:0')
episode: 441 training return: tensor(3.4804, device='cuda:0')
episode: 442 training return: tensor(251.5120, device='cuda:0')
episode: 443 training return: tensor(-221.8028, device='cuda:0')
epoch: 111 test_true_pfm: 2549.7195932958334 sim_pfm: 253.14700131091135
episode: 444 training return: tensor(235.7083, device='cuda:0')
episode: 445 training return: tensor(-294.3918, device='cuda:0')
episode: 446 training return: tensor(-311.6714, device='cuda:0')
episode: 447 training return: tensor(-52.9381, device='cuda:0')
epoch: 112 test_true_pfm: 2667.7508638748977 sim_pfm: 1.0279526102822274
episode: 448 training return: tensor(40.6513, device='cuda:0')
episode: 449 training return: tensor(26.2937, device='cuda:0')
episode: 450 training return: tensor(-79.7264, device='cuda:0')
episode: 451 training return: tensor(130.1549, device='cuda:0')
epoch: 113 test_true_pfm: 2764.3212955400086 sim_pfm: -16.30390518833883
episode: 452 training return: tensor(-21.1670, device='cuda:0')
episode: 453 training return: tensor(-289.3332, device='cuda:0')
episode: 454 training return: tensor(-283.2324, device='cuda:0')
episode: 455 training return: tensor(-351.4210, device='cuda:0')
epoch: 114 test_true_pfm: 2672.1653838169354 sim_pfm: -1.9637834249491182
episode: 456 training return: tensor(154.9103, device='cuda:0')
episode: 457 training return: tensor(-115.3665, device='cuda:0')
episode: 458 training return: tensor(256.0734, device='cuda:0')
episode: 459 training return: tensor(-345.6239, device='cuda:0')
epoch: 115 test_true_pfm: 1991.7753980552677 sim_pfm: 170.6470461288312
episode: 460 training return: tensor(-197.7016, device='cuda:0')
episode: 461 training return: tensor(-232.4225, device='cuda:0')
episode: 462 training return: tensor(94.3961, device='cuda:0')
episode: 463 training return: tensor(234.7272, device='cuda:0')
epoch: 116 test_true_pfm: 2343.178698140228 sim_pfm: -6.345918554036568
episode: 464 training return: tensor(-378.5196, device='cuda:0')
episode: 465 training return: tensor(-253.7453, device='cuda:0')
episode: 466 training return: tensor(-42.1422, device='cuda:0')
episode: 467 training return: tensor(-266.9554, device='cuda:0')
epoch: 117 test_true_pfm: 2585.601358971447 sim_pfm: 18.909776595556952
episode: 468 training return: tensor(-311.3981, device='cuda:0')
episode: 469 training return: tensor(-371.5156, device='cuda:0')
episode: 470 training return: tensor(-381.3788, device='cuda:0')
episode: 471 training return: tensor(262.2240, device='cuda:0')
epoch: 118 test_true_pfm: 2734.55923550344 sim_pfm: -3.1737890650013774
episode: 472 training return: tensor(233.7845, device='cuda:0')
episode: 473 training return: tensor(-208.7672, device='cuda:0')
episode: 474 training return: tensor(245.2220, device='cuda:0')
episode: 475 training return: tensor(58.8886, device='cuda:0')
epoch: 119 test_true_pfm: 2760.5294424092403 sim_pfm: 34.7600014708781
episode: 476 training return: tensor(-357.3954, device='cuda:0')
episode: 477 training return: tensor(-64.5271, device='cuda:0')
episode: 478 training return: tensor(-352.9973, device='cuda:0')
episode: 479 training return: tensor(-212.3994, device='cuda:0')
epoch: 120 test_true_pfm: 2935.541250207551 sim_pfm: -22.47762399430697
episode: 480 training return: tensor(152.4619, device='cuda:0')
episode: 481 training return: tensor(-278.0355, device='cuda:0')
episode: 482 training return: tensor(-360.2979, device='cuda:0')
episode: 483 training return: tensor(-76.5969, device='cuda:0')
epoch: 121 test_true_pfm: 2177.156932913494 sim_pfm: 47.49397995804126
episode: 484 training return: tensor(30.8291, device='cuda:0')
episode: 485 training return: tensor(-283.7362, device='cuda:0')
episode: 486 training return: tensor(108.9112, device='cuda:0')
episode: 487 training return: tensor(247.4374, device='cuda:0')
epoch: 122 test_true_pfm: 2547.6228949099786 sim_pfm: 58.29895230402084
episode: 488 training return: tensor(-384.7412, device='cuda:0')
episode: 489 training return: tensor(37.3716, device='cuda:0')
episode: 490 training return: tensor(19.8916, device='cuda:0')
episode: 491 training return: tensor(-35.0638, device='cuda:0')
epoch: 123 test_true_pfm: 2695.4962327075887 sim_pfm: 64.35694771104802
episode: 492 training return: tensor(293.6648, device='cuda:0')
episode: 493 training return: tensor(-353.5416, device='cuda:0')
episode: 494 training return: tensor(133.1331, device='cuda:0')
episode: 495 training return: tensor(-379.5803, device='cuda:0')
epoch: 124 test_true_pfm: 2947.7464426416777 sim_pfm: 145.57288671242227
episode: 496 training return: tensor(281.0338, device='cuda:0')
episode: 497 training return: tensor(-348.7052, device='cuda:0')
episode: 498 training return: tensor(-137.7654, device='cuda:0')
episode: 499 training return: tensor(-294.2161, device='cuda:0')
epoch: 125 test_true_pfm: 2877.72898486271 sim_pfm: 97.84734982217196
episode: 500 training return: tensor(-340.4612, device='cuda:0')
episode: 501 training return: tensor(-282.7760, device='cuda:0')
episode: 502 training return: tensor(-328.5426, device='cuda:0')
episode: 503 training return: tensor(-253.3587, device='cuda:0')
epoch: 126 test_true_pfm: 2884.5286063453273 sim_pfm: -55.334142737478636
episode: 504 training return: tensor(-354.2931, device='cuda:0')
episode: 505 training return: tensor(291.9063, device='cuda:0')
episode: 506 training return: tensor(-110.6963, device='cuda:0')
episode: 507 training return: tensor(-319.5210, device='cuda:0')
epoch: 127 test_true_pfm: 2528.7992994829838 sim_pfm: -52.307768238630764
episode: 508 training return: tensor(246.0947, device='cuda:0')
episode: 509 training return: tensor(10.6779, device='cuda:0')
episode: 510 training return: tensor(-386.2302, device='cuda:0')
episode: 511 training return: tensor(-10.2357, device='cuda:0')
epoch: 128 test_true_pfm: 2792.782943159298 sim_pfm: -45.49225612095324
episode: 512 training return: tensor(299.1059, device='cuda:0')
episode: 513 training return: tensor(243.1399, device='cuda:0')
episode: 514 training return: tensor(59.7251, device='cuda:0')
episode: 515 training return: tensor(129.9938, device='cuda:0')
epoch: 129 test_true_pfm: 2064.3243657283656 sim_pfm: -0.8689379798791682
episode: 516 training return: tensor(274.0186, device='cuda:0')
episode: 517 training return: tensor(-207.3000, device='cuda:0')
episode: 518 training return: tensor(67.5390, device='cuda:0')
episode: 519 training return: tensor(-25.5938, device='cuda:0')
epoch: 130 test_true_pfm: 2481.0418404407815 sim_pfm: -71.91696500145675
episode: 520 training return: tensor(209.3329, device='cuda:0')
episode: 521 training return: tensor(-177.6282, device='cuda:0')
episode: 522 training return: tensor(-210.1229, device='cuda:0')
episode: 523 training return: tensor(178.1211, device='cuda:0')
epoch: 131 test_true_pfm: 2094.6421179190747 sim_pfm: 19.138794116365414
episode: 524 training return: tensor(-290.8862, device='cuda:0')
episode: 525 training return: tensor(-316.8662, device='cuda:0')
episode: 526 training return: tensor(-161.9638, device='cuda:0')
episode: 527 training return: tensor(-330.5807, device='cuda:0')
epoch: 132 test_true_pfm: 2760.9649106889824 sim_pfm: 151.63693289046446
episode: 528 training return: tensor(-387.5665, device='cuda:0')
episode: 529 training return: tensor(-25.6826, device='cuda:0')
episode: 530 training return: tensor(-211.6293, device='cuda:0')
episode: 531 training return: tensor(259.7419, device='cuda:0')
epoch: 133 test_true_pfm: 2118.932423119732 sim_pfm: 143.9000920835339
episode: 532 training return: tensor(-349.7958, device='cuda:0')
episode: 533 training return: tensor(258.7509, device='cuda:0')
episode: 534 training return: tensor(-72.0999, device='cuda:0')
episode: 535 training return: tensor(115.7857, device='cuda:0')
epoch: 134 test_true_pfm: 2367.034838511379 sim_pfm: 11.771005679504015
episode: 536 training return: tensor(-108.1984, device='cuda:0')
episode: 537 training return: tensor(-272.5606, device='cuda:0')
episode: 538 training return: tensor(-359.3197, device='cuda:0')
episode: 539 training return: tensor(124.3247, device='cuda:0')
epoch: 135 test_true_pfm: 2788.4131148809333 sim_pfm: 63.054698166204616
episode: 540 training return: tensor(-366.0185, device='cuda:0')
episode: 541 training return: tensor(279.8530, device='cuda:0')
episode: 542 training return: tensor(22.6170, device='cuda:0')
episode: 543 training return: tensor(242.2430, device='cuda:0')
epoch: 136 test_true_pfm: 2798.8537853466883 sim_pfm: 147.1544741106045
episode: 544 training return: tensor(-120.6824, device='cuda:0')
episode: 545 training return: tensor(-231.4593, device='cuda:0')
episode: 546 training return: tensor(-290.8080, device='cuda:0')
episode: 547 training return: tensor(-118.1510, device='cuda:0')
epoch: 137 test_true_pfm: 2384.115715056869 sim_pfm: 170.18136425099024
episode: 548 training return: tensor(-298.4038, device='cuda:0')
episode: 549 training return: tensor(-214.9137, device='cuda:0')
episode: 550 training return: tensor(279.9879, device='cuda:0')
episode: 551 training return: tensor(210.5497, device='cuda:0')
epoch: 138 test_true_pfm: 2774.4799108127404 sim_pfm: 187.71557704593093
episode: 552 training return: tensor(-79.8173, device='cuda:0')
episode: 553 training return: tensor(-240.2188, device='cuda:0')
episode: 554 training return: tensor(-132.4780, device='cuda:0')
episode: 555 training return: tensor(263.7836, device='cuda:0')
epoch: 139 test_true_pfm: 2133.148220215031 sim_pfm: 88.25346080819145
episode: 556 training return: tensor(-301.5046, device='cuda:0')
episode: 557 training return: tensor(122.7581, device='cuda:0')
episode: 558 training return: tensor(-211.0805, device='cuda:0')
episode: 559 training return: tensor(-58.7999, device='cuda:0')
epoch: 140 test_true_pfm: 2475.1959316747816 sim_pfm: -0.8190855962069085
episode: 560 training return: tensor(21.2119, device='cuda:0')
episode: 561 training return: tensor(-349.8352, device='cuda:0')
episode: 562 training return: tensor(184.8405, device='cuda:0')
episode: 563 training return: tensor(-386.8804, device='cuda:0')
epoch: 141 test_true_pfm: 2838.7722974096273 sim_pfm: 65.367378302617
episode: 564 training return: tensor(-335.6774, device='cuda:0')
episode: 565 training return: tensor(246.1445, device='cuda:0')
episode: 566 training return: tensor(37.6504, device='cuda:0')
episode: 567 training return: tensor(-288.9935, device='cuda:0')
epoch: 142 test_true_pfm: 1953.9734089122976 sim_pfm: 163.9609177748401
episode: 568 training return: tensor(-375.8023, device='cuda:0')
episode: 569 training return: tensor(-291.4011, device='cuda:0')
episode: 570 training return: tensor(234.2080, device='cuda:0')
episode: 571 training return: tensor(-219.4601, device='cuda:0')
epoch: 143 test_true_pfm: 2764.4681592255124 sim_pfm: -190.36504745700708
episode: 572 training return: tensor(-331.1064, device='cuda:0')
episode: 573 training return: tensor(4.7241, device='cuda:0')
episode: 574 training return: tensor(-62.4998, device='cuda:0')
episode: 575 training return: tensor(-209.7097, device='cuda:0')
epoch: 144 test_true_pfm: 2327.865459407972 sim_pfm: 156.04238135175547
episode: 576 training return: tensor(-69.5648, device='cuda:0')
episode: 577 training return: tensor(-351.9618, device='cuda:0')
episode: 578 training return: tensor(-213.0938, device='cuda:0')
episode: 579 training return: tensor(107.3802, device='cuda:0')
epoch: 145 test_true_pfm: 2847.7517410888236 sim_pfm: -170.29899129263745
episode: 580 training return: tensor(-291.2058, device='cuda:0')
episode: 581 training return: tensor(38.6209, device='cuda:0')
episode: 582 training return: tensor(-382.6157, device='cuda:0')
episode: 583 training return: tensor(5.4544, device='cuda:0')
epoch: 146 test_true_pfm: 2856.5720502225854 sim_pfm: -27.744336804685492
episode: 584 training return: tensor(-328.1582, device='cuda:0')
episode: 585 training return: tensor(-227.1699, device='cuda:0')
episode: 586 training return: tensor(-339.5764, device='cuda:0')
episode: 587 training return: tensor(-259.8763, device='cuda:0')
epoch: 147 test_true_pfm: 2354.523521098223 sim_pfm: -174.05232054305574
episode: 588 training return: tensor(-241.7656, device='cuda:0')
episode: 589 training return: tensor(309.0352, device='cuda:0')
episode: 590 training return: tensor(-333.2104, device='cuda:0')
episode: 591 training return: tensor(120.9867, device='cuda:0')
epoch: 148 test_true_pfm: 2192.8866748986998 sim_pfm: -88.6746908481776
episode: 592 training return: tensor(-93.4734, device='cuda:0')
episode: 593 training return: tensor(-28.0617, device='cuda:0')
episode: 594 training return: tensor(-285.6470, device='cuda:0')
episode: 595 training return: tensor(-63.9776, device='cuda:0')
epoch: 149 test_true_pfm: 2688.1760568569393 sim_pfm: 224.28762572004538
episode: 596 training return: tensor(-284.2034, device='cuda:0')
episode: 597 training return: tensor(-387.9142, device='cuda:0')
episode: 598 training return: tensor(0.2158, device='cuda:0')
episode: 599 training return: tensor(238.9963, device='cuda:0')
epoch: 150 test_true_pfm: 2130.7578359232552 sim_pfm: 161.1571802601296
