['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'expert', '--seed', '4']
epoch: 0 training_loss 0.34719728752970697 test_loss: 0.24580118656158448
epoch: 1 training_loss 0.2309780487418175 test_loss: 0.2105637550354004
epoch: 2 training_loss 0.19928919337689877 test_loss: 0.18950414657592773
epoch: 3 training_loss 0.1720309231430292 test_loss: 0.15141667127609254
epoch: 4 training_loss 0.16494615942239763 test_loss: 0.13814737796783447
epoch: 5 training_loss 0.14711635030806064 test_loss: 0.1507741928100586
epoch: 6 training_loss 0.1508598191291094 test_loss: 0.13917709589004518
epoch: 7 training_loss 0.13524985428899527 test_loss: 0.12325136661529541
epoch: 8 training_loss 0.13442797411233187 test_loss: 0.1432321548461914
epoch: 9 training_loss 0.13574810437858104 test_loss: 0.16839059591293334
epoch: 10 training_loss 0.13961142357438802 test_loss: 0.12825108766555787
epoch: 11 training_loss 0.13248124331235886 test_loss: 0.14748744964599608
epoch: 12 training_loss 0.13719076216220855 test_loss: 0.1359032154083252
epoch: 13 training_loss 0.12923763606697322 test_loss: 0.10572865009307861
epoch: 14 training_loss 0.13716285344213247 test_loss: 0.13456782102584838
epoch: 15 training_loss 0.12634258814156055 test_loss: 0.12033269405364991
epoch: 16 training_loss 0.135941921249032 test_loss: 0.1020419716835022
epoch: 17 training_loss 0.11857692286372185 test_loss: 0.11623793840408325
epoch: 18 training_loss 0.12438712142407894 test_loss: 0.10956920385360717
epoch: 19 training_loss 0.11814777910709381 test_loss: 0.10627549886703491
epoch: 20 training_loss 0.11833329293876886 test_loss: 0.11654473543167114
epoch: 21 training_loss 0.1244845275208354 test_loss: 0.14007983207702637
epoch: 22 training_loss 0.12126956559717655 test_loss: 0.10758836269378662
epoch: 23 training_loss 0.12271655153483152 test_loss: 0.11847554445266724
epoch: 24 training_loss 0.11594101447612047 test_loss: 0.10985455513000489
epoch: 25 training_loss 0.11849337972700597 test_loss: 0.12311645746231079
epoch: 26 training_loss 0.11271016139537096 test_loss: 0.116901695728302
epoch: 27 training_loss 0.1211029039695859 test_loss: 0.12162964344024658
epoch: 28 training_loss 0.11202082816511393 test_loss: 0.11986654996871948
epoch: 29 training_loss 0.11723391208797693 test_loss: 0.1199882984161377
epoch: 30 training_loss 0.11317648019641638 test_loss: 0.11676712036132812
epoch: 31 training_loss 0.12287639807909727 test_loss: 0.1189928412437439
epoch: 32 training_loss 0.11467411797493696 test_loss: 0.09959843158721923
epoch: 33 training_loss 0.12325076442211866 test_loss: 0.11145564317703247
epoch: 34 training_loss 0.11931262247264385 test_loss: 0.12957324981689453
epoch: 35 training_loss 0.11733893938362598 test_loss: 0.11739321947097778
epoch: 36 training_loss 0.11747934132814407 test_loss: 0.1181438684463501
epoch: 37 training_loss 0.1171372701600194 test_loss: 0.11199003458023071
epoch: 38 training_loss 0.11184445019811391 test_loss: 0.13208974599838258
epoch: 39 training_loss 0.1134232760593295 test_loss: 0.1029518723487854
epoch: 40 training_loss 0.11722456190735102 test_loss: 0.10740141868591309
epoch: 41 training_loss 0.10995676420629025 test_loss: 0.11466100215911865
epoch: 42 training_loss 0.11435770716518164 test_loss: 0.12304017543792725
epoch: 43 training_loss 0.11352045573294163 test_loss: 0.11366806030273438
epoch: 44 training_loss 0.11647159397602082 test_loss: 0.11921979188919067
epoch: 45 training_loss 0.1127104588970542 test_loss: 0.09096367359161377
epoch: 46 training_loss 0.12181511633098126 test_loss: 0.12201778888702393
epoch: 47 training_loss 0.11581062205135823 test_loss: 0.12354689836502075
epoch: 48 training_loss 0.1176023530960083 test_loss: 0.11624429225921631
epoch: 49 training_loss 0.11586015678942203 test_loss: 0.11474453210830689
epoch: 50 training_loss 0.11371449247002602 test_loss: 0.09751002192497253
epoch: 51 training_loss 0.11364075060933829 test_loss: 0.11613221168518066
epoch: 52 training_loss 0.11571098662912846 test_loss: 0.0944309651851654
epoch: 53 training_loss 0.11499194830656051 test_loss: 0.10251824855804444
epoch: 54 training_loss 0.11574568539857864 test_loss: 0.10061948299407959
epoch: 55 training_loss 0.1111926488392055 test_loss: 0.09900195598602295
epoch: 56 training_loss 0.11153216283768415 test_loss: 0.10989423990249633
epoch: 57 training_loss 0.10524201802909375 test_loss: 0.10441381931304931
epoch: 58 training_loss 0.10947322223335504 test_loss: 0.115432870388031
epoch: 59 training_loss 0.11679184779524804 test_loss: 0.11227148771286011
epoch: 60 training_loss 0.10485141213983297 test_loss: 0.09405670762062072
epoch: 61 training_loss 0.11550361413508653 test_loss: 0.11130422353744507
epoch: 62 training_loss 0.10617172878235578 test_loss: 0.10657291412353516
epoch: 63 training_loss 0.11924384675920009 test_loss: 0.1183731198310852
epoch: 64 training_loss 0.11157018037512899 test_loss: 0.16127443313598633
epoch: 65 training_loss 0.12164274349808693 test_loss: 0.10206054449081421
epoch: 66 training_loss 0.10728248860687017 test_loss: 0.11731282472610474
epoch: 67 training_loss 0.10709219858050346 test_loss: 0.10082018375396729
epoch: 68 training_loss 0.10708649080246686 test_loss: 0.10859514474868774
epoch: 69 training_loss 0.10788758885115385 test_loss: 0.1208351731300354
epoch: 70 training_loss 0.10670278491452337 test_loss: 0.11043747663497924
epoch: 71 training_loss 0.11014530539512635 test_loss: 0.10816940069198608
epoch: 72 training_loss 0.11453351270407439 test_loss: 0.10908236503601074
epoch: 73 training_loss 0.11219822656363249 test_loss: 0.09313269853591918
epoch: 74 training_loss 0.10909488992765545 test_loss: 0.09909850955009461
epoch: 75 training_loss 0.11276848159730435 test_loss: 0.10308529138565063
epoch: 76 training_loss 0.11001174893230199 test_loss: 0.11250394582748413
epoch: 77 training_loss 0.11331611521542072 test_loss: 0.12794415950775145
epoch: 78 training_loss 0.11174495972692966 test_loss: 0.10383161306381225
epoch: 79 training_loss 0.11460042705759406 test_loss: 0.10284082889556885
epoch: 80 training_loss 0.10404820166528225 test_loss: 0.11807512044906616
epoch: 81 training_loss 0.11152147378772498 test_loss: 0.10911766290664673
epoch: 82 training_loss 0.11032460764050483 test_loss: 0.12092926502227783
epoch: 83 training_loss 0.11161141693592072 test_loss: 0.10342707633972167
epoch: 84 training_loss 0.10748609486967325 test_loss: 0.11300231218338012
epoch: 85 training_loss 0.11180474009364844 test_loss: 0.10326807498931885
epoch: 86 training_loss 0.11545671373605729 test_loss: 0.11028248071670532
epoch: 87 training_loss 0.10572325076907874 test_loss: 0.11107785701751709
epoch: 88 training_loss 0.1094967233017087 test_loss: 0.09728851914405823
epoch: 89 training_loss 0.11154064778238534 test_loss: 0.10843656063079835
epoch: 90 training_loss 0.10856874581426382 test_loss: 0.1055572509765625
epoch: 91 training_loss 0.11242073580622673 test_loss: 0.10055273771286011
epoch: 92 training_loss 0.11100588668137788 test_loss: 0.12145818471908569
epoch: 93 training_loss 0.10732167957350612 test_loss: 0.10850090980529785
epoch: 94 training_loss 0.10629163719713688 test_loss: 0.09369423985481262
epoch: 95 training_loss 0.10880836252123118 test_loss: 0.11230393648147582
epoch: 96 training_loss 0.11041036698967219 test_loss: 0.10663893222808837
epoch: 97 training_loss 0.11101752731949091 test_loss: 0.12077430486679078
epoch: 98 training_loss 0.11029791241511702 test_loss: 0.08348002433776855
epoch: 99 training_loss 0.10706310220062733 test_loss: 0.11211267709732056
epoch: 100 training_loss 0.10473469708114863 test_loss: 0.11479288339614868
epoch: 101 training_loss 0.10910619497299194 test_loss: 0.10775173902511596
epoch: 102 training_loss 0.10260116279125214 test_loss: 0.1239740490913391
epoch: 103 training_loss 0.1112064903602004 test_loss: 0.10980224609375
epoch: 104 training_loss 0.10297347966581583 test_loss: 0.11673346757888795
epoch: 105 training_loss 0.10739352339878679 test_loss: 0.11683083772659301
epoch: 106 training_loss 0.11064991839230061 test_loss: 0.11360094547271729
epoch: 107 training_loss 0.11462258376181125 test_loss: 0.09949584007263183
epoch: 108 training_loss 0.11216234490275383 test_loss: 0.11123557090759277
epoch: 109 training_loss 0.10660338848829269 test_loss: 0.12415633201599122
epoch: 110 training_loss 0.10323731729760766 test_loss: 0.1227792739868164
epoch: 111 training_loss 0.10652983894571662 test_loss: 0.1127516746520996
epoch: 112 training_loss 0.11269886039197445 test_loss: 0.11565823554992676
epoch: 113 training_loss 0.10888816904276609 test_loss: 0.10735214948654175
epoch: 114 training_loss 0.11182554595172406 test_loss: 0.1052959680557251
epoch: 115 training_loss 0.11594497416168452 test_loss: 0.10251750946044921
epoch: 116 training_loss 0.10724167753010988 test_loss: 0.10864659547805786
epoch: 117 training_loss 0.10852932054549455 test_loss: 0.13003120422363282
epoch: 118 training_loss 0.10715115491300821 test_loss: 0.1236760139465332
epoch: 119 training_loss 0.10187460985034705 test_loss: 0.11336352825164794
epoch: 120 training_loss 0.10503632413223386 test_loss: 0.1151117205619812
epoch: 121 training_loss 0.10860204398632049 test_loss: 0.11452320814132691
epoch: 122 training_loss 0.1077215980552137 test_loss: 0.11767133474349975
epoch: 123 training_loss 0.1112987582758069 test_loss: 0.09895403981208802
epoch: 124 training_loss 0.10910251088440419 test_loss: 0.1162139892578125
epoch: 125 training_loss 0.10997297512367368 test_loss: 0.12134971618652343
epoch: 126 training_loss 0.10923455873504281 test_loss: 0.12090506553649902
epoch: 127 training_loss 0.10194057010114194 test_loss: 0.10280990600585938
epoch: 128 training_loss 0.11060995610430836 test_loss: 0.10672013759613037
epoch: 129 training_loss 0.10892454154789448 test_loss: 0.10654665231704712
epoch: 130 training_loss 0.11193886186927557 test_loss: 0.09817518591880799
epoch: 131 training_loss 0.11064280673861504 test_loss: 0.11105514764785766
epoch: 132 training_loss 0.10927551791071892 test_loss: 0.12736852169036866
epoch: 133 training_loss 0.10904640004038811 test_loss: 0.10119407176971436
epoch: 134 training_loss 0.11313027996569872 test_loss: 0.11206917762756348
epoch: 135 training_loss 0.10974719589576125 test_loss: 0.11321885585784912
epoch: 136 training_loss 0.1089396420121193 test_loss: 0.10400651693344116
epoch: 137 training_loss 0.10994395658373833 test_loss: 0.10185203552246094
epoch: 138 training_loss 0.11542782528325915 test_loss: 0.11539112329483033
epoch: 139 training_loss 0.11457684919238091 test_loss: 0.09887276887893677
epoch: 140 training_loss 0.11137166801840066 test_loss: 0.09131943583488464
epoch: 141 training_loss 0.10217080809175969 test_loss: 0.08426712155342102
epoch: 142 training_loss 0.11106736529618502 test_loss: 0.11133064031600952
epoch: 143 training_loss 0.10964695148169995 test_loss: 0.10845407247543334
epoch: 144 training_loss 0.11243201646953821 test_loss: 0.09977555871009827
epoch: 145 training_loss 0.10606243638321758 test_loss: 0.10941894054412842
epoch: 146 training_loss 0.10982315007597208 test_loss: 0.10608699321746826
epoch: 147 training_loss 0.10532654646784068 test_loss: 0.11321322917938233
epoch: 148 training_loss 0.10720991499722005 test_loss: 0.11558358669281006
epoch: 149 training_loss 0.10757639490067959 test_loss: 0.11308419704437256
epoch: 0 training_loss 45.93860864639282 test_loss: 26.302999877929686
epoch: 1 training_loss 21.52275890350342 test_loss: 17.896870422363282
epoch: 2 training_loss 16.197237491607666 test_loss: 14.787033081054688
epoch: 3 training_loss 13.243546895980835 test_loss: 12.080321502685546
epoch: 4 training_loss 11.683308944702148 test_loss: 11.140248107910157
epoch: 5 training_loss 10.137084951400757 test_loss: 9.521161651611328
epoch: 6 training_loss 9.322132406234742 test_loss: 9.043760681152344
epoch: 7 training_loss 8.308351144790649 test_loss: 8.142256164550782
epoch: 8 training_loss 7.7047900342941285 test_loss: 7.2933601379394535
epoch: 9 training_loss 7.206051316261291 test_loss: 6.986448669433594
epoch: 10 training_loss 6.682659120559692 test_loss: 6.980897521972656
epoch: 11 training_loss 6.51633514881134 test_loss: 6.326261138916015
epoch: 12 training_loss 6.094858412742615 test_loss: 6.200164794921875
epoch: 13 training_loss 5.745251250267029 test_loss: 5.79547348022461
epoch: 14 training_loss 5.6390922737121585 test_loss: 5.2955585479736325
epoch: 15 training_loss 5.376959772109985 test_loss: 5.055165100097656
epoch: 16 training_loss 5.087762937545777 test_loss: 4.982644271850586
epoch: 17 training_loss 4.966700088977814 test_loss: 4.966922760009766
epoch: 18 training_loss 4.743346800804138 test_loss: 4.942729568481445
epoch: 19 training_loss 4.732925369739532 test_loss: 4.653057861328125
epoch: 20 training_loss 4.563350560665131 test_loss: 4.590645980834961
epoch: 21 training_loss 4.433594686985016 test_loss: 4.388426208496094
epoch: 22 training_loss 4.31551635503769 test_loss: 4.292136764526367
epoch: 23 training_loss 4.277469632625579 test_loss: 4.2700653076171875
epoch: 24 training_loss 4.136462602615357 test_loss: 4.06041374206543
epoch: 25 training_loss 4.03656837940216 test_loss: 4.277312088012695
epoch: 26 training_loss 4.0707446122169495 test_loss: 4.1550651550292965
epoch: 27 training_loss 3.9267172479629515 test_loss: 4.075461959838867
epoch: 28 training_loss 3.812139964103699 test_loss: 4.076667404174804
epoch: 29 training_loss 3.7374464774131777 test_loss: 3.738841247558594
epoch: 30 training_loss 3.7330267214775086 test_loss: 3.7135299682617187
epoch: 31 training_loss 3.6086490106582643 test_loss: 3.823107147216797
epoch: 32 training_loss 3.568824994564056 test_loss: 3.7766277313232424
epoch: 33 training_loss 3.495626890659332 test_loss: 3.668511962890625
epoch: 34 training_loss 3.552609715461731 test_loss: 3.4891067504882813
epoch: 35 training_loss 3.4263683366775513 test_loss: 3.4079540252685545
epoch: 36 training_loss 3.4567474937438964 test_loss: 3.540767288208008
epoch: 37 training_loss 3.402216031551361 test_loss: 3.3126911163330077
epoch: 38 training_loss 3.3028293156623842 test_loss: 3.3803741455078127
epoch: 39 training_loss 3.3288798332214355 test_loss: 3.250946044921875
epoch: 40 training_loss 3.2480973267555235 test_loss: 3.3242618560791017
epoch: 41 training_loss 3.218732831478119 test_loss: 3.1890451431274416
epoch: 42 training_loss 3.0900485920906067 test_loss: 3.218484878540039
epoch: 43 training_loss 3.089620778560638 test_loss: 3.3021175384521486
epoch: 44 training_loss 3.120499336719513 test_loss: 3.117201232910156
epoch: 45 training_loss 3.005976552963257 test_loss: 3.166355323791504
epoch: 46 training_loss 2.9690526223182676 test_loss: 3.069132995605469
epoch: 47 training_loss 2.9748651027679442 test_loss: 3.0823413848876955
epoch: 48 training_loss 2.9433892488479616 test_loss: 3.0754121780395507
epoch: 49 training_loss 2.9360362982749937 test_loss: 2.9997814178466795
epoch: 50 training_loss 2.8964323806762695 test_loss: 2.867824745178223
epoch: 51 training_loss 2.926262357234955 test_loss: 2.9356250762939453
epoch: 52 training_loss 2.8246773743629454 test_loss: 2.894334411621094
epoch: 53 training_loss 2.79307341337204 test_loss: 2.794845199584961
epoch: 54 training_loss 2.7811533546447755 test_loss: 2.710348701477051
epoch: 55 training_loss 2.8074206733703613 test_loss: 2.8152809143066406
epoch: 56 training_loss 2.7767610692977907 test_loss: 2.746218681335449
epoch: 57 training_loss 2.786075973510742 test_loss: 2.7876213073730467
epoch: 58 training_loss 2.6650689697265624 test_loss: 2.7453596115112306
epoch: 59 training_loss 2.69250425696373 test_loss: 2.796028900146484
epoch: 60 training_loss 2.6561831784248353 test_loss: 2.6660831451416014
epoch: 61 training_loss 2.665021848678589 test_loss: 2.7104944229125976
epoch: 62 training_loss 2.6764594197273253 test_loss: 2.511311340332031
epoch: 63 training_loss 2.5983913958072664 test_loss: 2.6966854095458985
epoch: 64 training_loss 2.602390775680542 test_loss: 2.7213584899902346
epoch: 65 training_loss 2.619918327331543 test_loss: 2.6792421340942383
epoch: 66 training_loss 2.5984436774253847 test_loss: 2.489296531677246
epoch: 67 training_loss 2.513993822336197 test_loss: 2.6977108001708983
epoch: 68 training_loss 2.5926536393165587 test_loss: 2.7453237533569337
epoch: 69 training_loss 2.5662509667873383 test_loss: 2.5724603652954103
epoch: 70 training_loss 2.5568982684612274 test_loss: 2.676279067993164
epoch: 71 training_loss 2.488480315208435 test_loss: 2.483375358581543
epoch: 72 training_loss 2.578436574935913 test_loss: 2.510262298583984
epoch: 73 training_loss 2.539739773273468 test_loss: 2.726070594787598
epoch: 74 training_loss 2.4168590307235718 test_loss: 2.633502388000488
epoch: 75 training_loss 2.465945551395416 test_loss: 2.364843559265137
epoch: 76 training_loss 2.4721157014369965 test_loss: 2.638128471374512
epoch: 77 training_loss 2.3980131685733794 test_loss: 2.39678955078125
epoch: 78 training_loss 2.4607179164886475 test_loss: 2.4414365768432615
epoch: 79 training_loss 2.3635709035396575 test_loss: 2.4435092926025392
epoch: 80 training_loss 2.393011301755905 test_loss: 2.4640708923339845
epoch: 81 training_loss 2.425157072544098 test_loss: 2.4735469818115234
epoch: 82 training_loss 2.3172207903862 test_loss: 2.3346673965454103
epoch: 83 training_loss 2.324421557188034 test_loss: 2.4094362258911133
epoch: 84 training_loss 2.3421602988243104 test_loss: 2.3971321105957033
epoch: 85 training_loss 2.310191068649292 test_loss: 2.3332109451293945
epoch: 86 training_loss 2.290321156978607 test_loss: 2.221652603149414
epoch: 87 training_loss 2.2545568346977234 test_loss: 2.330170440673828
epoch: 88 training_loss 2.331397434473038 test_loss: 2.3168182373046875
epoch: 89 training_loss 2.3022289085388183 test_loss: 2.3851823806762695
epoch: 90 training_loss 2.241002150774002 test_loss: 2.513813591003418
epoch: 91 training_loss 2.2414233505725862 test_loss: 2.275342559814453
epoch: 92 training_loss 2.2894694483280182 test_loss: 2.2816675186157225
epoch: 93 training_loss 2.261143708229065 test_loss: 2.1166181564331055
epoch: 94 training_loss 2.1649352848529815 test_loss: 2.28307991027832
epoch: 95 training_loss 2.273196966648102 test_loss: 2.0922115325927733
epoch: 96 training_loss 2.167828747034073 test_loss: 2.228338432312012
epoch: 97 training_loss 2.200840709209442 test_loss: 2.181343650817871
epoch: 98 training_loss 2.215737564563751 test_loss: 2.3269588470458986
epoch: 99 training_loss 2.15751633644104 test_loss: 2.185738945007324
epoch: 100 training_loss 2.254199514389038 test_loss: 2.121717643737793
epoch: 101 training_loss 2.2046365475654603 test_loss: 2.181230926513672
epoch: 102 training_loss 2.1531167888641356 test_loss: 2.1488637924194336
epoch: 103 training_loss 2.129205527305603 test_loss: 2.217950630187988
epoch: 104 training_loss 2.20383531332016 test_loss: 2.1426549911499024
epoch: 105 training_loss 2.093119753599167 test_loss: 2.2659221649169923
epoch: 106 training_loss 2.1765306389331815 test_loss: 2.2489946365356444
epoch: 107 training_loss 2.1544592547416688 test_loss: 2.182550811767578
epoch: 108 training_loss 2.1340419256687166 test_loss: 2.070260429382324
epoch: 109 training_loss 2.136665405035019 test_loss: 2.2077251434326173
epoch: 110 training_loss 2.1906663942337037 test_loss: 2.0761411666870115
epoch: 111 training_loss 2.142753357887268 test_loss: 2.178293228149414
epoch: 112 training_loss 2.087395910024643 test_loss: 2.262294387817383
epoch: 113 training_loss 2.1131756961345673 test_loss: 2.075328254699707
epoch: 114 training_loss 2.085950639247894 test_loss: 2.149765968322754
epoch: 115 training_loss 2.067219091653824 test_loss: 2.2210975646972657
epoch: 116 training_loss 2.0711074125766755 test_loss: 2.0280323028564453
epoch: 117 training_loss 2.0288035547733307 test_loss: 2.234965133666992
epoch: 118 training_loss 2.030120639801025 test_loss: 2.0291810989379884
epoch: 119 training_loss 2.0939181101322175 test_loss: 2.0891698837280273
epoch: 120 training_loss 2.060611598491669 test_loss: 1.9863637924194335
epoch: 121 training_loss 2.0489779210090635 test_loss: 1.982762908935547
epoch: 122 training_loss 1.99344655752182 test_loss: 2.116176223754883
epoch: 123 training_loss 2.0041110825538637 test_loss: 1.950326156616211
epoch: 124 training_loss 1.995188376903534 test_loss: 1.9744945526123048
epoch: 125 training_loss 2.0409699523448945 test_loss: 2.081452560424805
epoch: 126 training_loss 2.004634166955948 test_loss: 1.9606904983520508
epoch: 127 training_loss 2.0289316499233245 test_loss: 1.985546875
epoch: 128 training_loss 1.9599663043022155 test_loss: 1.9807897567749024
epoch: 129 training_loss 1.9995983815193177 test_loss: 2.0903804779052733
epoch: 130 training_loss 1.9881344532966614 test_loss: 2.0913358688354493
epoch: 131 training_loss 1.9503455221652986 test_loss: 2.058490180969238
epoch: 132 training_loss 2.014685130119324 test_loss: 2.015161895751953
epoch: 133 training_loss 1.945627967119217 test_loss: 2.0632583618164064
epoch: 134 training_loss 1.9296565961837768 test_loss: 1.9341760635375977
epoch: 135 training_loss 1.9092516565322877 test_loss: 2.0195337295532227
epoch: 136 training_loss 1.8750872540473937 test_loss: 1.9714166641235351
epoch: 137 training_loss 1.9310918295383452 test_loss: 1.9921861648559571
epoch: 138 training_loss 1.9711448287963866 test_loss: 2.0283924102783204
epoch: 139 training_loss 1.9463610994815825 test_loss: 1.9568494796752929
epoch: 140 training_loss 1.9642569530010223 test_loss: 1.9869964599609375
epoch: 141 training_loss 1.9158690798282623 test_loss: 1.9184486389160156
epoch: 142 training_loss 1.8912737154960633 test_loss: 1.9320457458496094
epoch: 143 training_loss 1.9397636938095093 test_loss: 2.036235046386719
epoch: 144 training_loss 1.9059908390045166 test_loss: 1.9893739700317383
epoch: 145 training_loss 1.903906352519989 test_loss: 1.985262680053711
epoch: 146 training_loss 1.8667319297790528 test_loss: 1.9073348999023438
epoch: 147 training_loss 1.9326388502120972 test_loss: 2.0271379470825197
epoch: 148 training_loss 1.916679310798645 test_loss: 1.909933090209961
epoch: 149 training_loss 1.8714551091194154 test_loss: 1.8324880599975586
7581.011283135789
episode: 0 training return: tensor(-368.1752, device='cuda:0')
episode: 1 training return: tensor(-996.7032, device='cuda:0')
episode: 2 training return: tensor(-944.3409, device='cuda:0')
episode: 3 training return: tensor(-418.6289, device='cuda:0')
epoch: 1 test_true_pfm: 3481.2696342742784 sim_pfm: -379.0499820889672
episode: 4 training return: tensor(-624.3298, device='cuda:0')
episode: 5 training return: tensor(-877.4797, device='cuda:0')
episode: 6 training return: tensor(-711.8906, device='cuda:0')
episode: 7 training return: tensor(-962.2830, device='cuda:0')
epoch: 2 test_true_pfm: 9529.217994758661 sim_pfm: -491.3105732323602
episode: 8 training return: tensor(-730.6483, device='cuda:0')
episode: 9 training return: tensor(-460.7969, device='cuda:0')
episode: 10 training return: tensor(-718.6738, device='cuda:0')
episode: 11 training return: tensor(-519.0884, device='cuda:0')
epoch: 3 test_true_pfm: 7534.085317370736 sim_pfm: -281.9002002700775
episode: 12 training return: tensor(-243.3180, device='cuda:0')
episode: 13 training return: tensor(-117.5146, device='cuda:0')
episode: 14 training return: tensor(-727.5552, device='cuda:0')
episode: 15 training return: tensor(-589.3730, device='cuda:0')
epoch: 4 test_true_pfm: 6989.287594103863 sim_pfm: -676.1161840184747
episode: 16 training return: tensor(-421.3252, device='cuda:0')
episode: 17 training return: tensor(-574.8224, device='cuda:0')
episode: 18 training return: tensor(-320.6380, device='cuda:0')
episode: 19 training return: tensor(-223.8351, device='cuda:0')
epoch: 5 test_true_pfm: 4101.848429938621 sim_pfm: -444.28350574199186
episode: 20 training return: tensor(-999.1303, device='cuda:0')
episode: 21 training return: tensor(-708.0037, device='cuda:0')
episode: 22 training return: tensor(-905.2533, device='cuda:0')
episode: 23 training return: tensor(-819.3216, device='cuda:0')
epoch: 6 test_true_pfm: 8192.587638237852 sim_pfm: -333.1497366803621
episode: 24 training return: tensor(-657.0603, device='cuda:0')
episode: 25 training return: tensor(-429.5692, device='cuda:0')
episode: 26 training return: tensor(-286.0747, device='cuda:0')
episode: 27 training return: tensor(-402.0150, device='cuda:0')
epoch: 7 test_true_pfm: 7769.338369689919 sim_pfm: -255.55174485093448
episode: 28 training return: tensor(-174.2935, device='cuda:0')
episode: 29 training return: tensor(-321.2530, device='cuda:0')
episode: 30 training return: tensor(-236.3660, device='cuda:0')
episode: 31 training return: tensor(-271.7416, device='cuda:0')
epoch: 8 test_true_pfm: 9814.489104160977 sim_pfm: -237.87303955911193
episode: 32 training return: tensor(-998.8444, device='cuda:0')
episode: 33 training return: tensor(-433.2200, device='cuda:0')
episode: 34 training return: tensor(-285.4538, device='cuda:0')
episode: 35 training return: tensor(-901.7627, device='cuda:0')
epoch: 9 test_true_pfm: 6365.889049168363 sim_pfm: -118.07483151827667
episode: 36 training return: tensor(-788.1867, device='cuda:0')
episode: 37 training return: tensor(-667.2842, device='cuda:0')
episode: 38 training return: tensor(-155.2193, device='cuda:0')
episode: 39 training return: tensor(-944.0732, device='cuda:0')
epoch: 10 test_true_pfm: 6154.4104840308355 sim_pfm: -450.5920447376945
episode: 40 training return: tensor(-341.7175, device='cuda:0')
episode: 41 training return: tensor(-145.2956, device='cuda:0')
episode: 42 training return: tensor(-999.9277, device='cuda:0')
episode: 43 training return: tensor(-157.2387, device='cuda:0')
epoch: 11 test_true_pfm: 5025.453022287548 sim_pfm: 36.515862686287925
episode: 44 training return: tensor(-977.3642, device='cuda:0')
episode: 45 training return: tensor(-756.2281, device='cuda:0')
episode: 46 training return: tensor(-18.7361, device='cuda:0')
episode: 47 training return: tensor(-577.3081, device='cuda:0')
epoch: 12 test_true_pfm: 6912.829570021771 sim_pfm: -467.3951349238244
episode: 48 training return: tensor(-136.6588, device='cuda:0')
episode: 49 training return: tensor(-890.0602, device='cuda:0')
episode: 50 training return: tensor(-568.0062, device='cuda:0')
episode: 51 training return: tensor(-314.4705, device='cuda:0')
epoch: 13 test_true_pfm: 7044.060581277736 sim_pfm: -98.0045280844206
episode: 52 training return: tensor(-68.6162, device='cuda:0')
episode: 53 training return: tensor(-484.1445, device='cuda:0')
episode: 54 training return: tensor(-332.7813, device='cuda:0')
episode: 55 training return: tensor(-309.1391, device='cuda:0')
epoch: 14 test_true_pfm: 5600.387847292527 sim_pfm: -10.266140640131198
episode: 56 training return: tensor(-418.2350, device='cuda:0')
episode: 57 training return: tensor(-520.7220, device='cuda:0')
episode: 58 training return: tensor(-89.4494, device='cuda:0')
episode: 59 training return: tensor(-144.5248, device='cuda:0')
epoch: 15 test_true_pfm: 7249.017191362994 sim_pfm: 58.22091264196206
episode: 60 training return: tensor(-333.4327, device='cuda:0')
episode: 61 training return: tensor(-140.8975, device='cuda:0')
episode: 62 training return: tensor(-430.7306, device='cuda:0')
episode: 63 training return: tensor(-646.2113, device='cuda:0')
epoch: 16 test_true_pfm: 8608.498763841444 sim_pfm: -369.0754223417316
episode: 64 training return: tensor(-981.5345, device='cuda:0')
episode: 65 training return: tensor(-207.7738, device='cuda:0')
episode: 66 training return: tensor(-77.9766, device='cuda:0')
episode: 67 training return: tensor(-213.7196, device='cuda:0')
epoch: 17 test_true_pfm: 6868.409502549618 sim_pfm: 78.45913498881661
episode: 68 training return: tensor(-786.9357, device='cuda:0')
episode: 69 training return: tensor(-946.8688, device='cuda:0')
episode: 70 training return: tensor(-952.5934, device='cuda:0')
episode: 71 training return: tensor(-209.9625, device='cuda:0')
epoch: 18 test_true_pfm: 7316.301962893175 sim_pfm: -389.36565632423543
episode: 72 training return: tensor(-201.8931, device='cuda:0')
episode: 73 training return: tensor(-565.0222, device='cuda:0')
episode: 74 training return: tensor(-532.5911, device='cuda:0')
episode: 75 training return: tensor(-871.4055, device='cuda:0')
epoch: 19 test_true_pfm: 10189.69042213308 sim_pfm: -19.581770773550186
episode: 76 training return: tensor(-565.3146, device='cuda:0')
episode: 77 training return: tensor(-794.4795, device='cuda:0')
episode: 78 training return: tensor(-437.9029, device='cuda:0')
episode: 79 training return: tensor(-498.8999, device='cuda:0')
epoch: 20 test_true_pfm: 5940.8680920012785 sim_pfm: -213.30471264384687
episode: 80 training return: tensor(-407.5441, device='cuda:0')
episode: 81 training return: tensor(-760.0114, device='cuda:0')
episode: 82 training return: tensor(-251.5713, device='cuda:0')
episode: 83 training return: tensor(-109.7182, device='cuda:0')
epoch: 21 test_true_pfm: 2527.334543942408 sim_pfm: -403.83856770632946
episode: 84 training return: tensor(-371.8456, device='cuda:0')
episode: 85 training return: tensor(-502.7155, device='cuda:0')
episode: 86 training return: tensor(-81.7297, device='cuda:0')
episode: 87 training return: tensor(-227.7226, device='cuda:0')
epoch: 22 test_true_pfm: 8742.330696462355 sim_pfm: -18.469986576936208
episode: 88 training return: tensor(-85.3837, device='cuda:0')
episode: 89 training return: tensor(-485.8900, device='cuda:0')
episode: 90 training return: tensor(-344.9160, device='cuda:0')
episode: 91 training return: tensor(-180.3336, device='cuda:0')
epoch: 23 test_true_pfm: 9366.086904408941 sim_pfm: -752.3233259935708
episode: 92 training return: tensor(-239.2556, device='cuda:0')
episode: 93 training return: tensor(19.8862, device='cuda:0')
episode: 94 training return: tensor(-363.9338, device='cuda:0')
episode: 95 training return: tensor(12.4312, device='cuda:0')
epoch: 24 test_true_pfm: 10468.18846079599 sim_pfm: -168.35860058599306
episode: 96 training return: tensor(8.6411, device='cuda:0')
episode: 97 training return: tensor(-400.5273, device='cuda:0')
episode: 98 training return: tensor(-406.6755, device='cuda:0')
episode: 99 training return: tensor(-190.4949, device='cuda:0')
epoch: 25 test_true_pfm: 7204.891324571491 sim_pfm: -263.57409673164756
episode: 100 training return: tensor(-696.8502, device='cuda:0')
episode: 101 training return: tensor(-284.9172, device='cuda:0')
episode: 102 training return: tensor(-126.0058, device='cuda:0')
episode: 103 training return: tensor(-726.7735, device='cuda:0')
epoch: 26 test_true_pfm: 7075.356068201946 sim_pfm: -66.52252658229554
episode: 104 training return: tensor(-999.5058, device='cuda:0')
episode: 105 training return: tensor(-570.4342, device='cuda:0')
episode: 106 training return: tensor(-123.6706, device='cuda:0')
episode: 107 training return: tensor(-27.0293, device='cuda:0')
epoch: 27 test_true_pfm: 7432.85193706775 sim_pfm: -211.78565140421657
episode: 108 training return: tensor(-609.4023, device='cuda:0')
episode: 109 training return: tensor(-445.9129, device='cuda:0')
episode: 110 training return: tensor(-38.0931, device='cuda:0')
episode: 111 training return: tensor(67.1260, device='cuda:0')
epoch: 28 test_true_pfm: 8544.157535423783 sim_pfm: 18.891710712263983
episode: 112 training return: tensor(-41.5910, device='cuda:0')
episode: 113 training return: tensor(89.4248, device='cuda:0')
episode: 114 training return: tensor(-307.6487, device='cuda:0')
episode: 115 training return: tensor(-78.0392, device='cuda:0')
epoch: 29 test_true_pfm: 9931.358859640777 sim_pfm: 42.95176954328781
episode: 116 training return: tensor(-284.6001, device='cuda:0')
episode: 117 training return: tensor(4.6724, device='cuda:0')
episode: 118 training return: tensor(45.5031, device='cuda:0')
episode: 119 training return: tensor(-888.9335, device='cuda:0')
epoch: 30 test_true_pfm: 6329.087662806421 sim_pfm: -433.2847337955997
episode: 120 training return: tensor(-97.3045, device='cuda:0')
episode: 121 training return: tensor(-75.4322, device='cuda:0')
episode: 122 training return: tensor(-211.6742, device='cuda:0')
episode: 123 training return: tensor(-997.5735, device='cuda:0')
epoch: 31 test_true_pfm: 7554.697186573841 sim_pfm: 209.08182905518333
episode: 124 training return: tensor(-398.0194, device='cuda:0')
episode: 125 training return: tensor(-370.1509, device='cuda:0')
episode: 126 training return: tensor(-68.0025, device='cuda:0')
episode: 127 training return: tensor(9.8689, device='cuda:0')
epoch: 32 test_true_pfm: 7954.630541486109 sim_pfm: -32.92089249496348
episode: 128 training return: tensor(-20.9548, device='cuda:0')
episode: 129 training return: tensor(-28.6469, device='cuda:0')
episode: 130 training return: tensor(46.0766, device='cuda:0')
episode: 131 training return: tensor(38.3148, device='cuda:0')
epoch: 33 test_true_pfm: 4696.692729082292 sim_pfm: 189.51147465675604
episode: 132 training return: tensor(-571.4296, device='cuda:0')
episode: 133 training return: tensor(-6.7131, device='cuda:0')
episode: 134 training return: tensor(-564.3220, device='cuda:0')
episode: 135 training return: tensor(122.5592, device='cuda:0')
epoch: 34 test_true_pfm: 3912.4080346193773 sim_pfm: -10.698089050701432
episode: 136 training return: tensor(-771.9359, device='cuda:0')
episode: 137 training return: tensor(-191.8471, device='cuda:0')
episode: 138 training return: tensor(-530.7480, device='cuda:0')
episode: 139 training return: tensor(-698.6548, device='cuda:0')
epoch: 35 test_true_pfm: 8243.362781255833 sim_pfm: 135.1855393506121
episode: 140 training return: tensor(-76.8119, device='cuda:0')
episode: 141 training return: tensor(-457.0876, device='cuda:0')
episode: 142 training return: tensor(-154.3163, device='cuda:0')
episode: 143 training return: tensor(-30.1644, device='cuda:0')
epoch: 36 test_true_pfm: 7940.430956625787 sim_pfm: 123.12654027616372
episode: 144 training return: tensor(48.1243, device='cuda:0')
episode: 145 training return: tensor(-117.4399, device='cuda:0')
episode: 146 training return: tensor(-332.4373, device='cuda:0')
episode: 147 training return: tensor(-310.3642, device='cuda:0')
epoch: 37 test_true_pfm: 10283.96900822549 sim_pfm: 108.06552190775983
episode: 148 training return: tensor(-178.5335, device='cuda:0')
episode: 149 training return: tensor(-45.4244, device='cuda:0')
episode: 150 training return: tensor(-61.8786, device='cuda:0')
episode: 151 training return: tensor(29.6875, device='cuda:0')
epoch: 38 test_true_pfm: 8923.190967807212 sim_pfm: -355.50018641042215
episode: 152 training return: tensor(133.7728, device='cuda:0')
episode: 153 training return: tensor(95.0091, device='cuda:0')
episode: 154 training return: tensor(-999.4908, device='cuda:0')
episode: 155 training return: tensor(-54.0327, device='cuda:0')
epoch: 39 test_true_pfm: 6364.7322688698405 sim_pfm: 61.97634323465172
episode: 156 training return: tensor(-937.6573, device='cuda:0')
episode: 157 training return: tensor(-575.4224, device='cuda:0')
episode: 158 training return: tensor(-908.4089, device='cuda:0')
episode: 159 training return: tensor(-306.5227, device='cuda:0')
epoch: 40 test_true_pfm: 6850.114049452499 sim_pfm: 10.501260706514586
episode: 160 training return: tensor(-133.5381, device='cuda:0')
episode: 161 training return: tensor(-349.3273, device='cuda:0')
episode: 162 training return: tensor(-86.3930, device='cuda:0')
episode: 163 training return: tensor(-98.6983, device='cuda:0')
epoch: 41 test_true_pfm: 6618.034046566908 sim_pfm: -54.79435175116911
episode: 164 training return: tensor(-12.5982, device='cuda:0')
episode: 165 training return: tensor(-214.9496, device='cuda:0')
episode: 166 training return: tensor(-430.1854, device='cuda:0')
episode: 167 training return: tensor(43.5046, device='cuda:0')
epoch: 42 test_true_pfm: 7190.280619798548 sim_pfm: -227.98672035126947
episode: 168 training return: tensor(-179.8195, device='cuda:0')
episode: 169 training return: tensor(128.2384, device='cuda:0')
episode: 170 training return: tensor(-549.5721, device='cuda:0')
episode: 171 training return: tensor(95.4881, device='cuda:0')
epoch: 43 test_true_pfm: 7026.039208922058 sim_pfm: -105.20085015278892
episode: 172 training return: tensor(-138.0433, device='cuda:0')
episode: 173 training return: tensor(167.9008, device='cuda:0')
episode: 174 training return: tensor(-402.3535, device='cuda:0')
episode: 175 training return: tensor(-381.7434, device='cuda:0')
epoch: 44 test_true_pfm: 9518.66095865932 sim_pfm: 155.90108811114138
episode: 176 training return: tensor(-672.4876, device='cuda:0')
episode: 177 training return: tensor(-782.9272, device='cuda:0')
episode: 178 training return: tensor(-861.1100, device='cuda:0')
episode: 179 training return: tensor(19.3946, device='cuda:0')
epoch: 45 test_true_pfm: 10038.319563249168 sim_pfm: -107.56610758028303
episode: 180 training return: tensor(-998.8292, device='cuda:0')
episode: 181 training return: tensor(180.3751, device='cuda:0')
episode: 182 training return: tensor(-0.7389, device='cuda:0')
episode: 183 training return: tensor(-159.9390, device='cuda:0')
epoch: 46 test_true_pfm: 7524.257078039883 sim_pfm: 66.58673007776572
episode: 184 training return: tensor(45.5202, device='cuda:0')
episode: 185 training return: tensor(-302.1207, device='cuda:0')
episode: 186 training return: tensor(2.8041, device='cuda:0')
episode: 187 training return: tensor(-85.6495, device='cuda:0')
epoch: 47 test_true_pfm: 9717.423082534753 sim_pfm: -73.99686451341647
episode: 188 training return: tensor(-468.5572, device='cuda:0')
episode: 189 training return: tensor(29.8881, device='cuda:0')
episode: 190 training return: tensor(239.2630, device='cuda:0')
episode: 191 training return: tensor(217.7269, device='cuda:0')
epoch: 48 test_true_pfm: 9695.419457532398 sim_pfm: -111.11216642847285
episode: 192 training return: tensor(-493.5224, device='cuda:0')
episode: 193 training return: tensor(-241.1704, device='cuda:0')
episode: 194 training return: tensor(140.0850, device='cuda:0')
episode: 195 training return: tensor(66.5539, device='cuda:0')
epoch: 49 test_true_pfm: 9634.161646667957 sim_pfm: 248.1459063378085
episode: 196 training return: tensor(-408.0343, device='cuda:0')
episode: 197 training return: tensor(14.5773, device='cuda:0')
episode: 198 training return: tensor(-196.9911, device='cuda:0')
episode: 199 training return: tensor(78.4911, device='cuda:0')
epoch: 50 test_true_pfm: 6166.750278470515 sim_pfm: -138.57344604000295
episode: 200 training return: tensor(203.8705, device='cuda:0')
episode: 201 training return: tensor(105.7298, device='cuda:0')
episode: 202 training return: tensor(-753.3388, device='cuda:0')
episode: 203 training return: tensor(238.7114, device='cuda:0')
epoch: 51 test_true_pfm: 6066.756443045929 sim_pfm: 114.30097204512761
episode: 204 training return: tensor(32.5025, device='cuda:0')
episode: 205 training return: tensor(-107.4148, device='cuda:0')
episode: 206 training return: tensor(-72.4433, device='cuda:0')
episode: 207 training return: tensor(20.4429, device='cuda:0')
epoch: 52 test_true_pfm: 9824.73323274013 sim_pfm: 184.1315668405732
episode: 208 training return: tensor(123.7417, device='cuda:0')
episode: 209 training return: tensor(190.3213, device='cuda:0')
episode: 210 training return: tensor(45.3986, device='cuda:0')
episode: 211 training return: tensor(-74.7586, device='cuda:0')
epoch: 53 test_true_pfm: 9652.797045011348 sim_pfm: 250.82721233798657
episode: 212 training return: tensor(-507.3895, device='cuda:0')
episode: 213 training return: tensor(12.8721, device='cuda:0')
episode: 214 training return: tensor(-223.3225, device='cuda:0')
episode: 215 training return: tensor(-34.5833, device='cuda:0')
epoch: 54 test_true_pfm: 10060.20547521196 sim_pfm: 214.82012972899247
episode: 216 training return: tensor(210.2754, device='cuda:0')
episode: 217 training return: tensor(-267.6857, device='cuda:0')
episode: 218 training return: tensor(-271.9589, device='cuda:0')
episode: 219 training return: tensor(-187.2396, device='cuda:0')
epoch: 55 test_true_pfm: 10607.429114442622 sim_pfm: 310.475764137198
episode: 220 training return: tensor(-777.4500, device='cuda:0')
episode: 221 training return: tensor(-561.3659, device='cuda:0')
episode: 222 training return: tensor(-129.7162, device='cuda:0')
episode: 223 training return: tensor(-81.1128, device='cuda:0')
epoch: 56 test_true_pfm: 9155.059363714814 sim_pfm: -107.80561333436829
episode: 224 training return: tensor(-358.0488, device='cuda:0')
episode: 225 training return: tensor(2.2469, device='cuda:0')
episode: 226 training return: tensor(40.6608, device='cuda:0')
episode: 227 training return: tensor(-96.9093, device='cuda:0')
epoch: 57 test_true_pfm: 7641.284069690501 sim_pfm: 223.6027912395463
episode: 228 training return: tensor(-703.2067, device='cuda:0')
episode: 229 training return: tensor(172.5932, device='cuda:0')
episode: 230 training return: tensor(85.0660, device='cuda:0')
episode: 231 training return: tensor(-114.8964, device='cuda:0')
epoch: 58 test_true_pfm: 9319.404774406927 sim_pfm: 135.3321206861486
episode: 232 training return: tensor(-2.2316, device='cuda:0')
episode: 233 training return: tensor(-846.9513, device='cuda:0')
episode: 234 training return: tensor(-85.6143, device='cuda:0')
episode: 235 training return: tensor(-292.8526, device='cuda:0')
epoch: 59 test_true_pfm: 9573.915379317516 sim_pfm: -221.13406177097932
episode: 236 training return: tensor(-999.7063, device='cuda:0')
episode: 237 training return: tensor(143.0040, device='cuda:0')
episode: 238 training return: tensor(-273.0264, device='cuda:0')
episode: 239 training return: tensor(80.5417, device='cuda:0')
epoch: 60 test_true_pfm: 6924.615927239189 sim_pfm: -336.33870289903524
episode: 240 training return: tensor(-338.2672, device='cuda:0')
episode: 241 training return: tensor(-177.8081, device='cuda:0')
episode: 242 training return: tensor(-144.7369, device='cuda:0')
episode: 243 training return: tensor(-815.6835, device='cuda:0')
epoch: 61 test_true_pfm: 10245.066263338593 sim_pfm: 237.53016948723234
episode: 244 training return: tensor(4.7324, device='cuda:0')
episode: 245 training return: tensor(-183.8615, device='cuda:0')
episode: 246 training return: tensor(-47.8725, device='cuda:0')
episode: 247 training return: tensor(1.8684, device='cuda:0')
epoch: 62 test_true_pfm: 10452.569127889867 sim_pfm: 185.93683601001976
episode: 248 training return: tensor(-131.3960, device='cuda:0')
episode: 249 training return: tensor(-595.6732, device='cuda:0')
episode: 250 training return: tensor(-107.1566, device='cuda:0')
episode: 251 training return: tensor(145.9416, device='cuda:0')
epoch: 63 test_true_pfm: 10536.97110908586 sim_pfm: -122.26061833789572
episode: 252 training return: tensor(8.9013, device='cuda:0')
episode: 253 training return: tensor(-11.8964, device='cuda:0')
episode: 254 training return: tensor(-859.8000, device='cuda:0')
episode: 255 training return: tensor(-253.1330, device='cuda:0')
epoch: 64 test_true_pfm: 10693.812879825153 sim_pfm: 179.8719183510208
episode: 256 training return: tensor(169.5630, device='cuda:0')
episode: 257 training return: tensor(69.7698, device='cuda:0')
episode: 258 training return: tensor(108.2378, device='cuda:0')
episode: 259 training return: tensor(-794.1340, device='cuda:0')
epoch: 65 test_true_pfm: 5100.027910513941 sim_pfm: -23.61124830483459
episode: 260 training return: tensor(-851.9167, device='cuda:0')
episode: 261 training return: tensor(-556.5228, device='cuda:0')
episode: 262 training return: tensor(49.6618, device='cuda:0')
episode: 263 training return: tensor(143.5900, device='cuda:0')
epoch: 66 test_true_pfm: 8247.5581739984 sim_pfm: 150.46084067803653
episode: 264 training return: tensor(-151.2002, device='cuda:0')
episode: 265 training return: tensor(-269.5717, device='cuda:0')
episode: 266 training return: tensor(-419.3914, device='cuda:0')
episode: 267 training return: tensor(-224.3601, device='cuda:0')
epoch: 67 test_true_pfm: 7006.341013211032 sim_pfm: 199.15880745480536
episode: 268 training return: tensor(26.3377, device='cuda:0')
episode: 269 training return: tensor(-991.9192, device='cuda:0')
episode: 270 training return: tensor(233.7482, device='cuda:0')
episode: 271 training return: tensor(-632.8854, device='cuda:0')
epoch: 68 test_true_pfm: 10435.101606123673 sim_pfm: 21.88510757921419
episode: 272 training return: tensor(97.3163, device='cuda:0')
episode: 273 training return: tensor(-713.4036, device='cuda:0')
episode: 274 training return: tensor(280.6956, device='cuda:0')
episode: 275 training return: tensor(-206.4513, device='cuda:0')
epoch: 69 test_true_pfm: 6417.819079624659 sim_pfm: 158.76134636901165
episode: 276 training return: tensor(-49.8246, device='cuda:0')
episode: 277 training return: tensor(2.5216, device='cuda:0')
episode: 278 training return: tensor(-42.5727, device='cuda:0')
episode: 279 training return: tensor(-170.8441, device='cuda:0')
epoch: 70 test_true_pfm: 10341.11367882428 sim_pfm: 194.79009377682814
episode: 280 training return: tensor(-839.3627, device='cuda:0')
episode: 281 training return: tensor(-940.0345, device='cuda:0')
episode: 282 training return: tensor(118.3091, device='cuda:0')
episode: 283 training return: tensor(200.3468, device='cuda:0')
epoch: 71 test_true_pfm: 10069.64222201752 sim_pfm: 294.15351768994395
episode: 284 training return: tensor(-28.7111, device='cuda:0')
episode: 285 training return: tensor(116.0548, device='cuda:0')
episode: 286 training return: tensor(85.9673, device='cuda:0')
episode: 287 training return: tensor(120.5003, device='cuda:0')
epoch: 72 test_true_pfm: 10616.502475380743 sim_pfm: -98.5215178563473
episode: 288 training return: tensor(48.1796, device='cuda:0')
episode: 289 training return: tensor(-443.3705, device='cuda:0')
episode: 290 training return: tensor(167.3650, device='cuda:0')
episode: 291 training return: tensor(-54.9252, device='cuda:0')
epoch: 73 test_true_pfm: 7533.322783362072 sim_pfm: -83.00284605527607
episode: 292 training return: tensor(139.1080, device='cuda:0')
episode: 293 training return: tensor(-202.0370, device='cuda:0')
episode: 294 training return: tensor(-14.4632, device='cuda:0')
episode: 295 training return: tensor(-2.2440, device='cuda:0')
epoch: 74 test_true_pfm: 9226.166978144043 sim_pfm: -2.6040791789419018
episode: 296 training return: tensor(-911.7781, device='cuda:0')
episode: 297 training return: tensor(-219.8757, device='cuda:0')
episode: 298 training return: tensor(-445.1106, device='cuda:0')
episode: 299 training return: tensor(-30.8652, device='cuda:0')
epoch: 75 test_true_pfm: 5904.191327188296 sim_pfm: 157.44244023800516
episode: 300 training return: tensor(-999.0211, device='cuda:0')
episode: 301 training return: tensor(-45.8002, device='cuda:0')
episode: 302 training return: tensor(19.8994, device='cuda:0')
episode: 303 training return: tensor(-495.9332, device='cuda:0')
epoch: 76 test_true_pfm: 7986.256790003338 sim_pfm: -127.59927372308448
episode: 304 training return: tensor(213.3404, device='cuda:0')
episode: 305 training return: tensor(-849.8933, device='cuda:0')
episode: 306 training return: tensor(160.4807, device='cuda:0')
episode: 307 training return: tensor(142.6436, device='cuda:0')
epoch: 77 test_true_pfm: 6694.862999463631 sim_pfm: 328.02277358598076
episode: 308 training return: tensor(-7.1570, device='cuda:0')
episode: 309 training return: tensor(229.7164, device='cuda:0')
episode: 310 training return: tensor(-93.3130, device='cuda:0')
episode: 311 training return: tensor(-122.7834, device='cuda:0')
epoch: 78 test_true_pfm: 9652.419842709898 sim_pfm: 159.5126336996715
episode: 312 training return: tensor(51.3889, device='cuda:0')
episode: 313 training return: tensor(-315.0375, device='cuda:0')
episode: 314 training return: tensor(306.8946, device='cuda:0')
episode: 315 training return: tensor(51.1978, device='cuda:0')
epoch: 79 test_true_pfm: 10337.233732906074 sim_pfm: 268.62543338665273
episode: 316 training return: tensor(14.9676, device='cuda:0')
episode: 317 training return: tensor(-70.2756, device='cuda:0')
episode: 318 training return: tensor(143.3709, device='cuda:0')
episode: 319 training return: tensor(-41.5811, device='cuda:0')
epoch: 80 test_true_pfm: 7207.681708623092 sim_pfm: 22.865650563966483
episode: 320 training return: tensor(74.2893, device='cuda:0')
episode: 321 training return: tensor(49.0900, device='cuda:0')
episode: 322 training return: tensor(-69.3798, device='cuda:0')
episode: 323 training return: tensor(136.8442, device='cuda:0')
epoch: 81 test_true_pfm: 10232.51505447534 sim_pfm: -127.66951844849002
episode: 324 training return: tensor(-680.4708, device='cuda:0')
episode: 325 training return: tensor(173.2746, device='cuda:0')
episode: 326 training return: tensor(-171.1968, device='cuda:0')
episode: 327 training return: tensor(-157.6684, device='cuda:0')
epoch: 82 test_true_pfm: 10439.425421017047 sim_pfm: -52.32779447194965
episode: 328 training return: tensor(-664.5942, device='cuda:0')
episode: 329 training return: tensor(93.0350, device='cuda:0')
episode: 330 training return: tensor(-117.5097, device='cuda:0')
episode: 331 training return: tensor(13.5586, device='cuda:0')
epoch: 83 test_true_pfm: 9644.618655920453 sim_pfm: 181.25539177710502
episode: 332 training return: tensor(-863.8136, device='cuda:0')
episode: 333 training return: tensor(12.7227, device='cuda:0')
episode: 334 training return: tensor(-337.0682, device='cuda:0')
episode: 335 training return: tensor(176.8103, device='cuda:0')
epoch: 84 test_true_pfm: 10170.417616001121 sim_pfm: -269.7344165474642
episode: 336 training return: tensor(-31.2250, device='cuda:0')
episode: 337 training return: tensor(99.4384, device='cuda:0')
episode: 338 training return: tensor(-16.3146, device='cuda:0')
episode: 339 training return: tensor(207.2395, device='cuda:0')
epoch: 85 test_true_pfm: 10380.900684755616 sim_pfm: 224.28551782729724
episode: 340 training return: tensor(74.5834, device='cuda:0')
episode: 341 training return: tensor(-764.7951, device='cuda:0')
episode: 342 training return: tensor(-499.7124, device='cuda:0')
episode: 343 training return: tensor(170.2370, device='cuda:0')
epoch: 86 test_true_pfm: 10004.321302780214 sim_pfm: -711.3448441441093
episode: 344 training return: tensor(24.6357, device='cuda:0')
episode: 345 training return: tensor(153.4731, device='cuda:0')
episode: 346 training return: tensor(261.6787, device='cuda:0')
episode: 347 training return: tensor(-442.2922, device='cuda:0')
epoch: 87 test_true_pfm: 10313.004938756754 sim_pfm: 230.058149815343
episode: 348 training return: tensor(-19.9909, device='cuda:0')
episode: 349 training return: tensor(1.7175, device='cuda:0')
episode: 350 training return: tensor(82.4390, device='cuda:0')
episode: 351 training return: tensor(102.3745, device='cuda:0')
epoch: 88 test_true_pfm: 7084.544404192762 sim_pfm: 204.2560139715206
episode: 352 training return: tensor(7.7238, device='cuda:0')
episode: 353 training return: tensor(157.9597, device='cuda:0')
episode: 354 training return: tensor(195.5658, device='cuda:0')
episode: 355 training return: tensor(247.6397, device='cuda:0')
epoch: 89 test_true_pfm: 9401.762895733636 sim_pfm: -39.51442290130459
episode: 356 training return: tensor(-29.4732, device='cuda:0')
episode: 357 training return: tensor(135.3692, device='cuda:0')
episode: 358 training return: tensor(-28.6441, device='cuda:0')
episode: 359 training return: tensor(-223.5943, device='cuda:0')
epoch: 90 test_true_pfm: 6421.245661527918 sim_pfm: -184.07845173203773
episode: 360 training return: tensor(-288.0858, device='cuda:0')
episode: 361 training return: tensor(-111.8558, device='cuda:0')
episode: 362 training return: tensor(58.7222, device='cuda:0')
episode: 363 training return: tensor(112.9486, device='cuda:0')
epoch: 91 test_true_pfm: 10323.698612718028 sim_pfm: 55.81495631214542
episode: 364 training return: tensor(-156.0660, device='cuda:0')
episode: 365 training return: tensor(232.5066, device='cuda:0')
episode: 366 training return: tensor(108.8042, device='cuda:0')
episode: 367 training return: tensor(250.1168, device='cuda:0')
epoch: 92 test_true_pfm: 10386.839803032257 sim_pfm: -125.38378086078835
episode: 368 training return: tensor(131.0165, device='cuda:0')
episode: 369 training return: tensor(-984.7356, device='cuda:0')
episode: 370 training return: tensor(42.4140, device='cuda:0')
episode: 371 training return: tensor(-83.1195, device='cuda:0')
epoch: 93 test_true_pfm: 10363.506580781259 sim_pfm: -337.96004696842283
episode: 372 training return: tensor(-26.5566, device='cuda:0')
episode: 373 training return: tensor(-71.9501, device='cuda:0')
episode: 374 training return: tensor(-67.2298, device='cuda:0')
episode: 375 training return: tensor(51.9323, device='cuda:0')
epoch: 94 test_true_pfm: 10541.49132653426 sim_pfm: 197.4762833607771
episode: 376 training return: tensor(89.8344, device='cuda:0')
episode: 377 training return: tensor(-999.9157, device='cuda:0')
episode: 378 training return: tensor(-666.4634, device='cuda:0')
episode: 379 training return: tensor(-184.3653, device='cuda:0')
epoch: 95 test_true_pfm: 6987.736430415095 sim_pfm: 154.72620365603748
episode: 380 training return: tensor(-999.5010, device='cuda:0')
episode: 381 training return: tensor(-93.8958, device='cuda:0')
episode: 382 training return: tensor(-825.7153, device='cuda:0')
episode: 383 training return: tensor(-399.9023, device='cuda:0')
epoch: 96 test_true_pfm: 7775.806222011663 sim_pfm: 166.11870543432693
episode: 384 training return: tensor(146.7556, device='cuda:0')
episode: 385 training return: tensor(-20.8887, device='cuda:0')
episode: 386 training return: tensor(341.7350, device='cuda:0')
episode: 387 training return: tensor(106.3391, device='cuda:0')
epoch: 97 test_true_pfm: 10213.71360601962 sim_pfm: 301.2315358949224
episode: 388 training return: tensor(276.6783, device='cuda:0')
episode: 389 training return: tensor(164.5562, device='cuda:0')
episode: 390 training return: tensor(36.9067, device='cuda:0')
episode: 391 training return: tensor(172.9834, device='cuda:0')
epoch: 98 test_true_pfm: 10599.858081953009 sim_pfm: 193.0780430995704
episode: 392 training return: tensor(232.3347, device='cuda:0')
episode: 393 training return: tensor(159.5772, device='cuda:0')
episode: 394 training return: tensor(159.0311, device='cuda:0')
episode: 395 training return: tensor(-169.3334, device='cuda:0')
epoch: 99 test_true_pfm: 8773.535884549876 sim_pfm: 191.55314543051645
episode: 396 training return: tensor(-52.9425, device='cuda:0')
episode: 397 training return: tensor(117.7667, device='cuda:0')
episode: 398 training return: tensor(-7.2632, device='cuda:0')
episode: 399 training return: tensor(120.1496, device='cuda:0')
epoch: 100 test_true_pfm: 10401.673034226422 sim_pfm: 227.68442840552112
episode: 400 training return: tensor(126.3704, device='cuda:0')
episode: 401 training return: tensor(119.4374, device='cuda:0')
episode: 402 training return: tensor(209.4229, device='cuda:0')
episode: 403 training return: tensor(59.5968, device='cuda:0')
epoch: 101 test_true_pfm: 9401.588777811261 sim_pfm: 242.2844020309179
episode: 404 training return: tensor(-33.8291, device='cuda:0')
episode: 405 training return: tensor(255.6306, device='cuda:0')
episode: 406 training return: tensor(166.7189, device='cuda:0')
episode: 407 training return: tensor(-26.6053, device='cuda:0')
epoch: 102 test_true_pfm: 7500.1361937182955 sim_pfm: 269.86264198917587
episode: 408 training return: tensor(-595.0157, device='cuda:0')
episode: 409 training return: tensor(32.1395, device='cuda:0')
episode: 410 training return: tensor(28.2704, device='cuda:0')
episode: 411 training return: tensor(52.0750, device='cuda:0')
epoch: 103 test_true_pfm: 10361.18406527446 sim_pfm: 220.25693845481146
episode: 412 training return: tensor(59.8148, device='cuda:0')
episode: 413 training return: tensor(106.1401, device='cuda:0')
episode: 414 training return: tensor(-181.9878, device='cuda:0')
episode: 415 training return: tensor(155.2406, device='cuda:0')
epoch: 104 test_true_pfm: 9620.832711465782 sim_pfm: 212.74465711635034
episode: 416 training return: tensor(-794.2423, device='cuda:0')
episode: 417 training return: tensor(49.6942, device='cuda:0')
episode: 418 training return: tensor(-54.2642, device='cuda:0')
episode: 419 training return: tensor(-299.5165, device='cuda:0')
epoch: 105 test_true_pfm: 10538.158425717658 sim_pfm: 67.58289690526242
episode: 420 training return: tensor(108.7187, device='cuda:0')
episode: 421 training return: tensor(-82.9691, device='cuda:0')
episode: 422 training return: tensor(-23.1678, device='cuda:0')
episode: 423 training return: tensor(264.9334, device='cuda:0')
epoch: 106 test_true_pfm: 10410.537302046525 sim_pfm: 227.3241537505916
episode: 424 training return: tensor(15.7488, device='cuda:0')
episode: 425 training return: tensor(-820.0459, device='cuda:0')
episode: 426 training return: tensor(183.7808, device='cuda:0')
episode: 427 training return: tensor(243.0119, device='cuda:0')
epoch: 107 test_true_pfm: 10253.598579585003 sim_pfm: -212.44678481555698
episode: 428 training return: tensor(-903.4782, device='cuda:0')
episode: 429 training return: tensor(-4.2665, device='cuda:0')
episode: 430 training return: tensor(97.5605, device='cuda:0')
episode: 431 training return: tensor(-63.6892, device='cuda:0')
epoch: 108 test_true_pfm: 10014.708702331922 sim_pfm: 215.27868533800938
episode: 432 training return: tensor(-541.2900, device='cuda:0')
episode: 433 training return: tensor(-999.4648, device='cuda:0')
episode: 434 training return: tensor(-275.6413, device='cuda:0')
episode: 435 training return: tensor(98.7524, device='cuda:0')
epoch: 109 test_true_pfm: 9439.21854687253 sim_pfm: 162.08180898866462
episode: 436 training return: tensor(81.5941, device='cuda:0')
episode: 437 training return: tensor(-231.4317, device='cuda:0')
episode: 438 training return: tensor(-195.5477, device='cuda:0')
episode: 439 training return: tensor(124.2671, device='cuda:0')
epoch: 110 test_true_pfm: 10559.934029067832 sim_pfm: 193.84865566603062
episode: 440 training return: tensor(-111.2218, device='cuda:0')
episode: 441 training return: tensor(88.8931, device='cuda:0')
episode: 442 training return: tensor(-274.6703, device='cuda:0')
episode: 443 training return: tensor(214.4046, device='cuda:0')
epoch: 111 test_true_pfm: 10377.955136072278 sim_pfm: 67.79296685680553
episode: 444 training return: tensor(-735.6298, device='cuda:0')
episode: 445 training return: tensor(215.8957, device='cuda:0')
episode: 446 training return: tensor(-366.2363, device='cuda:0')
episode: 447 training return: tensor(-452.4436, device='cuda:0')
epoch: 112 test_true_pfm: 9373.869182034086 sim_pfm: 277.40701254770585
episode: 448 training return: tensor(-334.2612, device='cuda:0')
episode: 449 training return: tensor(290.7177, device='cuda:0')
episode: 450 training return: tensor(233.0313, device='cuda:0')
episode: 451 training return: tensor(150.7765, device='cuda:0')
epoch: 113 test_true_pfm: 8530.82752115662 sim_pfm: 275.50902747322107
episode: 452 training return: tensor(8.7227, device='cuda:0')
episode: 453 training return: tensor(329.1183, device='cuda:0')
episode: 454 training return: tensor(-27.9649, device='cuda:0')
episode: 455 training return: tensor(-22.3052, device='cuda:0')
epoch: 114 test_true_pfm: 9702.75887529945 sim_pfm: -170.4240937619858
episode: 456 training return: tensor(274.7313, device='cuda:0')
episode: 457 training return: tensor(71.3238, device='cuda:0')
episode: 458 training return: tensor(119.3783, device='cuda:0')
episode: 459 training return: tensor(14.9829, device='cuda:0')
epoch: 115 test_true_pfm: 10670.061581755625 sim_pfm: 105.16490762969867
episode: 460 training return: tensor(-40.0437, device='cuda:0')
episode: 461 training return: tensor(73.9426, device='cuda:0')
episode: 462 training return: tensor(-371.2012, device='cuda:0')
episode: 463 training return: tensor(-374.3891, device='cuda:0')
epoch: 116 test_true_pfm: 10496.70141401473 sim_pfm: -343.42815470237593
episode: 464 training return: tensor(177.1639, device='cuda:0')
episode: 465 training return: tensor(101.7744, device='cuda:0')
episode: 466 training return: tensor(130.9100, device='cuda:0')
episode: 467 training return: tensor(-44.1700, device='cuda:0')
epoch: 117 test_true_pfm: 10612.439219134729 sim_pfm: 229.31882311762698
episode: 468 training return: tensor(204.3017, device='cuda:0')
episode: 469 training return: tensor(-343.7333, device='cuda:0')
episode: 470 training return: tensor(151.8926, device='cuda:0')
episode: 471 training return: tensor(-294.2353, device='cuda:0')
epoch: 118 test_true_pfm: 10579.076791616197 sim_pfm: 26.203364771344543
episode: 472 training return: tensor(261.7131, device='cuda:0')
episode: 473 training return: tensor(181.2793, device='cuda:0')
episode: 474 training return: tensor(241.2826, device='cuda:0')
episode: 475 training return: tensor(-235.4344, device='cuda:0')
epoch: 119 test_true_pfm: 10628.62618419311 sim_pfm: -226.13693197482885
episode: 476 training return: tensor(-65.4841, device='cuda:0')
episode: 477 training return: tensor(-79.2419, device='cuda:0')
episode: 478 training return: tensor(38.8047, device='cuda:0')
episode: 479 training return: tensor(306.7773, device='cuda:0')
epoch: 120 test_true_pfm: 10533.058852291637 sim_pfm: 274.0737994145214
episode: 480 training return: tensor(149.7566, device='cuda:0')
episode: 481 training return: tensor(114.0948, device='cuda:0')
episode: 482 training return: tensor(61.7356, device='cuda:0')
episode: 483 training return: tensor(119.6541, device='cuda:0')
epoch: 121 test_true_pfm: 9552.902082779825 sim_pfm: 175.99806236229293
episode: 484 training return: tensor(98.3220, device='cuda:0')
episode: 485 training return: tensor(-143.0998, device='cuda:0')
episode: 486 training return: tensor(83.3146, device='cuda:0')
episode: 487 training return: tensor(107.6240, device='cuda:0')
epoch: 122 test_true_pfm: 8985.923754809019 sim_pfm: 167.13953523671566
episode: 488 training return: tensor(233.5061, device='cuda:0')
episode: 489 training return: tensor(92.9717, device='cuda:0')
episode: 490 training return: tensor(-612.9496, device='cuda:0')
episode: 491 training return: tensor(132.2242, device='cuda:0')
epoch: 123 test_true_pfm: 10670.776370073603 sim_pfm: -69.53359963038626
episode: 492 training return: tensor(-113.0497, device='cuda:0')
episode: 493 training return: tensor(63.8975, device='cuda:0')
episode: 494 training return: tensor(67.4600, device='cuda:0')
episode: 495 training return: tensor(198.4678, device='cuda:0')
epoch: 124 test_true_pfm: 10582.491565012315 sim_pfm: 163.26711299542026
episode: 496 training return: tensor(-423.5120, device='cuda:0')
episode: 497 training return: tensor(158.7551, device='cuda:0')
episode: 498 training return: tensor(208.3386, device='cuda:0')
episode: 499 training return: tensor(93.5748, device='cuda:0')
epoch: 125 test_true_pfm: 10372.192769453906 sim_pfm: 139.3500996394238
episode: 500 training return: tensor(138.8830, device='cuda:0')
episode: 501 training return: tensor(-29.5782, device='cuda:0')
episode: 502 training return: tensor(68.1092, device='cuda:0')
episode: 503 training return: tensor(-22.4739, device='cuda:0')
epoch: 126 test_true_pfm: 9499.617391893533 sim_pfm: 207.46760050435356
episode: 504 training return: tensor(-139.2962, device='cuda:0')
episode: 505 training return: tensor(-55.7204, device='cuda:0')
episode: 506 training return: tensor(96.8705, device='cuda:0')
episode: 507 training return: tensor(-160.6131, device='cuda:0')
epoch: 127 test_true_pfm: 6846.028597724394 sim_pfm: -74.06965698541414
episode: 508 training return: tensor(-498.6444, device='cuda:0')
episode: 509 training return: tensor(-581.9942, device='cuda:0')
episode: 510 training return: tensor(-31.8451, device='cuda:0')
episode: 511 training return: tensor(-91.9705, device='cuda:0')
epoch: 128 test_true_pfm: 8899.57576410299 sim_pfm: 221.65006212048078
episode: 512 training return: tensor(53.1994, device='cuda:0')
episode: 513 training return: tensor(89.3424, device='cuda:0')
episode: 514 training return: tensor(75.8607, device='cuda:0')
episode: 515 training return: tensor(105.9722, device='cuda:0')
epoch: 129 test_true_pfm: 9750.125971075411 sim_pfm: 202.0509860047508
episode: 516 training return: tensor(32.6903, device='cuda:0')
episode: 517 training return: tensor(92.3259, device='cuda:0')
episode: 518 training return: tensor(180.8005, device='cuda:0')
episode: 519 training return: tensor(-155.3070, device='cuda:0')
epoch: 130 test_true_pfm: 10205.447207998204 sim_pfm: 80.69303303881316
episode: 520 training return: tensor(119.5815, device='cuda:0')
episode: 521 training return: tensor(-999.7371, device='cuda:0')
episode: 522 training return: tensor(-114.5792, device='cuda:0')
episode: 523 training return: tensor(76.5962, device='cuda:0')
epoch: 131 test_true_pfm: 10610.591990003495 sim_pfm: -445.2447809870743
episode: 524 training return: tensor(-256.2093, device='cuda:0')
episode: 525 training return: tensor(61.6467, device='cuda:0')
episode: 526 training return: tensor(171.2418, device='cuda:0')
episode: 527 training return: tensor(198.2285, device='cuda:0')
epoch: 132 test_true_pfm: 10126.38428101571 sim_pfm: -2.572258374624653
episode: 528 training return: tensor(63.2315, device='cuda:0')
episode: 529 training return: tensor(94.6430, device='cuda:0')
episode: 530 training return: tensor(-274.0957, device='cuda:0')
episode: 531 training return: tensor(77.3736, device='cuda:0')
epoch: 133 test_true_pfm: 10526.638039593829 sim_pfm: 297.98913531711634
episode: 532 training return: tensor(69.8620, device='cuda:0')
episode: 533 training return: tensor(159.3460, device='cuda:0')
episode: 534 training return: tensor(37.3978, device='cuda:0')
episode: 535 training return: tensor(-177.7078, device='cuda:0')
epoch: 134 test_true_pfm: 10305.803352983832 sim_pfm: 306.5910735578121
episode: 536 training return: tensor(172.3963, device='cuda:0')
episode: 537 training return: tensor(-112.2389, device='cuda:0')
episode: 538 training return: tensor(255.5159, device='cuda:0')
episode: 539 training return: tensor(271.7256, device='cuda:0')
epoch: 135 test_true_pfm: 10118.920822056023 sim_pfm: 241.47737184352204
episode: 540 training return: tensor(-80.2575, device='cuda:0')
episode: 541 training return: tensor(59.9492, device='cuda:0')
episode: 542 training return: tensor(81.6707, device='cuda:0')
episode: 543 training return: tensor(260.6024, device='cuda:0')
epoch: 136 test_true_pfm: 8523.668354470045 sim_pfm: 209.05142081623003
episode: 544 training return: tensor(134.4904, device='cuda:0')
episode: 545 training return: tensor(-75.5099, device='cuda:0')
episode: 546 training return: tensor(162.8978, device='cuda:0')
episode: 547 training return: tensor(-318.2744, device='cuda:0')
epoch: 137 test_true_pfm: 9230.82053069627 sim_pfm: -31.524292183419067
episode: 548 training return: tensor(126.2422, device='cuda:0')
episode: 549 training return: tensor(86.3817, device='cuda:0')
episode: 550 training return: tensor(192.2474, device='cuda:0')
episode: 551 training return: tensor(106.0083, device='cuda:0')
epoch: 138 test_true_pfm: 9571.587051802087 sim_pfm: -32.199866672085285
episode: 552 training return: tensor(-369.6934, device='cuda:0')
episode: 553 training return: tensor(-505.5204, device='cuda:0')
episode: 554 training return: tensor(56.4691, device='cuda:0')
episode: 555 training return: tensor(185.9757, device='cuda:0')
epoch: 139 test_true_pfm: 10661.74049278833 sim_pfm: 243.0090663008159
episode: 556 training return: tensor(220.0272, device='cuda:0')
episode: 557 training return: tensor(-292.1444, device='cuda:0')
episode: 558 training return: tensor(76.4606, device='cuda:0')
episode: 559 training return: tensor(-57.8705, device='cuda:0')
epoch: 140 test_true_pfm: 10335.52678698986 sim_pfm: 254.79252732040672
episode: 560 training return: tensor(195.5839, device='cuda:0')
episode: 561 training return: tensor(195.9333, device='cuda:0')
episode: 562 training return: tensor(200.1512, device='cuda:0')
episode: 563 training return: tensor(0.7937, device='cuda:0')
epoch: 141 test_true_pfm: 6060.876372274756 sim_pfm: 352.6864710212685
episode: 564 training return: tensor(-328.5711, device='cuda:0')
episode: 565 training return: tensor(279.4848, device='cuda:0')
episode: 566 training return: tensor(155.7832, device='cuda:0')
episode: 567 training return: tensor(104.0326, device='cuda:0')
epoch: 142 test_true_pfm: 10628.445495667283 sim_pfm: 173.07236256580413
episode: 568 training return: tensor(121.0041, device='cuda:0')
episode: 569 training return: tensor(102.3155, device='cuda:0')
episode: 570 training return: tensor(-637.5164, device='cuda:0')
episode: 571 training return: tensor(177.8251, device='cuda:0')
epoch: 143 test_true_pfm: 10353.919512741972 sim_pfm: 228.1372415131967
episode: 572 training return: tensor(-323.0223, device='cuda:0')
episode: 573 training return: tensor(-703.9230, device='cuda:0')
episode: 574 training return: tensor(237.5302, device='cuda:0')
episode: 575 training return: tensor(-633.1295, device='cuda:0')
epoch: 144 test_true_pfm: 8410.528385953434 sim_pfm: 120.9898387081242
episode: 576 training return: tensor(-91.9908, device='cuda:0')
episode: 577 training return: tensor(-101.7999, device='cuda:0')
episode: 578 training return: tensor(289.6881, device='cuda:0')
episode: 579 training return: tensor(146.1709, device='cuda:0')
epoch: 145 test_true_pfm: 8416.284870684853 sim_pfm: 224.9691218805674
episode: 580 training return: tensor(278.3095, device='cuda:0')
episode: 581 training return: tensor(164.5753, device='cuda:0')
episode: 582 training return: tensor(45.9562, device='cuda:0')
episode: 583 training return: tensor(-165.3394, device='cuda:0')
epoch: 146 test_true_pfm: 10515.288207890218 sim_pfm: 154.4636930373575
episode: 584 training return: tensor(233.1337, device='cuda:0')
episode: 585 training return: tensor(111.5554, device='cuda:0')
episode: 586 training return: tensor(99.5887, device='cuda:0')
episode: 587 training return: tensor(-994.0927, device='cuda:0')
epoch: 147 test_true_pfm: 10629.94201793245 sim_pfm: -167.49447615149742
episode: 588 training return: tensor(5.0339, device='cuda:0')
episode: 589 training return: tensor(-532.8809, device='cuda:0')
episode: 590 training return: tensor(210.0569, device='cuda:0')
episode: 591 training return: tensor(328.7750, device='cuda:0')
epoch: 148 test_true_pfm: 9264.49510381522 sim_pfm: 346.45884940847947
episode: 592 training return: tensor(-117.3373, device='cuda:0')
episode: 593 training return: tensor(-989.0793, device='cuda:0')
episode: 594 training return: tensor(354.2710, device='cuda:0')
episode: 595 training return: tensor(148.3157, device='cuda:0')
epoch: 149 test_true_pfm: 10697.534742304553 sim_pfm: -121.03585254839466
episode: 596 training return: tensor(209.7974, device='cuda:0')
episode: 597 training return: tensor(109.9351, device='cuda:0')
episode: 598 training return: tensor(327.1901, device='cuda:0')
episode: 599 training return: tensor(-939.6343, device='cuda:0')
epoch: 150 test_true_pfm: 10732.949548706543 sim_pfm: 27.71483788288121
