['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'medium', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 4.19069855928421 test_loss: 2.8070047378540037
epoch: 1 training_loss 2.236055861711502 test_loss: 1.7161216735839844
epoch: 2 training_loss 1.2988369405269622 test_loss: 1.0215134620666504
epoch: 3 training_loss 0.6662914751470089 test_loss: 0.2870272159576416
epoch: 4 training_loss 0.09547537486068905 test_loss: -0.039872846007347106
epoch: 5 training_loss -0.18441357585135848 test_loss: -0.38485710620880126
epoch: 6 training_loss -0.47301550466101616 test_loss: -0.6485433578491211
epoch: 7 training_loss -0.8019620381295681 test_loss: -0.9224982261657715
epoch: 8 training_loss -1.1013671672344207 test_loss: -1.0985020637512206
epoch: 9 training_loss -1.329180555343628 test_loss: -1.4881827354431152
epoch: 10 training_loss -1.600202043056488 test_loss: -1.6025407791137696
epoch: 11 training_loss -1.7791398870944977 test_loss: -1.8171239852905274
epoch: 12 training_loss -1.9046356385946275 test_loss: -1.9595466613769532
epoch: 13 training_loss -2.004275242090225 test_loss: -1.9329910278320312
epoch: 14 training_loss -2.1006267619132997 test_loss: -2.231868553161621
epoch: 15 training_loss -2.203382863998413 test_loss: -2.3104736328125
epoch: 16 training_loss -2.3563504707813263 test_loss: -2.337891960144043
epoch: 17 training_loss -2.4144181418418884 test_loss: -2.4943607330322264
epoch: 18 training_loss -2.4324017643928526 test_loss: -2.3333396911621094
epoch: 19 training_loss -2.566150121688843 test_loss: -2.586258888244629
epoch: 20 training_loss -2.515187397003174 test_loss: -2.4042129516601562
epoch: 21 training_loss -2.659533075094223 test_loss: -2.754702568054199
epoch: 22 training_loss -2.6900556826591493 test_loss: -2.7449247360229494
epoch: 23 training_loss -2.833994290828705 test_loss: -2.768177795410156
epoch: 24 training_loss -2.847446475028992 test_loss: -2.8540252685546874
epoch: 25 training_loss -2.831851897239685 test_loss: -2.9882326126098633
epoch: 26 training_loss -2.9600631535053252 test_loss: -2.9811561584472654
epoch: 27 training_loss -2.976733915805817 test_loss: -2.8776123046875
epoch: 28 training_loss -2.9298030710220337 test_loss: -2.993892478942871
epoch: 29 training_loss -3.0146769285202026 test_loss: -3.1096229553222656
epoch: 30 training_loss -3.092794048786163 test_loss: -3.0921436309814454
epoch: 31 training_loss -3.1453672361373903 test_loss: -3.149057388305664
epoch: 32 training_loss -3.1155123853683473 test_loss: -3.3094497680664063
epoch: 33 training_loss -3.1766856575012206 test_loss: -3.142812728881836
epoch: 34 training_loss -3.261509568691254 test_loss: -3.1015718460083006
epoch: 35 training_loss -3.292486083507538 test_loss: -3.2651405334472656
epoch: 36 training_loss -3.333934907913208 test_loss: -3.336669158935547
epoch: 37 training_loss -3.2990670943260194 test_loss: -3.473582458496094
epoch: 38 training_loss -3.398941478729248 test_loss: -3.306829833984375
epoch: 39 training_loss -3.3995175743103028 test_loss: -3.5003036499023437
epoch: 40 training_loss -3.435850839614868 test_loss: -3.42879638671875
epoch: 41 training_loss -3.4322554731369017 test_loss: -3.403981018066406
epoch: 42 training_loss -3.498433520793915 test_loss: -3.4491992950439454
epoch: 43 training_loss -3.4963884115219117 test_loss: -3.3768146514892576
epoch: 44 training_loss -3.5281189036369325 test_loss: -3.586581802368164
epoch: 45 training_loss -3.62283575296402 test_loss: -3.681239700317383
epoch: 46 training_loss -3.5879135298728944 test_loss: -3.525434112548828
epoch: 47 training_loss -3.671890206336975 test_loss: -3.763720703125
epoch: 48 training_loss -3.685276200771332 test_loss: -3.7072879791259767
epoch: 49 training_loss -3.6792536640167235 test_loss: -3.7201255798339843
epoch: 50 training_loss -3.6810207033157347 test_loss: -3.7350311279296875
epoch: 51 training_loss -3.7740912556648256 test_loss: -3.794646072387695
epoch: 52 training_loss -3.7746694183349607 test_loss: -4.021644973754883
epoch: 53 training_loss -3.844621732234955 test_loss: -3.815176010131836
epoch: 54 training_loss -3.8434314942359924 test_loss: -3.9023159027099608
epoch: 55 training_loss -3.92811425447464 test_loss: -3.774897003173828
epoch: 56 training_loss -3.8512878465652465 test_loss: -3.64996452331543
epoch: 57 training_loss -3.889231467247009 test_loss: -3.9355937957763674
epoch: 58 training_loss -3.967794179916382 test_loss: -3.9637279510498047
epoch: 59 training_loss -3.9951835918426513 test_loss: -4.031611633300781
epoch: 60 training_loss -3.9866944193840026 test_loss: -4.001424407958984
epoch: 61 training_loss -4.027071259021759 test_loss: -3.9800933837890624
epoch: 62 training_loss -4.0942151379585265 test_loss: -3.9999515533447267
epoch: 63 training_loss -4.080349502563476 test_loss: -4.088197708129883
epoch: 64 training_loss -4.004376389980316 test_loss: -4.099152374267578
epoch: 65 training_loss -4.170191712379456 test_loss: -4.090315628051758
epoch: 66 training_loss -4.088659446239472 test_loss: -4.0778053283691404
epoch: 67 training_loss -4.201933326721192 test_loss: -4.249369430541992
epoch: 68 training_loss -4.216176514625549 test_loss: -4.210482025146485
epoch: 69 training_loss -4.210491201877594 test_loss: -4.299711227416992
epoch: 70 training_loss -4.053484132289887 test_loss: -3.9820369720458983
epoch: 71 training_loss -4.242265045642853 test_loss: -4.4133655548095705
epoch: 72 training_loss -4.273031957149506 test_loss: -4.288678741455078
epoch: 73 training_loss -4.321036219596863 test_loss: -4.317940521240234
epoch: 74 training_loss -4.32341623544693 test_loss: -4.3455547332763675
epoch: 75 training_loss -4.286136298179627 test_loss: -4.336816024780274
epoch: 76 training_loss -4.351013240814209 test_loss: -4.361273193359375
epoch: 77 training_loss -4.357604737281799 test_loss: -4.257346343994141
epoch: 78 training_loss -4.399590141773224 test_loss: -4.303540420532227
epoch: 79 training_loss -4.35915180683136 test_loss: -4.428124618530274
epoch: 80 training_loss -4.447441136837005 test_loss: -4.5523933410644535
epoch: 81 training_loss -4.4412588787078855 test_loss: -4.330400848388672
epoch: 82 training_loss -4.463939247131347 test_loss: -4.46507682800293
epoch: 83 training_loss -4.480843157768249 test_loss: -4.460893630981445
epoch: 84 training_loss -4.469081616401672 test_loss: -4.487557983398437
epoch: 85 training_loss -4.4474622631073 test_loss: -4.371143341064453
epoch: 86 training_loss -4.506156058311462 test_loss: -4.557273483276367
epoch: 87 training_loss -4.493003401756287 test_loss: -4.456692123413086
epoch: 88 training_loss -4.55460834980011 test_loss: -4.44451675415039
epoch: 89 training_loss -4.540868508815765 test_loss: -4.501882171630859
epoch: 90 training_loss -4.60675651550293 test_loss: -4.580774307250977
epoch: 91 training_loss -4.419672160148621 test_loss: -4.579391860961914
epoch: 92 training_loss -4.609533343315125 test_loss: -4.529925918579101
epoch: 93 training_loss -4.581927311420441 test_loss: -4.628787612915039
epoch: 94 training_loss -4.606032967567444 test_loss: -4.604539489746093
epoch: 95 training_loss -4.632185912132263 test_loss: -4.753241348266601
epoch: 96 training_loss -4.708263721466064 test_loss: -4.6380149841308596
epoch: 97 training_loss -4.633445491790772 test_loss: -4.669423675537109
epoch: 98 training_loss -4.725228567123413 test_loss: -4.676120758056641
epoch: 99 training_loss -4.731031956672669 test_loss: -4.766645431518555
epoch: 100 training_loss -4.743736734390259 test_loss: -4.761701965332032
epoch: 101 training_loss -4.745786046981811 test_loss: -4.147402572631836
epoch: 102 training_loss -4.655970501899719 test_loss: -4.767252349853516
epoch: 103 training_loss -4.758367028236389 test_loss: -4.831505966186524
epoch: 104 training_loss -4.773815259933472 test_loss: -4.574529647827148
epoch: 105 training_loss -4.732159547805786 test_loss: -4.689927291870117
epoch: 106 training_loss -4.802666282653808 test_loss: -4.858100128173828
epoch: 107 training_loss -4.815462565422058 test_loss: -4.8065948486328125
epoch: 108 training_loss -4.852607579231262 test_loss: -4.931844711303711
epoch: 109 training_loss -4.802612099647522 test_loss: -4.97154655456543
epoch: 110 training_loss -4.789621577262879 test_loss: -4.746860504150391
epoch: 111 training_loss -4.875014109611511 test_loss: -4.918730163574219
epoch: 112 training_loss -4.93361795425415 test_loss: -4.947772216796875
epoch: 113 training_loss -4.893979120254516 test_loss: -4.932403182983398
epoch: 114 training_loss -4.853714437484741 test_loss: -4.706461715698242
epoch: 115 training_loss -4.894105086326599 test_loss: -4.801370620727539
epoch: 116 training_loss -4.91245623588562 test_loss: -5.009341812133789
epoch: 117 training_loss -4.942137742042542 test_loss: -4.861735153198242
epoch: 118 training_loss -4.958540601730347 test_loss: -5.035360336303711
epoch: 119 training_loss -4.962838711738587 test_loss: -4.893664932250976
epoch: 120 training_loss -4.983994817733764 test_loss: -4.935946655273438
epoch: 121 training_loss -4.944374997615814 test_loss: -5.030833435058594
epoch: 122 training_loss -5.007310347557068 test_loss: -4.8342430114746096
epoch: 123 training_loss -5.009878549575806 test_loss: -4.971551895141602
epoch: 124 training_loss -4.997401633262634 test_loss: -5.0110313415527346
epoch: 125 training_loss -5.02587016582489 test_loss: -5.03820915222168
epoch: 126 training_loss -5.100864505767822 test_loss: -4.98404541015625
epoch: 127 training_loss -4.925802116394043 test_loss: -5.040541458129883
epoch: 128 training_loss -5.053040323257446 test_loss: -5.120097732543945
epoch: 129 training_loss -5.072932081222534 test_loss: -5.105266952514649
epoch: 130 training_loss -5.0964700841903685 test_loss: -5.12327880859375
epoch: 131 training_loss -5.044694328308106 test_loss: -5.1047718048095705
epoch: 132 training_loss -5.086531090736389 test_loss: -5.087329864501953
epoch: 133 training_loss -5.130647583007812 test_loss: -5.149892044067383
epoch: 134 training_loss -5.138748269081116 test_loss: -5.077523422241211
epoch: 135 training_loss -5.104590754508973 test_loss: -5.139306640625
epoch: 136 training_loss -5.153319191932678 test_loss: -5.149026107788086
epoch: 137 training_loss -5.12242995262146 test_loss: -5.0497486114501955
epoch: 138 training_loss -5.199341597557068 test_loss: -5.323274612426758
epoch: 139 training_loss -5.215933365821838 test_loss: -5.237900161743164
epoch: 140 training_loss -5.154535570144653 test_loss: -5.197478866577148
epoch: 141 training_loss -5.232489242553711 test_loss: -5.0861774444580075
epoch: 142 training_loss -5.215114936828614 test_loss: -5.134477996826172
epoch: 143 training_loss -5.237528119087219 test_loss: -5.2506050109863285
epoch: 144 training_loss -5.228769383430481 test_loss: -5.238586807250977
epoch: 145 training_loss -5.210498032569885 test_loss: -5.148956680297852
epoch: 146 training_loss -5.230446214675903 test_loss: -5.234407043457031
epoch: 147 training_loss -5.085344071388245 test_loss: -5.365414428710937
epoch: 148 training_loss -5.209697723388672 test_loss: -5.183180618286133
epoch: 149 training_loss -5.312211151123047 test_loss: -5.314360809326172
33.051612025009845
episode: 0 training return: tensor(-7148764., device='cuda:0')
episode: 1 training return: tensor(-102218.1875, device='cuda:0')
episode: 2 training return: tensor(-30099156., device='cuda:0')
episode: 3 training return: tensor(-4150756.2500, device='cuda:0')
epoch: 1 test_true_pfm: -19.65550034837541
episode: 4 training return: tensor(-24015022., device='cuda:0')
episode: 5 training return: tensor(-956734.9375, device='cuda:0')
episode: 6 training return: tensor(-6431765., device='cuda:0')
episode: 7 training return: tensor(-4860068.5000, device='cuda:0')
epoch: 2 test_true_pfm: 0.5127670306832466
episode: 8 training return: tensor(-408571.1562, device='cuda:0')
episode: 9 training return: tensor(-9084542., device='cuda:0')
episode: 10 training return: tensor(-46234.9258, device='cuda:0')
episode: 11 training return: tensor(-592.5161, device='cuda:0')
epoch: 3 test_true_pfm: 28.190364297417005
episode: 12 training return: tensor(-536.2023, device='cuda:0')
episode: 13 training return: tensor(-3442.9976, device='cuda:0')
episode: 14 training return: tensor(-536.7923, device='cuda:0')
episode: 15 training return: tensor(-600.6964, device='cuda:0')
epoch: 4 test_true_pfm: 29.011884549824963
episode: 16 training return: tensor(-599.6852, device='cuda:0')
episode: 17 training return: tensor(-621.4610, device='cuda:0')
episode: 18 training return: tensor(-566.8831, device='cuda:0')
episode: 19 training return: tensor(-630.2598, device='cuda:0')
epoch: 5 test_true_pfm: 14.566574321728822
episode: 20 training return: tensor(-651.4871, device='cuda:0')
episode: 21 training return: tensor(-663.0788, device='cuda:0')
episode: 22 training return: tensor(-656.3458, device='cuda:0')
episode: 23 training return: tensor(-529.2368, device='cuda:0')
epoch: 6 test_true_pfm: 29.493707624311064
episode: 24 training return: tensor(-545.9509, device='cuda:0')
episode: 25 training return: tensor(-539.3354, device='cuda:0')
episode: 26 training return: tensor(-6602.0195, device='cuda:0')
episode: 27 training return: tensor(-25561.5527, device='cuda:0')
epoch: 7 test_true_pfm: 30.522606718336164
episode: 28 training return: tensor(-424317.4375, device='cuda:0')
episode: 29 training return: tensor(-402681.7812, device='cuda:0')
episode: 30 training return: tensor(-374373.1250, device='cuda:0')
episode: 31 training return: tensor(-104492.3281, device='cuda:0')
epoch: 8 test_true_pfm: 34.179171162023565
episode: 32 training return: tensor(-133862.7656, device='cuda:0')
episode: 33 training return: tensor(-219326.3125, device='cuda:0')
episode: 34 training return: tensor(-8843097., device='cuda:0')
episode: 35 training return: tensor(-2116364., device='cuda:0')
epoch: 9 test_true_pfm: 38.23364696092246
episode: 36 training return: tensor(-167814.0312, device='cuda:0')
episode: 37 training return: tensor(-66646.8438, device='cuda:0')
episode: 38 training return: tensor(-54527.3164, device='cuda:0')
episode: 39 training return: tensor(-2374747.2500, device='cuda:0')
epoch: 10 test_true_pfm: 40.46871695267576
episode: 40 training return: tensor(-124982.2656, device='cuda:0')
episode: 41 training return: tensor(-82572.3281, device='cuda:0')
episode: 42 training return: tensor(-126243.0781, device='cuda:0')
episode: 43 training return: tensor(-119267.9297, device='cuda:0')
epoch: 11 test_true_pfm: 40.11350780041186
episode: 44 training return: tensor(-59979.7852, device='cuda:0')
episode: 45 training return: tensor(-143349.3906, device='cuda:0')
episode: 46 training return: tensor(-102562.8828, device='cuda:0')
episode: 47 training return: tensor(-32247.9941, device='cuda:0')
epoch: 12 test_true_pfm: 35.41186538230957
episode: 48 training return: tensor(-265347.8438, device='cuda:0')
episode: 49 training return: tensor(-1048586.5000, device='cuda:0')
episode: 50 training return: tensor(-1046455.3125, device='cuda:0')
episode: 51 training return: tensor(-8.8662e+09, device='cuda:0')
epoch: 13 test_true_pfm: 44.80103749187504
episode: 52 training return: tensor(-281357.9375, device='cuda:0')
episode: 53 training return: tensor(-201327.2656, device='cuda:0')
episode: 54 training return: tensor(-688.0607, device='cuda:0')
episode: 55 training return: tensor(-753.1414, device='cuda:0')
epoch: 14 test_true_pfm: 16.118540679046493
episode: 56 training return: tensor(-667.9537, device='cuda:0')
episode: 57 training return: tensor(-714.3343, device='cuda:0')
episode: 58 training return: tensor(-662.8495, device='cuda:0')
episode: 59 training return: tensor(-590.4368, device='cuda:0')
epoch: 15 test_true_pfm: 19.014692412286813
episode: 60 training return: tensor(-619.7886, device='cuda:0')
episode: 61 training return: tensor(-713.0921, device='cuda:0')
episode: 62 training return: tensor(-684.7758, device='cuda:0')
episode: 63 training return: tensor(-655.0512, device='cuda:0')
epoch: 16 test_true_pfm: 12.599162935185515
episode: 64 training return: tensor(-2350.7966, device='cuda:0')
episode: 65 training return: tensor(-744.7627, device='cuda:0')
episode: 66 training return: tensor(-826.9736, device='cuda:0')
episode: 67 training return: tensor(-1151.5336, device='cuda:0')
epoch: 17 test_true_pfm: 15.818411518589594
episode: 68 training return: tensor(-660.8772, device='cuda:0')
episode: 69 training return: tensor(-714.4811, device='cuda:0')
episode: 70 training return: tensor(-592.5994, device='cuda:0')
episode: 71 training return: tensor(-647.9373, device='cuda:0')
epoch: 18 test_true_pfm: 19.0755147656659
episode: 72 training return: tensor(-8296799.5000, device='cuda:0')
episode: 73 training return: tensor(-39707464., device='cuda:0')
episode: 74 training return: tensor(-272356.1250, device='cuda:0')
episode: 75 training return: tensor(-153751.8594, device='cuda:0')
epoch: 19 test_true_pfm: -11.237985631956372
episode: 76 training return: tensor(-930251.3125, device='cuda:0')
episode: 77 training return: tensor(-27321710., device='cuda:0')
episode: 78 training return: tensor(-29634164., device='cuda:0')
episode: 79 training return: tensor(-38950904., device='cuda:0')
epoch: 20 test_true_pfm: -18.860439401958875
episode: 80 training return: tensor(-28834934., device='cuda:0')
episode: 81 training return: tensor(-30265452., device='cuda:0')
episode: 82 training return: tensor(-37315136., device='cuda:0')
episode: 83 training return: tensor(-28299056., device='cuda:0')
epoch: 21 test_true_pfm: -17.428610717423318
episode: 84 training return: tensor(-27335180., device='cuda:0')
episode: 85 training return: tensor(-46707652., device='cuda:0')
episode: 86 training return: tensor(-41873708., device='cuda:0')
episode: 87 training return: tensor(-1.5572e+18, device='cuda:0')
epoch: 22 test_true_pfm: 17.556717378322578
episode: 88 training return: tensor(-2.2552e+10, device='cuda:0')
episode: 89 training return: tensor(-1257954.1250, device='cuda:0')
episode: 90 training return: tensor(-1006276.3750, device='cuda:0')
episode: 91 training return: tensor(-1.6887e+08, device='cuda:0')
epoch: 23 test_true_pfm: 11.581420060617738
episode: 92 training return: tensor(-818655.8125, device='cuda:0')
episode: 93 training return: tensor(-1285939.5000, device='cuda:0')
episode: 94 training return: tensor(-939046., device='cuda:0')
episode: 95 training return: tensor(-631977.8750, device='cuda:0')
epoch: 24 test_true_pfm: 2.590113116864793
episode: 96 training return: tensor(-970824.5000, device='cuda:0')
episode: 97 training return: tensor(-509782.3750, device='cuda:0')
episode: 98 training return: tensor(-66380.6562, device='cuda:0')
episode: 99 training return: tensor(-43532.2188, device='cuda:0')
epoch: 25 test_true_pfm: 13.776381339531863
episode: 100 training return: tensor(-530955.4375, device='cuda:0')
episode: 101 training return: tensor(-102616.7188, device='cuda:0')
episode: 102 training return: tensor(-99342.0703, device='cuda:0')
episode: 103 training return: tensor(-703.5380, device='cuda:0')
epoch: 26 test_true_pfm: 13.653771460694808
episode: 104 training return: tensor(-192969.5156, device='cuda:0')
episode: 105 training return: tensor(-53344.7109, device='cuda:0')
episode: 106 training return: tensor(-115624.1562, device='cuda:0')
episode: 107 training return: tensor(-3936.1643, device='cuda:0')
epoch: 27 test_true_pfm: 28.020417761032245
episode: 108 training return: tensor(-560.9964, device='cuda:0')
episode: 109 training return: tensor(-2471.3152, device='cuda:0')
episode: 110 training return: tensor(-649.6602, device='cuda:0')
episode: 111 training return: tensor(-819.3573, device='cuda:0')
epoch: 28 test_true_pfm: 16.091073956731183
episode: 112 training return: tensor(-53431.9141, device='cuda:0')
episode: 113 training return: tensor(-723.7119, device='cuda:0')
episode: 114 training return: tensor(-671.8054, device='cuda:0')
episode: 115 training return: tensor(-28675.6719, device='cuda:0')
epoch: 29 test_true_pfm: 8.188598812784388
episode: 116 training return: tensor(-640.7071, device='cuda:0')
episode: 117 training return: tensor(-743.3857, device='cuda:0')
episode: 118 training return: tensor(-653.6461, device='cuda:0')
episode: 119 training return: tensor(-6346.8149, device='cuda:0')
epoch: 30 test_true_pfm: 15.921534106525266
episode: 120 training return: tensor(-655.8954, device='cuda:0')
episode: 121 training return: tensor(-547.5670, device='cuda:0')
episode: 122 training return: tensor(-550.3434, device='cuda:0')
episode: 123 training return: tensor(-54373.7656, device='cuda:0')
epoch: 31 test_true_pfm: 12.360329331213157
episode: 124 training return: tensor(-59354.6016, device='cuda:0')
episode: 125 training return: tensor(-39196.8516, device='cuda:0')
episode: 126 training return: tensor(-128892.0625, device='cuda:0')
episode: 127 training return: tensor(-34587.9023, device='cuda:0')
epoch: 32 test_true_pfm: 15.109640100958956
episode: 128 training return: tensor(-65297.2969, device='cuda:0')
episode: 129 training return: tensor(-45644.8203, device='cuda:0')
episode: 130 training return: tensor(-60994.5195, device='cuda:0')
episode: 131 training return: tensor(-680.7647, device='cuda:0')
epoch: 33 test_true_pfm: 23.747787079238744
episode: 132 training return: tensor(-36111.5742, device='cuda:0')
episode: 133 training return: tensor(-31669.6152, device='cuda:0')
episode: 134 training return: tensor(-1661.4043, device='cuda:0')
episode: 135 training return: tensor(-5685.5249, device='cuda:0')
epoch: 34 test_true_pfm: 26.131126695426058
episode: 136 training return: tensor(-1536.7169, device='cuda:0')
episode: 137 training return: tensor(-1604.1194, device='cuda:0')
episode: 138 training return: tensor(-1023.1172, device='cuda:0')
episode: 139 training return: tensor(-1007.6037, device='cuda:0')
epoch: 35 test_true_pfm: 29.52151637741821
episode: 140 training return: tensor(-1303.1882, device='cuda:0')
episode: 141 training return: tensor(-1580.8114, device='cuda:0')
episode: 142 training return: tensor(-1797.4563, device='cuda:0')
episode: 143 training return: tensor(-1516.3334, device='cuda:0')
epoch: 36 test_true_pfm: 26.870559424071963
episode: 144 training return: tensor(-1315.6375, device='cuda:0')
episode: 145 training return: tensor(-1622.2467, device='cuda:0')
episode: 146 training return: tensor(-779.2083, device='cuda:0')
episode: 147 training return: tensor(-2006.8737, device='cuda:0')
epoch: 37 test_true_pfm: 27.128321926849857
episode: 148 training return: tensor(-1149.1206, device='cuda:0')
episode: 149 training return: tensor(-2963.5503, device='cuda:0')
episode: 150 training return: tensor(-1666.8282, device='cuda:0')
episode: 151 training return: tensor(-1789.3906, device='cuda:0')
epoch: 38 test_true_pfm: 27.109395235959244
episode: 152 training return: tensor(-3548.8313, device='cuda:0')
episode: 153 training return: tensor(-1214.0194, device='cuda:0')
episode: 154 training return: tensor(-1432.1655, device='cuda:0')
episode: 155 training return: tensor(-1531.6243, device='cuda:0')
epoch: 39 test_true_pfm: 27.54354992925233
episode: 156 training return: tensor(-672.2866, device='cuda:0')
episode: 157 training return: tensor(-2055.2681, device='cuda:0')
episode: 158 training return: tensor(-3061.1487, device='cuda:0')
episode: 159 training return: tensor(-1967.7288, device='cuda:0')
epoch: 40 test_true_pfm: 25.794527776146346
episode: 160 training return: tensor(-5354.7036, device='cuda:0')
episode: 161 training return: tensor(-4045.8494, device='cuda:0')
episode: 162 training return: tensor(-541.2404, device='cuda:0')
episode: 163 training return: tensor(-553.3574, device='cuda:0')
epoch: 41 test_true_pfm: 27.15829047735382
episode: 164 training return: tensor(-2862.6377, device='cuda:0')
episode: 165 training return: tensor(-564.8352, device='cuda:0')
episode: 166 training return: tensor(-607.3809, device='cuda:0')
episode: 167 training return: tensor(-551.6304, device='cuda:0')
epoch: 42 test_true_pfm: 20.621640932092863
episode: 168 training return: tensor(-1566.6089, device='cuda:0')
episode: 169 training return: tensor(-604.7714, device='cuda:0')
episode: 170 training return: tensor(-550.1344, device='cuda:0')
episode: 171 training return: tensor(-567.4397, device='cuda:0')
epoch: 43 test_true_pfm: 26.71541236187168
episode: 172 training return: tensor(-2182.1184, device='cuda:0')
episode: 173 training return: tensor(-1648.0380, device='cuda:0')
episode: 174 training return: tensor(-605.7144, device='cuda:0')
episode: 175 training return: tensor(-559.1295, device='cuda:0')
epoch: 44 test_true_pfm: 27.8955328630203
episode: 176 training return: tensor(-58300.8047, device='cuda:0')
episode: 177 training return: tensor(-819.0034, device='cuda:0')
episode: 178 training return: tensor(-616.0391, device='cuda:0')
episode: 179 training return: tensor(-4179.9429, device='cuda:0')
epoch: 45 test_true_pfm: 26.03721053873044
episode: 180 training return: tensor(-592.3578, device='cuda:0')
episode: 181 training return: tensor(-563.1146, device='cuda:0')
episode: 182 training return: tensor(-619.4667, device='cuda:0')
episode: 183 training return: tensor(-620.4799, device='cuda:0')
epoch: 46 test_true_pfm: 27.18605617482759
episode: 184 training return: tensor(-548.7531, device='cuda:0')
episode: 185 training return: tensor(-576.1254, device='cuda:0')
episode: 186 training return: tensor(-4033.6250, device='cuda:0')
episode: 187 training return: tensor(-607.5320, device='cuda:0')
epoch: 47 test_true_pfm: 27.138512932535644
episode: 188 training return: tensor(-851.1473, device='cuda:0')
episode: 189 training return: tensor(-648.5377, device='cuda:0')
episode: 190 training return: tensor(-758.0425, device='cuda:0')
episode: 191 training return: tensor(-1053.3619, device='cuda:0')
epoch: 48 test_true_pfm: 12.303604358693644
episode: 192 training return: tensor(-651.0081, device='cuda:0')
episode: 193 training return: tensor(-3375.8943, device='cuda:0')
episode: 194 training return: tensor(-950.7187, device='cuda:0')
episode: 195 training return: tensor(-39372.6055, device='cuda:0')
epoch: 49 test_true_pfm: 16.364461257264367
episode: 196 training return: tensor(-704.2261, device='cuda:0')
episode: 197 training return: tensor(-645.3119, device='cuda:0')
episode: 198 training return: tensor(-640.2045, device='cuda:0')
episode: 199 training return: tensor(-647.3168, device='cuda:0')
epoch: 50 test_true_pfm: 12.910285241754377
episode: 200 training return: tensor(-707.5040, device='cuda:0')
episode: 201 training return: tensor(-689.8585, device='cuda:0')
episode: 202 training return: tensor(-719.4001, device='cuda:0')
episode: 203 training return: tensor(-675.4579, device='cuda:0')
epoch: 51 test_true_pfm: 15.937779888515852
episode: 204 training return: tensor(-646.8036, device='cuda:0')
episode: 205 training return: tensor(-644.6724, device='cuda:0')
episode: 206 training return: tensor(-748.2740, device='cuda:0')
episode: 207 training return: tensor(-901.0065, device='cuda:0')
epoch: 52 test_true_pfm: 6.927392129649019
episode: 208 training return: tensor(-651.6003, device='cuda:0')
episode: 209 training return: tensor(-639.1358, device='cuda:0')
episode: 210 training return: tensor(-690.2426, device='cuda:0')
episode: 211 training return: tensor(-647.8260, device='cuda:0')
epoch: 53 test_true_pfm: 14.328968117063292
episode: 212 training return: tensor(-145215.7031, device='cuda:0')
episode: 213 training return: tensor(-686.9571, device='cuda:0')
episode: 214 training return: tensor(-648.5019, device='cuda:0')
episode: 215 training return: tensor(-660.7940, device='cuda:0')
epoch: 54 test_true_pfm: 15.131085094213296
episode: 216 training return: tensor(-33311.7266, device='cuda:0')
episode: 217 training return: tensor(-648.6239, device='cuda:0')
episode: 218 training return: tensor(-582.0137, device='cuda:0')
episode: 219 training return: tensor(-711.9115, device='cuda:0')
epoch: 55 test_true_pfm: 11.893070715724601
episode: 220 training return: tensor(-749.9416, device='cuda:0')
episode: 221 training return: tensor(-621.5095, device='cuda:0')
episode: 222 training return: tensor(-650.4495, device='cuda:0')
episode: 223 training return: tensor(-692.4502, device='cuda:0')
epoch: 56 test_true_pfm: 13.495859473498703
episode: 224 training return: tensor(-686.5679, device='cuda:0')
episode: 225 training return: tensor(-656.8669, device='cuda:0')
episode: 226 training return: tensor(-186461.2812, device='cuda:0')
episode: 227 training return: tensor(-762.1444, device='cuda:0')
epoch: 57 test_true_pfm: 16.391008120540043
episode: 228 training return: tensor(-671.4270, device='cuda:0')
episode: 229 training return: tensor(-51357.4492, device='cuda:0')
episode: 230 training return: tensor(-63702.5781, device='cuda:0')
episode: 231 training return: tensor(-660.2195, device='cuda:0')
epoch: 58 test_true_pfm: 13.448331095185612
episode: 232 training return: tensor(-713.8098, device='cuda:0')
episode: 233 training return: tensor(-673.1717, device='cuda:0')
episode: 234 training return: tensor(-662.5466, device='cuda:0')
episode: 235 training return: tensor(-710.5872, device='cuda:0')
epoch: 59 test_true_pfm: 14.013787884716479
episode: 236 training return: tensor(-665.2852, device='cuda:0')
episode: 237 training return: tensor(-704.1298, device='cuda:0')
episode: 238 training return: tensor(-606.7972, device='cuda:0')
episode: 239 training return: tensor(-633.8705, device='cuda:0')
epoch: 60 test_true_pfm: 15.129760676782876
episode: 240 training return: tensor(-650.5463, device='cuda:0')
episode: 241 training return: tensor(-650.8525, device='cuda:0')
episode: 242 training return: tensor(-633.4312, device='cuda:0')
episode: 243 training return: tensor(-614.0485, device='cuda:0')
epoch: 61 test_true_pfm: 10.654701864646844
episode: 244 training return: tensor(-37701.8125, device='cuda:0')
episode: 245 training return: tensor(-662.3135, device='cuda:0')
episode: 246 training return: tensor(-117553.9609, device='cuda:0')
episode: 247 training return: tensor(-682.5225, device='cuda:0')
epoch: 62 test_true_pfm: 19.41908669201281
episode: 248 training return: tensor(-637.2491, device='cuda:0')
episode: 249 training return: tensor(-3659.9900, device='cuda:0')
episode: 250 training return: tensor(-651.2085, device='cuda:0')
episode: 251 training return: tensor(-5707.0562, device='cuda:0')
epoch: 63 test_true_pfm: 5.77284876418115
episode: 252 training return: tensor(-653.9734, device='cuda:0')
episode: 253 training return: tensor(-889.0049, device='cuda:0')
episode: 254 training return: tensor(-720.9605, device='cuda:0')
episode: 255 training return: tensor(-642.7737, device='cuda:0')
epoch: 64 test_true_pfm: 16.21676539560222
episode: 256 training return: tensor(-754.1031, device='cuda:0')
episode: 257 training return: tensor(-647.6332, device='cuda:0')
episode: 258 training return: tensor(-672.2785, device='cuda:0')
episode: 259 training return: tensor(-578.7842, device='cuda:0')
epoch: 65 test_true_pfm: 10.860416824983908
episode: 260 training return: tensor(-764.8432, device='cuda:0')
episode: 261 training return: tensor(-655.9969, device='cuda:0')
episode: 262 training return: tensor(-675.5597, device='cuda:0')
episode: 263 training return: tensor(-7715.2163, device='cuda:0')
epoch: 66 test_true_pfm: 14.641976781129305
episode: 264 training return: tensor(-705.9876, device='cuda:0')
episode: 265 training return: tensor(-656.0206, device='cuda:0')
episode: 266 training return: tensor(-50699.8906, device='cuda:0')
episode: 267 training return: tensor(-638.7397, device='cuda:0')
epoch: 67 test_true_pfm: 16.71350285760181
episode: 268 training return: tensor(-796.6832, device='cuda:0')
episode: 269 training return: tensor(-639.3053, device='cuda:0')
episode: 270 training return: tensor(-653.6915, device='cuda:0')
episode: 271 training return: tensor(-669.1854, device='cuda:0')
epoch: 68 test_true_pfm: 15.420586310846176
episode: 272 training return: tensor(-25065.6699, device='cuda:0')
episode: 273 training return: tensor(-593.1140, device='cuda:0')
episode: 274 training return: tensor(-645.2413, device='cuda:0')
episode: 275 training return: tensor(-91600.5078, device='cuda:0')
epoch: 69 test_true_pfm: 16.820089483166488
episode: 276 training return: tensor(-8860.5322, device='cuda:0')
episode: 277 training return: tensor(-706.3875, device='cuda:0')
episode: 278 training return: tensor(-662.9234, device='cuda:0')
episode: 279 training return: tensor(-737.0178, device='cuda:0')
epoch: 70 test_true_pfm: 9.885799731883004
episode: 280 training return: tensor(-5284.0938, device='cuda:0')
episode: 281 training return: tensor(-186550.5938, device='cuda:0')
episode: 282 training return: tensor(-648.9261, device='cuda:0')
episode: 283 training return: tensor(-720.2854, device='cuda:0')
epoch: 71 test_true_pfm: 16.706283726553995
episode: 284 training return: tensor(-670.2230, device='cuda:0')
episode: 285 training return: tensor(-696.3541, device='cuda:0')
episode: 286 training return: tensor(-1160.5564, device='cuda:0')
episode: 287 training return: tensor(-678.2252, device='cuda:0')
epoch: 72 test_true_pfm: 10.732611524073628
episode: 288 training return: tensor(-691.3375, device='cuda:0')
episode: 289 training return: tensor(-794.8991, device='cuda:0')
episode: 290 training return: tensor(-645.2200, device='cuda:0')
episode: 291 training return: tensor(-81777.6797, device='cuda:0')
epoch: 73 test_true_pfm: 11.266173264416938
episode: 292 training return: tensor(-698.7806, device='cuda:0')
episode: 293 training return: tensor(-2843.2710, device='cuda:0')
episode: 294 training return: tensor(-723.0549, device='cuda:0')
episode: 295 training return: tensor(-3488.8252, device='cuda:0')
epoch: 74 test_true_pfm: 12.709197493341563
episode: 296 training return: tensor(-647.0873, device='cuda:0')
episode: 297 training return: tensor(-659.9335, device='cuda:0')
episode: 298 training return: tensor(-748.7840, device='cuda:0')
episode: 299 training return: tensor(-87405.0859, device='cuda:0')
epoch: 75 test_true_pfm: 10.721916961443188
episode: 300 training return: tensor(-653.4336, device='cuda:0')
episode: 301 training return: tensor(-1085.6134, device='cuda:0')
episode: 302 training return: tensor(-799.5319, device='cuda:0')
episode: 303 training return: tensor(-550.4970, device='cuda:0')
epoch: 76 test_true_pfm: 15.90893658550992
episode: 304 training return: tensor(-2642.6514, device='cuda:0')
episode: 305 training return: tensor(-625.2734, device='cuda:0')
episode: 306 training return: tensor(-75539.3984, device='cuda:0')
episode: 307 training return: tensor(-649.4178, device='cuda:0')
epoch: 77 test_true_pfm: 17.117958212270516
episode: 308 training return: tensor(-96279.4375, device='cuda:0')
episode: 309 training return: tensor(-704.9546, device='cuda:0')
episode: 310 training return: tensor(-779.5033, device='cuda:0')
episode: 311 training return: tensor(-802.4454, device='cuda:0')
epoch: 78 test_true_pfm: 13.556836655391056
episode: 312 training return: tensor(-28009.7441, device='cuda:0')
episode: 313 training return: tensor(-674.2571, device='cuda:0')
episode: 314 training return: tensor(-645.2477, device='cuda:0')
episode: 315 training return: tensor(-687.3420, device='cuda:0')
epoch: 79 test_true_pfm: 8.94239396970626
episode: 316 training return: tensor(-241134.7812, device='cuda:0')
episode: 317 training return: tensor(-1174.9625, device='cuda:0')
episode: 318 training return: tensor(-22181.9746, device='cuda:0')
episode: 319 training return: tensor(-641.1456, device='cuda:0')
epoch: 80 test_true_pfm: 17.238670569533287
episode: 320 training return: tensor(-648.3126, device='cuda:0')
episode: 321 training return: tensor(-2362.8242, device='cuda:0')
episode: 322 training return: tensor(-664.4167, device='cuda:0')
episode: 323 training return: tensor(-645.0046, device='cuda:0')
epoch: 81 test_true_pfm: 10.855049196454214
episode: 324 training return: tensor(-654.7062, device='cuda:0')
episode: 325 training return: tensor(-705.5879, device='cuda:0')
episode: 326 training return: tensor(-686.6431, device='cuda:0')
episode: 327 training return: tensor(-702.6083, device='cuda:0')
epoch: 82 test_true_pfm: 13.168535122482789
episode: 328 training return: tensor(-4568.5337, device='cuda:0')
episode: 329 training return: tensor(-4995.8472, device='cuda:0')
episode: 330 training return: tensor(-670.1396, device='cuda:0')
episode: 331 training return: tensor(-639.5452, device='cuda:0')
epoch: 83 test_true_pfm: 13.19056583912851
episode: 332 training return: tensor(-675.5638, device='cuda:0')
episode: 333 training return: tensor(-639.9350, device='cuda:0')
episode: 334 training return: tensor(-661.1025, device='cuda:0')
episode: 335 training return: tensor(-6925.2505, device='cuda:0')
epoch: 84 test_true_pfm: 17.62743105435319
episode: 336 training return: tensor(-685.7574, device='cuda:0')
episode: 337 training return: tensor(-691.5707, device='cuda:0')
episode: 338 training return: tensor(-652.4888, device='cuda:0')
episode: 339 training return: tensor(-648.4355, device='cuda:0')
epoch: 85 test_true_pfm: 15.582941670905956
episode: 340 training return: tensor(-618.0612, device='cuda:0')
episode: 341 training return: tensor(-39016.6445, device='cuda:0')
episode: 342 training return: tensor(-641.6281, device='cuda:0')
episode: 343 training return: tensor(-673.9672, device='cuda:0')
epoch: 86 test_true_pfm: 16.09841323958718
episode: 344 training return: tensor(-727.0727, device='cuda:0')
episode: 345 training return: tensor(-609.3738, device='cuda:0')
episode: 346 training return: tensor(-3161.1340, device='cuda:0')
episode: 347 training return: tensor(-535.5881, device='cuda:0')
epoch: 87 test_true_pfm: 13.981826273964606
episode: 348 training return: tensor(-657.3369, device='cuda:0')
episode: 349 training return: tensor(-649.0012, device='cuda:0')
episode: 350 training return: tensor(-648.6625, device='cuda:0')
episode: 351 training return: tensor(-18847.3203, device='cuda:0')
epoch: 88 test_true_pfm: 14.343525037530437
episode: 352 training return: tensor(-17810.2988, device='cuda:0')
episode: 353 training return: tensor(-654.9055, device='cuda:0')
episode: 354 training return: tensor(-669.2787, device='cuda:0')
episode: 355 training return: tensor(-644.5654, device='cuda:0')
epoch: 89 test_true_pfm: 12.9376966100556
episode: 356 training return: tensor(-668.7102, device='cuda:0')
episode: 357 training return: tensor(-651.3456, device='cuda:0')
episode: 358 training return: tensor(-27746.4570, device='cuda:0')
episode: 359 training return: tensor(-2746.6294, device='cuda:0')
epoch: 90 test_true_pfm: 16.4104066336947
episode: 360 training return: tensor(-38755.8477, device='cuda:0')
episode: 361 training return: tensor(-688.5194, device='cuda:0')
episode: 362 training return: tensor(-661.9377, device='cuda:0')
episode: 363 training return: tensor(-11787.8867, device='cuda:0')
epoch: 91 test_true_pfm: 20.59822717656926
episode: 364 training return: tensor(-675.4576, device='cuda:0')
episode: 365 training return: tensor(-695.8230, device='cuda:0')
episode: 366 training return: tensor(-638.4352, device='cuda:0')
episode: 367 training return: tensor(-634.9615, device='cuda:0')
epoch: 92 test_true_pfm: 24.372174325165577
episode: 368 training return: tensor(-723.9456, device='cuda:0')
episode: 369 training return: tensor(-4327.1318, device='cuda:0')
episode: 370 training return: tensor(-39004.3438, device='cuda:0')
episode: 371 training return: tensor(-648.8149, device='cuda:0')
epoch: 93 test_true_pfm: 15.144217023684666
episode: 372 training return: tensor(-591.2353, device='cuda:0')
episode: 373 training return: tensor(-765.0536, device='cuda:0')
episode: 374 training return: tensor(-657.5540, device='cuda:0')
episode: 375 training return: tensor(-673.2151, device='cuda:0')
epoch: 94 test_true_pfm: 15.752935647003406
episode: 376 training return: tensor(-728.2480, device='cuda:0')
episode: 377 training return: tensor(-160319.3594, device='cuda:0')
episode: 378 training return: tensor(-722.2315, device='cuda:0')
episode: 379 training return: tensor(-548.4226, device='cuda:0')
epoch: 95 test_true_pfm: 13.522838033602113
episode: 380 training return: tensor(-724.1973, device='cuda:0')
episode: 381 training return: tensor(-8293.1025, device='cuda:0')
episode: 382 training return: tensor(-655.7894, device='cuda:0')
episode: 383 training return: tensor(-700.9681, device='cuda:0')
epoch: 96 test_true_pfm: 11.717093772632136
episode: 384 training return: tensor(-5119.8228, device='cuda:0')
episode: 385 training return: tensor(-612.5685, device='cuda:0')
episode: 386 training return: tensor(-668.9769, device='cuda:0')
episode: 387 training return: tensor(-904.7231, device='cuda:0')
epoch: 97 test_true_pfm: 14.430979754977585
episode: 388 training return: tensor(-719.8277, device='cuda:0')
episode: 389 training return: tensor(-689.8631, device='cuda:0')
episode: 390 training return: tensor(-653.6805, device='cuda:0')
episode: 391 training return: tensor(-711.6143, device='cuda:0')
epoch: 98 test_true_pfm: 14.940885855692182
episode: 392 training return: tensor(-26648.9043, device='cuda:0')
episode: 393 training return: tensor(-750.3872, device='cuda:0')
episode: 394 training return: tensor(-2482.5999, device='cuda:0')
episode: 395 training return: tensor(-681.2953, device='cuda:0')
epoch: 99 test_true_pfm: 9.326598205583057
episode: 396 training return: tensor(-597.4272, device='cuda:0')
episode: 397 training return: tensor(-616.4457, device='cuda:0')
episode: 398 training return: tensor(-685.3284, device='cuda:0')
episode: 399 training return: tensor(-653.0336, device='cuda:0')
epoch: 100 test_true_pfm: 20.92797104188105
episode: 400 training return: tensor(-2852.3188, device='cuda:0')
episode: 401 training return: tensor(-3484.2310, device='cuda:0')
episode: 402 training return: tensor(-666.3430, device='cuda:0')
episode: 403 training return: tensor(-676.9736, device='cuda:0')
epoch: 101 test_true_pfm: 9.010337604475817
episode: 404 training return: tensor(-3339.2444, device='cuda:0')
episode: 405 training return: tensor(-710.0813, device='cuda:0')
episode: 406 training return: tensor(-658.3600, device='cuda:0')
episode: 407 training return: tensor(-666.0271, device='cuda:0')
epoch: 102 test_true_pfm: 15.840863191295607
episode: 408 training return: tensor(-873.5509, device='cuda:0')
episode: 409 training return: tensor(-704.4478, device='cuda:0')
episode: 410 training return: tensor(-2496.2661, device='cuda:0')
episode: 411 training return: tensor(-74701.9922, device='cuda:0')
epoch: 103 test_true_pfm: 10.420825755135716
episode: 412 training return: tensor(-640.7823, device='cuda:0')
episode: 413 training return: tensor(-7051.7246, device='cuda:0')
episode: 414 training return: tensor(-6359.7505, device='cuda:0')
episode: 415 training return: tensor(-31516.6582, device='cuda:0')
epoch: 104 test_true_pfm: 17.533496041787
episode: 416 training return: tensor(-1194.4812, device='cuda:0')
episode: 417 training return: tensor(-676.0074, device='cuda:0')
episode: 418 training return: tensor(-652.6790, device='cuda:0')
episode: 419 training return: tensor(-546.8389, device='cuda:0')
epoch: 105 test_true_pfm: 12.802001539471451
episode: 420 training return: tensor(-641.6888, device='cuda:0')
episode: 421 training return: tensor(-728.4780, device='cuda:0')
episode: 422 training return: tensor(-652.8412, device='cuda:0')
episode: 423 training return: tensor(-677.4973, device='cuda:0')
epoch: 106 test_true_pfm: 4.575928476887315
episode: 424 training return: tensor(-691.4343, device='cuda:0')
episode: 425 training return: tensor(-7483.0200, device='cuda:0')
episode: 426 training return: tensor(-650.0684, device='cuda:0')
episode: 427 training return: tensor(-22416.7598, device='cuda:0')
epoch: 107 test_true_pfm: 9.204981778419727
episode: 428 training return: tensor(-663.4252, device='cuda:0')
episode: 429 training return: tensor(-44566.4375, device='cuda:0')
episode: 430 training return: tensor(-652.8477, device='cuda:0')
episode: 431 training return: tensor(-735.2685, device='cuda:0')
epoch: 108 test_true_pfm: 8.064067416101206
episode: 432 training return: tensor(-637.0273, device='cuda:0')
episode: 433 training return: tensor(-43649.7344, device='cuda:0')
episode: 434 training return: tensor(-706.6549, device='cuda:0')
episode: 435 training return: tensor(-644.6631, device='cuda:0')
epoch: 109 test_true_pfm: 18.96201104854247
episode: 436 training return: tensor(-669.0839, device='cuda:0')
episode: 437 training return: tensor(-673.4070, device='cuda:0')
episode: 438 training return: tensor(-11709.3887, device='cuda:0')
episode: 439 training return: tensor(-615.3710, device='cuda:0')
epoch: 110 test_true_pfm: 9.03974270766787
episode: 440 training return: tensor(-649.8848, device='cuda:0')
episode: 441 training return: tensor(-545.3170, device='cuda:0')
episode: 442 training return: tensor(-8485.0488, device='cuda:0')
episode: 443 training return: tensor(-109912.2031, device='cuda:0')
epoch: 111 test_true_pfm: 15.069792424866012
episode: 444 training return: tensor(-612.6516, device='cuda:0')
episode: 445 training return: tensor(-15564.9219, device='cuda:0')
episode: 446 training return: tensor(-606.6504, device='cuda:0')
episode: 447 training return: tensor(-682.9593, device='cuda:0')
epoch: 112 test_true_pfm: 16.279830267032445
episode: 448 training return: tensor(-692.1938, device='cuda:0')
episode: 449 training return: tensor(-577.0844, device='cuda:0')
episode: 450 training return: tensor(-750.0959, device='cuda:0')
episode: 451 training return: tensor(-737.0067, device='cuda:0')
epoch: 113 test_true_pfm: 12.831634643542708
episode: 452 training return: tensor(-586.8715, device='cuda:0')
episode: 453 training return: tensor(-599.1314, device='cuda:0')
episode: 454 training return: tensor(-649.1613, device='cuda:0')
episode: 455 training return: tensor(-656.2693, device='cuda:0')
epoch: 114 test_true_pfm: 20.486340993277658
episode: 456 training return: tensor(-776.0559, device='cuda:0')
episode: 457 training return: tensor(-661.9961, device='cuda:0')
episode: 458 training return: tensor(-648.0511, device='cuda:0')
episode: 459 training return: tensor(-714.6561, device='cuda:0')
epoch: 115 test_true_pfm: 12.729297830424702
episode: 460 training return: tensor(-597.3786, device='cuda:0')
episode: 461 training return: tensor(-578.4504, device='cuda:0')
episode: 462 training return: tensor(-649.1114, device='cuda:0')
episode: 463 training return: tensor(-804.4969, device='cuda:0')
epoch: 116 test_true_pfm: 9.373090835187693
episode: 464 training return: tensor(-1046.6492, device='cuda:0')
episode: 465 training return: tensor(-656.2827, device='cuda:0')
episode: 466 training return: tensor(-651.0810, device='cuda:0')
episode: 467 training return: tensor(-139585.0312, device='cuda:0')
epoch: 117 test_true_pfm: 17.178222625440547
episode: 468 training return: tensor(-1812.4358, device='cuda:0')
episode: 469 training return: tensor(-57939.3945, device='cuda:0')
episode: 470 training return: tensor(-678.7200, device='cuda:0')
episode: 471 training return: tensor(-1027.5267, device='cuda:0')
epoch: 118 test_true_pfm: 15.165533104319243
episode: 472 training return: tensor(-11578.5264, device='cuda:0')
episode: 473 training return: tensor(-74845.5469, device='cuda:0')
episode: 474 training return: tensor(-670.3470, device='cuda:0')
episode: 475 training return: tensor(-2241.3208, device='cuda:0')
epoch: 119 test_true_pfm: 15.392338999285451
episode: 476 training return: tensor(-609.9896, device='cuda:0')
episode: 477 training return: tensor(-733.1458, device='cuda:0')
episode: 478 training return: tensor(-15023.0459, device='cuda:0')
episode: 479 training return: tensor(-756.0423, device='cuda:0')
epoch: 120 test_true_pfm: 16.68176300669374
episode: 480 training return: tensor(-809.3968, device='cuda:0')
episode: 481 training return: tensor(-761.9649, device='cuda:0')
episode: 482 training return: tensor(-588.4515, device='cuda:0')
episode: 483 training return: tensor(-693.3240, device='cuda:0')
epoch: 121 test_true_pfm: 17.280373574529598
episode: 484 training return: tensor(-652.9761, device='cuda:0')
episode: 485 training return: tensor(-32184.3418, device='cuda:0')
episode: 486 training return: tensor(-662.4734, device='cuda:0')
episode: 487 training return: tensor(-4254.1133, device='cuda:0')
epoch: 122 test_true_pfm: 13.814730935494467
episode: 488 training return: tensor(-655.5440, device='cuda:0')
episode: 489 training return: tensor(-669.7172, device='cuda:0')
episode: 490 training return: tensor(-649.5532, device='cuda:0')
episode: 491 training return: tensor(-2474.4041, device='cuda:0')
epoch: 123 test_true_pfm: 16.069661383992674
episode: 492 training return: tensor(-607.9978, device='cuda:0')
episode: 493 training return: tensor(-117811.5234, device='cuda:0')
episode: 494 training return: tensor(-641.5757, device='cuda:0')
episode: 495 training return: tensor(-629.3998, device='cuda:0')
epoch: 124 test_true_pfm: 13.19586416633031
episode: 496 training return: tensor(-698.0574, device='cuda:0')
episode: 497 training return: tensor(-563.6503, device='cuda:0')
episode: 498 training return: tensor(-29451.2578, device='cuda:0')
episode: 499 training return: tensor(-639.7633, device='cuda:0')
epoch: 125 test_true_pfm: 13.478668933476403
episode: 500 training return: tensor(-647.1802, device='cuda:0')
episode: 501 training return: tensor(-724.0646, device='cuda:0')
episode: 502 training return: tensor(-1733.2869, device='cuda:0')
episode: 503 training return: tensor(-661.3209, device='cuda:0')
epoch: 126 test_true_pfm: 18.221727159366758
episode: 504 training return: tensor(-647.0537, device='cuda:0')
episode: 505 training return: tensor(-655.8376, device='cuda:0')
episode: 506 training return: tensor(-121245.6641, device='cuda:0')
episode: 507 training return: tensor(-866.4397, device='cuda:0')
epoch: 127 test_true_pfm: 8.148133186514567
episode: 508 training return: tensor(-637.2672, device='cuda:0')
episode: 509 training return: tensor(-33212.1055, device='cuda:0')
episode: 510 training return: tensor(-618.6408, device='cuda:0')
episode: 511 training return: tensor(-637.0847, device='cuda:0')
epoch: 128 test_true_pfm: 19.61992696652456
episode: 512 training return: tensor(-8233.3271, device='cuda:0')
episode: 513 training return: tensor(-16146.4229, device='cuda:0')
episode: 514 training return: tensor(-5484.5586, device='cuda:0')
episode: 515 training return: tensor(-729.7891, device='cuda:0')
epoch: 129 test_true_pfm: 12.842061393966304
episode: 516 training return: tensor(-604.7274, device='cuda:0')
episode: 517 training return: tensor(-647.1943, device='cuda:0')
episode: 518 training return: tensor(-646.4338, device='cuda:0')
episode: 519 training return: tensor(-680.0927, device='cuda:0')
epoch: 130 test_true_pfm: 15.034454194690607
episode: 520 training return: tensor(-645.3844, device='cuda:0')
episode: 521 training return: tensor(-647.4323, device='cuda:0')
episode: 522 training return: tensor(-579.5391, device='cuda:0')
episode: 523 training return: tensor(-719.6763, device='cuda:0')
epoch: 131 test_true_pfm: 15.6673101731494
episode: 524 training return: tensor(-648.3227, device='cuda:0')
episode: 525 training return: tensor(-56642.5508, device='cuda:0')
episode: 526 training return: tensor(-654.7646, device='cuda:0')
episode: 527 training return: tensor(-749.4126, device='cuda:0')
epoch: 132 test_true_pfm: 13.456671078107368
episode: 528 training return: tensor(-721.0005, device='cuda:0')
episode: 529 training return: tensor(-533.8077, device='cuda:0')
episode: 530 training return: tensor(-12070.3516, device='cuda:0')
episode: 531 training return: tensor(-690.6527, device='cuda:0')
epoch: 133 test_true_pfm: 7.987212198061667
episode: 532 training return: tensor(-109168.6016, device='cuda:0')
episode: 533 training return: tensor(-626.8256, device='cuda:0')
episode: 534 training return: tensor(-929.9105, device='cuda:0')
episode: 535 training return: tensor(-695.0743, device='cuda:0')
epoch: 134 test_true_pfm: 15.607757521781803
episode: 536 training return: tensor(-674.0859, device='cuda:0')
episode: 537 training return: tensor(-644.6583, device='cuda:0')
episode: 538 training return: tensor(-3553.6196, device='cuda:0')
episode: 539 training return: tensor(-655.7446, device='cuda:0')
epoch: 135 test_true_pfm: 16.76236244133663
episode: 540 training return: tensor(-751.8286, device='cuda:0')
episode: 541 training return: tensor(-642.4946, device='cuda:0')
episode: 542 training return: tensor(-629.9839, device='cuda:0')
episode: 543 training return: tensor(-682.0934, device='cuda:0')
epoch: 136 test_true_pfm: 3.9809592694779212
episode: 544 training return: tensor(-665.5460, device='cuda:0')
episode: 545 training return: tensor(-644.3776, device='cuda:0')
episode: 546 training return: tensor(-4633.2974, device='cuda:0')
episode: 547 training return: tensor(-641.9366, device='cuda:0')
epoch: 137 test_true_pfm: 10.85574680329891
episode: 548 training return: tensor(-546.5667, device='cuda:0')
episode: 549 training return: tensor(-653.3866, device='cuda:0')
episode: 550 training return: tensor(-908.9158, device='cuda:0')
episode: 551 training return: tensor(-660.1154, device='cuda:0')
epoch: 138 test_true_pfm: 12.107263398424104
episode: 552 training return: tensor(-40560.3477, device='cuda:0')
episode: 553 training return: tensor(-689.8851, device='cuda:0')
episode: 554 training return: tensor(-4616.5688, device='cuda:0')
episode: 555 training return: tensor(-5429.4692, device='cuda:0')
epoch: 139 test_true_pfm: 19.312936468606217
episode: 556 training return: tensor(-755.6541, device='cuda:0')
episode: 557 training return: tensor(-28196.8262, device='cuda:0')
episode: 558 training return: tensor(-693.5109, device='cuda:0')
episode: 559 training return: tensor(-682.8276, device='cuda:0')
epoch: 140 test_true_pfm: 15.909349648090204
episode: 560 training return: tensor(-670.7239, device='cuda:0')
episode: 561 training return: tensor(-785.3961, device='cuda:0')
episode: 562 training return: tensor(-696.1980, device='cuda:0')
episode: 563 training return: tensor(-644.9402, device='cuda:0')
epoch: 141 test_true_pfm: 11.67143977924591
episode: 564 training return: tensor(-893.7512, device='cuda:0')
episode: 565 training return: tensor(-642.9639, device='cuda:0')
episode: 566 training return: tensor(-86945.2578, device='cuda:0')
episode: 567 training return: tensor(-17496.7324, device='cuda:0')
epoch: 142 test_true_pfm: 16.0133227964004
episode: 568 training return: tensor(-642.8010, device='cuda:0')
episode: 569 training return: tensor(-658.4163, device='cuda:0')
episode: 570 training return: tensor(-660.3513, device='cuda:0')
episode: 571 training return: tensor(-636.7607, device='cuda:0')
epoch: 143 test_true_pfm: 15.693974041677055
episode: 572 training return: tensor(-662.6177, device='cuda:0')
episode: 573 training return: tensor(-50177.1641, device='cuda:0')
episode: 574 training return: tensor(-781.7957, device='cuda:0')
episode: 575 training return: tensor(-784.3996, device='cuda:0')
epoch: 144 test_true_pfm: 15.107621812496697
episode: 576 training return: tensor(-81210.5859, device='cuda:0')
episode: 577 training return: tensor(-1571.0460, device='cuda:0')
episode: 578 training return: tensor(-649.3907, device='cuda:0')
episode: 579 training return: tensor(-651.0781, device='cuda:0')
epoch: 145 test_true_pfm: 12.221032275232123
episode: 580 training return: tensor(-656.6210, device='cuda:0')
episode: 581 training return: tensor(-658.5529, device='cuda:0')
episode: 582 training return: tensor(-647.5219, device='cuda:0')
episode: 583 training return: tensor(-702.3096, device='cuda:0')
epoch: 146 test_true_pfm: 8.102239443591774
episode: 584 training return: tensor(-628.7635, device='cuda:0')
episode: 585 training return: tensor(-718.1715, device='cuda:0')
episode: 586 training return: tensor(-668.9470, device='cuda:0')
episode: 587 training return: tensor(-626.1304, device='cuda:0')
epoch: 147 test_true_pfm: 12.07596477372483
episode: 588 training return: tensor(-632.3777, device='cuda:0')
episode: 589 training return: tensor(-40668.5625, device='cuda:0')
episode: 590 training return: tensor(-650.6762, device='cuda:0')
episode: 591 training return: tensor(-606.2424, device='cuda:0')
epoch: 148 test_true_pfm: 15.538401548378705
episode: 592 training return: tensor(-647.8575, device='cuda:0')
episode: 593 training return: tensor(-11490.1758, device='cuda:0')
episode: 594 training return: tensor(-672.9080, device='cuda:0')
episode: 595 training return: tensor(-662.8610, device='cuda:0')
epoch: 149 test_true_pfm: 12.918195252574284
episode: 596 training return: tensor(-654.5808, device='cuda:0')
episode: 597 training return: tensor(-675.1301, device='cuda:0')
episode: 598 training return: tensor(-645.1022, device='cuda:0')
episode: 599 training return: tensor(-648.6004, device='cuda:0')
epoch: 150 test_true_pfm: 15.313287965134569
