['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 1.4698077620193362 test_loss: -5.005829238891602
epoch: 1 training_loss -6.813740799427032 test_loss: -8.316191864013671
epoch: 2 training_loss -9.214861760139465 test_loss: -9.634939575195313
epoch: 3 training_loss -10.432779989242555 test_loss: -10.815686798095703
epoch: 4 training_loss -11.296684942245484 test_loss: -11.612569427490234
epoch: 5 training_loss -11.839017648696899 test_loss: -11.979873657226562
epoch: 6 training_loss -12.319931392669679 test_loss: -12.343263244628906
epoch: 7 training_loss -12.624241418838501 test_loss: -12.873178100585937
epoch: 8 training_loss -12.88593264579773 test_loss: -13.162289428710938
epoch: 9 training_loss -13.125949287414551 test_loss: -13.521456909179687
epoch: 10 training_loss -13.366851739883423 test_loss: -13.462278747558594
epoch: 11 training_loss -13.497120752334595 test_loss: -13.642880249023438
epoch: 12 training_loss -13.560629119873047 test_loss: -13.673965454101562
epoch: 13 training_loss -13.7594291305542 test_loss: -13.89825897216797
epoch: 14 training_loss -13.891756277084351 test_loss: -13.845896911621093
epoch: 15 training_loss -14.04915020942688 test_loss: -14.162008666992188
epoch: 16 training_loss -14.221560192108154 test_loss: -14.313972473144531
epoch: 17 training_loss -14.298814096450805 test_loss: -14.455540466308594
epoch: 18 training_loss -14.34752965927124 test_loss: -14.437019348144531
epoch: 19 training_loss -14.51716347694397 test_loss: -14.568826293945312
epoch: 20 training_loss -14.479510192871095 test_loss: -14.590167236328124
epoch: 21 training_loss -14.645174255371094 test_loss: -14.667626953125
epoch: 22 training_loss -14.665859355926514 test_loss: -14.829742431640625
epoch: 23 training_loss -14.698688259124756 test_loss: -14.910943603515625
epoch: 24 training_loss -14.787995824813843 test_loss: -14.99816436767578
epoch: 25 training_loss -14.934816722869874 test_loss: -15.00274658203125
epoch: 26 training_loss -14.80782325744629 test_loss: -14.980543518066407
epoch: 27 training_loss -14.980299196243287 test_loss: -15.00952606201172
epoch: 28 training_loss -15.035035066604614 test_loss: -14.821397399902343
epoch: 29 training_loss -15.056485805511475 test_loss: -14.992269897460938
epoch: 30 training_loss -15.106029434204101 test_loss: -15.0818603515625
epoch: 31 training_loss -15.123199605941773 test_loss: -15.240597534179688
epoch: 32 training_loss -15.262623805999755 test_loss: -15.1658203125
epoch: 33 training_loss -15.355241727828979 test_loss: -15.272010803222656
epoch: 34 training_loss -15.335179681777953 test_loss: -15.121824645996094
epoch: 35 training_loss -15.31232292175293 test_loss: -15.294180297851563
epoch: 36 training_loss -15.44308391571045 test_loss: -15.291838073730469
epoch: 37 training_loss -15.362314529418946 test_loss: -15.3195068359375
epoch: 38 training_loss -15.412010231018066 test_loss: -15.444721984863282
epoch: 39 training_loss -15.469388990402221 test_loss: -15.510595703125
epoch: 40 training_loss -15.485686054229737 test_loss: -15.528303527832032
epoch: 41 training_loss -15.618510875701904 test_loss: -15.62615966796875
epoch: 42 training_loss -15.591899423599243 test_loss: -15.450399780273438
epoch: 43 training_loss -15.63587080001831 test_loss: -15.611125183105468
epoch: 44 training_loss -15.609466791152954 test_loss: -15.638082885742188
epoch: 45 training_loss -15.6822661113739 test_loss: -15.655110168457032
epoch: 46 training_loss -15.689399042129516 test_loss: -15.539437866210937
epoch: 47 training_loss -15.735199193954468 test_loss: -15.541670227050782
epoch: 48 training_loss -15.7456112575531 test_loss: -15.755938720703124
epoch: 49 training_loss -15.717745866775513 test_loss: -15.725733947753906
epoch: 50 training_loss -15.77034541130066 test_loss: -15.80223388671875
epoch: 51 training_loss -15.811374502182007 test_loss: -15.59570770263672
epoch: 52 training_loss -15.73725417137146 test_loss: -15.755329895019532
epoch: 53 training_loss -15.877099380493163 test_loss: -15.838420104980468
epoch: 54 training_loss -15.818713903427124 test_loss: -15.96069793701172
epoch: 55 training_loss -15.824590730667115 test_loss: -15.8248046875
epoch: 56 training_loss -15.945068941116332 test_loss: -15.887530517578124
epoch: 57 training_loss -15.912325439453125 test_loss: -15.784152221679687
epoch: 58 training_loss -15.938186540603638 test_loss: -15.92423095703125
epoch: 59 training_loss -15.953652734756469 test_loss: -16.05208740234375
epoch: 60 training_loss -15.981265897750854 test_loss: -15.85792236328125
epoch: 61 training_loss -16.0395703125 test_loss: -15.899826049804688
epoch: 62 training_loss -15.999569358825683 test_loss: -15.988185119628906
epoch: 63 training_loss -15.952260208129882 test_loss: -16.00013122558594
epoch: 64 training_loss -16.118897399902345 test_loss: -16.20265350341797
epoch: 65 training_loss -16.0486217212677 test_loss: -16.10376739501953
epoch: 66 training_loss -16.015101280212402 test_loss: -16.122547912597657
epoch: 67 training_loss -16.118770837783813 test_loss: -16.068226623535157
epoch: 68 training_loss -16.077405252456664 test_loss: -16.236849975585937
epoch: 69 training_loss -16.147178764343263 test_loss: -16.167556762695312
epoch: 70 training_loss -16.09726058959961 test_loss: -16.215682983398438
epoch: 71 training_loss -16.126543855667116 test_loss: -16.279302978515624
epoch: 72 training_loss -16.21172978401184 test_loss: -16.182719421386718
epoch: 73 training_loss -16.16893214225769 test_loss: -16.116880798339842
epoch: 74 training_loss -16.202506198883057 test_loss: -16.033793640136718
epoch: 75 training_loss -16.20254369735718 test_loss: -16.256809997558594
epoch: 76 training_loss -16.189888429641723 test_loss: -16.22167510986328
epoch: 77 training_loss -16.178858318328857 test_loss: -16.250308227539062
epoch: 78 training_loss -16.25331894874573 test_loss: -16.25615234375
epoch: 79 training_loss -16.172483339309693 test_loss: -16.436517333984376
epoch: 80 training_loss -16.287211809158325 test_loss: -16.307774353027344
epoch: 81 training_loss -16.24941005706787 test_loss: -16.40352783203125
epoch: 82 training_loss -16.350862503051758 test_loss: -16.27770080566406
epoch: 83 training_loss -16.285486965179444 test_loss: -16.29966583251953
epoch: 84 training_loss -16.33781311035156 test_loss: -16.256036376953126
epoch: 85 training_loss -16.266984519958495 test_loss: -16.3431884765625
epoch: 86 training_loss -16.30107192993164 test_loss: -16.246055603027344
epoch: 87 training_loss -16.32770055770874 test_loss: -16.4793212890625
epoch: 88 training_loss -16.35773880004883 test_loss: -16.253675842285155
epoch: 89 training_loss -16.391901893615724 test_loss: -16.282443237304687
epoch: 90 training_loss -16.448050756454467 test_loss: -16.393739318847658
epoch: 91 training_loss -16.36415081977844 test_loss: -16.4265869140625
epoch: 92 training_loss -16.370066699981688 test_loss: -16.24475860595703
epoch: 93 training_loss -16.421439933776856 test_loss: -16.3434326171875
epoch: 94 training_loss -16.44809013366699 test_loss: -16.443597412109376
epoch: 95 training_loss -16.42073823928833 test_loss: -16.273501586914062
epoch: 96 training_loss -16.49928731918335 test_loss: -16.606532287597656
epoch: 97 training_loss -16.439708204269408 test_loss: -16.423268127441407
epoch: 98 training_loss -16.37967088699341 test_loss: -16.38096923828125
epoch: 99 training_loss -16.531748390197755 test_loss: -16.62243194580078
epoch: 100 training_loss -16.48789779663086 test_loss: -16.562716674804687
epoch: 101 training_loss -16.50575957298279 test_loss: -16.48456573486328
epoch: 102 training_loss -16.459252223968505 test_loss: -16.591241455078126
epoch: 103 training_loss -16.452253074645995 test_loss: -16.6198974609375
epoch: 104 training_loss -16.541197996139527 test_loss: -16.323350524902345
epoch: 105 training_loss -16.499517183303833 test_loss: -16.493885803222657
epoch: 106 training_loss -16.51409546852112 test_loss: -16.60106658935547
epoch: 107 training_loss -16.544551973342895 test_loss: -16.425994873046875
epoch: 108 training_loss -16.558290510177613 test_loss: -16.523365783691407
epoch: 109 training_loss -16.633192157745363 test_loss: -16.489501953125
epoch: 110 training_loss -16.61430414199829 test_loss: -16.605990600585937
epoch: 111 training_loss -16.621730432510375 test_loss: -16.679345703125
epoch: 112 training_loss -16.58557520866394 test_loss: -16.586219787597656
epoch: 113 training_loss -16.63762201309204 test_loss: -16.492611694335938
epoch: 114 training_loss -16.60778844833374 test_loss: -16.56846923828125
epoch: 115 training_loss -16.561954803466797 test_loss: -16.51939697265625
epoch: 116 training_loss -16.5768159866333 test_loss: -16.453916931152342
epoch: 117 training_loss -16.6235836315155 test_loss: -16.653021240234374
epoch: 118 training_loss -16.72156768798828 test_loss: -16.55079803466797
epoch: 119 training_loss -16.648942222595213 test_loss: -16.7172607421875
epoch: 120 training_loss -16.65725600242615 test_loss: -16.648095703125
epoch: 121 training_loss -16.6820516204834 test_loss: -16.77169189453125
epoch: 122 training_loss -16.640617656707764 test_loss: -16.727804565429686
epoch: 123 training_loss -16.64877712249756 test_loss: -16.701097106933595
epoch: 124 training_loss -16.701110467910766 test_loss: -16.677981567382812
epoch: 125 training_loss -16.581557207107544 test_loss: -16.65113220214844
epoch: 126 training_loss -16.72294140815735 test_loss: -16.57872772216797
epoch: 127 training_loss -16.686254768371583 test_loss: -16.66011962890625
epoch: 128 training_loss -16.710465927124023 test_loss: -16.815403747558594
epoch: 129 training_loss -16.6729500579834 test_loss: -16.635205078125
epoch: 130 training_loss -16.714377365112306 test_loss: -16.834507751464844
epoch: 131 training_loss -16.72046682357788 test_loss: -16.75496826171875
epoch: 132 training_loss -16.644744262695312 test_loss: -16.61419677734375
epoch: 133 training_loss -16.703973417282103 test_loss: -16.819334411621092
epoch: 134 training_loss -16.735058603286742 test_loss: -16.8832763671875
epoch: 135 training_loss -16.70537908554077 test_loss: -16.749974060058594
epoch: 136 training_loss -16.759692544937135 test_loss: -16.563140869140625
epoch: 137 training_loss -16.72437976837158 test_loss: -16.776119995117188
epoch: 138 training_loss -16.741812143325806 test_loss: -16.85649871826172
epoch: 139 training_loss -16.763736238479613 test_loss: -16.839564514160156
epoch: 140 training_loss -16.8121981048584 test_loss: -16.80760955810547
epoch: 141 training_loss -16.83816568374634 test_loss: -16.756942749023438
epoch: 142 training_loss -16.753461408615113 test_loss: -16.809103393554686
epoch: 143 training_loss -16.791943855285645 test_loss: -16.826683044433594
epoch: 144 training_loss -16.862110137939453 test_loss: -16.653279113769532
epoch: 145 training_loss -16.810125045776367 test_loss: -16.701272583007814
epoch: 146 training_loss -16.780808410644532 test_loss: -16.752491760253907
epoch: 147 training_loss -16.78826889038086 test_loss: -16.74414978027344
epoch: 148 training_loss -16.872207775115967 test_loss: -16.758454895019533
epoch: 149 training_loss -16.84735319137573 test_loss: -16.884515380859376
-1.560706426354691
episode: 0 training return: tensor(-2.9657e+11, device='cuda:0')
episode: 1 training return: tensor(-8.6975e+11, device='cuda:0')
episode: 2 training return: tensor(-35430.4883, device='cuda:0')
episode: 3 training return: tensor(-5.7180e+11, device='cuda:0')
epoch: 1 test_true_pfm: -45.75729981556274
episode: 4 training return: tensor(-68091.6562, device='cuda:0')
episode: 5 training return: tensor(-5.2797e+10, device='cuda:0')
episode: 6 training return: tensor(-115725.6094, device='cuda:0')
episode: 7 training return: tensor(-8452.5488, device='cuda:0')
epoch: 2 test_true_pfm: -72.44221827983337
episode: 8 training return: tensor(-1.6313e+12, device='cuda:0')
episode: 9 training return: tensor(-414439.8750, device='cuda:0')
episode: 10 training return: tensor(-49463.4570, device='cuda:0')
episode: 11 training return: tensor(-49396.0703, device='cuda:0')
epoch: 3 test_true_pfm: -45.327212440482135
episode: 12 training return: tensor(-49543.5391, device='cuda:0')
episode: 13 training return: tensor(-49427.6016, device='cuda:0')
episode: 14 training return: tensor(-49002.4453, device='cuda:0')
episode: 15 training return: tensor(-49456.7656, device='cuda:0')
epoch: 4 test_true_pfm: -45.67211288862626
episode: 16 training return: tensor(-49301.9219, device='cuda:0')
episode: 17 training return: tensor(-49254.5391, device='cuda:0')
episode: 18 training return: tensor(-49322.2031, device='cuda:0')
episode: 19 training return: tensor(-49149.1289, device='cuda:0')
epoch: 5 test_true_pfm: -47.048857666985036
episode: 20 training return: tensor(-49327.2500, device='cuda:0')
episode: 21 training return: tensor(-49389.9883, device='cuda:0')
episode: 22 training return: tensor(-49379.5000, device='cuda:0')
episode: 23 training return: tensor(-49536.4805, device='cuda:0')
epoch: 6 test_true_pfm: -43.252744093162825
episode: 24 training return: tensor(-49206.8008, device='cuda:0')
episode: 25 training return: tensor(-49352.2930, device='cuda:0')
episode: 26 training return: tensor(-50245.8008, device='cuda:0')
episode: 27 training return: tensor(-49407.8945, device='cuda:0')
epoch: 7 test_true_pfm: -89.89165115490947
episode: 28 training return: tensor(-48763.7578, device='cuda:0')
episode: 29 training return: tensor(-49499.5352, device='cuda:0')
episode: 30 training return: tensor(-49513.9922, device='cuda:0')
episode: 31 training return: tensor(-49531.4453, device='cuda:0')
epoch: 8 test_true_pfm: -69.34251258342631
episode: 32 training return: tensor(-48641.3945, device='cuda:0')
episode: 33 training return: tensor(-49365.4883, device='cuda:0')
episode: 34 training return: tensor(-49518.1172, device='cuda:0')
episode: 35 training return: tensor(-49464.1953, device='cuda:0')
epoch: 9 test_true_pfm: -51.730590721562855
episode: 36 training return: tensor(-49490.4961, device='cuda:0')
episode: 37 training return: tensor(-49106.3359, device='cuda:0')
episode: 38 training return: tensor(-49114.5234, device='cuda:0')
episode: 39 training return: tensor(-49734.4102, device='cuda:0')
epoch: 10 test_true_pfm: -44.15613236359624
episode: 40 training return: tensor(-49476.2227, device='cuda:0')
episode: 41 training return: tensor(-49999.3281, device='cuda:0')
episode: 42 training return: tensor(-49316.2891, device='cuda:0')
episode: 43 training return: tensor(-49358.9141, device='cuda:0')
epoch: 11 test_true_pfm: -38.542566454445655
episode: 44 training return: tensor(-49461.0703, device='cuda:0')
episode: 45 training return: tensor(-49295.8945, device='cuda:0')
episode: 46 training return: tensor(-49296.5195, device='cuda:0')
episode: 47 training return: tensor(-49443.5664, device='cuda:0')
epoch: 12 test_true_pfm: -48.968655897274495
episode: 48 training return: tensor(-49239.7969, device='cuda:0')
episode: 49 training return: tensor(-49120.4766, device='cuda:0')
episode: 50 training return: tensor(-49212.6445, device='cuda:0')
episode: 51 training return: tensor(-49445.2734, device='cuda:0')
epoch: 13 test_true_pfm: -43.707691399360534
episode: 52 training return: tensor(-49528.6914, device='cuda:0')
episode: 53 training return: tensor(-49958.3789, device='cuda:0')
episode: 54 training return: tensor(-51782.6484, device='cuda:0')
episode: 55 training return: tensor(-48931.7070, device='cuda:0')
epoch: 14 test_true_pfm: -71.38890041477968
episode: 56 training return: tensor(-49323.3242, device='cuda:0')
episode: 57 training return: tensor(-49469.2773, device='cuda:0')
episode: 58 training return: tensor(-49452.7930, device='cuda:0')
episode: 59 training return: tensor(-49319.5781, device='cuda:0')
epoch: 15 test_true_pfm: -41.589989506028985
episode: 60 training return: tensor(-50951.2578, device='cuda:0')
episode: 61 training return: tensor(-49256.7930, device='cuda:0')
episode: 62 training return: tensor(-49307.8906, device='cuda:0')
episode: 63 training return: tensor(-49452.7305, device='cuda:0')
epoch: 16 test_true_pfm: -39.34084890995214
episode: 64 training return: tensor(-49327.3047, device='cuda:0')
episode: 65 training return: tensor(-49409.7578, device='cuda:0')
episode: 66 training return: tensor(-49489.8047, device='cuda:0')
episode: 67 training return: tensor(-49290.6992, device='cuda:0')
epoch: 17 test_true_pfm: -39.05462768977507
episode: 68 training return: tensor(-49542.2383, device='cuda:0')
episode: 69 training return: tensor(-49402.9570, device='cuda:0')
episode: 70 training return: tensor(-49436.9023, device='cuda:0')
episode: 71 training return: tensor(-49491.5000, device='cuda:0')
epoch: 18 test_true_pfm: -40.61292846218015
episode: 72 training return: tensor(-49538.8633, device='cuda:0')
episode: 73 training return: tensor(-49316.8086, device='cuda:0')
episode: 74 training return: tensor(-49476.2031, device='cuda:0')
episode: 75 training return: tensor(-49353.1602, device='cuda:0')
epoch: 19 test_true_pfm: -40.19194325285829
episode: 76 training return: tensor(-49466.6680, device='cuda:0')
episode: 77 training return: tensor(-49341.9102, device='cuda:0')
episode: 78 training return: tensor(-49033.5156, device='cuda:0')
episode: 79 training return: tensor(-49086.0898, device='cuda:0')
epoch: 20 test_true_pfm: -40.86022715101122
episode: 80 training return: tensor(-49417.2891, device='cuda:0')
episode: 81 training return: tensor(-49514.7695, device='cuda:0')
episode: 82 training return: tensor(-61316.2891, device='cuda:0')
episode: 83 training return: tensor(-96374.1094, device='cuda:0')
epoch: 21 test_true_pfm: -45.78241713926152
episode: 84 training return: tensor(-51212.9922, device='cuda:0')
episode: 85 training return: tensor(-49248.5547, device='cuda:0')
episode: 86 training return: tensor(-49229.3750, device='cuda:0')
episode: 87 training return: tensor(-51055.5195, device='cuda:0')
epoch: 22 test_true_pfm: -74.17309720462032
episode: 88 training return: tensor(-53774.1758, device='cuda:0')
episode: 89 training return: tensor(-49442.0938, device='cuda:0')
episode: 90 training return: tensor(-49277.0703, device='cuda:0')
episode: 91 training return: tensor(-49245.2852, device='cuda:0')
epoch: 23 test_true_pfm: -41.202524686296044
episode: 92 training return: tensor(-53765.1953, device='cuda:0')
episode: 93 training return: tensor(-49354.4844, device='cuda:0')
episode: 94 training return: tensor(-49318.1289, device='cuda:0')
episode: 95 training return: tensor(-49481.6836, device='cuda:0')
epoch: 24 test_true_pfm: -40.86863651605571
episode: 96 training return: tensor(-49284.9258, device='cuda:0')
episode: 97 training return: tensor(-66217.3125, device='cuda:0')
episode: 98 training return: tensor(-49485.3398, device='cuda:0')
episode: 99 training return: tensor(-72491.6719, device='cuda:0')
epoch: 25 test_true_pfm: -2.428440089477062
episode: 100 training return: tensor(-49683.3320, device='cuda:0')
episode: 101 training return: tensor(-50540.1523, device='cuda:0')
episode: 102 training return: tensor(-49639.4375, device='cuda:0')
episode: 103 training return: tensor(-50811.6875, device='cuda:0')
epoch: 26 test_true_pfm: -46.38806001431587
episode: 104 training return: tensor(-61968.4961, device='cuda:0')
episode: 105 training return: tensor(-50761.1289, device='cuda:0')
episode: 106 training return: tensor(-75314.9297, device='cuda:0')
episode: 107 training return: tensor(-53442.8477, device='cuda:0')
epoch: 27 test_true_pfm: -44.27202215236713
episode: 108 training return: tensor(-49224.0820, device='cuda:0')
episode: 109 training return: tensor(-50266.7734, device='cuda:0')
episode: 110 training return: tensor(-51445.0273, device='cuda:0')
episode: 111 training return: tensor(-53275.0664, device='cuda:0')
epoch: 28 test_true_pfm: -76.97666932596694
episode: 112 training return: tensor(-48595.6094, device='cuda:0')
episode: 113 training return: tensor(-49424.9023, device='cuda:0')
episode: 114 training return: tensor(-49287.0391, device='cuda:0')
episode: 115 training return: tensor(-49659.7305, device='cuda:0')
epoch: 29 test_true_pfm: -76.2809079958285
episode: 116 training return: tensor(-48679.2109, device='cuda:0')
episode: 117 training return: tensor(-47783.1172, device='cuda:0')
episode: 118 training return: tensor(-47412.5352, device='cuda:0')
episode: 119 training return: tensor(-47412.1602, device='cuda:0')
epoch: 30 test_true_pfm: -41.15725461156267
episode: 120 training return: tensor(-49678.4961, device='cuda:0')
episode: 121 training return: tensor(-49289.2461, device='cuda:0')
episode: 122 training return: tensor(-49536.3242, device='cuda:0')
episode: 123 training return: tensor(-49205.9375, device='cuda:0')
epoch: 31 test_true_pfm: -44.667618162084814
episode: 124 training return: tensor(-1.3720e+08, device='cuda:0')
episode: 125 training return: tensor(-49440.2500, device='cuda:0')
episode: 126 training return: tensor(-49317.4922, device='cuda:0')
episode: 127 training return: tensor(-49305.8125, device='cuda:0')
epoch: 32 test_true_pfm: -40.93085891210108
episode: 128 training return: tensor(-49186.9766, device='cuda:0')
episode: 129 training return: tensor(-49340.2852, device='cuda:0')
episode: 130 training return: tensor(-49273.4258, device='cuda:0')
episode: 131 training return: tensor(-49308.8086, device='cuda:0')
epoch: 33 test_true_pfm: -38.020437463592636
episode: 132 training return: tensor(-49644.4961, device='cuda:0')
episode: 133 training return: tensor(-49240.4102, device='cuda:0')
episode: 134 training return: tensor(-84864., device='cuda:0')
episode: 135 training return: tensor(-49470.1602, device='cuda:0')
epoch: 34 test_true_pfm: -43.06101463766722
episode: 136 training return: tensor(-125664.6406, device='cuda:0')
episode: 137 training return: tensor(-49318.2656, device='cuda:0')
episode: 138 training return: tensor(-49321.9688, device='cuda:0')
episode: 139 training return: tensor(-49577.5000, device='cuda:0')
epoch: 35 test_true_pfm: -297.06622790362553
episode: 140 training return: tensor(-41080.4258, device='cuda:0')
episode: 141 training return: tensor(-39578.1523, device='cuda:0')
episode: 142 training return: tensor(-39556.0195, device='cuda:0')
episode: 143 training return: tensor(-48024.0508, device='cuda:0')
epoch: 36 test_true_pfm: -39.855128506737906
episode: 144 training return: tensor(-49303.4414, device='cuda:0')
episode: 145 training return: tensor(-49428.6328, device='cuda:0')
episode: 146 training return: tensor(-49708.7930, device='cuda:0')
episode: 147 training return: tensor(-98324.0078, device='cuda:0')
epoch: 37 test_true_pfm: -38.887063788269764
episode: 148 training return: tensor(-49506.1367, device='cuda:0')
episode: 149 training return: tensor(-49286.9570, device='cuda:0')
episode: 150 training return: tensor(-53287.7383, device='cuda:0')
episode: 151 training return: tensor(-48798.0039, device='cuda:0')
epoch: 38 test_true_pfm: -70.79100968264365
episode: 152 training return: tensor(-87090.6250, device='cuda:0')
episode: 153 training return: tensor(-83430.1953, device='cuda:0')
episode: 154 training return: tensor(-82806.6641, device='cuda:0')
episode: 155 training return: tensor(-81579.5391, device='cuda:0')
epoch: 39 test_true_pfm: -376.8830436055352
episode: 156 training return: tensor(-58025.9102, device='cuda:0')
episode: 157 training return: tensor(-49416.3281, device='cuda:0')
episode: 158 training return: tensor(-49494.3945, device='cuda:0')
episode: 159 training return: tensor(-49489.2773, device='cuda:0')
epoch: 40 test_true_pfm: -41.68304168855095
episode: 160 training return: tensor(-49312.7148, device='cuda:0')
episode: 161 training return: tensor(-49281.5859, device='cuda:0')
episode: 162 training return: tensor(-57630.6641, device='cuda:0')
episode: 163 training return: tensor(-62878.3438, device='cuda:0')
epoch: 41 test_true_pfm: -39.78355134744343
episode: 164 training return: tensor(-49488.6172, device='cuda:0')
episode: 165 training return: tensor(-49333.8984, device='cuda:0')
episode: 166 training return: tensor(-51643.7578, device='cuda:0')
episode: 167 training return: tensor(-49513.4375, device='cuda:0')
epoch: 42 test_true_pfm: -36.67563554885262
episode: 168 training return: tensor(-49504.5977, device='cuda:0')
episode: 169 training return: tensor(-49483.3750, device='cuda:0')
episode: 170 training return: tensor(-49498.4688, device='cuda:0')
episode: 171 training return: tensor(-49461.7539, device='cuda:0')
epoch: 43 test_true_pfm: -43.31629025166914
episode: 172 training return: tensor(-48842.3359, device='cuda:0')
episode: 173 training return: tensor(-49334.8711, device='cuda:0')
episode: 174 training return: tensor(-49516.4922, device='cuda:0')
episode: 175 training return: tensor(-49451.6328, device='cuda:0')
epoch: 44 test_true_pfm: -36.935928363889325
episode: 176 training return: tensor(-49198.9961, device='cuda:0')
episode: 177 training return: tensor(-54795.6172, device='cuda:0')
episode: 178 training return: tensor(-49415.2578, device='cuda:0')
episode: 179 training return: tensor(-49436.2148, device='cuda:0')
epoch: 45 test_true_pfm: -45.78735927443529
episode: 180 training return: tensor(-51079.1758, device='cuda:0')
episode: 181 training return: tensor(-59013.0938, device='cuda:0')
episode: 182 training return: tensor(-49341.4531, device='cuda:0')
episode: 183 training return: tensor(-49446.3516, device='cuda:0')
epoch: 46 test_true_pfm: 40.334439235802456
episode: 184 training return: tensor(-1375665.7500, device='cuda:0')
episode: 185 training return: tensor(-49352.5820, device='cuda:0')
episode: 186 training return: tensor(-48276.5078, device='cuda:0')
episode: 187 training return: tensor(-64607.4219, device='cuda:0')
epoch: 47 test_true_pfm: -54.88809280181536
episode: 188 training return: tensor(-61262.7539, device='cuda:0')
episode: 189 training return: tensor(-53477.3320, device='cuda:0')
episode: 190 training return: tensor(-46949.4688, device='cuda:0')
episode: 191 training return: tensor(-49312.1719, device='cuda:0')
epoch: 48 test_true_pfm: -35.0809641922964
episode: 192 training return: tensor(-45838.9883, device='cuda:0')
episode: 193 training return: tensor(-67343.3047, device='cuda:0')
episode: 194 training return: tensor(-48744.8945, device='cuda:0')
episode: 195 training return: tensor(-46947.2266, device='cuda:0')
epoch: 49 test_true_pfm: -45.87318664834137
episode: 196 training return: tensor(-49512.8672, device='cuda:0')
episode: 197 training return: tensor(-49546.1055, device='cuda:0')
episode: 198 training return: tensor(-49552.9258, device='cuda:0')
episode: 199 training return: tensor(-49552.4922, device='cuda:0')
epoch: 50 test_true_pfm: -37.499786152898835
episode: 200 training return: tensor(-44889.4102, device='cuda:0')
episode: 201 training return: tensor(-45504.5000, device='cuda:0')
episode: 202 training return: tensor(-47877.6211, device='cuda:0')
episode: 203 training return: tensor(-46288.6055, device='cuda:0')
epoch: 51 test_true_pfm: -34.998165454633586
episode: 204 training return: tensor(-51826.2773, device='cuda:0')
episode: 205 training return: tensor(-56005.5352, device='cuda:0')
episode: 206 training return: tensor(-4.4121e+12, device='cuda:0')
episode: 207 training return: tensor(-47041.3438, device='cuda:0')
epoch: 52 test_true_pfm: -40.97609178461607
episode: 208 training return: tensor(-49209.5156, device='cuda:0')
episode: 209 training return: tensor(-48709.5625, device='cuda:0')
episode: 210 training return: tensor(-54084.9609, device='cuda:0')
episode: 211 training return: tensor(-49480.2500, device='cuda:0')
epoch: 53 test_true_pfm: -38.72284451644702
episode: 212 training return: tensor(-49336.1875, device='cuda:0')
episode: 213 training return: tensor(-49261.5273, device='cuda:0')
episode: 214 training return: tensor(-49283.5000, device='cuda:0')
episode: 215 training return: tensor(-49361.3750, device='cuda:0')
epoch: 54 test_true_pfm: -64.94006891197112
episode: 216 training return: tensor(-50627.7383, device='cuda:0')
episode: 217 training return: tensor(-50665.2031, device='cuda:0')
episode: 218 training return: tensor(-85712.4375, device='cuda:0')
episode: 219 training return: tensor(-49566.6172, device='cuda:0')
epoch: 55 test_true_pfm: -59.451883663115154
episode: 220 training return: tensor(-49178.0820, device='cuda:0')
episode: 221 training return: tensor(-49121.6211, device='cuda:0')
episode: 222 training return: tensor(-51426.0352, device='cuda:0')
episode: 223 training return: tensor(-49205.5508, device='cuda:0')
epoch: 56 test_true_pfm: -55.456452368736954
episode: 224 training return: tensor(-49260.0977, device='cuda:0')
episode: 225 training return: tensor(-49608.4336, device='cuda:0')
episode: 226 training return: tensor(-51208.5586, device='cuda:0')
episode: 227 training return: tensor(-112112.5391, device='cuda:0')
epoch: 57 test_true_pfm: -214.242456431288
episode: 228 training return: tensor(-128015.4141, device='cuda:0')
episode: 229 training return: tensor(-135217.7969, device='cuda:0')
episode: 230 training return: tensor(-138962.4219, device='cuda:0')
episode: 231 training return: tensor(-47285.3594, device='cuda:0')
epoch: 58 test_true_pfm: -61.226127590046225
episode: 232 training return: tensor(-47120.9453, device='cuda:0')
episode: 233 training return: tensor(-46907.4922, device='cuda:0')
episode: 234 training return: tensor(-46831.7930, device='cuda:0')
episode: 235 training return: tensor(-46804.5586, device='cuda:0')
epoch: 59 test_true_pfm: -60.740554940729055
episode: 236 training return: tensor(-47135.1797, device='cuda:0')
episode: 237 training return: tensor(-46848.6992, device='cuda:0')
episode: 238 training return: tensor(-46872.4531, device='cuda:0')
episode: 239 training return: tensor(-46819.7734, device='cuda:0')
epoch: 60 test_true_pfm: -62.89693182516933
episode: 240 training return: tensor(-46862.4883, device='cuda:0')
episode: 241 training return: tensor(-46893.5547, device='cuda:0')
episode: 242 training return: tensor(-47502.1719, device='cuda:0')
episode: 243 training return: tensor(-46922.0352, device='cuda:0')
epoch: 61 test_true_pfm: -61.51199554255013
episode: 244 training return: tensor(-46885.3516, device='cuda:0')
episode: 245 training return: tensor(-47444.1641, device='cuda:0')
episode: 246 training return: tensor(-56191.3086, device='cuda:0')
episode: 247 training return: tensor(-47052.9258, device='cuda:0')
epoch: 62 test_true_pfm: -65.77312961194414
episode: 248 training return: tensor(-47635.3555, device='cuda:0')
episode: 249 training return: tensor(-47421.3594, device='cuda:0')
episode: 250 training return: tensor(-50881.1055, device='cuda:0')
episode: 251 training return: tensor(-50235.6094, device='cuda:0')
epoch: 63 test_true_pfm: -48.76735160156926
episode: 252 training return: tensor(-51942., device='cuda:0')
episode: 253 training return: tensor(-49098.5156, device='cuda:0')
episode: 254 training return: tensor(-48778.8398, device='cuda:0')
episode: 255 training return: tensor(-47179.6055, device='cuda:0')
epoch: 64 test_true_pfm: -65.41013383324174
episode: 256 training return: tensor(-47073.2109, device='cuda:0')
episode: 257 training return: tensor(-49304.1758, device='cuda:0')
episode: 258 training return: tensor(-47107.5469, device='cuda:0')
episode: 259 training return: tensor(-47040.6523, device='cuda:0')
epoch: 65 test_true_pfm: -64.49704886090173
episode: 260 training return: tensor(-47361.7188, device='cuda:0')
episode: 261 training return: tensor(-47175.1641, device='cuda:0')
episode: 262 training return: tensor(-53445.0547, device='cuda:0')
episode: 263 training return: tensor(-48127.2500, device='cuda:0')
epoch: 66 test_true_pfm: -56.11242075071012
episode: 264 training return: tensor(-47169.4688, device='cuda:0')
episode: 265 training return: tensor(-47361.0977, device='cuda:0')
episode: 266 training return: tensor(-47012.0547, device='cuda:0')
episode: 267 training return: tensor(-46827.8398, device='cuda:0')
epoch: 67 test_true_pfm: -64.27685388954103
episode: 268 training return: tensor(-46963.9141, device='cuda:0')
episode: 269 training return: tensor(-46910.1797, device='cuda:0')
episode: 270 training return: tensor(-46963.9414, device='cuda:0')
episode: 271 training return: tensor(-47090.4102, device='cuda:0')
epoch: 68 test_true_pfm: -74.44585760421714
episode: 272 training return: tensor(-47067.4180, device='cuda:0')
episode: 273 training return: tensor(-46886.3281, device='cuda:0')
episode: 274 training return: tensor(-46904.3242, device='cuda:0')
episode: 275 training return: tensor(-46928.9609, device='cuda:0')
epoch: 69 test_true_pfm: -63.576418466070265
episode: 276 training return: tensor(-47003.4766, device='cuda:0')
episode: 277 training return: tensor(-46923.5586, device='cuda:0')
episode: 278 training return: tensor(-47030.3633, device='cuda:0')
episode: 279 training return: tensor(-46993.5586, device='cuda:0')
epoch: 70 test_true_pfm: -64.50083711693897
episode: 280 training return: tensor(-46819.8320, device='cuda:0')
episode: 281 training return: tensor(-46898.9961, device='cuda:0')
episode: 282 training return: tensor(-46904.7500, device='cuda:0')
episode: 283 training return: tensor(-61611.9102, device='cuda:0')
epoch: 71 test_true_pfm: -35.73972320659675
episode: 284 training return: tensor(-67625.7422, device='cuda:0')
episode: 285 training return: tensor(-67034.0469, device='cuda:0')
episode: 286 training return: tensor(-50881.1641, device='cuda:0')
episode: 287 training return: tensor(-47206.7305, device='cuda:0')
epoch: 72 test_true_pfm: -61.473697149232464
episode: 288 training return: tensor(-46868.0391, device='cuda:0')
episode: 289 training return: tensor(-46938.4102, device='cuda:0')
episode: 290 training return: tensor(-46902.0391, device='cuda:0')
episode: 291 training return: tensor(-46858.9102, device='cuda:0')
epoch: 73 test_true_pfm: -61.42625694097066
episode: 292 training return: tensor(-49557.9141, device='cuda:0')
episode: 293 training return: tensor(-128572.3984, device='cuda:0')
episode: 294 training return: tensor(-49128.7812, device='cuda:0')
episode: 295 training return: tensor(-47005.1562, device='cuda:0')
epoch: 74 test_true_pfm: -62.14548171930276
episode: 296 training return: tensor(-46974.7930, device='cuda:0')
episode: 297 training return: tensor(-46928.3086, device='cuda:0')
episode: 298 training return: tensor(-46988.6641, device='cuda:0')
episode: 299 training return: tensor(-46939.6133, device='cuda:0')
epoch: 75 test_true_pfm: -62.15755160186834
episode: 300 training return: tensor(-46894.7695, device='cuda:0')
episode: 301 training return: tensor(-46855.7031, device='cuda:0')
episode: 302 training return: tensor(-50282.3906, device='cuda:0')
episode: 303 training return: tensor(-53992.4414, device='cuda:0')
epoch: 76 test_true_pfm: -72.57033626531303
episode: 304 training return: tensor(-48000.3398, device='cuda:0')
episode: 305 training return: tensor(-1.8917e+08, device='cuda:0')
episode: 306 training return: tensor(-57332.1250, device='cuda:0')
episode: 307 training return: tensor(-48940.9570, device='cuda:0')
epoch: 77 test_true_pfm: 175.31245247195886
episode: 308 training return: tensor(-70388.9219, device='cuda:0')
episode: 309 training return: tensor(-46172.1406, device='cuda:0')
episode: 310 training return: tensor(-48071.8281, device='cuda:0')
episode: 311 training return: tensor(-54788.0742, device='cuda:0')
epoch: 78 test_true_pfm: -78.0454299553481
episode: 312 training return: tensor(-46346.2539, device='cuda:0')
episode: 313 training return: tensor(-47449.6875, device='cuda:0')
episode: 314 training return: tensor(-47241.0703, device='cuda:0')
episode: 315 training return: tensor(-47034.2500, device='cuda:0')
epoch: 79 test_true_pfm: -98.82632747586713
episode: 316 training return: tensor(-47393.8320, device='cuda:0')
episode: 317 training return: tensor(-1512667.7500, device='cuda:0')
episode: 318 training return: tensor(-49464.7852, device='cuda:0')
episode: 319 training return: tensor(-54386.0078, device='cuda:0')
epoch: 80 test_true_pfm: 244.32793017690676
episode: 320 training return: tensor(-44533.5664, device='cuda:0')
episode: 321 training return: tensor(-61110.0312, device='cuda:0')
episode: 322 training return: tensor(-50880.8945, device='cuda:0')
episode: 323 training return: tensor(-47232.3594, device='cuda:0')
epoch: 81 test_true_pfm: -66.12333818838063
episode: 324 training return: tensor(-68086.3125, device='cuda:0')
episode: 325 training return: tensor(-47146.6641, device='cuda:0')
episode: 326 training return: tensor(-48301.0859, device='cuda:0')
episode: 327 training return: tensor(-32779.8516, device='cuda:0')
epoch: 82 test_true_pfm: 127.51690210882879
episode: 328 training return: tensor(-2.7939e+08, device='cuda:0')
episode: 329 training return: tensor(-46666.8633, device='cuda:0')
episode: 330 training return: tensor(-54320.0664, device='cuda:0')
episode: 331 training return: tensor(-48769.3711, device='cuda:0')
epoch: 83 test_true_pfm: -64.43705633214908
episode: 332 training return: tensor(-49459.5469, device='cuda:0')
episode: 333 training return: tensor(-51893.3672, device='cuda:0')
episode: 334 training return: tensor(-52298.5078, device='cuda:0')
episode: 335 training return: tensor(-47760.5547, device='cuda:0')
epoch: 84 test_true_pfm: -25.18926071631219
episode: 336 training return: tensor(-50746.1094, device='cuda:0')
episode: 337 training return: tensor(-54347.7734, device='cuda:0')
episode: 338 training return: tensor(-50812.1797, device='cuda:0')
episode: 339 training return: tensor(-50529.2734, device='cuda:0')
epoch: 85 test_true_pfm: -30.171611183774672
episode: 340 training return: tensor(-49402.0742, device='cuda:0')
episode: 341 training return: tensor(-50515.9688, device='cuda:0')
episode: 342 training return: tensor(-51138.9336, device='cuda:0')
episode: 343 training return: tensor(-50913.5078, device='cuda:0')
epoch: 86 test_true_pfm: -61.23466695372996
episode: 344 training return: tensor(-49866.2695, device='cuda:0')
episode: 345 training return: tensor(-47874.6641, device='cuda:0')
episode: 346 training return: tensor(-504430.9375, device='cuda:0')
episode: 347 training return: tensor(-91280.6094, device='cuda:0')
epoch: 87 test_true_pfm: -67.05365282640123
episode: 348 training return: tensor(-48342.8672, device='cuda:0')
episode: 349 training return: tensor(-48114.1641, device='cuda:0')
episode: 350 training return: tensor(-49302.7031, device='cuda:0')
episode: 351 training return: tensor(-51217.2930, device='cuda:0')
epoch: 88 test_true_pfm: -77.38092288007694
episode: 352 training return: tensor(-50727.5898, device='cuda:0')
episode: 353 training return: tensor(-50672.1602, device='cuda:0')
episode: 354 training return: tensor(-50557.9180, device='cuda:0')
episode: 355 training return: tensor(-63229932., device='cuda:0')
epoch: 89 test_true_pfm: -61.96034636392104
episode: 356 training return: tensor(-47064.7109, device='cuda:0')
episode: 357 training return: tensor(-47656.1797, device='cuda:0')
episode: 358 training return: tensor(-47359.1133, device='cuda:0')
episode: 359 training return: tensor(-47218.1875, device='cuda:0')
epoch: 90 test_true_pfm: -81.08041395838444
episode: 360 training return: tensor(-47135.6797, device='cuda:0')
episode: 361 training return: tensor(-47032.0156, device='cuda:0')
episode: 362 training return: tensor(-48516.8672, device='cuda:0')
episode: 363 training return: tensor(-55734.6797, device='cuda:0')
epoch: 91 test_true_pfm: -173.94361310149102
episode: 364 training return: tensor(-72828.5312, device='cuda:0')
episode: 365 training return: tensor(-27660.2578, device='cuda:0')
episode: 366 training return: tensor(-30536.6230, device='cuda:0')
episode: 367 training return: tensor(-29429.8477, device='cuda:0')
epoch: 92 test_true_pfm: 175.10257645257784
episode: 368 training return: tensor(-35972.4883, device='cuda:0')
episode: 369 training return: tensor(-47145.7344, device='cuda:0')
episode: 370 training return: tensor(-34617.0234, device='cuda:0')
episode: 371 training return: tensor(-47060.6094, device='cuda:0')
epoch: 93 test_true_pfm: -39.64346163623582
episode: 372 training return: tensor(-49023.5391, device='cuda:0')
episode: 373 training return: tensor(-49146.6055, device='cuda:0')
episode: 374 training return: tensor(-53154.7773, device='cuda:0')
episode: 375 training return: tensor(-50183.6367, device='cuda:0')
epoch: 94 test_true_pfm: -65.1704584198272
episode: 376 training return: tensor(-53633.9922, device='cuda:0')
episode: 377 training return: tensor(-189856.7031, device='cuda:0')
episode: 378 training return: tensor(-49505.2422, device='cuda:0')
episode: 379 training return: tensor(-61918.2969, device='cuda:0')
epoch: 95 test_true_pfm: -55.18884210552716
episode: 380 training return: tensor(-46883.7930, device='cuda:0')
episode: 381 training return: tensor(-47780.1289, device='cuda:0')
episode: 382 training return: tensor(-47399.0781, device='cuda:0')
episode: 383 training return: tensor(-13275817., device='cuda:0')
epoch: 96 test_true_pfm: -39.436957539073745
episode: 384 training return: tensor(-50036.5195, device='cuda:0')
episode: 385 training return: tensor(-46475.3750, device='cuda:0')
episode: 386 training return: tensor(-46415.5039, device='cuda:0')
episode: 387 training return: tensor(-212025.5312, device='cuda:0')
epoch: 97 test_true_pfm: -113.08237615517601
episode: 388 training return: tensor(-207776.0312, device='cuda:0')
episode: 389 training return: tensor(-213654.3438, device='cuda:0')
episode: 390 training return: tensor(-86064.7969, device='cuda:0')
episode: 391 training return: tensor(-47033.7148, device='cuda:0')
epoch: 98 test_true_pfm: -54.59649156606676
episode: 392 training return: tensor(-47114.5898, device='cuda:0')
episode: 393 training return: tensor(-47014.6172, device='cuda:0')
episode: 394 training return: tensor(-60357.5430, device='cuda:0')
episode: 395 training return: tensor(-47080.4375, device='cuda:0')
epoch: 99 test_true_pfm: -68.9051244314159
episode: 396 training return: tensor(-46437.2305, device='cuda:0')
episode: 397 training return: tensor(-45972.3906, device='cuda:0')
episode: 398 training return: tensor(-45330.9570, device='cuda:0')
episode: 399 training return: tensor(-26863.5801, device='cuda:0')
epoch: 100 test_true_pfm: -59.72920116833016
episode: 400 training return: tensor(-2.6797e+08, device='cuda:0')
episode: 401 training return: tensor(-49635.2734, device='cuda:0')
episode: 402 training return: tensor(-14373.4385, device='cuda:0')
episode: 403 training return: tensor(-281050.2188, device='cuda:0')
epoch: 101 test_true_pfm: -384.8223572353782
episode: 404 training return: tensor(-4.1554e+08, device='cuda:0')
episode: 405 training return: tensor(-4.7490e+10, device='cuda:0')
episode: 406 training return: tensor(-22860.6504, device='cuda:0')
episode: 407 training return: tensor(-2.0644e+08, device='cuda:0')
epoch: 102 test_true_pfm: 475.70186474012036
episode: 408 training return: tensor(-74207.2578, device='cuda:0')
episode: 409 training return: tensor(-1.1187e+10, device='cuda:0')
episode: 410 training return: tensor(-78152.3359, device='cuda:0')
episode: 411 training return: tensor(-1.5228e+08, device='cuda:0')
epoch: 103 test_true_pfm: 163.6844840375331
episode: 412 training return: tensor(-43594.2227, device='cuda:0')
episode: 413 training return: tensor(-46766.2695, device='cuda:0')
episode: 414 training return: tensor(-46862.3555, device='cuda:0')
episode: 415 training return: tensor(-47019.8945, device='cuda:0')
epoch: 104 test_true_pfm: -69.43995929006623
episode: 416 training return: tensor(-48065.7617, device='cuda:0')
episode: 417 training return: tensor(-47513.1328, device='cuda:0')
episode: 418 training return: tensor(-51303.9297, device='cuda:0')
episode: 419 training return: tensor(-5.0943e+08, device='cuda:0')
epoch: 105 test_true_pfm: -117.24503253890627
episode: 420 training return: tensor(-91736.4688, device='cuda:0')
episode: 421 training return: tensor(-1.1393e+09, device='cuda:0')
episode: 422 training return: tensor(-12870.0879, device='cuda:0')
episode: 423 training return: tensor(-63838.8906, device='cuda:0')
epoch: 106 test_true_pfm: 149.10755594848413
episode: 424 training return: tensor(-4.3181e+08, device='cuda:0')
episode: 425 training return: tensor(-14903.6104, device='cuda:0')
episode: 426 training return: tensor(-13008.0420, device='cuda:0')
episode: 427 training return: tensor(-10455.9814, device='cuda:0')
epoch: 107 test_true_pfm: 291.74367706622985
episode: 428 training return: tensor(-100997.8281, device='cuda:0')
episode: 429 training return: tensor(-487786.5938, device='cuda:0')
episode: 430 training return: tensor(-11589.9502, device='cuda:0')
episode: 431 training return: tensor(-93015.8906, device='cuda:0')
epoch: 108 test_true_pfm: 363.05189508261054
episode: 432 training return: tensor(-102625.4453, device='cuda:0')
episode: 433 training return: tensor(-85708.7188, device='cuda:0')
episode: 434 training return: tensor(-121074.7422, device='cuda:0')
episode: 435 training return: tensor(-97998.1953, device='cuda:0')
epoch: 109 test_true_pfm: 39.060165657413535
episode: 436 training return: tensor(-8564.8564, device='cuda:0')
episode: 437 training return: tensor(-8457.4111, device='cuda:0')
episode: 438 training return: tensor(-153299.0938, device='cuda:0')
episode: 439 training return: tensor(-17428.4434, device='cuda:0')
epoch: 110 test_true_pfm: 268.02147851638057
episode: 440 training return: tensor(-22382.5859, device='cuda:0')
episode: 441 training return: tensor(-22386.9961, device='cuda:0')
episode: 442 training return: tensor(-37111.4492, device='cuda:0')
episode: 443 training return: tensor(-23647.5957, device='cuda:0')
epoch: 111 test_true_pfm: -36.54596487512031
episode: 444 training return: tensor(-16177.2939, device='cuda:0')
episode: 445 training return: tensor(-53169.3750, device='cuda:0')
episode: 446 training return: tensor(-15948.5420, device='cuda:0')
episode: 447 training return: tensor(-9562.6348, device='cuda:0')
epoch: 112 test_true_pfm: -26.523766538790493
episode: 448 training return: tensor(-17293.2422, device='cuda:0')
episode: 449 training return: tensor(-119874.9688, device='cuda:0')
episode: 450 training return: tensor(-40708.8086, device='cuda:0')
episode: 451 training return: tensor(-26617.8828, device='cuda:0')
epoch: 113 test_true_pfm: -591.3737678883186
episode: 452 training return: tensor(-15064.4980, device='cuda:0')
episode: 453 training return: tensor(-38300.2656, device='cuda:0')
episode: 454 training return: tensor(-9593.3564, device='cuda:0')
episode: 455 training return: tensor(-48781.0898, device='cuda:0')
epoch: 114 test_true_pfm: 114.62940307282179
episode: 456 training return: tensor(-9196.5312, device='cuda:0')
episode: 457 training return: tensor(-9838540., device='cuda:0')
episode: 458 training return: tensor(-9719178., device='cuda:0')
episode: 459 training return: tensor(-1.2168e+08, device='cuda:0')
epoch: 115 test_true_pfm: -257.83897036313783
episode: 460 training return: tensor(-9398904., device='cuda:0')
episode: 461 training return: tensor(-398891.8438, device='cuda:0')
episode: 462 training return: tensor(-8452.3730, device='cuda:0')
episode: 463 training return: tensor(-8912634., device='cuda:0')
epoch: 116 test_true_pfm: -80.14964440682462
episode: 464 training return: tensor(-9961762., device='cuda:0')
episode: 465 training return: tensor(-11309.4990, device='cuda:0')
episode: 466 training return: tensor(-9024944., device='cuda:0')
episode: 467 training return: tensor(-9468927., device='cuda:0')
epoch: 117 test_true_pfm: -72.46137851508843
episode: 468 training return: tensor(-92391568., device='cuda:0')
episode: 469 training return: tensor(-10978.6875, device='cuda:0')
episode: 470 training return: tensor(-49354624., device='cuda:0')
episode: 471 training return: tensor(-9801607., device='cuda:0')
epoch: 118 test_true_pfm: -162.95595073975855
episode: 472 training return: tensor(-65862.5312, device='cuda:0')
episode: 473 training return: tensor(-1.5776e+08, device='cuda:0')
episode: 474 training return: tensor(-41877592., device='cuda:0')
episode: 475 training return: tensor(-8842676., device='cuda:0')
epoch: 119 test_true_pfm: -286.53980096951904
episode: 476 training return: tensor(-152445.6094, device='cuda:0')
episode: 477 training return: tensor(-127606.1094, device='cuda:0')
episode: 478 training return: tensor(-129115.2812, device='cuda:0')
episode: 479 training return: tensor(-152939.0312, device='cuda:0')
epoch: 120 test_true_pfm: 186.2809916750924
episode: 480 training return: tensor(-35607.8320, device='cuda:0')
episode: 481 training return: tensor(-7021197., device='cuda:0')
episode: 482 training return: tensor(-3207683.7500, device='cuda:0')
episode: 483 training return: tensor(-3225738.5000, device='cuda:0')
epoch: 121 test_true_pfm: 140.64683389431738
episode: 484 training return: tensor(-2822800., device='cuda:0')
episode: 485 training return: tensor(-2624793.7500, device='cuda:0')
episode: 486 training return: tensor(-3195222.2500, device='cuda:0')
episode: 487 training return: tensor(-4181.8271, device='cuda:0')
epoch: 122 test_true_pfm: -70.01444011135915
episode: 488 training return: tensor(-18590.7168, device='cuda:0')
episode: 489 training return: tensor(-11344.2734, device='cuda:0')
episode: 490 training return: tensor(-13382.1758, device='cuda:0')
episode: 491 training return: tensor(-13128.2725, device='cuda:0')
epoch: 123 test_true_pfm: 72.9646078460561
episode: 492 training return: tensor(-11424.7451, device='cuda:0')
episode: 493 training return: tensor(-12267.0771, device='cuda:0')
episode: 494 training return: tensor(-10279.2646, device='cuda:0')
episode: 495 training return: tensor(-5462.4258, device='cuda:0')
epoch: 124 test_true_pfm: -81.70812412419833
episode: 496 training return: tensor(-28153058., device='cuda:0')
episode: 497 training return: tensor(-24497.7617, device='cuda:0')
episode: 498 training return: tensor(-53614948., device='cuda:0')
episode: 499 training return: tensor(-12404.0645, device='cuda:0')
epoch: 125 test_true_pfm: 77.24968295998184
episode: 500 training return: tensor(-22684.2578, device='cuda:0')
episode: 501 training return: tensor(-15855.7588, device='cuda:0')
episode: 502 training return: tensor(-16005.3496, device='cuda:0')
episode: 503 training return: tensor(-16100.8574, device='cuda:0')
epoch: 126 test_true_pfm: 107.51470871828697
episode: 504 training return: tensor(-37600.7422, device='cuda:0')
episode: 505 training return: tensor(-16263479., device='cuda:0')
episode: 506 training return: tensor(-16483.4590, device='cuda:0')
episode: 507 training return: tensor(-16461.6152, device='cuda:0')
epoch: 127 test_true_pfm: 60.29516047137286
episode: 508 training return: tensor(-13796.2139, device='cuda:0')
episode: 509 training return: tensor(-15608.1670, device='cuda:0')
episode: 510 training return: tensor(-9437659., device='cuda:0')
episode: 511 training return: tensor(-41795132., device='cuda:0')
epoch: 128 test_true_pfm: 418.0659141530839
episode: 512 training return: tensor(-19470.0234, device='cuda:0')
episode: 513 training return: tensor(-96575.1328, device='cuda:0')
episode: 514 training return: tensor(-4341013., device='cuda:0')
episode: 515 training return: tensor(-103895.6797, device='cuda:0')
epoch: 129 test_true_pfm: -311.5264013794246
episode: 516 training return: tensor(-2297886.7500, device='cuda:0')
episode: 517 training return: tensor(-4889.2827, device='cuda:0')
episode: 518 training return: tensor(-3973.5540, device='cuda:0')
episode: 519 training return: tensor(-3743.0576, device='cuda:0')
epoch: 130 test_true_pfm: 33.63647023299372
episode: 520 training return: tensor(-4474.0518, device='cuda:0')
episode: 521 training return: tensor(-27428.9473, device='cuda:0')
episode: 522 training return: tensor(-4200.9746, device='cuda:0')
episode: 523 training return: tensor(-3098307.5000, device='cuda:0')
epoch: 131 test_true_pfm: 70.49857757409694
episode: 524 training return: tensor(-11493.5244, device='cuda:0')
episode: 525 training return: tensor(-9505844., device='cuda:0')
episode: 526 training return: tensor(-305890.4062, device='cuda:0')
episode: 527 training return: tensor(-6951089.5000, device='cuda:0')
epoch: 132 test_true_pfm: 103.78050297021436
episode: 528 training return: tensor(-23556.8594, device='cuda:0')
episode: 529 training return: tensor(-16084.5762, device='cuda:0')
episode: 530 training return: tensor(-1275786.5000, device='cuda:0')
episode: 531 training return: tensor(-5340005., device='cuda:0')
epoch: 133 test_true_pfm: -32.77205683083313
episode: 532 training return: tensor(-10983.6436, device='cuda:0')
episode: 533 training return: tensor(-3883.3118, device='cuda:0')
episode: 534 training return: tensor(-3850.8279, device='cuda:0')
episode: 535 training return: tensor(-4016.5671, device='cuda:0')
epoch: 134 test_true_pfm: 26.219282388552074
episode: 536 training return: tensor(-3691.3247, device='cuda:0')
episode: 537 training return: tensor(-9452.7002, device='cuda:0')
episode: 538 training return: tensor(-10932.1270, device='cuda:0')
episode: 539 training return: tensor(-10935.3848, device='cuda:0')
epoch: 135 test_true_pfm: -30.14116370255978
episode: 540 training return: tensor(-16214.8115, device='cuda:0')
episode: 541 training return: tensor(-12022.5859, device='cuda:0')
episode: 542 training return: tensor(-6058781.5000, device='cuda:0')
episode: 543 training return: tensor(-11156.9199, device='cuda:0')
epoch: 136 test_true_pfm: -24.647580587483322
episode: 544 training return: tensor(-10871.2197, device='cuda:0')
episode: 545 training return: tensor(-10937.3506, device='cuda:0')
episode: 546 training return: tensor(-11003.2314, device='cuda:0')
episode: 547 training return: tensor(-11008.7256, device='cuda:0')
epoch: 137 test_true_pfm: -45.403727080105966
episode: 548 training return: tensor(-11199.6875, device='cuda:0')
episode: 549 training return: tensor(-10868.5654, device='cuda:0')
episode: 550 training return: tensor(-11049.4092, device='cuda:0')
episode: 551 training return: tensor(-12759.5039, device='cuda:0')
epoch: 138 test_true_pfm: -13.729082784437722
episode: 552 training return: tensor(-16722.0801, device='cuda:0')
episode: 553 training return: tensor(-18075.3145, device='cuda:0')
episode: 554 training return: tensor(-19271.4609, device='cuda:0')
episode: 555 training return: tensor(-24654884., device='cuda:0')
epoch: 139 test_true_pfm: -44.18482599603933
episode: 556 training return: tensor(-15979.7832, device='cuda:0')
episode: 557 training return: tensor(-16501.6387, device='cuda:0')
episode: 558 training return: tensor(-16388.0879, device='cuda:0')
episode: 559 training return: tensor(-37867.7305, device='cuda:0')
epoch: 140 test_true_pfm: -35.97694733895654
episode: 560 training return: tensor(-38568.5820, device='cuda:0')
episode: 561 training return: tensor(-8341.2041, device='cuda:0')
episode: 562 training return: tensor(-8885311., device='cuda:0')
episode: 563 training return: tensor(-7789036.5000, device='cuda:0')
epoch: 141 test_true_pfm: 412.05743465737487
episode: 564 training return: tensor(-8736224., device='cuda:0')
episode: 565 training return: tensor(-9322243., device='cuda:0')
episode: 566 training return: tensor(-6953433.5000, device='cuda:0')
episode: 567 training return: tensor(-9936.6230, device='cuda:0')
epoch: 142 test_true_pfm: 27.311260338660997
episode: 568 training return: tensor(-12848.6338, device='cuda:0')
episode: 569 training return: tensor(-6109.6177, device='cuda:0')
episode: 570 training return: tensor(-6545042., device='cuda:0')
episode: 571 training return: tensor(-5746.3398, device='cuda:0')
epoch: 143 test_true_pfm: -85.82584447903257
episode: 572 training return: tensor(-15816.9219, device='cuda:0')
episode: 573 training return: tensor(-9396.6562, device='cuda:0')
episode: 574 training return: tensor(-32790.6328, device='cuda:0')
episode: 575 training return: tensor(-27559.1934, device='cuda:0')
epoch: 144 test_true_pfm: -88.594878245555
episode: 576 training return: tensor(-15902.7412, device='cuda:0')
episode: 577 training return: tensor(-29753.2559, device='cuda:0')
episode: 578 training return: tensor(-92841.8594, device='cuda:0')
episode: 579 training return: tensor(-28150.3535, device='cuda:0')
epoch: 145 test_true_pfm: -81.56541865894162
episode: 580 training return: tensor(-18886.0586, device='cuda:0')
episode: 581 training return: tensor(-59879.2734, device='cuda:0')
episode: 582 training return: tensor(-15898.8311, device='cuda:0')
episode: 583 training return: tensor(-3.8309e+08, device='cuda:0')
epoch: 146 test_true_pfm: 88.36968415597191
episode: 584 training return: tensor(-9185026., device='cuda:0')
episode: 585 training return: tensor(-4.6761e+09, device='cuda:0')
episode: 586 training return: tensor(-22032104., device='cuda:0')
episode: 587 training return: tensor(-10267.9014, device='cuda:0')
epoch: 147 test_true_pfm: 140.92583673574975
episode: 588 training return: tensor(-11507.6387, device='cuda:0')
episode: 589 training return: tensor(-4478800., device='cuda:0')
episode: 590 training return: tensor(-63922.5430, device='cuda:0')
episode: 591 training return: tensor(-82795.4609, device='cuda:0')
epoch: 148 test_true_pfm: 26.792644730182406
episode: 592 training return: tensor(-651584., device='cuda:0')
episode: 593 training return: tensor(-145608.9688, device='cuda:0')
episode: 594 training return: tensor(-6.4461e+08, device='cuda:0')
episode: 595 training return: tensor(-9.3359e+08, device='cuda:0')
epoch: 149 test_true_pfm: -124.04606938148753
episode: 596 training return: tensor(-3445186.2500, device='cuda:0')
episode: 597 training return: tensor(-7.2502e+08, device='cuda:0')
episode: 598 training return: tensor(-7.0104e+08, device='cuda:0')
episode: 599 training return: tensor(-1.4765e+10, device='cuda:0')
epoch: 150 test_true_pfm: 21.484606290224956
