['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'expert', '--seed', '1']
epoch: 0 training_loss 0.3387726014852524 test_loss: 0.24311726093292235
epoch: 1 training_loss 0.21098714649677278 test_loss: 0.19898947477340698
epoch: 2 training_loss 0.1958730448037386 test_loss: 0.20083889961242676
epoch: 3 training_loss 0.17313927240669727 test_loss: 0.17038347721099853
epoch: 4 training_loss 0.16131151683628558 test_loss: 0.1516830801963806
epoch: 5 training_loss 0.15211306631565094 test_loss: 0.15600097179412842
epoch: 6 training_loss 0.15424505792558194 test_loss: 0.1279153347015381
epoch: 7 training_loss 0.14458035629242658 test_loss: 0.1185030460357666
epoch: 8 training_loss 0.14247733905911444 test_loss: 0.14891985654830933
epoch: 9 training_loss 0.13493342097848654 test_loss: 0.1317140817642212
epoch: 10 training_loss 0.13294142886996269 test_loss: 0.10656713247299195
epoch: 11 training_loss 0.1404756512492895 test_loss: 0.1308828830718994
epoch: 12 training_loss 0.12853311840444803 test_loss: 0.11813772916793823
epoch: 13 training_loss 0.13107653852552176 test_loss: 0.11753603219985961
epoch: 14 training_loss 0.12802430488169192 test_loss: 0.14570374488830568
epoch: 15 training_loss 0.12828479455783964 test_loss: 0.13837053775787353
epoch: 16 training_loss 0.13123486757278444 test_loss: 0.12252247333526611
epoch: 17 training_loss 0.11343461252748967 test_loss: 0.1254451632499695
epoch: 18 training_loss 0.12478654023259877 test_loss: 0.11460647583007813
epoch: 19 training_loss 0.12541206229478122 test_loss: 0.137579345703125
epoch: 20 training_loss 0.11716829314827919 test_loss: 0.1040128231048584
epoch: 21 training_loss 0.12048947650939226 test_loss: 0.11675478219985962
epoch: 22 training_loss 0.11602230461314321 test_loss: 0.12740886211395264
epoch: 23 training_loss 0.11940019816160202 test_loss: 0.11868689060211182
epoch: 24 training_loss 0.1162295901030302 test_loss: 0.12959221601486207
epoch: 25 training_loss 0.1176738366484642 test_loss: 0.11174914836883545
epoch: 26 training_loss 0.11496605634689332 test_loss: 0.11725062131881714
epoch: 27 training_loss 0.11877105243504048 test_loss: 0.11737544536590576
epoch: 28 training_loss 0.10925087939947843 test_loss: 0.11336736679077149
epoch: 29 training_loss 0.11418040849268436 test_loss: 0.12736151218414307
epoch: 30 training_loss 0.11336571175605059 test_loss: 0.11682348251342774
epoch: 31 training_loss 0.1109638423472643 test_loss: 0.10142661333084106
epoch: 32 training_loss 0.113766911290586 test_loss: 0.11683247089385987
epoch: 33 training_loss 0.11743856478482485 test_loss: 0.11840540170669556
epoch: 34 training_loss 0.10013199850916862 test_loss: 0.10591708421707154
epoch: 35 training_loss 0.11699892710894347 test_loss: 0.11530075073242188
epoch: 36 training_loss 0.11732362914830446 test_loss: 0.12142297029495239
epoch: 37 training_loss 0.10970982145518064 test_loss: 0.11535425186157226
epoch: 38 training_loss 0.10598728723824025 test_loss: 0.11459060907363891
epoch: 39 training_loss 0.11733509425073863 test_loss: 0.11518375873565674
epoch: 40 training_loss 0.11872422602027655 test_loss: 0.10161306858062744
epoch: 41 training_loss 0.11169476808980107 test_loss: 0.11471660137176513
epoch: 42 training_loss 0.1045301972515881 test_loss: 0.12026678323745728
epoch: 43 training_loss 0.1086757292598486 test_loss: 0.11300473213195801
epoch: 44 training_loss 0.11111729979515075 test_loss: 0.10305085182189941
epoch: 45 training_loss 0.11133799705654383 test_loss: 0.1388596773147583
epoch: 46 training_loss 0.10994757939130068 test_loss: 0.11946345567703247
epoch: 47 training_loss 0.10609202779829502 test_loss: 0.10809023380279541
epoch: 48 training_loss 0.1114595633931458 test_loss: 0.10244570970535279
epoch: 49 training_loss 0.11769162558019161 test_loss: 0.09599139094352722
epoch: 50 training_loss 0.1243081296980381 test_loss: 0.12091610431671143
epoch: 51 training_loss 0.10854320857673884 test_loss: 0.11410943269729615
epoch: 52 training_loss 0.10844034355133772 test_loss: 0.10990041494369507
epoch: 53 training_loss 0.10993979413062334 test_loss: 0.11003917455673218
epoch: 54 training_loss 0.1180684781447053 test_loss: 0.10355409383773803
epoch: 55 training_loss 0.11042883625254035 test_loss: 0.11339417695999146
epoch: 56 training_loss 0.11602017462253571 test_loss: 0.11029345989227295
epoch: 57 training_loss 0.11122491162270308 test_loss: 0.11678084135055541
epoch: 58 training_loss 0.11419358044862747 test_loss: 0.09883860349655152
epoch: 59 training_loss 0.10769696490839124 test_loss: 0.11570756435394287
epoch: 60 training_loss 0.11086591944098473 test_loss: 0.10361338853836059
epoch: 61 training_loss 0.11805707391351461 test_loss: 0.12182139158248902
epoch: 62 training_loss 0.10713867865502834 test_loss: 0.11380207538604736
epoch: 63 training_loss 0.12303705137223005 test_loss: 0.11173708438873291
epoch: 64 training_loss 0.10998839551582933 test_loss: 0.12340079545974732
epoch: 65 training_loss 0.11656127959489822 test_loss: 0.12163735628128051
epoch: 66 training_loss 0.10895570896565915 test_loss: 0.11962215900421143
epoch: 67 training_loss 0.11371287304908037 test_loss: 0.10586894750595092
epoch: 68 training_loss 0.10892895437777042 test_loss: 0.10609362125396729
epoch: 69 training_loss 0.10621303029358387 test_loss: 0.12356418371200562
epoch: 70 training_loss 0.11320926286280156 test_loss: 0.09763914346694946
epoch: 71 training_loss 0.11157946281135082 test_loss: 0.1043984055519104
epoch: 72 training_loss 0.10342189783230424 test_loss: 0.11300673484802246
epoch: 73 training_loss 0.11138532925397157 test_loss: 0.10983918905258179
epoch: 74 training_loss 0.11472722381353379 test_loss: 0.0989259660243988
epoch: 75 training_loss 0.11339410200715065 test_loss: 0.11858687400817872
epoch: 76 training_loss 0.10794052511453628 test_loss: 0.09835150837898254
epoch: 77 training_loss 0.10917092882096767 test_loss: 0.10479010343551635
epoch: 78 training_loss 0.10440710809081793 test_loss: 0.10617066621780395
epoch: 79 training_loss 0.10903144303709268 test_loss: 0.09926320314407348
epoch: 80 training_loss 0.11458156459033489 test_loss: 0.11103291511535644
epoch: 81 training_loss 0.1113708969578147 test_loss: 0.1048469066619873
epoch: 82 training_loss 0.11222985733300447 test_loss: 0.10002269744873046
epoch: 83 training_loss 0.1060179740563035 test_loss: 0.13184211254119874
epoch: 84 training_loss 0.11193614143878222 test_loss: 0.10548840761184693
epoch: 85 training_loss 0.10558489359915256 test_loss: 0.10579344034194946
epoch: 86 training_loss 0.10851907808333636 test_loss: 0.1175496220588684
epoch: 87 training_loss 0.10450724367052316 test_loss: 0.101271653175354
epoch: 88 training_loss 0.10886973399668932 test_loss: 0.10546931028366088
epoch: 89 training_loss 0.11141862086951733 test_loss: 0.11075609922409058
epoch: 90 training_loss 0.10691400613635778 test_loss: 0.10424717664718627
epoch: 91 training_loss 0.11624016277492047 test_loss: 0.10872892141342164
epoch: 92 training_loss 0.10760650018230081 test_loss: 0.10842716693878174
epoch: 93 training_loss 0.10586781742051243 test_loss: 0.09430912733078003
epoch: 94 training_loss 0.11408410239964724 test_loss: 0.11011641025543213
epoch: 95 training_loss 0.11132972853258252 test_loss: 0.11754215955734253
epoch: 96 training_loss 0.11200279727578163 test_loss: 0.11164684295654297
epoch: 97 training_loss 0.11026244629174471 test_loss: 0.1179950475692749
epoch: 98 training_loss 0.10659061877056956 test_loss: 0.09788950681686401
epoch: 99 training_loss 0.10589910246431827 test_loss: 0.11258965730667114
epoch: 100 training_loss 0.10228996295481921 test_loss: 0.10175994634628296
epoch: 101 training_loss 0.10715999715030193 test_loss: 0.10729458332061767
epoch: 102 training_loss 0.11080307479947805 test_loss: 0.11939377784729004
epoch: 103 training_loss 0.11024235859513283 test_loss: 0.11017724275588989
epoch: 104 training_loss 0.10419963572174311 test_loss: 0.09280675649642944
epoch: 105 training_loss 0.11362996995449066 test_loss: 0.12144683599472046
epoch: 106 training_loss 0.11603269025683403 test_loss: 0.10093836784362793
epoch: 107 training_loss 0.1116700293123722 test_loss: 0.10017157793045044
epoch: 108 training_loss 0.10743168178945779 test_loss: 0.11167997121810913
epoch: 109 training_loss 0.10937984298914671 test_loss: 0.11816307306289672
epoch: 110 training_loss 0.10569019047543407 test_loss: 0.11156752109527587
epoch: 111 training_loss 0.10713920827955008 test_loss: 0.09252851605415344
epoch: 112 training_loss 0.10877062868326902 test_loss: 0.10178189277648926
epoch: 113 training_loss 0.10324372414499522 test_loss: 0.0956694483757019
epoch: 114 training_loss 0.10606698773801326 test_loss: 0.08685773015022277
epoch: 115 training_loss 0.10932063233107328 test_loss: 0.11063227653503419
epoch: 116 training_loss 0.10871496371924877 test_loss: 0.10947959423065186
epoch: 117 training_loss 0.10913313554599881 test_loss: 0.1099177360534668
epoch: 118 training_loss 0.10561025895178318 test_loss: 0.09747022986412049
epoch: 119 training_loss 0.10718280605971814 test_loss: 0.1033437728881836
epoch: 120 training_loss 0.10614053677767515 test_loss: 0.10257710218429565
epoch: 121 training_loss 0.11611283306032419 test_loss: 0.09270419478416443
epoch: 122 training_loss 0.10904926106333733 test_loss: 0.09947887659072877
epoch: 123 training_loss 0.10165427666157484 test_loss: 0.10376737117767335
epoch: 124 training_loss 0.11214826220646501 test_loss: 0.10357815027236938
epoch: 125 training_loss 0.10914137043058872 test_loss: 0.11062651872634888
epoch: 126 training_loss 0.10838981855660677 test_loss: 0.10936752557754517
epoch: 127 training_loss 0.11080540396273136 test_loss: 0.11448477506637574
epoch: 128 training_loss 0.11283843968063594 test_loss: 0.09855251908302307
epoch: 129 training_loss 0.10218446515500546 test_loss: 0.0837751567363739
epoch: 130 training_loss 0.10619255907833576 test_loss: 0.11340144872665406
epoch: 131 training_loss 0.11139189515262843 test_loss: 0.10388293266296386
epoch: 132 training_loss 0.11299512784928084 test_loss: 0.12430881261825562
epoch: 133 training_loss 0.106721902936697 test_loss: 0.10640466213226318
epoch: 134 training_loss 0.11654455371201039 test_loss: 0.11429891586303711
epoch: 135 training_loss 0.11038363303989172 test_loss: 0.10560864210128784
epoch: 136 training_loss 0.10917588137090206 test_loss: 0.11990196704864502
epoch: 137 training_loss 0.10892591655254363 test_loss: 0.12206686735153198
epoch: 138 training_loss 0.11099397983402014 test_loss: 0.10397099256515503
epoch: 139 training_loss 0.10982723111286759 test_loss: 0.09098992943763733
epoch: 140 training_loss 0.10864399064332247 test_loss: 0.0924554169178009
epoch: 141 training_loss 0.10797262378036976 test_loss: 0.098850679397583
epoch: 142 training_loss 0.10663673501461744 test_loss: 0.10443835258483887
epoch: 143 training_loss 0.10541469927877188 test_loss: 0.10645871162414551
epoch: 144 training_loss 0.10878646275028586 test_loss: 0.1086394190788269
epoch: 145 training_loss 0.11218577083200217 test_loss: 0.09685980677604675
epoch: 146 training_loss 0.10307760706171393 test_loss: 0.09917514324188233
epoch: 147 training_loss 0.10616971882060171 test_loss: 0.10762987136840821
epoch: 148 training_loss 0.11042147049680352 test_loss: 0.10094249248504639
epoch: 149 training_loss 0.09777366328984499 test_loss: 0.10792092084884644
epoch: 0 training_loss 46.90962724685669 test_loss: 26.00102844238281
epoch: 1 training_loss 21.271364097595214 test_loss: 18.09069061279297
epoch: 2 training_loss 16.32497422218323 test_loss: 15.434370422363282
epoch: 3 training_loss 14.020843210220336 test_loss: 12.842103576660156
epoch: 4 training_loss 12.089767732620238 test_loss: 11.084286499023438
epoch: 5 training_loss 10.572263898849487 test_loss: 9.859727478027343
epoch: 6 training_loss 9.372062187194825 test_loss: 8.881830596923828
epoch: 7 training_loss 8.617248516082764 test_loss: 8.219713592529297
epoch: 8 training_loss 8.169422988891602 test_loss: 7.5070343017578125
epoch: 9 training_loss 7.551941394805908 test_loss: 7.219432067871094
epoch: 10 training_loss 7.028273768424988 test_loss: 6.6800178527832035
epoch: 11 training_loss 6.592255206108093 test_loss: 6.317858505249023
epoch: 12 training_loss 6.248280177116394 test_loss: 6.295164108276367
epoch: 13 training_loss 5.861264066696167 test_loss: 5.915903854370117
epoch: 14 training_loss 5.767610836029053 test_loss: 5.2772361755371096
epoch: 15 training_loss 5.458146185874939 test_loss: 5.344832229614258
epoch: 16 training_loss 5.2890576171875 test_loss: 5.361270523071289
epoch: 17 training_loss 5.043140738010407 test_loss: 4.875595474243164
epoch: 18 training_loss 4.853896028995514 test_loss: 4.598144912719727
epoch: 19 training_loss 4.761763725280762 test_loss: 4.688255310058594
epoch: 20 training_loss 4.590053031444549 test_loss: 4.395342254638672
epoch: 21 training_loss 4.5075205492973325 test_loss: 4.576644515991211
epoch: 22 training_loss 4.448096451759338 test_loss: 4.483627700805664
epoch: 23 training_loss 4.321691298484803 test_loss: 4.226232528686523
epoch: 24 training_loss 4.105501277446747 test_loss: 4.045708465576172
epoch: 25 training_loss 4.122666153907776 test_loss: 4.1148223876953125
epoch: 26 training_loss 3.9944116258621216 test_loss: 3.95721549987793
epoch: 27 training_loss 4.032039651870727 test_loss: 3.983612060546875
epoch: 28 training_loss 3.987670340538025 test_loss: 3.75404052734375
epoch: 29 training_loss 3.869778673648834 test_loss: 3.7051151275634764
epoch: 30 training_loss 3.7639875078201293 test_loss: 3.747505950927734
epoch: 31 training_loss 3.697405734062195 test_loss: 3.7789703369140626
epoch: 32 training_loss 3.5926248478889464 test_loss: 3.583180618286133
epoch: 33 training_loss 3.550356912612915 test_loss: 3.487196350097656
epoch: 34 training_loss 3.437732002735138 test_loss: 3.5623008728027346
epoch: 35 training_loss 3.4619547390937804 test_loss: 3.2841747283935545
epoch: 36 training_loss 3.341915473937988 test_loss: 3.289484405517578
epoch: 37 training_loss 3.4198207592964174 test_loss: 3.288456344604492
epoch: 38 training_loss 3.3002720284461975 test_loss: 3.46234130859375
epoch: 39 training_loss 3.252953667640686 test_loss: 3.2860366821289064
epoch: 40 training_loss 3.244385142326355 test_loss: 3.165077018737793
epoch: 41 training_loss 3.1706146502494814 test_loss: 2.997492218017578
epoch: 42 training_loss 3.180047731399536 test_loss: 3.1416248321533202
epoch: 43 training_loss 3.1365674543380737 test_loss: 3.072026252746582
epoch: 44 training_loss 3.0416474509239197 test_loss: 3.045161819458008
epoch: 45 training_loss 3.1541425490379336 test_loss: 3.0145387649536133
epoch: 46 training_loss 2.9804034948349 test_loss: 2.96080322265625
epoch: 47 training_loss 2.948178036212921 test_loss: 3.0161972045898438
epoch: 48 training_loss 2.8736829829216 test_loss: 2.9771331787109374
epoch: 49 training_loss 2.8849404215812684 test_loss: 2.8942018508911134
epoch: 50 training_loss 2.880422179698944 test_loss: 2.8775279998779295
epoch: 51 training_loss 2.8705701494216918 test_loss: 2.7263162612915037
epoch: 52 training_loss 2.8414183259010315 test_loss: 2.742925834655762
epoch: 53 training_loss 2.8208646786212923 test_loss: 2.8118354797363283
epoch: 54 training_loss 2.8146286725997927 test_loss: 2.7707822799682615
epoch: 55 training_loss 2.736378810405731 test_loss: 2.663465118408203
epoch: 56 training_loss 2.690477728843689 test_loss: 2.6887304306030275
epoch: 57 training_loss 2.813394696712494 test_loss: 2.7003774642944336
epoch: 58 training_loss 2.6266593647003176 test_loss: 2.7059581756591795
epoch: 59 training_loss 2.6910283041000365 test_loss: 2.6729597091674804
epoch: 60 training_loss 2.6362611556053164 test_loss: 2.6099241256713865
epoch: 61 training_loss 2.6343743276596068 test_loss: 2.4997957229614256
epoch: 62 training_loss 2.589480843544006 test_loss: 2.5339435577392577
epoch: 63 training_loss 2.591021304130554 test_loss: 2.4896242141723635
epoch: 64 training_loss 2.492710928916931 test_loss: 2.5597291946411134
epoch: 65 training_loss 2.4791325187683104 test_loss: 2.473276901245117
epoch: 66 training_loss 2.538748228549957 test_loss: 2.447245216369629
epoch: 67 training_loss 2.4416827356815336 test_loss: 2.563326835632324
epoch: 68 training_loss 2.429545236825943 test_loss: 2.3846914291381838
epoch: 69 training_loss 2.423149869441986 test_loss: 2.3413862228393554
epoch: 70 training_loss 2.3681331765651703 test_loss: 2.4657772064208983
epoch: 71 training_loss 2.3437243056297303 test_loss: 2.4861202239990234
epoch: 72 training_loss 2.46634547829628 test_loss: 2.4062137603759766
epoch: 73 training_loss 2.3371949672698973 test_loss: 2.4551462173461913
epoch: 74 training_loss 2.330381897687912 test_loss: 2.2753530502319337
epoch: 75 training_loss 2.378661962747574 test_loss: 2.283693313598633
epoch: 76 training_loss 2.314512506723404 test_loss: 2.325809860229492
epoch: 77 training_loss 2.3006149590015412 test_loss: 2.3403604507446287
epoch: 78 training_loss 2.2926387190818787 test_loss: 2.266534614562988
epoch: 79 training_loss 2.2820899522304536 test_loss: 2.2753536224365236
epoch: 80 training_loss 2.203454923629761 test_loss: 2.3730005264282226
epoch: 81 training_loss 2.309672837257385 test_loss: 2.2704925537109375
epoch: 82 training_loss 2.2194182896614074 test_loss: 2.268993949890137
epoch: 83 training_loss 2.263553389310837 test_loss: 2.273081398010254
epoch: 84 training_loss 2.2383449482917785 test_loss: 2.1982345581054688
epoch: 85 training_loss 2.1707758224010467 test_loss: 2.2562929153442384
epoch: 86 training_loss 2.2085682117938994 test_loss: 2.1657735824584963
epoch: 87 training_loss 2.162203713655472 test_loss: 2.2232568740844725
epoch: 88 training_loss 2.161803225278854 test_loss: 2.1792240142822266
epoch: 89 training_loss 2.140617861747742 test_loss: 2.0155359268188477
epoch: 90 training_loss 2.0968423783779144 test_loss: 2.0152406692504883
epoch: 91 training_loss 2.1372609519958496 test_loss: 2.0502296447753907
epoch: 92 training_loss 2.0842112481594084 test_loss: 2.1459478378295898
epoch: 93 training_loss 2.125684999227524 test_loss: 2.107477378845215
epoch: 94 training_loss 2.142532293796539 test_loss: 2.151461601257324
epoch: 95 training_loss 2.1070562660694123 test_loss: 2.129355239868164
epoch: 96 training_loss 2.0387676656246185 test_loss: 2.1376089096069335
epoch: 97 training_loss 2.044682022333145 test_loss: 2.150875473022461
epoch: 98 training_loss 2.102936853170395 test_loss: 2.0103055953979494
epoch: 99 training_loss 2.0865445268154144 test_loss: 2.0434896469116213
epoch: 100 training_loss 2.0974356818199156 test_loss: 1.9982181549072267
epoch: 101 training_loss 2.001383420228958 test_loss: 2.02144775390625
epoch: 102 training_loss 2.0048057317733763 test_loss: 2.0271926879882813
epoch: 103 training_loss 1.9801112747192382 test_loss: 2.021823501586914
epoch: 104 training_loss 2.08027362704277 test_loss: 2.057868766784668
epoch: 105 training_loss 1.9812948143482207 test_loss: 2.02676944732666
epoch: 106 training_loss 2.0171537709236147 test_loss: 2.0568004608154298
epoch: 107 training_loss 2.003362448215485 test_loss: 1.9547351837158202
epoch: 108 training_loss 1.9894478881359101 test_loss: 1.9824947357177733
epoch: 109 training_loss 1.987786500453949 test_loss: 2.058390235900879
epoch: 110 training_loss 1.9652215802669526 test_loss: 1.953390121459961
epoch: 111 training_loss 1.937818228006363 test_loss: 2.0535383224487305
epoch: 112 training_loss 1.9375437211990356 test_loss: 1.8617109298706054
epoch: 113 training_loss 1.9511291885375976 test_loss: 2.084780693054199
epoch: 114 training_loss 1.8869313716888427 test_loss: 1.8693309783935548
epoch: 115 training_loss 1.9405700039863587 test_loss: 1.9816526412963866
epoch: 116 training_loss 1.9763482999801636 test_loss: 1.9742321014404296
epoch: 117 training_loss 1.9145894515514374 test_loss: 1.9656763076782227
epoch: 118 training_loss 1.9259903860092162 test_loss: 2.0093175888061525
epoch: 119 training_loss 1.9080080473423005 test_loss: 1.8561058044433594
epoch: 120 training_loss 1.8681417071819306 test_loss: 1.9428724288940429
epoch: 121 training_loss 1.920583426952362 test_loss: 1.85730037689209
epoch: 122 training_loss 1.85494313955307 test_loss: 1.8351018905639649
epoch: 123 training_loss 1.9029827392101288 test_loss: 1.8249065399169921
epoch: 124 training_loss 1.888873575925827 test_loss: 1.814291763305664
epoch: 125 training_loss 1.8889631569385528 test_loss: 2.006390380859375
epoch: 126 training_loss 1.8554862368106841 test_loss: 1.865107536315918
epoch: 127 training_loss 1.8450397956371307 test_loss: 1.901102066040039
epoch: 128 training_loss 1.8839521086215973 test_loss: 1.9086481094360352
epoch: 129 training_loss 1.9083045780658723 test_loss: 1.8291980743408203
epoch: 130 training_loss 1.816880452632904 test_loss: 1.8111783981323242
epoch: 131 training_loss 1.8327173388004303 test_loss: 1.7627973556518555
epoch: 132 training_loss 1.8573205268383026 test_loss: 1.7850475311279297
epoch: 133 training_loss 1.8423284208774566 test_loss: 1.8334028244018554
epoch: 134 training_loss 1.8117340993881226 test_loss: 1.812697410583496
epoch: 135 training_loss 1.8036782228946686 test_loss: 1.8833942413330078
epoch: 136 training_loss 1.8093975389003754 test_loss: 1.6887971878051757
epoch: 137 training_loss 1.768471268415451 test_loss: 1.7646671295166017
epoch: 138 training_loss 1.7963227427005768 test_loss: 1.8097444534301759
epoch: 139 training_loss 1.7843489515781403 test_loss: 1.7967140197753906
epoch: 140 training_loss 1.786998598575592 test_loss: 1.752352523803711
epoch: 141 training_loss 1.7575694584846497 test_loss: 1.8550413131713868
epoch: 142 training_loss 1.7994517493247986 test_loss: 1.6853946685791015
epoch: 143 training_loss 1.7958113980293273 test_loss: 1.7621559143066405
epoch: 144 training_loss 1.7867162454128265 test_loss: 1.7169683456420899
epoch: 145 training_loss 1.7488562977313995 test_loss: 1.6343551635742188
epoch: 146 training_loss 1.7440610826015472 test_loss: 1.8846712112426758
epoch: 147 training_loss 1.7420177233219147 test_loss: 1.6999172210693358
epoch: 148 training_loss 1.738276618719101 test_loss: 1.685347557067871
epoch: 149 training_loss 1.7359710037708282 test_loss: 1.6488275527954102
8577.632079367168
episode: 0 training return: tensor(-991.9265, device='cuda:0')
episode: 1 training return: tensor(-321.5733, device='cuda:0')
episode: 2 training return: tensor(-342.1479, device='cuda:0')
episode: 3 training return: tensor(-499.6694, device='cuda:0')
epoch: 1 test_true_pfm: 10008.135934837266 sim_pfm: -589.258000989561
episode: 4 training return: tensor(-764.5817, device='cuda:0')
episode: 5 training return: tensor(-831.5190, device='cuda:0')
episode: 6 training return: tensor(-696.0063, device='cuda:0')
episode: 7 training return: tensor(-227.7329, device='cuda:0')
epoch: 2 test_true_pfm: 6579.642257606782 sim_pfm: -529.117268429099
episode: 8 training return: tensor(-990.5380, device='cuda:0')
episode: 9 training return: tensor(-407.4883, device='cuda:0')
episode: 10 training return: tensor(-722.6655, device='cuda:0')
episode: 11 training return: tensor(-999.8735, device='cuda:0')
epoch: 3 test_true_pfm: 3044.757370535179 sim_pfm: -349.6472980165854
episode: 12 training return: tensor(-609.4536, device='cuda:0')
episode: 13 training return: tensor(-850.4564, device='cuda:0')
episode: 14 training return: tensor(-375.9225, device='cuda:0')
episode: 15 training return: tensor(-620.3824, device='cuda:0')
epoch: 4 test_true_pfm: 6549.290590721775 sim_pfm: -354.05907123875414
episode: 16 training return: tensor(-590.0188, device='cuda:0')
episode: 17 training return: tensor(-984.4073, device='cuda:0')
episode: 18 training return: tensor(-394.1949, device='cuda:0')
episode: 19 training return: tensor(-992.6152, device='cuda:0')
epoch: 5 test_true_pfm: 8427.260510288146 sim_pfm: -510.79163649418234
episode: 20 training return: tensor(-963.2718, device='cuda:0')
episode: 21 training return: tensor(-555.3340, device='cuda:0')
episode: 22 training return: tensor(-999.7662, device='cuda:0')
episode: 23 training return: tensor(-494.1437, device='cuda:0')
epoch: 6 test_true_pfm: 7992.975438533638 sim_pfm: -457.7078733778908
episode: 24 training return: tensor(-835.8382, device='cuda:0')
episode: 25 training return: tensor(-393.5926, device='cuda:0')
episode: 26 training return: tensor(-746.3090, device='cuda:0')
episode: 27 training return: tensor(-988.2358, device='cuda:0')
epoch: 7 test_true_pfm: 6750.123295689645 sim_pfm: -501.5989707301099
episode: 28 training return: tensor(-214.2099, device='cuda:0')
episode: 29 training return: tensor(-240.8607, device='cuda:0')
episode: 30 training return: tensor(-303.8033, device='cuda:0')
episode: 31 training return: tensor(-873.4849, device='cuda:0')
epoch: 8 test_true_pfm: 3395.0572994063073 sim_pfm: -547.4542064190609
episode: 32 training return: tensor(-995.2889, device='cuda:0')
episode: 33 training return: tensor(31.3071, device='cuda:0')
episode: 34 training return: tensor(-107.9431, device='cuda:0')
episode: 35 training return: tensor(-489.3918, device='cuda:0')
epoch: 9 test_true_pfm: 7203.35075795383 sim_pfm: -350.0810957910726
episode: 36 training return: tensor(-187.5058, device='cuda:0')
episode: 37 training return: tensor(-395.1850, device='cuda:0')
episode: 38 training return: tensor(-512.6102, device='cuda:0')
episode: 39 training return: tensor(-844.1599, device='cuda:0')
epoch: 10 test_true_pfm: 9749.197975045185 sim_pfm: -697.0895028264882
episode: 40 training return: tensor(-999.9985, device='cuda:0')
episode: 41 training return: tensor(-651.6544, device='cuda:0')
episode: 42 training return: tensor(-283.5889, device='cuda:0')
episode: 43 training return: tensor(-178.8405, device='cuda:0')
epoch: 11 test_true_pfm: 10179.537481446388 sim_pfm: -234.5161789134921
episode: 44 training return: tensor(-116.4942, device='cuda:0')
episode: 45 training return: tensor(-136.3186, device='cuda:0')
episode: 46 training return: tensor(136.5914, device='cuda:0')
episode: 47 training return: tensor(-468.7066, device='cuda:0')
epoch: 12 test_true_pfm: 10167.89076751565 sim_pfm: -551.6619560925174
episode: 48 training return: tensor(-262.3584, device='cuda:0')
episode: 49 training return: tensor(-266.6536, device='cuda:0')
episode: 50 training return: tensor(-599.1704, device='cuda:0')
episode: 51 training return: tensor(-228.4771, device='cuda:0')
epoch: 13 test_true_pfm: 6687.425320032859 sim_pfm: -7.346987010028291
episode: 52 training return: tensor(-999.9338, device='cuda:0')
episode: 53 training return: tensor(-999.9999, device='cuda:0')
episode: 54 training return: tensor(-361.3689, device='cuda:0')
episode: 55 training return: tensor(-99.0636, device='cuda:0')
epoch: 14 test_true_pfm: 10295.95066412366 sim_pfm: 39.91662450907946
episode: 56 training return: tensor(-42.6841, device='cuda:0')
episode: 57 training return: tensor(-218.1547, device='cuda:0')
episode: 58 training return: tensor(-972.7495, device='cuda:0')
episode: 59 training return: tensor(-999.7200, device='cuda:0')
epoch: 15 test_true_pfm: 5741.520953672749 sim_pfm: -22.152668357399914
episode: 60 training return: tensor(-353.6775, device='cuda:0')
episode: 61 training return: tensor(-305.9646, device='cuda:0')
episode: 62 training return: tensor(-146.8619, device='cuda:0')
episode: 63 training return: tensor(-999.8689, device='cuda:0')
epoch: 16 test_true_pfm: 10339.808319779257 sim_pfm: 31.38211285712896
episode: 64 training return: tensor(-958.1404, device='cuda:0')
episode: 65 training return: tensor(-989.3907, device='cuda:0')
episode: 66 training return: tensor(-216.0120, device='cuda:0')
episode: 67 training return: tensor(-131.8737, device='cuda:0')
epoch: 17 test_true_pfm: 10158.880279564217 sim_pfm: -43.05753525329055
episode: 68 training return: tensor(-148.1444, device='cuda:0')
episode: 69 training return: tensor(-35.5524, device='cuda:0')
episode: 70 training return: tensor(-120.6162, device='cuda:0')
episode: 71 training return: tensor(78.5177, device='cuda:0')
epoch: 18 test_true_pfm: 6439.3693306560845 sim_pfm: -409.3677167384788
episode: 72 training return: tensor(-404.0979, device='cuda:0')
episode: 73 training return: tensor(-113.6317, device='cuda:0')
episode: 74 training return: tensor(-499.1248, device='cuda:0')
episode: 75 training return: tensor(-710.8022, device='cuda:0')
epoch: 19 test_true_pfm: 6769.926972441744 sim_pfm: -374.3508684618864
episode: 76 training return: tensor(-159.1129, device='cuda:0')
episode: 77 training return: tensor(-1000., device='cuda:0')
episode: 78 training return: tensor(-180.7591, device='cuda:0')
episode: 79 training return: tensor(-460.0052, device='cuda:0')
epoch: 20 test_true_pfm: 9294.252569568653 sim_pfm: -571.8009652115483
episode: 80 training return: tensor(-53.2967, device='cuda:0')
episode: 81 training return: tensor(-405.9605, device='cuda:0')
episode: 82 training return: tensor(-243.2926, device='cuda:0')
episode: 83 training return: tensor(-194.1588, device='cuda:0')
epoch: 21 test_true_pfm: 8362.244552494154 sim_pfm: -87.5401809643566
episode: 84 training return: tensor(-392.4431, device='cuda:0')
episode: 85 training return: tensor(-181.9117, device='cuda:0')
episode: 86 training return: tensor(-71.0469, device='cuda:0')
episode: 87 training return: tensor(-257.1288, device='cuda:0')
epoch: 22 test_true_pfm: 10074.993498161117 sim_pfm: -302.143336458869
episode: 88 training return: tensor(-325.1063, device='cuda:0')
episode: 89 training return: tensor(-42.2679, device='cuda:0')
episode: 90 training return: tensor(-748.3019, device='cuda:0')
episode: 91 training return: tensor(-143.9704, device='cuda:0')
epoch: 23 test_true_pfm: 8840.749879729025 sim_pfm: 235.39471056712014
episode: 92 training return: tensor(-867.1329, device='cuda:0')
episode: 93 training return: tensor(31.6519, device='cuda:0')
episode: 94 training return: tensor(115.3092, device='cuda:0')
episode: 95 training return: tensor(-236.5897, device='cuda:0')
epoch: 24 test_true_pfm: 7616.895047010578 sim_pfm: 54.144700538696874
episode: 96 training return: tensor(-838.4483, device='cuda:0')
episode: 97 training return: tensor(60.2189, device='cuda:0')
episode: 98 training return: tensor(-54.6797, device='cuda:0')
episode: 99 training return: tensor(-92.0332, device='cuda:0')
epoch: 25 test_true_pfm: 9456.289822454783 sim_pfm: 33.73067664314294
episode: 100 training return: tensor(-68.6051, device='cuda:0')
episode: 101 training return: tensor(-381.2466, device='cuda:0')
episode: 102 training return: tensor(-821.4380, device='cuda:0')
episode: 103 training return: tensor(-191.0996, device='cuda:0')
epoch: 26 test_true_pfm: 8374.39531589953 sim_pfm: -22.907766874200508
episode: 104 training return: tensor(-809.0134, device='cuda:0')
episode: 105 training return: tensor(-895.1337, device='cuda:0')
episode: 106 training return: tensor(-173.0250, device='cuda:0')
episode: 107 training return: tensor(-266.9089, device='cuda:0')
epoch: 27 test_true_pfm: 8116.812282225156 sim_pfm: -266.6090335736905
episode: 108 training return: tensor(-19.1349, device='cuda:0')
episode: 109 training return: tensor(-51.4631, device='cuda:0')
episode: 110 training return: tensor(-102.6370, device='cuda:0')
episode: 111 training return: tensor(-95.3153, device='cuda:0')
epoch: 28 test_true_pfm: 10242.085486526297 sim_pfm: -159.94923642242793
episode: 112 training return: tensor(-186.5642, device='cuda:0')
episode: 113 training return: tensor(-82.1550, device='cuda:0')
episode: 114 training return: tensor(-84.6588, device='cuda:0')
episode: 115 training return: tensor(-436.3747, device='cuda:0')
epoch: 29 test_true_pfm: 10356.102351851756 sim_pfm: -48.07293186962488
episode: 116 training return: tensor(-153.3486, device='cuda:0')
episode: 117 training return: tensor(-277.9780, device='cuda:0')
episode: 118 training return: tensor(-974.9028, device='cuda:0')
episode: 119 training return: tensor(-998.9818, device='cuda:0')
epoch: 30 test_true_pfm: 8916.970429993127 sim_pfm: 46.05200644398186
episode: 120 training return: tensor(-255.4556, device='cuda:0')
episode: 121 training return: tensor(-123.5026, device='cuda:0')
episode: 122 training return: tensor(-637.2671, device='cuda:0')
episode: 123 training return: tensor(-256.6892, device='cuda:0')
epoch: 31 test_true_pfm: 9222.784621775798 sim_pfm: -316.8763521059882
episode: 124 training return: tensor(-942.7055, device='cuda:0')
episode: 125 training return: tensor(-119.2914, device='cuda:0')
episode: 126 training return: tensor(43.8931, device='cuda:0')
episode: 127 training return: tensor(-120.6067, device='cuda:0')
epoch: 32 test_true_pfm: 6800.714809724069 sim_pfm: 109.61757582925686
episode: 128 training return: tensor(-784.4050, device='cuda:0')
episode: 129 training return: tensor(-999.9965, device='cuda:0')
episode: 130 training return: tensor(-8.4357, device='cuda:0')
episode: 131 training return: tensor(-999.9996, device='cuda:0')
epoch: 33 test_true_pfm: 6696.248706713581 sim_pfm: 149.76317157124868
episode: 132 training return: tensor(-193.9478, device='cuda:0')
episode: 133 training return: tensor(-237.0996, device='cuda:0')
episode: 134 training return: tensor(-233.2914, device='cuda:0')
episode: 135 training return: tensor(-351.3037, device='cuda:0')
epoch: 34 test_true_pfm: 5967.914385146381 sim_pfm: -156.11044437783616
episode: 136 training return: tensor(-82.7152, device='cuda:0')
episode: 137 training return: tensor(-78.0851, device='cuda:0')
episode: 138 training return: tensor(-462.9141, device='cuda:0')
episode: 139 training return: tensor(-132.7414, device='cuda:0')
epoch: 35 test_true_pfm: 10471.351221125688 sim_pfm: 138.47817387109777
episode: 140 training return: tensor(-133.9677, device='cuda:0')
episode: 141 training return: tensor(-87.5727, device='cuda:0')
episode: 142 training return: tensor(-868.7653, device='cuda:0')
episode: 143 training return: tensor(-822.3346, device='cuda:0')
epoch: 36 test_true_pfm: 7764.247553271013 sim_pfm: -171.46496463248818
episode: 144 training return: tensor(-628.2241, device='cuda:0')
episode: 145 training return: tensor(-115.1484, device='cuda:0')
episode: 146 training return: tensor(-698.3291, device='cuda:0')
episode: 147 training return: tensor(-162.5364, device='cuda:0')
epoch: 37 test_true_pfm: 6355.050947090123 sim_pfm: -287.10207581302774
episode: 148 training return: tensor(-791.6548, device='cuda:0')
episode: 149 training return: tensor(-281.1436, device='cuda:0')
episode: 150 training return: tensor(-156.3051, device='cuda:0')
episode: 151 training return: tensor(116.2689, device='cuda:0')
epoch: 38 test_true_pfm: 9035.155818835045 sim_pfm: -53.333228024401855
episode: 152 training return: tensor(-290.1058, device='cuda:0')
episode: 153 training return: tensor(-999.9996, device='cuda:0')
episode: 154 training return: tensor(-321.9506, device='cuda:0')
episode: 155 training return: tensor(-647.3447, device='cuda:0')
epoch: 39 test_true_pfm: 6243.610809920909 sim_pfm: -251.21133790731741
episode: 156 training return: tensor(-54.0239, device='cuda:0')
episode: 157 training return: tensor(-134.6419, device='cuda:0')
episode: 158 training return: tensor(44.0627, device='cuda:0')
episode: 159 training return: tensor(-680.8874, device='cuda:0')
epoch: 40 test_true_pfm: 4122.4853984167285 sim_pfm: 40.88825205568961
episode: 160 training return: tensor(119.8659, device='cuda:0')
episode: 161 training return: tensor(-994.2369, device='cuda:0')
episode: 162 training return: tensor(-136.6902, device='cuda:0')
episode: 163 training return: tensor(-12.4007, device='cuda:0')
epoch: 41 test_true_pfm: 10210.358721832019 sim_pfm: -173.49208621389698
episode: 164 training return: tensor(-170.9279, device='cuda:0')
episode: 165 training return: tensor(-116.2626, device='cuda:0')
episode: 166 training return: tensor(-382.8577, device='cuda:0')
episode: 167 training return: tensor(-533.6278, device='cuda:0')
epoch: 42 test_true_pfm: 10203.807048705212 sim_pfm: 99.22103828863085
episode: 168 training return: tensor(-77.4360, device='cuda:0')
episode: 169 training return: tensor(-843.7976, device='cuda:0')
episode: 170 training return: tensor(-897.2642, device='cuda:0')
episode: 171 training return: tensor(-998.8953, device='cuda:0')
epoch: 43 test_true_pfm: 10416.34096620151 sim_pfm: 122.82479601337884
episode: 172 training return: tensor(189.3214, device='cuda:0')
episode: 173 training return: tensor(-12.3123, device='cuda:0')
episode: 174 training return: tensor(61.9493, device='cuda:0')
episode: 175 training return: tensor(33.8532, device='cuda:0')
epoch: 44 test_true_pfm: 10358.035544151426 sim_pfm: 93.1104370606384
episode: 176 training return: tensor(-266.9962, device='cuda:0')
episode: 177 training return: tensor(157.6932, device='cuda:0')
episode: 178 training return: tensor(-999.9990, device='cuda:0')
episode: 179 training return: tensor(-258.4767, device='cuda:0')
epoch: 45 test_true_pfm: 10517.500143643874 sim_pfm: -361.60722235936555
episode: 180 training return: tensor(13.2808, device='cuda:0')
episode: 181 training return: tensor(-933.9260, device='cuda:0')
episode: 182 training return: tensor(-382.2326, device='cuda:0')
episode: 183 training return: tensor(-999.9999, device='cuda:0')
epoch: 46 test_true_pfm: 10331.53637749972 sim_pfm: 210.21160855852455
episode: 184 training return: tensor(-69.3529, device='cuda:0')
episode: 185 training return: tensor(-89.0702, device='cuda:0')
episode: 186 training return: tensor(-797.1323, device='cuda:0')
episode: 187 training return: tensor(-38.5395, device='cuda:0')
epoch: 47 test_true_pfm: 10103.250227586948 sim_pfm: 136.43199324056818
episode: 188 training return: tensor(-999.9981, device='cuda:0')
episode: 189 training return: tensor(-184.5425, device='cuda:0')
episode: 190 training return: tensor(-999.9990, device='cuda:0')
episode: 191 training return: tensor(-726.8784, device='cuda:0')
epoch: 48 test_true_pfm: 10121.699768690489 sim_pfm: -218.09506228168416
episode: 192 training return: tensor(-219.5650, device='cuda:0')
episode: 193 training return: tensor(-222.1860, device='cuda:0')
episode: 194 training return: tensor(-254.0058, device='cuda:0')
episode: 195 training return: tensor(-711.6491, device='cuda:0')
epoch: 49 test_true_pfm: 6836.252653889732 sim_pfm: -6.740381859970512
episode: 196 training return: tensor(-999.2618, device='cuda:0')
episode: 197 training return: tensor(-267.0399, device='cuda:0')
episode: 198 training return: tensor(-342.4832, device='cuda:0')
episode: 199 training return: tensor(-373.2253, device='cuda:0')
epoch: 50 test_true_pfm: 10232.574060335619 sim_pfm: -277.4500629810694
episode: 200 training return: tensor(-179.9691, device='cuda:0')
episode: 201 training return: tensor(-97.5572, device='cuda:0')
episode: 202 training return: tensor(-999.0820, device='cuda:0')
episode: 203 training return: tensor(-341.7721, device='cuda:0')
epoch: 51 test_true_pfm: 6714.513349567443 sim_pfm: 203.41287569029373
episode: 204 training return: tensor(-102.6569, device='cuda:0')
episode: 205 training return: tensor(-748.3428, device='cuda:0')
episode: 206 training return: tensor(145.3485, device='cuda:0')
episode: 207 training return: tensor(-187.8697, device='cuda:0')
epoch: 52 test_true_pfm: 4108.438256732928 sim_pfm: 111.86455047031632
episode: 208 training return: tensor(-161.3267, device='cuda:0')
episode: 209 training return: tensor(57.7663, device='cuda:0')
episode: 210 training return: tensor(-158.8829, device='cuda:0')
episode: 211 training return: tensor(7.4466, device='cuda:0')
epoch: 53 test_true_pfm: 10268.980452544796 sim_pfm: 99.40318121799889
episode: 212 training return: tensor(-212.7290, device='cuda:0')
episode: 213 training return: tensor(-693.1628, device='cuda:0')
episode: 214 training return: tensor(52.9798, device='cuda:0')
episode: 215 training return: tensor(70.0148, device='cuda:0')
epoch: 54 test_true_pfm: 10261.190718206077 sim_pfm: 88.08032058111469
episode: 216 training return: tensor(-556.1761, device='cuda:0')
episode: 217 training return: tensor(-620.0153, device='cuda:0')
episode: 218 training return: tensor(-60.9088, device='cuda:0')
episode: 219 training return: tensor(-999.9767, device='cuda:0')
epoch: 55 test_true_pfm: 8712.260209126971 sim_pfm: -8.135826788638951
episode: 220 training return: tensor(-998.4841, device='cuda:0')
episode: 221 training return: tensor(-12.7091, device='cuda:0')
episode: 222 training return: tensor(-64.2782, device='cuda:0')
episode: 223 training return: tensor(-212.9139, device='cuda:0')
epoch: 56 test_true_pfm: 10262.790609068696 sim_pfm: 157.30301076034084
episode: 224 training return: tensor(-88.4377, device='cuda:0')
episode: 225 training return: tensor(48.4271, device='cuda:0')
episode: 226 training return: tensor(-999.9994, device='cuda:0')
episode: 227 training return: tensor(-105.9075, device='cuda:0')
epoch: 57 test_true_pfm: 10381.318376863643 sim_pfm: -10.245211888997195
episode: 228 training return: tensor(-251.4240, device='cuda:0')
episode: 229 training return: tensor(13.1520, device='cuda:0')
episode: 230 training return: tensor(-685.6614, device='cuda:0')
episode: 231 training return: tensor(-2.9400, device='cuda:0')
epoch: 58 test_true_pfm: 10490.831345245428 sim_pfm: 178.437782790182
episode: 232 training return: tensor(-111.0094, device='cuda:0')
episode: 233 training return: tensor(-86.9747, device='cuda:0')
episode: 234 training return: tensor(-478.2289, device='cuda:0')
episode: 235 training return: tensor(-218.7528, device='cuda:0')
epoch: 59 test_true_pfm: 10281.60252233816 sim_pfm: -449.5918955433881
episode: 236 training return: tensor(-61.0442, device='cuda:0')
episode: 237 training return: tensor(-983.2550, device='cuda:0')
episode: 238 training return: tensor(-245.6057, device='cuda:0')
episode: 239 training return: tensor(-165.2506, device='cuda:0')
epoch: 60 test_true_pfm: 10259.643128353579 sim_pfm: -46.13787333542132
episode: 240 training return: tensor(-91.1636, device='cuda:0')
episode: 241 training return: tensor(-71.5831, device='cuda:0')
episode: 242 training return: tensor(-999.9878, device='cuda:0')
episode: 243 training return: tensor(-590.8652, device='cuda:0')
epoch: 61 test_true_pfm: 7962.930060266827 sim_pfm: 66.05101294225703
episode: 244 training return: tensor(17.2143, device='cuda:0')
episode: 245 training return: tensor(-104.9022, device='cuda:0')
episode: 246 training return: tensor(76.2721, device='cuda:0')
episode: 247 training return: tensor(-264.4225, device='cuda:0')
epoch: 62 test_true_pfm: 10488.877468647755 sim_pfm: 22.365821454518784
episode: 248 training return: tensor(108.6448, device='cuda:0')
episode: 249 training return: tensor(-999.9996, device='cuda:0')
episode: 250 training return: tensor(-64.5102, device='cuda:0')
episode: 251 training return: tensor(-171.2666, device='cuda:0')
epoch: 63 test_true_pfm: 7532.757276689345 sim_pfm: 107.76579803885154
episode: 252 training return: tensor(-298.2307, device='cuda:0')
episode: 253 training return: tensor(-99.9450, device='cuda:0')
episode: 254 training return: tensor(-999.9992, device='cuda:0')
episode: 255 training return: tensor(77.6183, device='cuda:0')
epoch: 64 test_true_pfm: 10407.168345168096 sim_pfm: -3.7259114908520132
episode: 256 training return: tensor(-29.7407, device='cuda:0')
episode: 257 training return: tensor(-966.3068, device='cuda:0')
episode: 258 training return: tensor(-181.8296, device='cuda:0')
episode: 259 training return: tensor(-72.8485, device='cuda:0')
epoch: 65 test_true_pfm: 10042.74832752756 sim_pfm: -460.2282375806826
episode: 260 training return: tensor(-999.9970, device='cuda:0')
episode: 261 training return: tensor(-999.9994, device='cuda:0')
episode: 262 training return: tensor(-999.9553, device='cuda:0')
episode: 263 training return: tensor(-984.8944, device='cuda:0')
epoch: 66 test_true_pfm: 6891.12095972698 sim_pfm: 164.90616031672107
episode: 264 training return: tensor(53.8962, device='cuda:0')
episode: 265 training return: tensor(-34.7684, device='cuda:0')
episode: 266 training return: tensor(-108.9338, device='cuda:0')
episode: 267 training return: tensor(-707.2747, device='cuda:0')
epoch: 67 test_true_pfm: 10530.98635230966 sim_pfm: -328.45312138165656
episode: 268 training return: tensor(138.9309, device='cuda:0')
episode: 269 training return: tensor(-999.9993, device='cuda:0')
episode: 270 training return: tensor(-357.5387, device='cuda:0')
episode: 271 training return: tensor(150.7466, device='cuda:0')
epoch: 68 test_true_pfm: 7998.990213100027 sim_pfm: 136.94838348772223
episode: 272 training return: tensor(-494.7473, device='cuda:0')
episode: 273 training return: tensor(91.8502, device='cuda:0')
episode: 274 training return: tensor(-217.4818, device='cuda:0')
episode: 275 training return: tensor(122.3992, device='cuda:0')
epoch: 69 test_true_pfm: 9135.281471351162 sim_pfm: -263.40112575456925
episode: 276 training return: tensor(111.1103, device='cuda:0')
episode: 277 training return: tensor(-71.2537, device='cuda:0')
episode: 278 training return: tensor(37.6570, device='cuda:0')
episode: 279 training return: tensor(-992.7639, device='cuda:0')
epoch: 70 test_true_pfm: 9965.233673470082 sim_pfm: -659.4639819697477
episode: 280 training return: tensor(-999.9911, device='cuda:0')
episode: 281 training return: tensor(-39.8131, device='cuda:0')
episode: 282 training return: tensor(-431.1453, device='cuda:0')
episode: 283 training return: tensor(-67.8305, device='cuda:0')
epoch: 71 test_true_pfm: 10526.981018642486 sim_pfm: 160.9697130045388
episode: 284 training return: tensor(-999.9998, device='cuda:0')
episode: 285 training return: tensor(-113.4643, device='cuda:0')
episode: 286 training return: tensor(113.5512, device='cuda:0')
episode: 287 training return: tensor(-189.3807, device='cuda:0')
epoch: 72 test_true_pfm: 6938.678803307071 sim_pfm: -217.73457868841555
episode: 288 training return: tensor(222.6647, device='cuda:0')
episode: 289 training return: tensor(-46.0644, device='cuda:0')
episode: 290 training return: tensor(-333.7703, device='cuda:0')
episode: 291 training return: tensor(-999.9999, device='cuda:0')
epoch: 73 test_true_pfm: 9976.6634936579 sim_pfm: 57.208478072832804
episode: 292 training return: tensor(-117.3468, device='cuda:0')
episode: 293 training return: tensor(110.4472, device='cuda:0')
episode: 294 training return: tensor(-990.8751, device='cuda:0')
episode: 295 training return: tensor(17.1457, device='cuda:0')
epoch: 74 test_true_pfm: 10461.102693915689 sim_pfm: 264.21034351844963
episode: 296 training return: tensor(19.7867, device='cuda:0')
episode: 297 training return: tensor(-560.2178, device='cuda:0')
episode: 298 training return: tensor(88.2571, device='cuda:0')
episode: 299 training return: tensor(-153.8474, device='cuda:0')
epoch: 75 test_true_pfm: 6890.271612132485 sim_pfm: -555.9033712094339
episode: 300 training return: tensor(-999.9999, device='cuda:0')
episode: 301 training return: tensor(-999.9994, device='cuda:0')
episode: 302 training return: tensor(-588.0264, device='cuda:0')
episode: 303 training return: tensor(-999.9814, device='cuda:0')
epoch: 76 test_true_pfm: 10463.824245116522 sim_pfm: -152.2279963879458
episode: 304 training return: tensor(84.7968, device='cuda:0')
episode: 305 training return: tensor(27.8044, device='cuda:0')
episode: 306 training return: tensor(-171.5020, device='cuda:0')
episode: 307 training return: tensor(-712.8644, device='cuda:0')
epoch: 77 test_true_pfm: 9942.215958937966 sim_pfm: -99.88346381516506
episode: 308 training return: tensor(-28.6041, device='cuda:0')
episode: 309 training return: tensor(-996.3190, device='cuda:0')
episode: 310 training return: tensor(3.9069, device='cuda:0')
episode: 311 training return: tensor(12.1856, device='cuda:0')
epoch: 78 test_true_pfm: 10445.10668886542 sim_pfm: -124.6257660506526
episode: 312 training return: tensor(74.5896, device='cuda:0')
episode: 313 training return: tensor(53.9998, device='cuda:0')
episode: 314 training return: tensor(-44.2278, device='cuda:0')
episode: 315 training return: tensor(-25.7270, device='cuda:0')
epoch: 79 test_true_pfm: 10455.397072993686 sim_pfm: 114.33702755712632
episode: 316 training return: tensor(-28.1928, device='cuda:0')
episode: 317 training return: tensor(151.8649, device='cuda:0')
episode: 318 training return: tensor(-143.6205, device='cuda:0')
episode: 319 training return: tensor(129.7047, device='cuda:0')
epoch: 80 test_true_pfm: 10183.16775346051 sim_pfm: -446.74908984057646
episode: 320 training return: tensor(-86.3416, device='cuda:0')
episode: 321 training return: tensor(-779.0010, device='cuda:0')
episode: 322 training return: tensor(-95.2718, device='cuda:0')
episode: 323 training return: tensor(-135.5142, device='cuda:0')
epoch: 81 test_true_pfm: 3181.201411662311 sim_pfm: 146.810238550844
episode: 324 training return: tensor(-135.4252, device='cuda:0')
episode: 325 training return: tensor(71.0322, device='cuda:0')
episode: 326 training return: tensor(103.1873, device='cuda:0')
episode: 327 training return: tensor(57.1368, device='cuda:0')
epoch: 82 test_true_pfm: 7404.219251965279 sim_pfm: 184.92547249631025
episode: 328 training return: tensor(-566.8851, device='cuda:0')
episode: 329 training return: tensor(110.5563, device='cuda:0')
episode: 330 training return: tensor(21.4180, device='cuda:0')
episode: 331 training return: tensor(33.1955, device='cuda:0')
epoch: 83 test_true_pfm: 10369.819325915581 sim_pfm: -215.23965753356848
episode: 332 training return: tensor(-191.9357, device='cuda:0')
episode: 333 training return: tensor(-113.5984, device='cuda:0')
episode: 334 training return: tensor(-999.6575, device='cuda:0')
episode: 335 training return: tensor(14.9083, device='cuda:0')
epoch: 84 test_true_pfm: 10499.283025483024 sim_pfm: 216.5976199430685
episode: 336 training return: tensor(-15.2676, device='cuda:0')
episode: 337 training return: tensor(-284.3804, device='cuda:0')
episode: 338 training return: tensor(66.7523, device='cuda:0')
episode: 339 training return: tensor(-33.2597, device='cuda:0')
epoch: 85 test_true_pfm: 10175.54214068541 sim_pfm: 120.64609644752151
episode: 340 training return: tensor(-999.9994, device='cuda:0')
episode: 341 training return: tensor(102.5926, device='cuda:0')
episode: 342 training return: tensor(-18.4252, device='cuda:0')
episode: 343 training return: tensor(-999.4387, device='cuda:0')
epoch: 86 test_true_pfm: 9577.539801559005 sim_pfm: 340.81979141648236
episode: 344 training return: tensor(51.1865, device='cuda:0')
episode: 345 training return: tensor(-78.3271, device='cuda:0')
episode: 346 training return: tensor(-201.9796, device='cuda:0')
episode: 347 training return: tensor(-559.4527, device='cuda:0')
epoch: 87 test_true_pfm: 10415.102083825375 sim_pfm: -242.9810031753344
episode: 348 training return: tensor(-83.2310, device='cuda:0')
episode: 349 training return: tensor(-999.8610, device='cuda:0')
episode: 350 training return: tensor(-37.3076, device='cuda:0')
episode: 351 training return: tensor(-149.9176, device='cuda:0')
epoch: 88 test_true_pfm: 7416.802895957193 sim_pfm: 142.57430369365224
episode: 352 training return: tensor(-702.5275, device='cuda:0')
episode: 353 training return: tensor(86.8232, device='cuda:0')
episode: 354 training return: tensor(-133.3519, device='cuda:0')
episode: 355 training return: tensor(-372.3030, device='cuda:0')
epoch: 89 test_true_pfm: 10430.347164037268 sim_pfm: 142.6394965502162
episode: 356 training return: tensor(-999.7809, device='cuda:0')
episode: 357 training return: tensor(-51.6029, device='cuda:0')
episode: 358 training return: tensor(-91.1058, device='cuda:0')
episode: 359 training return: tensor(-41.1070, device='cuda:0')
epoch: 90 test_true_pfm: 7563.501243591207 sim_pfm: -238.92464962923745
episode: 360 training return: tensor(-224.8891, device='cuda:0')
episode: 361 training return: tensor(53.1051, device='cuda:0')
episode: 362 training return: tensor(-887.7791, device='cuda:0')
episode: 363 training return: tensor(84.1785, device='cuda:0')
epoch: 91 test_true_pfm: 10318.933836066411 sim_pfm: 123.51711593859363
episode: 364 training return: tensor(199.6941, device='cuda:0')
episode: 365 training return: tensor(-60.1079, device='cuda:0')
episode: 366 training return: tensor(-342.9882, device='cuda:0')
episode: 367 training return: tensor(-96.2609, device='cuda:0')
epoch: 92 test_true_pfm: 6614.844636660382 sim_pfm: -336.8595392534626
episode: 368 training return: tensor(-999.9998, device='cuda:0')
episode: 369 training return: tensor(-83.1564, device='cuda:0')
episode: 370 training return: tensor(-39.5285, device='cuda:0')
episode: 371 training return: tensor(-95.3913, device='cuda:0')
epoch: 93 test_true_pfm: 10599.860781638074 sim_pfm: -157.9171141628176
episode: 372 training return: tensor(-999.9754, device='cuda:0')
episode: 373 training return: tensor(-998.4247, device='cuda:0')
episode: 374 training return: tensor(-996.9838, device='cuda:0')
episode: 375 training return: tensor(-118.8946, device='cuda:0')
epoch: 94 test_true_pfm: 3643.5308201697903 sim_pfm: -253.6996766697072
episode: 376 training return: tensor(-0.0579, device='cuda:0')
episode: 377 training return: tensor(9.0236, device='cuda:0')
episode: 378 training return: tensor(-10.2773, device='cuda:0')
episode: 379 training return: tensor(-999.9999, device='cuda:0')
epoch: 95 test_true_pfm: 10252.829087924882 sim_pfm: 210.68292410195377
episode: 380 training return: tensor(93.2128, device='cuda:0')
episode: 381 training return: tensor(-103.1079, device='cuda:0')
episode: 382 training return: tensor(-362.2320, device='cuda:0')
episode: 383 training return: tensor(118.0278, device='cuda:0')
epoch: 96 test_true_pfm: 6682.472403275836 sim_pfm: 170.85784741087505
episode: 384 training return: tensor(8.7681, device='cuda:0')
episode: 385 training return: tensor(-999.9670, device='cuda:0')
episode: 386 training return: tensor(-994.6963, device='cuda:0')
episode: 387 training return: tensor(73.1774, device='cuda:0')
epoch: 97 test_true_pfm: 10375.620967282031 sim_pfm: 246.31399008018585
episode: 388 training return: tensor(-999.7144, device='cuda:0')
episode: 389 training return: tensor(57.8815, device='cuda:0')
episode: 390 training return: tensor(-999.9999, device='cuda:0')
episode: 391 training return: tensor(97.3295, device='cuda:0')
epoch: 98 test_true_pfm: 5603.877029887739 sim_pfm: 261.20937739188474
episode: 392 training return: tensor(104.0512, device='cuda:0')
episode: 393 training return: tensor(68.9261, device='cuda:0')
episode: 394 training return: tensor(76.0921, device='cuda:0')
episode: 395 training return: tensor(45.3349, device='cuda:0')
epoch: 99 test_true_pfm: 10497.581183155811 sim_pfm: 202.15695667061178
episode: 396 training return: tensor(104.1446, device='cuda:0')
episode: 397 training return: tensor(-103.6482, device='cuda:0')
episode: 398 training return: tensor(-15.9577, device='cuda:0')
episode: 399 training return: tensor(-999.9832, device='cuda:0')
epoch: 100 test_true_pfm: 9165.16811849594 sim_pfm: 155.03597320336848
episode: 400 training return: tensor(61.0690, device='cuda:0')
episode: 401 training return: tensor(-999.9993, device='cuda:0')
episode: 402 training return: tensor(9.7645, device='cuda:0')
episode: 403 training return: tensor(274.2581, device='cuda:0')
epoch: 101 test_true_pfm: 10496.17140016212 sim_pfm: 243.2646123082183
episode: 404 training return: tensor(-301.1531, device='cuda:0')
episode: 405 training return: tensor(-50.7039, device='cuda:0')
episode: 406 training return: tensor(40.2548, device='cuda:0')
episode: 407 training return: tensor(-65.0378, device='cuda:0')
epoch: 102 test_true_pfm: 9999.78425491221 sim_pfm: 222.54108780168463
episode: 408 training return: tensor(154.9280, device='cuda:0')
episode: 409 training return: tensor(-286.3684, device='cuda:0')
episode: 410 training return: tensor(-91.9605, device='cuda:0')
episode: 411 training return: tensor(11.6197, device='cuda:0')
epoch: 103 test_true_pfm: 6651.070494708838 sim_pfm: 79.75991875806358
episode: 412 training return: tensor(-120.0440, device='cuda:0')
episode: 413 training return: tensor(-999.9730, device='cuda:0')
episode: 414 training return: tensor(137.6213, device='cuda:0')
episode: 415 training return: tensor(130.5227, device='cuda:0')
epoch: 104 test_true_pfm: 10438.977673229108 sim_pfm: 205.8491113515726
episode: 416 training return: tensor(-52.3931, device='cuda:0')
episode: 417 training return: tensor(145.9716, device='cuda:0')
episode: 418 training return: tensor(132.8163, device='cuda:0')
episode: 419 training return: tensor(1.9104, device='cuda:0')
epoch: 105 test_true_pfm: 6778.756370810833 sim_pfm: 182.16195865310146
episode: 420 training return: tensor(-248.6868, device='cuda:0')
episode: 421 training return: tensor(-196.1430, device='cuda:0')
episode: 422 training return: tensor(35.3445, device='cuda:0')
episode: 423 training return: tensor(65.4333, device='cuda:0')
epoch: 106 test_true_pfm: 6929.555955258235 sim_pfm: 104.35443850949135
episode: 424 training return: tensor(-158.5179, device='cuda:0')
episode: 425 training return: tensor(123.5118, device='cuda:0')
episode: 426 training return: tensor(99.2133, device='cuda:0')
episode: 427 training return: tensor(-100.3050, device='cuda:0')
epoch: 107 test_true_pfm: 3329.05920079858 sim_pfm: -221.69085379534712
episode: 428 training return: tensor(-98.1510, device='cuda:0')
episode: 429 training return: tensor(-49.1786, device='cuda:0')
episode: 430 training return: tensor(194.7548, device='cuda:0')
episode: 431 training return: tensor(197.5430, device='cuda:0')
epoch: 108 test_true_pfm: 10509.9090819358 sim_pfm: 222.10673694093325
episode: 432 training return: tensor(233.8556, device='cuda:0')
episode: 433 training return: tensor(-870.5580, device='cuda:0')
episode: 434 training return: tensor(-118.5827, device='cuda:0')
episode: 435 training return: tensor(62.5140, device='cuda:0')
epoch: 109 test_true_pfm: 6906.03405389154 sim_pfm: -152.92476194225796
episode: 436 training return: tensor(7.0822, device='cuda:0')
episode: 437 training return: tensor(51.1445, device='cuda:0')
episode: 438 training return: tensor(-999.8813, device='cuda:0')
episode: 439 training return: tensor(-83.1112, device='cuda:0')
epoch: 110 test_true_pfm: 10529.684910054702 sim_pfm: 229.3177000687186
episode: 440 training return: tensor(-112.2287, device='cuda:0')
episode: 441 training return: tensor(-83.4499, device='cuda:0')
episode: 442 training return: tensor(60.2235, device='cuda:0')
episode: 443 training return: tensor(136.4366, device='cuda:0')
epoch: 111 test_true_pfm: 10373.479756811246 sim_pfm: 213.49272501002997
episode: 444 training return: tensor(60.7557, device='cuda:0')
episode: 445 training return: tensor(-18.7346, device='cuda:0')
episode: 446 training return: tensor(-17.5889, device='cuda:0')
episode: 447 training return: tensor(-322.0563, device='cuda:0')
epoch: 112 test_true_pfm: 8845.08555536294 sim_pfm: -259.380742058934
episode: 448 training return: tensor(-116.2243, device='cuda:0')
episode: 449 training return: tensor(-999.9995, device='cuda:0')
episode: 450 training return: tensor(252.3208, device='cuda:0')
episode: 451 training return: tensor(55.8316, device='cuda:0')
epoch: 113 test_true_pfm: 10356.6034820101 sim_pfm: 207.1842922177942
episode: 452 training return: tensor(-805.6541, device='cuda:0')
episode: 453 training return: tensor(35.9111, device='cuda:0')
episode: 454 training return: tensor(-763.2845, device='cuda:0')
episode: 455 training return: tensor(-190.1397, device='cuda:0')
epoch: 114 test_true_pfm: 10351.566643668464 sim_pfm: 157.30616546201054
episode: 456 training return: tensor(-125.1573, device='cuda:0')
episode: 457 training return: tensor(20.1364, device='cuda:0')
episode: 458 training return: tensor(-165.5815, device='cuda:0')
episode: 459 training return: tensor(-951.0823, device='cuda:0')
epoch: 115 test_true_pfm: 8054.878987605286 sim_pfm: -95.86377390823327
episode: 460 training return: tensor(-65.3445, device='cuda:0')
episode: 461 training return: tensor(98.4959, device='cuda:0')
episode: 462 training return: tensor(-413.2483, device='cuda:0')
episode: 463 training return: tensor(-100.5704, device='cuda:0')
epoch: 116 test_true_pfm: 6885.095298098492 sim_pfm: 94.13158389830885
episode: 464 training return: tensor(7.0434, device='cuda:0')
episode: 465 training return: tensor(3.5873, device='cuda:0')
episode: 466 training return: tensor(-198.9299, device='cuda:0')
episode: 467 training return: tensor(-546.2261, device='cuda:0')
epoch: 117 test_true_pfm: 10518.13983225719 sim_pfm: 220.14338934662132
episode: 468 training return: tensor(-345.4809, device='cuda:0')
episode: 469 training return: tensor(47.8052, device='cuda:0')
episode: 470 training return: tensor(175.7259, device='cuda:0')
episode: 471 training return: tensor(29.5774, device='cuda:0')
epoch: 118 test_true_pfm: 7778.827781020347 sim_pfm: 174.81016481560073
episode: 472 training return: tensor(-58.2716, device='cuda:0')
episode: 473 training return: tensor(9.6043, device='cuda:0')
episode: 474 training return: tensor(-999.9570, device='cuda:0')
episode: 475 training return: tensor(-568.8370, device='cuda:0')
epoch: 119 test_true_pfm: 10330.134369260659 sim_pfm: -182.61988739315225
episode: 476 training return: tensor(-403.4759, device='cuda:0')
episode: 477 training return: tensor(-80.2889, device='cuda:0')
episode: 478 training return: tensor(198.5779, device='cuda:0')
episode: 479 training return: tensor(-102.3652, device='cuda:0')
epoch: 120 test_true_pfm: 10524.556753403172 sim_pfm: -26.928474731403792
episode: 480 training return: tensor(96.5247, device='cuda:0')
episode: 481 training return: tensor(-7.0390, device='cuda:0')
episode: 482 training return: tensor(175.1321, device='cuda:0')
episode: 483 training return: tensor(-80.1909, device='cuda:0')
epoch: 121 test_true_pfm: 10525.927455898542 sim_pfm: -45.754345308756456
episode: 484 training return: tensor(82.6016, device='cuda:0')
episode: 485 training return: tensor(-33.6454, device='cuda:0')
episode: 486 training return: tensor(-81.0676, device='cuda:0')
episode: 487 training return: tensor(-83.0474, device='cuda:0')
epoch: 122 test_true_pfm: 10537.723484401547 sim_pfm: 282.8126200312108
episode: 488 training return: tensor(243.2313, device='cuda:0')
episode: 489 training return: tensor(150.1939, device='cuda:0')
episode: 490 training return: tensor(5.6859, device='cuda:0')
episode: 491 training return: tensor(50.1877, device='cuda:0')
epoch: 123 test_true_pfm: 7167.414275965613 sim_pfm: -159.0672281118265
episode: 492 training return: tensor(247.8083, device='cuda:0')
episode: 493 training return: tensor(-651.2199, device='cuda:0')
episode: 494 training return: tensor(-133.1213, device='cuda:0')
episode: 495 training return: tensor(185.1619, device='cuda:0')
epoch: 124 test_true_pfm: 6933.2650419815545 sim_pfm: -127.41252446452079
episode: 496 training return: tensor(111.0940, device='cuda:0')
episode: 497 training return: tensor(-999.9912, device='cuda:0')
episode: 498 training return: tensor(-106.1048, device='cuda:0')
episode: 499 training return: tensor(124.4969, device='cuda:0')
epoch: 125 test_true_pfm: 10589.759190646253 sim_pfm: -61.373341154268324
episode: 500 training return: tensor(23.9598, device='cuda:0')
episode: 501 training return: tensor(2.0570, device='cuda:0')
episode: 502 training return: tensor(-84.6064, device='cuda:0')
episode: 503 training return: tensor(207.0739, device='cuda:0')
epoch: 126 test_true_pfm: 10565.242165559563 sim_pfm: 266.85810810824233
episode: 504 training return: tensor(150.2107, device='cuda:0')
episode: 505 training return: tensor(-479.7086, device='cuda:0')
episode: 506 training return: tensor(-999.9999, device='cuda:0')
episode: 507 training return: tensor(-999.9354, device='cuda:0')
epoch: 127 test_true_pfm: 6950.3430949809945 sim_pfm: -413.2976243541537
episode: 508 training return: tensor(62.8949, device='cuda:0')
episode: 509 training return: tensor(-999.9943, device='cuda:0')
episode: 510 training return: tensor(-102.1304, device='cuda:0')
episode: 511 training return: tensor(106.9007, device='cuda:0')
epoch: 128 test_true_pfm: 10370.373309031604 sim_pfm: -99.13679988738538
episode: 512 training return: tensor(-4.2244, device='cuda:0')
episode: 513 training return: tensor(31.9943, device='cuda:0')
episode: 514 training return: tensor(200.6143, device='cuda:0')
episode: 515 training return: tensor(-133.8177, device='cuda:0')
epoch: 129 test_true_pfm: 10432.613669580687 sim_pfm: 80.46264870418236
episode: 516 training return: tensor(-999.3867, device='cuda:0')
episode: 517 training return: tensor(-16.4456, device='cuda:0')
episode: 518 training return: tensor(228.8976, device='cuda:0')
episode: 519 training return: tensor(-493.4490, device='cuda:0')
epoch: 130 test_true_pfm: 10617.551061166081 sim_pfm: -10.031442701622533
episode: 520 training return: tensor(13.6364, device='cuda:0')
episode: 521 training return: tensor(-791.3746, device='cuda:0')
episode: 522 training return: tensor(48.4920, device='cuda:0')
episode: 523 training return: tensor(202.0594, device='cuda:0')
epoch: 131 test_true_pfm: 7010.791277258083 sim_pfm: -418.43171248604386
episode: 524 training return: tensor(142.0955, device='cuda:0')
episode: 525 training return: tensor(-998.2137, device='cuda:0')
episode: 526 training return: tensor(140.8576, device='cuda:0')
episode: 527 training return: tensor(27.6073, device='cuda:0')
epoch: 132 test_true_pfm: 7079.858408158124 sim_pfm: -170.19839681081552
episode: 528 training return: tensor(39.0277, device='cuda:0')
episode: 529 training return: tensor(-73.6306, device='cuda:0')
episode: 530 training return: tensor(54.8847, device='cuda:0')
episode: 531 training return: tensor(-317.5856, device='cuda:0')
epoch: 133 test_true_pfm: 8280.788915618657 sim_pfm: -214.07028705526804
episode: 532 training return: tensor(163.8311, device='cuda:0')
episode: 533 training return: tensor(99.8840, device='cuda:0')
episode: 534 training return: tensor(100.1652, device='cuda:0')
episode: 535 training return: tensor(45.1317, device='cuda:0')
epoch: 134 test_true_pfm: 10430.129982698549 sim_pfm: -5.774018493092929
episode: 536 training return: tensor(65.0107, device='cuda:0')
episode: 537 training return: tensor(-903.2535, device='cuda:0')
episode: 538 training return: tensor(-67.6099, device='cuda:0')
episode: 539 training return: tensor(175.0911, device='cuda:0')
epoch: 135 test_true_pfm: 10484.8961258383 sim_pfm: -453.27247511570266
episode: 540 training return: tensor(82.4299, device='cuda:0')
episode: 541 training return: tensor(-998.4752, device='cuda:0')
episode: 542 training return: tensor(243.0101, device='cuda:0')
episode: 543 training return: tensor(-177.7963, device='cuda:0')
epoch: 136 test_true_pfm: 10546.268970420746 sim_pfm: 211.0981927763205
episode: 544 training return: tensor(91.1416, device='cuda:0')
episode: 545 training return: tensor(222.1942, device='cuda:0')
episode: 546 training return: tensor(-164.6323, device='cuda:0')
episode: 547 training return: tensor(45.4370, device='cuda:0')
epoch: 137 test_true_pfm: 10562.626801049271 sim_pfm: -311.15572482159286
episode: 548 training return: tensor(108.0007, device='cuda:0')
episode: 549 training return: tensor(101.5845, device='cuda:0')
episode: 550 training return: tensor(150.4189, device='cuda:0')
episode: 551 training return: tensor(39.6719, device='cuda:0')
epoch: 138 test_true_pfm: 8331.919729955918 sim_pfm: -143.44666702203298
episode: 552 training return: tensor(-32.2971, device='cuda:0')
episode: 553 training return: tensor(123.4703, device='cuda:0')
episode: 554 training return: tensor(97.9898, device='cuda:0')
episode: 555 training return: tensor(-102.9290, device='cuda:0')
epoch: 139 test_true_pfm: 10128.93706668139 sim_pfm: 134.1927211356427
episode: 556 training return: tensor(-103.3468, device='cuda:0')
episode: 557 training return: tensor(-44.3195, device='cuda:0')
episode: 558 training return: tensor(151.1133, device='cuda:0')
episode: 559 training return: tensor(95.5873, device='cuda:0')
epoch: 140 test_true_pfm: 10614.599502860277 sim_pfm: -137.73346845638784
episode: 560 training return: tensor(-999.4272, device='cuda:0')
episode: 561 training return: tensor(-999.9987, device='cuda:0')
episode: 562 training return: tensor(212.5726, device='cuda:0')
episode: 563 training return: tensor(166.6613, device='cuda:0')
epoch: 141 test_true_pfm: 7588.473038249976 sim_pfm: 311.4278169755319
episode: 564 training return: tensor(-999.9645, device='cuda:0')
episode: 565 training return: tensor(136.4514, device='cuda:0')
episode: 566 training return: tensor(-630.3102, device='cuda:0')
episode: 567 training return: tensor(209.1517, device='cuda:0')
epoch: 142 test_true_pfm: 10349.041736933685 sim_pfm: -407.14172228898195
episode: 568 training return: tensor(-97.9866, device='cuda:0')
episode: 569 training return: tensor(114.5547, device='cuda:0')
episode: 570 training return: tensor(85.0776, device='cuda:0')
episode: 571 training return: tensor(63.9848, device='cuda:0')
epoch: 143 test_true_pfm: 6970.507597328404 sim_pfm: 286.0324413144651
episode: 572 training return: tensor(-998.7028, device='cuda:0')
episode: 573 training return: tensor(-999.9756, device='cuda:0')
episode: 574 training return: tensor(-999.9419, device='cuda:0')
episode: 575 training return: tensor(162.0739, device='cuda:0')
epoch: 144 test_true_pfm: 10517.50098138779 sim_pfm: 212.60423796679243
episode: 576 training return: tensor(-589.9215, device='cuda:0')
episode: 577 training return: tensor(-999.9976, device='cuda:0')
episode: 578 training return: tensor(133.2062, device='cuda:0')
episode: 579 training return: tensor(42.7594, device='cuda:0')
epoch: 145 test_true_pfm: 7459.456454807623 sim_pfm: 258.52116317343706
episode: 580 training return: tensor(223.9663, device='cuda:0')
episode: 581 training return: tensor(91.0698, device='cuda:0')
episode: 582 training return: tensor(-152.0788, device='cuda:0')
episode: 583 training return: tensor(140.1874, device='cuda:0')
epoch: 146 test_true_pfm: 3411.09046526833 sim_pfm: -127.54108218152153
episode: 584 training return: tensor(-816.4340, device='cuda:0')
episode: 585 training return: tensor(223.5653, device='cuda:0')
episode: 586 training return: tensor(208.8577, device='cuda:0')
episode: 587 training return: tensor(-14.8699, device='cuda:0')
epoch: 147 test_true_pfm: 10513.583808356178 sim_pfm: 275.16905008479563
episode: 588 training return: tensor(161.9671, device='cuda:0')
episode: 589 training return: tensor(89.9212, device='cuda:0')
episode: 590 training return: tensor(-751.3811, device='cuda:0')
episode: 591 training return: tensor(-41.4443, device='cuda:0')
epoch: 148 test_true_pfm: 10557.464818153698 sim_pfm: -228.441946162687
episode: 592 training return: tensor(-27.3340, device='cuda:0')
episode: 593 training return: tensor(85.0149, device='cuda:0')
episode: 594 training return: tensor(217.3429, device='cuda:0')
episode: 595 training return: tensor(-212.8893, device='cuda:0')
epoch: 149 test_true_pfm: 10505.234969921248 sim_pfm: 185.9213015733209
episode: 596 training return: tensor(131.8772, device='cuda:0')
episode: 597 training return: tensor(-451.9507, device='cuda:0')
episode: 598 training return: tensor(-998.4850, device='cuda:0')
episode: 599 training return: tensor(-997.3885, device='cuda:0')
epoch: 150 test_true_pfm: 6893.213223270686 sim_pfm: -78.0831814869598
