['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.2069826103001833 test_loss: 0.1497640609741211
epoch: 1 training_loss 0.13250700432807208 test_loss: 0.135538911819458
epoch: 2 training_loss 0.12817601226270198 test_loss: 0.15569124221801758
epoch: 3 training_loss 0.12784115590155123 test_loss: 0.12306746244430541
epoch: 4 training_loss 0.1155332824215293 test_loss: 0.11503969430923462
epoch: 5 training_loss 0.1138159229233861 test_loss: 0.12395628690719604
epoch: 6 training_loss 0.10917080793529749 test_loss: 0.09176281094551086
epoch: 7 training_loss 0.11381491005420685 test_loss: 0.10373870134353638
epoch: 8 training_loss 0.11096709612756968 test_loss: 0.10725008249282837
epoch: 9 training_loss 0.10870785597711802 test_loss: 0.10870680809020997
epoch: 10 training_loss 0.10895337278023362 test_loss: 0.09834709167480468
epoch: 11 training_loss 0.11302187610417605 test_loss: 0.1255939245223999
epoch: 12 training_loss 0.10928064938634634 test_loss: 0.12503722906112671
epoch: 13 training_loss 0.11068109514191747 test_loss: 0.11379561424255372
epoch: 14 training_loss 0.1057678385078907 test_loss: 0.10914003849029541
epoch: 15 training_loss 0.10244738139212131 test_loss: 0.0980688214302063
epoch: 16 training_loss 0.11334662191569805 test_loss: 0.1286921262741089
epoch: 17 training_loss 0.10001042515039443 test_loss: 0.11812996864318848
epoch: 18 training_loss 0.10525527961552143 test_loss: 0.10176922082901001
epoch: 19 training_loss 0.09880600485950708 test_loss: 0.0954902708530426
epoch: 20 training_loss 0.11190132696181536 test_loss: 0.10275348424911498
epoch: 21 training_loss 0.11374719409272074 test_loss: 0.10034126043319702
epoch: 22 training_loss 0.09827877957373858 test_loss: 0.11422108411788941
epoch: 23 training_loss 0.10346450675278902 test_loss: 0.11010847091674805
epoch: 24 training_loss 0.10396278489381075 test_loss: 0.11132855415344238
epoch: 25 training_loss 0.10624816864728928 test_loss: 0.11789709329605103
epoch: 26 training_loss 0.10031073693186045 test_loss: 0.11010532379150391
epoch: 27 training_loss 0.10185246897861361 test_loss: 0.10930674076080323
epoch: 28 training_loss 0.10170198544859886 test_loss: 0.11052333116531372
epoch: 29 training_loss 0.09717700235545636 test_loss: 0.09929355382919311
epoch: 30 training_loss 0.09936111439019442 test_loss: 0.1000748634338379
epoch: 31 training_loss 0.09951404569670558 test_loss: 0.10042067766189575
epoch: 32 training_loss 0.10245354406535626 test_loss: 0.09697412848472595
epoch: 33 training_loss 0.1063579010590911 test_loss: 0.11048033237457275
epoch: 34 training_loss 0.10071350462734699 test_loss: 0.11250368356704712
epoch: 35 training_loss 0.10135279055684805 test_loss: 0.09715554118156433
epoch: 36 training_loss 0.10754725879058241 test_loss: 0.10399577617645264
epoch: 37 training_loss 0.10369509130716324 test_loss: 0.09531364440917969
epoch: 38 training_loss 0.10279452949762344 test_loss: 0.12561091184616088
epoch: 39 training_loss 0.10175021182745696 test_loss: 0.09619958996772766
epoch: 40 training_loss 0.09705241024494171 test_loss: 0.09952386617660522
epoch: 41 training_loss 0.10068009940907359 test_loss: 0.103425133228302
epoch: 42 training_loss 0.0985867440700531 test_loss: 0.09158670902252197
epoch: 43 training_loss 0.10041758982464671 test_loss: 0.09738205075263977
epoch: 44 training_loss 0.1026212421245873 test_loss: 0.08680577278137207
epoch: 45 training_loss 0.09914783660322428 test_loss: 0.1237521767616272
epoch: 46 training_loss 0.10011572241783143 test_loss: 0.10374281406402588
epoch: 47 training_loss 0.10169197488576173 test_loss: 0.10953638553619385
epoch: 48 training_loss 0.10327372804284096 test_loss: 0.105655038356781
epoch: 49 training_loss 0.104046062361449 test_loss: 0.10545620918273926
epoch: 50 training_loss 0.10196094838902355 test_loss: 0.10444804430007934
epoch: 51 training_loss 0.10036348685622215 test_loss: 0.10440957546234131
epoch: 52 training_loss 0.1037789398804307 test_loss: 0.10452213287353515
epoch: 53 training_loss 0.11012074463069439 test_loss: 0.10171077251434327
epoch: 54 training_loss 0.09730212945491075 test_loss: 0.10949273109436035
epoch: 55 training_loss 0.10247045757248997 test_loss: 0.10189493894577026
epoch: 56 training_loss 0.10378081187605857 test_loss: 0.1174154281616211
epoch: 57 training_loss 0.10462644580751658 test_loss: 0.11043963432312012
epoch: 58 training_loss 0.09962381470948457 test_loss: 0.1014869213104248
epoch: 59 training_loss 0.0939193757250905 test_loss: 0.10366547107696533
epoch: 60 training_loss 0.10599181305617095 test_loss: 0.10486531257629395
epoch: 61 training_loss 0.1050912506505847 test_loss: 0.10874546766281128
epoch: 62 training_loss 0.09788588061928749 test_loss: 0.11217632293701171
epoch: 63 training_loss 0.10060446612536907 test_loss: 0.09133310914039612
epoch: 64 training_loss 0.1024023275449872 test_loss: 0.09740402698516845
epoch: 65 training_loss 0.0994555051997304 test_loss: 0.0919021189212799
epoch: 66 training_loss 0.0989540671184659 test_loss: 0.10052731037139892
epoch: 67 training_loss 0.10042817953974009 test_loss: 0.10001342296600342
epoch: 68 training_loss 0.10159963998943568 test_loss: 0.11456193923950195
epoch: 69 training_loss 0.1024223243445158 test_loss: 0.09540886878967285
epoch: 70 training_loss 0.0998090835660696 test_loss: 0.08897200226783752
epoch: 71 training_loss 0.1011743351444602 test_loss: 0.09341355562210082
epoch: 72 training_loss 0.10034318996593355 test_loss: 0.1053680419921875
epoch: 73 training_loss 0.10459297116845846 test_loss: 0.10317186117172242
epoch: 74 training_loss 0.10271908465772867 test_loss: 0.10776885747909545
epoch: 75 training_loss 0.09639838520437478 test_loss: 0.09405617713928223
epoch: 76 training_loss 0.0994583804346621 test_loss: 0.11103235483169556
epoch: 77 training_loss 0.09898055270314217 test_loss: 0.1038059115409851
epoch: 78 training_loss 0.10047414492815733 test_loss: 0.11256065368652343
epoch: 79 training_loss 0.09930040918290615 test_loss: 0.10716067552566529
epoch: 80 training_loss 0.10188415128737688 test_loss: 0.09271523952484131
epoch: 81 training_loss 0.09914568614214658 test_loss: 0.11380990743637084
epoch: 82 training_loss 0.09766793545335531 test_loss: 0.08960419297218322
epoch: 83 training_loss 0.10146408833563328 test_loss: 0.09982360005378724
epoch: 84 training_loss 0.09628925148397684 test_loss: 0.10391361713409424
epoch: 85 training_loss 0.10901811627671122 test_loss: 0.09719967246055602
epoch: 86 training_loss 0.10116782931610942 test_loss: 0.11669021844863892
epoch: 87 training_loss 0.10156576611101627 test_loss: 0.1131319284439087
epoch: 88 training_loss 0.10225108589977026 test_loss: 0.11979161500930786
epoch: 89 training_loss 0.0969461790844798 test_loss: 0.11605321168899536
epoch: 90 training_loss 0.101253657117486 test_loss: 0.0922747015953064
epoch: 91 training_loss 0.09448427375406027 test_loss: 0.1036689043045044
epoch: 92 training_loss 0.09570664567872882 test_loss: 0.09776627421379089
epoch: 93 training_loss 0.09718443941324949 test_loss: 0.09513792991638184
epoch: 94 training_loss 0.10507818136364222 test_loss: 0.09555572271347046
epoch: 95 training_loss 0.1026312799192965 test_loss: 0.09370969533920288
epoch: 96 training_loss 0.09604139622300863 test_loss: 0.10142886638641357
epoch: 97 training_loss 0.09862065210938453 test_loss: 0.09806809425354004
epoch: 98 training_loss 0.09726571843028069 test_loss: 0.10032578706741332
epoch: 99 training_loss 0.09361254012212157 test_loss: 0.08759508728981018
epoch: 100 training_loss 0.0941041027661413 test_loss: 0.10007952451705933
epoch: 101 training_loss 0.09414104647934436 test_loss: 0.09876528978347779
epoch: 102 training_loss 0.090341758094728 test_loss: 0.12205908298492432
epoch: 103 training_loss 0.10670190811157226 test_loss: 0.0969275951385498
epoch: 104 training_loss 0.10224123831838369 test_loss: 0.11040481328964233
epoch: 105 training_loss 0.0954792008921504 test_loss: 0.10375397205352783
epoch: 106 training_loss 0.099647805262357 test_loss: 0.09242831468582154
epoch: 107 training_loss 0.10283027838915587 test_loss: 0.11565592288970947
epoch: 108 training_loss 0.10449011143296957 test_loss: 0.09551886916160583
epoch: 109 training_loss 0.0945004877820611 test_loss: 0.0974832832813263
epoch: 110 training_loss 0.09970836505293847 test_loss: 0.10132507085800171
epoch: 111 training_loss 0.09907454989850521 test_loss: 0.10645154714584351
epoch: 112 training_loss 0.09847228582948446 test_loss: 0.1113999366760254
epoch: 113 training_loss 0.0981819923594594 test_loss: 0.10535954236984253
epoch: 114 training_loss 0.10725688360631466 test_loss: 0.10772887468338013
epoch: 115 training_loss 0.09362471634522081 test_loss: 0.11757858991622924
epoch: 116 training_loss 0.10347707472741603 test_loss: 0.104270339012146
epoch: 117 training_loss 0.1033664758503437 test_loss: 0.11024285554885864
epoch: 118 training_loss 0.10511378437280655 test_loss: 0.10273553133010864
epoch: 119 training_loss 0.1009267371147871 test_loss: 0.10828784704208375
epoch: 120 training_loss 0.09571298116818071 test_loss: 0.1018332839012146
epoch: 121 training_loss 0.1011837779916823 test_loss: 0.09674053192138672
epoch: 122 training_loss 0.10027601003646851 test_loss: 0.10294165611267089
epoch: 123 training_loss 0.10188495226204396 test_loss: 0.11681053638458253
epoch: 124 training_loss 0.09905395608395338 test_loss: 0.10000593662261963
epoch: 125 training_loss 0.09435537558048963 test_loss: 0.11247435808181763
epoch: 126 training_loss 0.09169018886983395 test_loss: 0.10766400098800659
epoch: 127 training_loss 0.09663381444290281 test_loss: 0.08892985582351684
epoch: 128 training_loss 0.10358448155224323 test_loss: 0.09785844683647156
epoch: 129 training_loss 0.10234572008252144 test_loss: 0.10681657791137696
epoch: 130 training_loss 0.09788653248921036 test_loss: 0.09276314973831176
epoch: 131 training_loss 0.10160180244594813 test_loss: 0.09818572402000428
epoch: 132 training_loss 0.1027329732850194 test_loss: 0.09201086163520814
epoch: 133 training_loss 0.08994503777474165 test_loss: 0.09753313064575195
epoch: 134 training_loss 0.09340262336656452 test_loss: 0.10162622928619384
epoch: 135 training_loss 0.09320212017744779 test_loss: 0.10064550638198852
epoch: 136 training_loss 0.10095928054302931 test_loss: 0.11111105680465698
epoch: 137 training_loss 0.09731583904474973 test_loss: 0.10954387187957763
epoch: 138 training_loss 0.10109918039292097 test_loss: 0.09880666732788086
epoch: 139 training_loss 0.10455107824876904 test_loss: 0.10964843034744262
epoch: 140 training_loss 0.10122165828943253 test_loss: 0.09394636154174804
epoch: 141 training_loss 0.09879892885684967 test_loss: 0.0942252516746521
epoch: 142 training_loss 0.09725638780742883 test_loss: 0.0773497998714447
epoch: 143 training_loss 0.0988857446424663 test_loss: 0.09285424947738648
epoch: 144 training_loss 0.09917474109679461 test_loss: 0.10260274410247802
epoch: 145 training_loss 0.1001058454066515 test_loss: 0.10681884288787842
epoch: 146 training_loss 0.0976630999520421 test_loss: 0.10939295291900634
epoch: 147 training_loss 0.09840932119637728 test_loss: 0.1022098183631897
epoch: 148 training_loss 0.10125420358031988 test_loss: 0.09717005491256714
epoch: 149 training_loss 0.09608149908483028 test_loss: 0.10553348064422607
epoch: 0 training_loss 8.607111296653748 test_loss: 6.13215446472168
epoch: 1 training_loss 5.3043120574951175 test_loss: 4.410768508911133
epoch: 2 training_loss 3.9337038135528566 test_loss: 3.5502933502197265
epoch: 3 training_loss 3.33655686378479 test_loss: 3.099653434753418
epoch: 4 training_loss 2.874628713130951 test_loss: 2.6937232971191407
epoch: 5 training_loss 2.5366105151176455 test_loss: 2.4576318740844725
epoch: 6 training_loss 2.3991125655174255 test_loss: 2.2224502563476562
epoch: 7 training_loss 2.158198823928833 test_loss: 2.081641960144043
epoch: 8 training_loss 2.078067238330841 test_loss: 1.9561786651611328
epoch: 9 training_loss 1.9614850330352782 test_loss: 1.9659036636352538
epoch: 10 training_loss 1.8976184141635895 test_loss: 1.8161115646362305
epoch: 11 training_loss 1.8014285933971406 test_loss: 1.7343057632446288
epoch: 12 training_loss 1.7361663854122162 test_loss: 1.7279558181762695
epoch: 13 training_loss 1.7403824186325074 test_loss: 1.7509494781494142
epoch: 14 training_loss 1.6736303782463073 test_loss: 1.6405641555786132
epoch: 15 training_loss 1.6122285616397858 test_loss: 1.5931147575378417
epoch: 16 training_loss 1.5655203402042388 test_loss: 1.6250787734985352
epoch: 17 training_loss 1.531985628604889 test_loss: 1.4221702575683595
epoch: 18 training_loss 1.510481173992157 test_loss: 1.5174531936645508
epoch: 19 training_loss 1.4810099148750304 test_loss: 1.4869624137878419
epoch: 20 training_loss 1.4362056589126586 test_loss: 1.4361760139465332
epoch: 21 training_loss 1.4424724030494689 test_loss: 1.3938974380493163
epoch: 22 training_loss 1.4395884335041047 test_loss: 1.3340795516967774
epoch: 23 training_loss 1.4038869392871858 test_loss: 1.3526621818542481
epoch: 24 training_loss 1.3602830290794372 test_loss: 1.3967700958251954
epoch: 25 training_loss 1.3431568253040314 test_loss: 1.3699496269226075
epoch: 26 training_loss 1.336026953458786 test_loss: 1.387593936920166
epoch: 27 training_loss 1.3445574450492859 test_loss: 1.3281743049621582
epoch: 28 training_loss 1.319878625869751 test_loss: 1.2748066902160644
epoch: 29 training_loss 1.2825694811344146 test_loss: 1.3193513870239257
epoch: 30 training_loss 1.267224932909012 test_loss: 1.2197507858276366
epoch: 31 training_loss 1.2504912149906158 test_loss: 1.2249846458435059
epoch: 32 training_loss 1.2375954413414 test_loss: 1.2706542015075684
epoch: 33 training_loss 1.2152958816289903 test_loss: 1.2175987243652344
epoch: 34 training_loss 1.1925786447525024 test_loss: 1.2090316772460938
epoch: 35 training_loss 1.1988555687665938 test_loss: 1.160894775390625
epoch: 36 training_loss 1.183690124154091 test_loss: 1.1506239891052246
epoch: 37 training_loss 1.1545949518680572 test_loss: 1.132894515991211
epoch: 38 training_loss 1.1561116003990173 test_loss: 1.123384380340576
epoch: 39 training_loss 1.1653956204652787 test_loss: 1.1541539192199708
epoch: 40 training_loss 1.1623981100320817 test_loss: 1.1361318588256837
epoch: 41 training_loss 1.149728850722313 test_loss: 1.1457598686218262
epoch: 42 training_loss 1.127288384437561 test_loss: 1.1003032684326173
epoch: 43 training_loss 1.1298298519849777 test_loss: 1.0911273956298828
epoch: 44 training_loss 1.1140792453289032 test_loss: 1.1214912414550782
epoch: 45 training_loss 1.1008579725027083 test_loss: 1.1352697372436524
epoch: 46 training_loss 1.086950899362564 test_loss: 1.07855224609375
epoch: 47 training_loss 1.1047745615243911 test_loss: 1.1084668159484863
epoch: 48 training_loss 1.0876283222436904 test_loss: 1.1023253440856933
epoch: 49 training_loss 1.0831046760082246 test_loss: 1.067049503326416
epoch: 50 training_loss 1.065521336197853 test_loss: 1.1241888046264648
epoch: 51 training_loss 1.0537514346837997 test_loss: 1.0696906089782714
epoch: 52 training_loss 1.0480795621871948 test_loss: 1.061147403717041
epoch: 53 training_loss 1.0415488916635514 test_loss: 1.1040799140930175
epoch: 54 training_loss 1.0159195154905318 test_loss: 1.0537970542907715
epoch: 55 training_loss 1.0296109223365784 test_loss: 1.055363368988037
epoch: 56 training_loss 1.0337389075756074 test_loss: 1.0284308433532714
epoch: 57 training_loss 1.0315319794416427 test_loss: 1.0472524642944336
epoch: 58 training_loss 1.0181488329172135 test_loss: 0.9923609733581543
epoch: 59 training_loss 1.0331763565540313 test_loss: 1.027442741394043
epoch: 60 training_loss 1.0018994337320328 test_loss: 1.041065788269043
epoch: 61 training_loss 1.0048148530721663 test_loss: 1.0000704765319823
epoch: 62 training_loss 0.9963687229156494 test_loss: 0.9997638702392578
epoch: 63 training_loss 0.9941118788719178 test_loss: 0.9614931106567383
epoch: 64 training_loss 0.9776621139049531 test_loss: 0.9952134132385254
epoch: 65 training_loss 1.0013385289907455 test_loss: 0.9790979385375976
epoch: 66 training_loss 0.9848500740528107 test_loss: 0.9886750221252442
epoch: 67 training_loss 0.9772782200574874 test_loss: 0.9491204261779785
epoch: 68 training_loss 0.9612328773736953 test_loss: 0.9575943946838379
epoch: 69 training_loss 0.9710164666175842 test_loss: 0.9722964286804199
epoch: 70 training_loss 0.9486206996440888 test_loss: 0.9869975090026856
epoch: 71 training_loss 0.9489256668090821 test_loss: 0.9893073081970215
epoch: 72 training_loss 0.9498530429601669 test_loss: 0.9818258285522461
epoch: 73 training_loss 0.9482509171962739 test_loss: 0.9353859901428223
epoch: 74 training_loss 0.9432020366191864 test_loss: 0.9964504241943359
epoch: 75 training_loss 0.9494811391830444 test_loss: 0.9291998863220214
epoch: 76 training_loss 0.9483724474906922 test_loss: 0.9304099082946777
epoch: 77 training_loss 0.9114297699928283 test_loss: 0.9552097320556641
epoch: 78 training_loss 0.9512155157327652 test_loss: 0.9513710021972657
epoch: 79 training_loss 0.909457351565361 test_loss: 0.9068491935729981
epoch: 80 training_loss 0.9127508985996247 test_loss: 0.9072311401367188
epoch: 81 training_loss 0.9409591555595398 test_loss: 0.9097013473510742
epoch: 82 training_loss 0.9304519683122635 test_loss: 0.9215244293212891
epoch: 83 training_loss 0.9139539748430252 test_loss: 0.9228330612182617
epoch: 84 training_loss 0.8948545598983765 test_loss: 0.935085678100586
epoch: 85 training_loss 0.8913787001371384 test_loss: 0.9316343307495117
epoch: 86 training_loss 0.9054212653636933 test_loss: 0.9058834075927734
epoch: 87 training_loss 0.9221945565938949 test_loss: 0.9281136512756347
epoch: 88 training_loss 0.9125745898485184 test_loss: 0.8898765563964843
epoch: 89 training_loss 0.9171196520328522 test_loss: 0.9334060668945312
epoch: 90 training_loss 0.9175160586833954 test_loss: 0.8593897819519043
epoch: 91 training_loss 0.8804739528894424 test_loss: 0.9064346313476562
epoch: 92 training_loss 0.8942823475599289 test_loss: 0.9053435325622559
epoch: 93 training_loss 0.9215333020687103 test_loss: 0.8919223785400391
epoch: 94 training_loss 0.8742872893810272 test_loss: 0.8710092544555664
epoch: 95 training_loss 0.865425986647606 test_loss: 0.8641109466552734
epoch: 96 training_loss 0.9012107783555985 test_loss: 0.8752169609069824
epoch: 97 training_loss 0.8916549587249756 test_loss: 0.906471061706543
epoch: 98 training_loss 0.8629681444168091 test_loss: 0.8864373207092285
epoch: 99 training_loss 0.8789305198192596 test_loss: 0.8794375419616699
epoch: 100 training_loss 0.8949845457077026 test_loss: 0.8723212242126465
epoch: 101 training_loss 0.8734800606966019 test_loss: 0.8634194374084473
epoch: 102 training_loss 0.8783398604393006 test_loss: 0.844904899597168
epoch: 103 training_loss 0.8590685427188873 test_loss: 0.8694581985473633
epoch: 104 training_loss 0.8672190850973129 test_loss: 0.8565010070800781
epoch: 105 training_loss 0.8600458312034607 test_loss: 0.8429116249084473
epoch: 106 training_loss 0.8573404526710511 test_loss: 0.8676730155944824
epoch: 107 training_loss 0.8657014691829681 test_loss: 0.8728884696960449
epoch: 108 training_loss 0.8519163048267364 test_loss: 0.8351441383361816
epoch: 109 training_loss 0.8289553105831147 test_loss: 0.8517797470092774
epoch: 110 training_loss 0.8446706634759903 test_loss: 0.8322330474853515
epoch: 111 training_loss 0.8432736444473267 test_loss: 0.8581904411315918
epoch: 112 training_loss 0.8524131685495376 test_loss: 0.8456858634948731
epoch: 113 training_loss 0.8494124209880829 test_loss: 0.8056839942932129
epoch: 114 training_loss 0.8286802411079407 test_loss: 0.8676643371582031
epoch: 115 training_loss 0.8416979938745499 test_loss: 0.8357306480407715
epoch: 116 training_loss 0.8501518434286117 test_loss: 0.8221132278442382
epoch: 117 training_loss 0.8335899460315704 test_loss: 0.8448371887207031
epoch: 118 training_loss 0.8153588986396789 test_loss: 0.8391335487365723
epoch: 119 training_loss 0.824356142282486 test_loss: 0.8108882904052734
epoch: 120 training_loss 0.8385806560516358 test_loss: 0.837333869934082
epoch: 121 training_loss 0.8099862569570542 test_loss: 0.815097427368164
epoch: 122 training_loss 0.8145238256454468 test_loss: 0.8197291374206543
epoch: 123 training_loss 0.825246114730835 test_loss: 0.8477392196655273
epoch: 124 training_loss 0.8163692516088485 test_loss: 0.8123284339904785
epoch: 125 training_loss 0.8260472881793975 test_loss: 0.8597135543823242
epoch: 126 training_loss 0.8256662863492966 test_loss: 0.839760684967041
epoch: 127 training_loss 0.8060210412740707 test_loss: 0.8092305183410644
epoch: 128 training_loss 0.8250087869167327 test_loss: 0.8600237846374512
epoch: 129 training_loss 0.8190281021595002 test_loss: 0.8590299606323242
epoch: 130 training_loss 0.8194181650876999 test_loss: 0.8046090126037597
epoch: 131 training_loss 0.7976471823453903 test_loss: 0.7977948665618897
epoch: 132 training_loss 0.8115393882989883 test_loss: 0.8257659912109375
epoch: 133 training_loss 0.8110052716732025 test_loss: 0.8232090950012207
epoch: 134 training_loss 0.8030797827243805 test_loss: 0.8188514709472656
epoch: 135 training_loss 0.8036590266227722 test_loss: 0.7806363105773926
epoch: 136 training_loss 0.794970549941063 test_loss: 0.7995305538177491
epoch: 137 training_loss 0.7904087841510773 test_loss: 0.8097434997558594
epoch: 138 training_loss 0.804243540763855 test_loss: 0.8025167465209961
epoch: 139 training_loss 0.7792720049619675 test_loss: 0.7997951507568359
epoch: 140 training_loss 0.7756260228157044 test_loss: 0.771096134185791
epoch: 141 training_loss 0.7799697929620742 test_loss: 0.8349616050720214
epoch: 142 training_loss 0.786757161617279 test_loss: 0.7896228313446045
epoch: 143 training_loss 0.783887295126915 test_loss: 0.7845459461212159
epoch: 144 training_loss 0.7765641403198242 test_loss: 0.7920101165771485
epoch: 145 training_loss 0.7728785556554795 test_loss: 0.7786947727203369
epoch: 146 training_loss 0.8042245960235596 test_loss: 0.7746626853942871
epoch: 147 training_loss 0.789955911040306 test_loss: 0.78154296875
epoch: 148 training_loss 0.7672007036209106 test_loss: 0.7821519851684571
epoch: 149 training_loss 0.7852361458539963 test_loss: 0.7554727077484131
1928.0276780920199
episode: 0 training return: tensor(-471.5204, device='cuda:0')
episode: 1 training return: tensor(-246.1871, device='cuda:0')
episode: 2 training return: tensor(81.9061, device='cuda:0')
episode: 3 training return: tensor(-278.7850, device='cuda:0')
epoch: 1 test_true_pfm: 1112.3890210987308 sim_pfm: -560.8680379625876
episode: 4 training return: tensor(-267.4598, device='cuda:0')
episode: 5 training return: tensor(211.0411, device='cuda:0')
episode: 6 training return: tensor(12.8478, device='cuda:0')
episode: 7 training return: tensor(-54.3020, device='cuda:0')
epoch: 2 test_true_pfm: 1247.0785454423776 sim_pfm: -446.9731720293251
episode: 8 training return: tensor(-292.6806, device='cuda:0')
episode: 9 training return: tensor(-537.4206, device='cuda:0')
episode: 10 training return: tensor(-592.5030, device='cuda:0')
episode: 11 training return: tensor(-614.7385, device='cuda:0')
epoch: 3 test_true_pfm: 991.680338869101 sim_pfm: -515.9612085699724
episode: 12 training return: tensor(-453.0671, device='cuda:0')
episode: 13 training return: tensor(-405.3022, device='cuda:0')
episode: 14 training return: tensor(-402.9842, device='cuda:0')
episode: 15 training return: tensor(-479.4306, device='cuda:0')
epoch: 4 test_true_pfm: 1223.207161199364 sim_pfm: -495.9442991077667
episode: 16 training return: tensor(-456.3358, device='cuda:0')
episode: 17 training return: tensor(-442.0452, device='cuda:0')
episode: 18 training return: tensor(-470.1727, device='cuda:0')
episode: 19 training return: tensor(-176.8736, device='cuda:0')
epoch: 5 test_true_pfm: 1628.4089059755686 sim_pfm: -442.946232440571
episode: 20 training return: tensor(-466.2827, device='cuda:0')
episode: 21 training return: tensor(-281.3127, device='cuda:0')
episode: 22 training return: tensor(-484.7433, device='cuda:0')
episode: 23 training return: tensor(-427.9096, device='cuda:0')
epoch: 6 test_true_pfm: 1644.3908506820953 sim_pfm: -422.8033207577343
episode: 24 training return: tensor(-446.7095, device='cuda:0')
episode: 25 training return: tensor(-379.0435, device='cuda:0')
episode: 26 training return: tensor(-441.5721, device='cuda:0')
episode: 27 training return: tensor(-466.2032, device='cuda:0')
epoch: 7 test_true_pfm: 1290.3834272555948 sim_pfm: -399.8202200574451
episode: 28 training return: tensor(-548.8930, device='cuda:0')
episode: 29 training return: tensor(-581.4418, device='cuda:0')
episode: 30 training return: tensor(-569.2428, device='cuda:0')
episode: 31 training return: tensor(-459.9798, device='cuda:0')
epoch: 8 test_true_pfm: 846.5500674722845 sim_pfm: -583.0969154059421
episode: 32 training return: tensor(-489.9503, device='cuda:0')
episode: 33 training return: tensor(-394.4962, device='cuda:0')
episode: 34 training return: tensor(-436.6542, device='cuda:0')
episode: 35 training return: tensor(-523.6122, device='cuda:0')
epoch: 9 test_true_pfm: 1017.1749738395038 sim_pfm: -540.4459482288221
episode: 36 training return: tensor(-442.7297, device='cuda:0')
episode: 37 training return: tensor(-577.3451, device='cuda:0')
episode: 38 training return: tensor(-383.4642, device='cuda:0')
episode: 39 training return: tensor(-261.7392, device='cuda:0')
epoch: 10 test_true_pfm: 936.355293712355 sim_pfm: -592.3975423103742
episode: 40 training return: tensor(-467.6556, device='cuda:0')
episode: 41 training return: tensor(-404.1737, device='cuda:0')
episode: 42 training return: tensor(-417.3289, device='cuda:0')
episode: 43 training return: tensor(-501.1318, device='cuda:0')
epoch: 11 test_true_pfm: 863.4892419522625 sim_pfm: -592.8165483656727
episode: 44 training return: tensor(-537.6938, device='cuda:0')
episode: 45 training return: tensor(-549.3186, device='cuda:0')
episode: 46 training return: tensor(-418.2045, device='cuda:0')
episode: 47 training return: tensor(-491.5203, device='cuda:0')
epoch: 12 test_true_pfm: 1661.961984960402 sim_pfm: -237.0607153820844
episode: 48 training return: tensor(-464.2187, device='cuda:0')
episode: 49 training return: tensor(189.2511, device='cuda:0')
episode: 50 training return: tensor(-360.7748, device='cuda:0')
episode: 51 training return: tensor(-146.9612, device='cuda:0')
epoch: 13 test_true_pfm: 2053.057977390352 sim_pfm: -455.57702457639
episode: 52 training return: tensor(-395.2405, device='cuda:0')
episode: 53 training return: tensor(265.9753, device='cuda:0')
episode: 54 training return: tensor(-433.8769, device='cuda:0')
episode: 55 training return: tensor(-517.9387, device='cuda:0')
epoch: 14 test_true_pfm: 1946.8522605735106 sim_pfm: -108.08913801434876
episode: 56 training return: tensor(-522.7817, device='cuda:0')
episode: 57 training return: tensor(-505.5280, device='cuda:0')
episode: 58 training return: tensor(-60.6977, device='cuda:0')
episode: 59 training return: tensor(-105.5838, device='cuda:0')
epoch: 15 test_true_pfm: 1689.6652220450305 sim_pfm: -19.962382636556868
episode: 60 training return: tensor(-462.7616, device='cuda:0')
episode: 61 training return: tensor(-579.5663, device='cuda:0')
episode: 62 training return: tensor(-498.7111, device='cuda:0')
episode: 63 training return: tensor(-593.5591, device='cuda:0')
epoch: 16 test_true_pfm: 1878.0854095035872 sim_pfm: -258.9935820156825
episode: 64 training return: tensor(-490.3825, device='cuda:0')
episode: 65 training return: tensor(-200.4415, device='cuda:0')
episode: 66 training return: tensor(-413.4708, device='cuda:0')
episode: 67 training return: tensor(-553.6314, device='cuda:0')
epoch: 17 test_true_pfm: 1673.6714907734688 sim_pfm: -385.97163272866356
episode: 68 training return: tensor(-370.2697, device='cuda:0')
episode: 69 training return: tensor(-526.4634, device='cuda:0')
episode: 70 training return: tensor(-461.1030, device='cuda:0')
episode: 71 training return: tensor(65.5033, device='cuda:0')
epoch: 18 test_true_pfm: 1965.933753724257 sim_pfm: -184.17370648658834
episode: 72 training return: tensor(-492.7711, device='cuda:0')
episode: 73 training return: tensor(-495.2458, device='cuda:0')
episode: 74 training return: tensor(-31.9768, device='cuda:0')
episode: 75 training return: tensor(-508.4215, device='cuda:0')
epoch: 19 test_true_pfm: 1819.8477047020558 sim_pfm: -155.02789595199283
episode: 76 training return: tensor(-593.1435, device='cuda:0')
episode: 77 training return: tensor(-218.4115, device='cuda:0')
episode: 78 training return: tensor(-306.8723, device='cuda:0')
episode: 79 training return: tensor(-444.5107, device='cuda:0')
epoch: 20 test_true_pfm: 2176.937404343196 sim_pfm: -174.88060786842834
episode: 80 training return: tensor(-129.7360, device='cuda:0')
episode: 81 training return: tensor(-83.4675, device='cuda:0')
episode: 82 training return: tensor(-591.5797, device='cuda:0')
episode: 83 training return: tensor(226.4837, device='cuda:0')
epoch: 21 test_true_pfm: 2444.430566614395 sim_pfm: -134.04959380626678
episode: 84 training return: tensor(215.3869, device='cuda:0')
episode: 85 training return: tensor(-176.5716, device='cuda:0')
episode: 86 training return: tensor(27.3878, device='cuda:0')
episode: 87 training return: tensor(60.7990, device='cuda:0')
epoch: 22 test_true_pfm: 2662.0312836799444 sim_pfm: -216.57052436620384
episode: 88 training return: tensor(-533.0322, device='cuda:0')
episode: 89 training return: tensor(-260.0521, device='cuda:0')
episode: 90 training return: tensor(-554.9769, device='cuda:0')
episode: 91 training return: tensor(-284.5368, device='cuda:0')
epoch: 23 test_true_pfm: 2273.297357518279 sim_pfm: 147.1238555010253
episode: 92 training return: tensor(-181.5522, device='cuda:0')
episode: 93 training return: tensor(-581.4466, device='cuda:0')
episode: 94 training return: tensor(-466.2173, device='cuda:0')
episode: 95 training return: tensor(-400.8891, device='cuda:0')
epoch: 24 test_true_pfm: 1801.6780701389398 sim_pfm: -127.4363488574163
episode: 96 training return: tensor(-546.6069, device='cuda:0')
episode: 97 training return: tensor(-466.3601, device='cuda:0')
episode: 98 training return: tensor(-12.3557, device='cuda:0')
episode: 99 training return: tensor(-359.4728, device='cuda:0')
epoch: 25 test_true_pfm: 2455.987166559394 sim_pfm: -30.65786309748849
episode: 100 training return: tensor(120.6706, device='cuda:0')
episode: 101 training return: tensor(-116.5781, device='cuda:0')
episode: 102 training return: tensor(-506.9959, device='cuda:0')
episode: 103 training return: tensor(-425.7555, device='cuda:0')
epoch: 26 test_true_pfm: 2435.1309113517445 sim_pfm: -186.4702736348457
episode: 104 training return: tensor(-377.5110, device='cuda:0')
episode: 105 training return: tensor(235.5000, device='cuda:0')
episode: 106 training return: tensor(-485.4894, device='cuda:0')
episode: 107 training return: tensor(-394.1848, device='cuda:0')
epoch: 27 test_true_pfm: 2238.3015218153655 sim_pfm: 120.6332306752447
episode: 108 training return: tensor(-335.0292, device='cuda:0')
episode: 109 training return: tensor(-444.8583, device='cuda:0')
episode: 110 training return: tensor(-375.5576, device='cuda:0')
episode: 111 training return: tensor(-172.0804, device='cuda:0')
epoch: 28 test_true_pfm: 2072.9674702558964 sim_pfm: -15.384146650360586
episode: 112 training return: tensor(-199.6104, device='cuda:0')
episode: 113 training return: tensor(-444.4379, device='cuda:0')
episode: 114 training return: tensor(-368.4459, device='cuda:0')
episode: 115 training return: tensor(-556.4045, device='cuda:0')
epoch: 29 test_true_pfm: 2295.5894061622616 sim_pfm: 13.8752491826696
episode: 116 training return: tensor(-362.5286, device='cuda:0')
episode: 117 training return: tensor(-430.6300, device='cuda:0')
episode: 118 training return: tensor(-305.8459, device='cuda:0')
episode: 119 training return: tensor(-437.6901, device='cuda:0')
epoch: 30 test_true_pfm: 2823.1249393949415 sim_pfm: 74.94066179400154
episode: 120 training return: tensor(-359.3190, device='cuda:0')
episode: 121 training return: tensor(-216.6160, device='cuda:0')
episode: 122 training return: tensor(-513.0602, device='cuda:0')
episode: 123 training return: tensor(-414.4237, device='cuda:0')
epoch: 31 test_true_pfm: 2828.1675121059034 sim_pfm: 9.911081510401951
episode: 124 training return: tensor(-294.7297, device='cuda:0')
episode: 125 training return: tensor(-536.6950, device='cuda:0')
episode: 126 training return: tensor(-502.0537, device='cuda:0')
episode: 127 training return: tensor(-464.2310, device='cuda:0')
epoch: 32 test_true_pfm: 2828.3741769974117 sim_pfm: 129.49093913401398
episode: 128 training return: tensor(-283.3025, device='cuda:0')
episode: 129 training return: tensor(-466.2437, device='cuda:0')
episode: 130 training return: tensor(131.4914, device='cuda:0')
episode: 131 training return: tensor(-158.8791, device='cuda:0')
epoch: 33 test_true_pfm: 2628.1484463818383 sim_pfm: -311.38662526504294
episode: 132 training return: tensor(-30.3002, device='cuda:0')
episode: 133 training return: tensor(-285.4377, device='cuda:0')
episode: 134 training return: tensor(-465.9328, device='cuda:0')
episode: 135 training return: tensor(-424.4591, device='cuda:0')
epoch: 34 test_true_pfm: 1906.3932182623878 sim_pfm: -221.42487232329827
episode: 136 training return: tensor(300.2683, device='cuda:0')
episode: 137 training return: tensor(-140.1694, device='cuda:0')
episode: 138 training return: tensor(-119.2395, device='cuda:0')
episode: 139 training return: tensor(10.0853, device='cuda:0')
epoch: 35 test_true_pfm: 1875.040065693601 sim_pfm: 15.902945195072485
episode: 140 training return: tensor(-578.0262, device='cuda:0')
episode: 141 training return: tensor(-425.4430, device='cuda:0')
episode: 142 training return: tensor(-159.6057, device='cuda:0')
episode: 143 training return: tensor(-558.6257, device='cuda:0')
epoch: 36 test_true_pfm: 1913.8416636983482 sim_pfm: -256.1131259831309
episode: 144 training return: tensor(120.8936, device='cuda:0')
episode: 145 training return: tensor(-444.8103, device='cuda:0')
episode: 146 training return: tensor(-416.1614, device='cuda:0')
episode: 147 training return: tensor(-465.7164, device='cuda:0')
epoch: 37 test_true_pfm: 2102.0259755395714 sim_pfm: -214.40681918958822
episode: 148 training return: tensor(-429.7719, device='cuda:0')
episode: 149 training return: tensor(-573.0786, device='cuda:0')
episode: 150 training return: tensor(-484.1325, device='cuda:0')
episode: 151 training return: tensor(-429.8846, device='cuda:0')
epoch: 38 test_true_pfm: 1988.5791584182807 sim_pfm: -269.11355700489366
episode: 152 training return: tensor(-169.3298, device='cuda:0')
episode: 153 training return: tensor(-532.7039, device='cuda:0')
episode: 154 training return: tensor(-374.7895, device='cuda:0')
episode: 155 training return: tensor(-553.3390, device='cuda:0')
epoch: 39 test_true_pfm: 2079.3049186600238 sim_pfm: -240.31400411576033
episode: 156 training return: tensor(126.6967, device='cuda:0')
episode: 157 training return: tensor(-373.7539, device='cuda:0')
episode: 158 training return: tensor(-579.3936, device='cuda:0')
episode: 159 training return: tensor(-422.4226, device='cuda:0')
epoch: 40 test_true_pfm: 2902.238535045197 sim_pfm: 123.64361983753042
episode: 160 training return: tensor(-192.6010, device='cuda:0')
episode: 161 training return: tensor(-365.0158, device='cuda:0')
episode: 162 training return: tensor(-50.6770, device='cuda:0')
episode: 163 training return: tensor(-164.8939, device='cuda:0')
epoch: 41 test_true_pfm: 2508.233380693673 sim_pfm: -46.58928593616778
episode: 164 training return: tensor(-462.8207, device='cuda:0')
episode: 165 training return: tensor(-367.2801, device='cuda:0')
episode: 166 training return: tensor(-112.0416, device='cuda:0')
episode: 167 training return: tensor(-206.4153, device='cuda:0')
epoch: 42 test_true_pfm: 2290.9391423010516 sim_pfm: -281.99943519870675
episode: 168 training return: tensor(-456.3117, device='cuda:0')
episode: 169 training return: tensor(-429.3202, device='cuda:0')
episode: 170 training return: tensor(-372.1875, device='cuda:0')
episode: 171 training return: tensor(-436.9153, device='cuda:0')
epoch: 43 test_true_pfm: 1985.8301992861677 sim_pfm: 182.39482260891236
episode: 172 training return: tensor(-448.4778, device='cuda:0')
episode: 173 training return: tensor(-137.8424, device='cuda:0')
episode: 174 training return: tensor(-539.8503, device='cuda:0')
episode: 175 training return: tensor(-425.9054, device='cuda:0')
epoch: 44 test_true_pfm: 1949.2733196388642 sim_pfm: -152.38672330311965
episode: 176 training return: tensor(-531.7468, device='cuda:0')
episode: 177 training return: tensor(-477.5005, device='cuda:0')
episode: 178 training return: tensor(262.0061, device='cuda:0')
episode: 179 training return: tensor(-457.5185, device='cuda:0')
epoch: 45 test_true_pfm: 2835.397052184391 sim_pfm: 220.21363094072635
episode: 180 training return: tensor(253.0656, device='cuda:0')
episode: 181 training return: tensor(-84.1952, device='cuda:0')
episode: 182 training return: tensor(-572.1296, device='cuda:0')
episode: 183 training return: tensor(-235.8338, device='cuda:0')
epoch: 46 test_true_pfm: 1853.6981578795467 sim_pfm: -262.3887455839819
episode: 184 training return: tensor(-544.1350, device='cuda:0')
episode: 185 training return: tensor(-531.0292, device='cuda:0')
episode: 186 training return: tensor(-546.8445, device='cuda:0')
episode: 187 training return: tensor(-428.0844, device='cuda:0')
epoch: 47 test_true_pfm: 2386.120178071384 sim_pfm: -122.3125694115879
episode: 188 training return: tensor(-498.8909, device='cuda:0')
episode: 189 training return: tensor(-470.3552, device='cuda:0')
episode: 190 training return: tensor(-260.2610, device='cuda:0')
episode: 191 training return: tensor(-257.6494, device='cuda:0')
epoch: 48 test_true_pfm: 1649.81532825054 sim_pfm: -237.26361366129518
episode: 192 training return: tensor(-28.3916, device='cuda:0')
episode: 193 training return: tensor(-202.8392, device='cuda:0')
episode: 194 training return: tensor(-191.4945, device='cuda:0')
episode: 195 training return: tensor(-412.3628, device='cuda:0')
epoch: 49 test_true_pfm: 1855.6933753463707 sim_pfm: -52.077846755293045
episode: 196 training return: tensor(-417.1954, device='cuda:0')
episode: 197 training return: tensor(-516.3914, device='cuda:0')
episode: 198 training return: tensor(-355.6458, device='cuda:0')
episode: 199 training return: tensor(-113.3358, device='cuda:0')
epoch: 50 test_true_pfm: 1990.2763509384692 sim_pfm: -89.15560654984438
episode: 200 training return: tensor(-416.6170, device='cuda:0')
episode: 201 training return: tensor(-278.4435, device='cuda:0')
episode: 202 training return: tensor(-194.1832, device='cuda:0')
episode: 203 training return: tensor(-17.7026, device='cuda:0')
epoch: 51 test_true_pfm: 1759.2866331473103 sim_pfm: -261.0317281119642
episode: 204 training return: tensor(115.4327, device='cuda:0')
episode: 205 training return: tensor(-456.7399, device='cuda:0')
episode: 206 training return: tensor(-501.8585, device='cuda:0')
episode: 207 training return: tensor(-490.2685, device='cuda:0')
epoch: 52 test_true_pfm: 1501.9003796639006 sim_pfm: -180.12068799774474
episode: 208 training return: tensor(-255.2306, device='cuda:0')
episode: 209 training return: tensor(-570.3644, device='cuda:0')
episode: 210 training return: tensor(-485.1778, device='cuda:0')
episode: 211 training return: tensor(-105.4660, device='cuda:0')
epoch: 53 test_true_pfm: 2637.192037228475 sim_pfm: -111.88172484053455
episode: 212 training return: tensor(-516.0104, device='cuda:0')
episode: 213 training return: tensor(118.9073, device='cuda:0')
episode: 214 training return: tensor(-299.2575, device='cuda:0')
episode: 215 training return: tensor(-359.5767, device='cuda:0')
epoch: 54 test_true_pfm: 1981.218428325364 sim_pfm: -181.65438036878672
episode: 216 training return: tensor(-49.5879, device='cuda:0')
episode: 217 training return: tensor(-423.8256, device='cuda:0')
episode: 218 training return: tensor(-432.8405, device='cuda:0')
episode: 219 training return: tensor(-269.4782, device='cuda:0')
epoch: 55 test_true_pfm: 1850.8567443288814 sim_pfm: -187.41146750526968
episode: 220 training return: tensor(-264.2977, device='cuda:0')
episode: 221 training return: tensor(-433.3599, device='cuda:0')
episode: 222 training return: tensor(-295.0016, device='cuda:0')
episode: 223 training return: tensor(-555.1353, device='cuda:0')
epoch: 56 test_true_pfm: 1976.485566347793 sim_pfm: -302.7504472059857
episode: 224 training return: tensor(-568.6408, device='cuda:0')
episode: 225 training return: tensor(-110.4750, device='cuda:0')
episode: 226 training return: tensor(-377.8021, device='cuda:0')
episode: 227 training return: tensor(-472.4168, device='cuda:0')
epoch: 57 test_true_pfm: 1943.9594465620482 sim_pfm: -221.1713154220488
episode: 228 training return: tensor(-274.8174, device='cuda:0')
episode: 229 training return: tensor(-112.7442, device='cuda:0')
episode: 230 training return: tensor(-267.7549, device='cuda:0')
episode: 231 training return: tensor(-307.4146, device='cuda:0')
epoch: 58 test_true_pfm: 1866.31396256443 sim_pfm: -318.3773653230455
episode: 232 training return: tensor(-333.9064, device='cuda:0')
episode: 233 training return: tensor(-360.0319, device='cuda:0')
episode: 234 training return: tensor(-458.1988, device='cuda:0')
episode: 235 training return: tensor(-584.1081, device='cuda:0')
epoch: 59 test_true_pfm: 1421.2887078941374 sim_pfm: -398.11775376015186
episode: 236 training return: tensor(-273.5368, device='cuda:0')
episode: 237 training return: tensor(-108.6579, device='cuda:0')
episode: 238 training return: tensor(132.1797, device='cuda:0')
episode: 239 training return: tensor(-149.7094, device='cuda:0')
epoch: 60 test_true_pfm: 1861.063013362231 sim_pfm: -77.36428368601871
episode: 240 training return: tensor(-554.5424, device='cuda:0')
episode: 241 training return: tensor(-504.5748, device='cuda:0')
episode: 242 training return: tensor(-245.0332, device='cuda:0')
episode: 243 training return: tensor(-448.1325, device='cuda:0')
epoch: 61 test_true_pfm: 1823.4533520754994 sim_pfm: -172.22716460376978
episode: 244 training return: tensor(-362.5772, device='cuda:0')
episode: 245 training return: tensor(-455.0307, device='cuda:0')
episode: 246 training return: tensor(-576.7941, device='cuda:0')
episode: 247 training return: tensor(-424.7687, device='cuda:0')
epoch: 62 test_true_pfm: 2144.3647639873325 sim_pfm: -28.69585501041729
episode: 248 training return: tensor(-458.5521, device='cuda:0')
episode: 249 training return: tensor(-411.5468, device='cuda:0')
episode: 250 training return: tensor(-484.2328, device='cuda:0')
episode: 251 training return: tensor(-264.7826, device='cuda:0')
epoch: 63 test_true_pfm: 1895.5363403059018 sim_pfm: -218.12792256555986
episode: 252 training return: tensor(-361.6846, device='cuda:0')
episode: 253 training return: tensor(-504.0505, device='cuda:0')
episode: 254 training return: tensor(-143.1494, device='cuda:0')
episode: 255 training return: tensor(-277.5799, device='cuda:0')
epoch: 64 test_true_pfm: 2098.1972180108237 sim_pfm: -276.25011783149483
episode: 256 training return: tensor(-582.7501, device='cuda:0')
episode: 257 training return: tensor(-273.0706, device='cuda:0')
episode: 258 training return: tensor(-276.8000, device='cuda:0')
episode: 259 training return: tensor(-437.3507, device='cuda:0')
epoch: 65 test_true_pfm: 1412.4493701091797 sim_pfm: -439.4334102361463
episode: 260 training return: tensor(-500.5641, device='cuda:0')
episode: 261 training return: tensor(125.1774, device='cuda:0')
episode: 262 training return: tensor(-560.2383, device='cuda:0')
episode: 263 training return: tensor(110.5732, device='cuda:0')
epoch: 66 test_true_pfm: 1786.4863094596622 sim_pfm: -253.57290766803393
episode: 264 training return: tensor(-258.4959, device='cuda:0')
episode: 265 training return: tensor(-476.6649, device='cuda:0')
episode: 266 training return: tensor(-479.4218, device='cuda:0')
episode: 267 training return: tensor(-313.0264, device='cuda:0')
epoch: 67 test_true_pfm: 1733.4057514492742 sim_pfm: -193.98472018579682
episode: 268 training return: tensor(-478.0518, device='cuda:0')
episode: 269 training return: tensor(-567.0694, device='cuda:0')
episode: 270 training return: tensor(-504.9652, device='cuda:0')
episode: 271 training return: tensor(-339.2081, device='cuda:0')
epoch: 68 test_true_pfm: 1549.8871692740329 sim_pfm: -354.0521461369256
episode: 272 training return: tensor(-384.1992, device='cuda:0')
episode: 273 training return: tensor(-247.9153, device='cuda:0')
episode: 274 training return: tensor(-381.3691, device='cuda:0')
episode: 275 training return: tensor(221.9463, device='cuda:0')
epoch: 69 test_true_pfm: 1908.122975084598 sim_pfm: -323.7142377695224
episode: 276 training return: tensor(-287.0653, device='cuda:0')
episode: 277 training return: tensor(-363.0639, device='cuda:0')
episode: 278 training return: tensor(-554.8891, device='cuda:0')
episode: 279 training return: tensor(-507.4196, device='cuda:0')
epoch: 70 test_true_pfm: 2055.972164269439 sim_pfm: -172.81639738113154
episode: 280 training return: tensor(-289.7726, device='cuda:0')
episode: 281 training return: tensor(-560.6470, device='cuda:0')
episode: 282 training return: tensor(-442.1363, device='cuda:0')
episode: 283 training return: tensor(-33.2408, device='cuda:0')
epoch: 71 test_true_pfm: 1176.81805397292 sim_pfm: -489.3789429608344
episode: 284 training return: tensor(-413.9540, device='cuda:0')
episode: 285 training return: tensor(-452.4655, device='cuda:0')
episode: 286 training return: tensor(-279.4896, device='cuda:0')
episode: 287 training return: tensor(-456.1937, device='cuda:0')
epoch: 72 test_true_pfm: 1096.965537418347 sim_pfm: -442.67909689395066
episode: 288 training return: tensor(-518.2009, device='cuda:0')
episode: 289 training return: tensor(-383.4574, device='cuda:0')
episode: 290 training return: tensor(-230.9893, device='cuda:0')
episode: 291 training return: tensor(-353.6385, device='cuda:0')
epoch: 73 test_true_pfm: 1685.2735164601352 sim_pfm: -269.84428559144726
episode: 292 training return: tensor(-331.8761, device='cuda:0')
episode: 293 training return: tensor(-361.1192, device='cuda:0')
episode: 294 training return: tensor(-11.2352, device='cuda:0')
episode: 295 training return: tensor(-523.3240, device='cuda:0')
epoch: 74 test_true_pfm: 2031.6801773972381 sim_pfm: -245.1357779543226
episode: 296 training return: tensor(-466.2900, device='cuda:0')
episode: 297 training return: tensor(-267.2870, device='cuda:0')
episode: 298 training return: tensor(-409.1972, device='cuda:0')
episode: 299 training return: tensor(-577.3123, device='cuda:0')
epoch: 75 test_true_pfm: 2289.0077644793478 sim_pfm: -226.5720167691858
episode: 300 training return: tensor(-588.0820, device='cuda:0')
episode: 301 training return: tensor(-629.9332, device='cuda:0')
episode: 302 training return: tensor(-486.7822, device='cuda:0')
episode: 303 training return: tensor(-292.6747, device='cuda:0')
epoch: 76 test_true_pfm: 1568.3409308898736 sim_pfm: -298.2334358280059
episode: 304 training return: tensor(-593.1416, device='cuda:0')
episode: 305 training return: tensor(-544.6277, device='cuda:0')
episode: 306 training return: tensor(-444.0295, device='cuda:0')
episode: 307 training return: tensor(-251.9512, device='cuda:0')
epoch: 77 test_true_pfm: 1716.4709053831127 sim_pfm: -189.82354729808867
episode: 308 training return: tensor(-259.8193, device='cuda:0')
episode: 309 training return: tensor(-242.8924, device='cuda:0')
episode: 310 training return: tensor(-430.8852, device='cuda:0')
episode: 311 training return: tensor(-288.1189, device='cuda:0')
epoch: 78 test_true_pfm: 1882.9274801632553 sim_pfm: -284.6561569771341
episode: 312 training return: tensor(-262.5373, device='cuda:0')
episode: 313 training return: tensor(-431.6608, device='cuda:0')
episode: 314 training return: tensor(-452.1078, device='cuda:0')
episode: 315 training return: tensor(-250.8791, device='cuda:0')
epoch: 79 test_true_pfm: 1931.4484987827689 sim_pfm: -144.90998142724857
episode: 316 training return: tensor(-509.8807, device='cuda:0')
episode: 317 training return: tensor(-310.4037, device='cuda:0')
episode: 318 training return: tensor(-465.7268, device='cuda:0')
episode: 319 training return: tensor(-426.0915, device='cuda:0')
epoch: 80 test_true_pfm: 1931.7703895142697 sim_pfm: -296.19695872648543
episode: 320 training return: tensor(-579.6321, device='cuda:0')
episode: 321 training return: tensor(122.8973, device='cuda:0')
episode: 322 training return: tensor(-576.8607, device='cuda:0')
episode: 323 training return: tensor(-388.9855, device='cuda:0')
epoch: 81 test_true_pfm: 2008.3479519546734 sim_pfm: -206.0415200254356
episode: 324 training return: tensor(-530.2865, device='cuda:0')
episode: 325 training return: tensor(-125.5280, device='cuda:0')
episode: 326 training return: tensor(-12.9225, device='cuda:0')
episode: 327 training return: tensor(-81.3060, device='cuda:0')
epoch: 82 test_true_pfm: 1725.0935286059812 sim_pfm: -236.39139329083264
episode: 328 training return: tensor(-428.6187, device='cuda:0')
episode: 329 training return: tensor(-590.9336, device='cuda:0')
episode: 330 training return: tensor(-438.0060, device='cuda:0')
episode: 331 training return: tensor(-531.1139, device='cuda:0')
epoch: 83 test_true_pfm: 1760.5774693712192 sim_pfm: -211.17297707280764
episode: 332 training return: tensor(-100.3873, device='cuda:0')
episode: 333 training return: tensor(-405.3696, device='cuda:0')
episode: 334 training return: tensor(-553.6157, device='cuda:0')
episode: 335 training return: tensor(-171.3527, device='cuda:0')
epoch: 84 test_true_pfm: 2051.8934305332054 sim_pfm: -162.3165743786764
episode: 336 training return: tensor(-527.5875, device='cuda:0')
episode: 337 training return: tensor(-465.8403, device='cuda:0')
episode: 338 training return: tensor(-229.5536, device='cuda:0')
episode: 339 training return: tensor(-571.8496, device='cuda:0')
epoch: 85 test_true_pfm: 1232.8209530452016 sim_pfm: -382.7304943766988
episode: 340 training return: tensor(-429.7654, device='cuda:0')
episode: 341 training return: tensor(-463.3394, device='cuda:0')
episode: 342 training return: tensor(-537.2137, device='cuda:0')
episode: 343 training return: tensor(-584.2674, device='cuda:0')
epoch: 86 test_true_pfm: 1958.0829787925024 sim_pfm: -405.363180764912
episode: 344 training return: tensor(-554.1693, device='cuda:0')
episode: 345 training return: tensor(106.6911, device='cuda:0')
episode: 346 training return: tensor(-558.6771, device='cuda:0')
episode: 347 training return: tensor(-473.1885, device='cuda:0')
epoch: 87 test_true_pfm: 1967.2655608060293 sim_pfm: -206.21155714045744
episode: 348 training return: tensor(-488.7755, device='cuda:0')
episode: 349 training return: tensor(-339.0865, device='cuda:0')
episode: 350 training return: tensor(-234.0179, device='cuda:0')
episode: 351 training return: tensor(-426.9270, device='cuda:0')
epoch: 88 test_true_pfm: 3279.3640082436827 sim_pfm: -148.820307920813
episode: 352 training return: tensor(-91.2586, device='cuda:0')
episode: 353 training return: tensor(-322.7827, device='cuda:0')
episode: 354 training return: tensor(-434.1691, device='cuda:0')
episode: 355 training return: tensor(-400.8186, device='cuda:0')
epoch: 89 test_true_pfm: 2095.9188159584623 sim_pfm: -312.6331513740976
episode: 356 training return: tensor(-447.6653, device='cuda:0')
episode: 357 training return: tensor(-555.6680, device='cuda:0')
episode: 358 training return: tensor(-374.6459, device='cuda:0')
episode: 359 training return: tensor(-371.1545, device='cuda:0')
epoch: 90 test_true_pfm: 2111.046746768438 sim_pfm: -142.96498136090426
episode: 360 training return: tensor(-422.0329, device='cuda:0')
episode: 361 training return: tensor(-373.5222, device='cuda:0')
episode: 362 training return: tensor(-426.1181, device='cuda:0')
episode: 363 training return: tensor(-340.7495, device='cuda:0')
epoch: 91 test_true_pfm: 1927.7732892826116 sim_pfm: -152.30346953339176
episode: 364 training return: tensor(-477.2208, device='cuda:0')
episode: 365 training return: tensor(-586.8978, device='cuda:0')
episode: 366 training return: tensor(-617.2798, device='cuda:0')
episode: 367 training return: tensor(-491.1389, device='cuda:0')
epoch: 92 test_true_pfm: 2034.13009771194 sim_pfm: -235.99447228456847
episode: 368 training return: tensor(-572.1609, device='cuda:0')
episode: 369 training return: tensor(-550.4288, device='cuda:0')
episode: 370 training return: tensor(-426.7983, device='cuda:0')
episode: 371 training return: tensor(-301.3138, device='cuda:0')
epoch: 93 test_true_pfm: 1554.5695988272244 sim_pfm: -429.92804870158824
episode: 372 training return: tensor(-144.1294, device='cuda:0')
episode: 373 training return: tensor(-379.4534, device='cuda:0')
episode: 374 training return: tensor(-522.9606, device='cuda:0')
episode: 375 training return: tensor(-440.8207, device='cuda:0')
epoch: 94 test_true_pfm: 1325.584584476386 sim_pfm: -433.13620302131557
episode: 376 training return: tensor(-345.9331, device='cuda:0')
episode: 377 training return: tensor(-161.0709, device='cuda:0')
episode: 378 training return: tensor(-361.9679, device='cuda:0')
episode: 379 training return: tensor(-495.9228, device='cuda:0')
epoch: 95 test_true_pfm: 1639.7515464314554 sim_pfm: -239.63280196406413
episode: 380 training return: tensor(-326.5915, device='cuda:0')
episode: 381 training return: tensor(-454.9655, device='cuda:0')
episode: 382 training return: tensor(-361.2380, device='cuda:0')
episode: 383 training return: tensor(-480.2924, device='cuda:0')
epoch: 96 test_true_pfm: 2045.2239824683559 sim_pfm: -314.3776210684737
episode: 384 training return: tensor(-373.2528, device='cuda:0')
episode: 385 training return: tensor(-152.4381, device='cuda:0')
episode: 386 training return: tensor(-210.2985, device='cuda:0')
episode: 387 training return: tensor(-403.9515, device='cuda:0')
epoch: 97 test_true_pfm: 1221.7170729839745 sim_pfm: -473.3451369807978
episode: 388 training return: tensor(-389.0308, device='cuda:0')
episode: 389 training return: tensor(-168.3855, device='cuda:0')
episode: 390 training return: tensor(-501.8114, device='cuda:0')
episode: 391 training return: tensor(-601.3946, device='cuda:0')
epoch: 98 test_true_pfm: 1359.4963076195502 sim_pfm: -295.6654027521533
episode: 392 training return: tensor(-511.7110, device='cuda:0')
episode: 393 training return: tensor(-515.9748, device='cuda:0')
episode: 394 training return: tensor(-548.8218, device='cuda:0')
episode: 395 training return: tensor(-285.9696, device='cuda:0')
epoch: 99 test_true_pfm: 2171.2141910150476 sim_pfm: -264.8439940607641
episode: 396 training return: tensor(-367.8644, device='cuda:0')
episode: 397 training return: tensor(-478.5500, device='cuda:0')
episode: 398 training return: tensor(-462.1781, device='cuda:0')
episode: 399 training return: tensor(-428.3622, device='cuda:0')
epoch: 100 test_true_pfm: 1357.5758238411865 sim_pfm: -481.1178777508321
episode: 400 training return: tensor(-150.0638, device='cuda:0')
episode: 401 training return: tensor(-523.2598, device='cuda:0')
episode: 402 training return: tensor(-519.3984, device='cuda:0')
episode: 403 training return: tensor(-575.2269, device='cuda:0')
epoch: 101 test_true_pfm: 1328.8567083730368 sim_pfm: -420.8578740764642
episode: 404 training return: tensor(-284.3846, device='cuda:0')
episode: 405 training return: tensor(-573.4698, device='cuda:0')
episode: 406 training return: tensor(-544.1851, device='cuda:0')
episode: 407 training return: tensor(-527.6346, device='cuda:0')
epoch: 102 test_true_pfm: 1637.2940371650996 sim_pfm: -264.724057876049
episode: 408 training return: tensor(-505.3450, device='cuda:0')
episode: 409 training return: tensor(-301.2255, device='cuda:0')
episode: 410 training return: tensor(-377.7182, device='cuda:0')
episode: 411 training return: tensor(-571.5782, device='cuda:0')
epoch: 103 test_true_pfm: 1828.6434266844517 sim_pfm: -163.98101243233154
episode: 412 training return: tensor(-6.2746, device='cuda:0')
episode: 413 training return: tensor(-220.8020, device='cuda:0')
episode: 414 training return: tensor(-279.0413, device='cuda:0')
episode: 415 training return: tensor(-561.3151, device='cuda:0')
epoch: 104 test_true_pfm: 1278.9582672854606 sim_pfm: -410.54225516955677
episode: 416 training return: tensor(-366.6765, device='cuda:0')
episode: 417 training return: tensor(-428.0859, device='cuda:0')
episode: 418 training return: tensor(-250.5676, device='cuda:0')
episode: 419 training return: tensor(-489.5310, device='cuda:0')
epoch: 105 test_true_pfm: 1761.703886274249 sim_pfm: -441.80688885517884
episode: 420 training return: tensor(-358.1589, device='cuda:0')
episode: 421 training return: tensor(-494.6154, device='cuda:0')
episode: 422 training return: tensor(-214.3773, device='cuda:0')
episode: 423 training return: tensor(-325.7143, device='cuda:0')
epoch: 106 test_true_pfm: 1480.861095888702 sim_pfm: -373.450563331057
episode: 424 training return: tensor(-289.1932, device='cuda:0')
episode: 425 training return: tensor(-529.0882, device='cuda:0')
episode: 426 training return: tensor(-574.6491, device='cuda:0')
episode: 427 training return: tensor(-461.0821, device='cuda:0')
epoch: 107 test_true_pfm: 1273.4992191158674 sim_pfm: -420.16302262960625
episode: 428 training return: tensor(-481.8759, device='cuda:0')
episode: 429 training return: tensor(67.7254, device='cuda:0')
episode: 430 training return: tensor(-205.3348, device='cuda:0')
episode: 431 training return: tensor(-596.6247, device='cuda:0')
epoch: 108 test_true_pfm: 2130.8770160382464 sim_pfm: -174.3129559129593
episode: 432 training return: tensor(-355.8418, device='cuda:0')
episode: 433 training return: tensor(-412.1966, device='cuda:0')
episode: 434 training return: tensor(-82.3320, device='cuda:0')
episode: 435 training return: tensor(-525.2380, device='cuda:0')
epoch: 109 test_true_pfm: 1679.5223686219342 sim_pfm: -303.7520809523606
episode: 436 training return: tensor(-461.3026, device='cuda:0')
episode: 437 training return: tensor(-583.2700, device='cuda:0')
episode: 438 training return: tensor(-203.4191, device='cuda:0')
episode: 439 training return: tensor(-467.8795, device='cuda:0')
epoch: 110 test_true_pfm: 1144.4282439923043 sim_pfm: -512.3682318126279
episode: 440 training return: tensor(-437.0069, device='cuda:0')
episode: 441 training return: tensor(-45.6419, device='cuda:0')
episode: 442 training return: tensor(-178.7064, device='cuda:0')
episode: 443 training return: tensor(-406.8781, device='cuda:0')
epoch: 111 test_true_pfm: 1289.7330884813207 sim_pfm: -499.6087785116785
episode: 444 training return: tensor(-22.0382, device='cuda:0')
episode: 445 training return: tensor(-255.7099, device='cuda:0')
episode: 446 training return: tensor(-216.7888, device='cuda:0')
episode: 447 training return: tensor(-561.7300, device='cuda:0')
epoch: 112 test_true_pfm: 1845.1064000978085 sim_pfm: -210.66887499128157
episode: 448 training return: tensor(-515.8882, device='cuda:0')
episode: 449 training return: tensor(-178.1124, device='cuda:0')
episode: 450 training return: tensor(-347.2288, device='cuda:0')
episode: 451 training return: tensor(-524.9081, device='cuda:0')
epoch: 113 test_true_pfm: 1162.2303475902888 sim_pfm: -506.80948298661195
episode: 452 training return: tensor(-513.2527, device='cuda:0')
episode: 453 training return: tensor(-370.2337, device='cuda:0')
episode: 454 training return: tensor(-516.7286, device='cuda:0')
episode: 455 training return: tensor(-430.6686, device='cuda:0')
epoch: 114 test_true_pfm: 1967.2473332540328 sim_pfm: -18.932739258831134
episode: 456 training return: tensor(-411.2163, device='cuda:0')
episode: 457 training return: tensor(-511.8568, device='cuda:0')
episode: 458 training return: tensor(-251.5598, device='cuda:0')
episode: 459 training return: tensor(-313.6756, device='cuda:0')
epoch: 115 test_true_pfm: 2083.747015895763 sim_pfm: -246.39480746525805
episode: 460 training return: tensor(-581.9012, device='cuda:0')
episode: 461 training return: tensor(-345.6626, device='cuda:0')
episode: 462 training return: tensor(-314.8806, device='cuda:0')
episode: 463 training return: tensor(-395.5652, device='cuda:0')
epoch: 116 test_true_pfm: 1242.1430940327018 sim_pfm: -447.68055465697154
episode: 464 training return: tensor(-447.8560, device='cuda:0')
episode: 465 training return: tensor(-410.0933, device='cuda:0')
episode: 466 training return: tensor(-523.9280, device='cuda:0')
episode: 467 training return: tensor(-448.4338, device='cuda:0')
epoch: 117 test_true_pfm: 1349.4064660929419 sim_pfm: -355.3808433158168
episode: 468 training return: tensor(-510.3813, device='cuda:0')
episode: 469 training return: tensor(-573.1320, device='cuda:0')
episode: 470 training return: tensor(-313.2143, device='cuda:0')
episode: 471 training return: tensor(-578.9240, device='cuda:0')
epoch: 118 test_true_pfm: 1723.759870315643 sim_pfm: -202.9884227271347
episode: 472 training return: tensor(-185.1174, device='cuda:0')
episode: 473 training return: tensor(-302.6236, device='cuda:0')
episode: 474 training return: tensor(-322.7415, device='cuda:0')
episode: 475 training return: tensor(-217.6290, device='cuda:0')
epoch: 119 test_true_pfm: 1106.7660269015669 sim_pfm: -525.1181757995606
episode: 476 training return: tensor(-398.7652, device='cuda:0')
episode: 477 training return: tensor(-359.6104, device='cuda:0')
episode: 478 training return: tensor(-430.9643, device='cuda:0')
episode: 479 training return: tensor(-300.4309, device='cuda:0')
epoch: 120 test_true_pfm: 1480.789154034647 sim_pfm: -415.1043531834924
episode: 480 training return: tensor(-470.4785, device='cuda:0')
episode: 481 training return: tensor(-383.3657, device='cuda:0')
episode: 482 training return: tensor(-367.8781, device='cuda:0')
episode: 483 training return: tensor(-422.4611, device='cuda:0')
epoch: 121 test_true_pfm: 1469.8081764601172 sim_pfm: -258.0729112669748
episode: 484 training return: tensor(-513.5349, device='cuda:0')
episode: 485 training return: tensor(-576.0767, device='cuda:0')
episode: 486 training return: tensor(-100.7246, device='cuda:0')
episode: 487 training return: tensor(-587.4760, device='cuda:0')
epoch: 122 test_true_pfm: 1450.9774624301738 sim_pfm: -317.6371004214355
episode: 488 training return: tensor(-369.7710, device='cuda:0')
episode: 489 training return: tensor(-577.8079, device='cuda:0')
episode: 490 training return: tensor(-366.1676, device='cuda:0')
episode: 491 training return: tensor(-547.0541, device='cuda:0')
epoch: 123 test_true_pfm: 1312.1417606482917 sim_pfm: -449.35509206961916
episode: 492 training return: tensor(-432.1237, device='cuda:0')
episode: 493 training return: tensor(-352.1398, device='cuda:0')
episode: 494 training return: tensor(-437.0661, device='cuda:0')
episode: 495 training return: tensor(-488.5768, device='cuda:0')
epoch: 124 test_true_pfm: 1868.3729745686571 sim_pfm: -290.46070374253515
episode: 496 training return: tensor(-155.6945, device='cuda:0')
episode: 497 training return: tensor(-586.0835, device='cuda:0')
episode: 498 training return: tensor(-430.5617, device='cuda:0')
episode: 499 training return: tensor(-314.3505, device='cuda:0')
epoch: 125 test_true_pfm: 1191.7570675806219 sim_pfm: -502.71501448035514
episode: 500 training return: tensor(-462.2210, device='cuda:0')
episode: 501 training return: tensor(-344.0044, device='cuda:0')
episode: 502 training return: tensor(-246.4144, device='cuda:0')
episode: 503 training return: tensor(-445.4528, device='cuda:0')
epoch: 126 test_true_pfm: 1735.8004359470062 sim_pfm: -301.686374271987
episode: 504 training return: tensor(-452.8767, device='cuda:0')
episode: 505 training return: tensor(-572.9197, device='cuda:0')
episode: 506 training return: tensor(-385.5020, device='cuda:0')
episode: 507 training return: tensor(-570.4498, device='cuda:0')
epoch: 127 test_true_pfm: 1349.954004768216 sim_pfm: -382.6667446712963
episode: 508 training return: tensor(-435.4622, device='cuda:0')
episode: 509 training return: tensor(-183.7368, device='cuda:0')
episode: 510 training return: tensor(-428.3710, device='cuda:0')
episode: 511 training return: tensor(-279.1789, device='cuda:0')
epoch: 128 test_true_pfm: 1331.4889772483264 sim_pfm: -414.05759198132245
episode: 512 training return: tensor(-600.6380, device='cuda:0')
episode: 513 training return: tensor(-429.9068, device='cuda:0')
episode: 514 training return: tensor(-425.6297, device='cuda:0')
episode: 515 training return: tensor(-373.9605, device='cuda:0')
epoch: 129 test_true_pfm: 1906.6587117178333 sim_pfm: -375.8977605597174
episode: 516 training return: tensor(-450.4282, device='cuda:0')
episode: 517 training return: tensor(-427.1175, device='cuda:0')
episode: 518 training return: tensor(-384.7024, device='cuda:0')
episode: 519 training return: tensor(-297.5034, device='cuda:0')
epoch: 130 test_true_pfm: 1714.7459226568983 sim_pfm: -251.37760346763147
episode: 520 training return: tensor(-476.0430, device='cuda:0')
episode: 521 training return: tensor(-51.5011, device='cuda:0')
episode: 522 training return: tensor(-550.2559, device='cuda:0')
episode: 523 training return: tensor(-456.5760, device='cuda:0')
epoch: 131 test_true_pfm: 1380.2094336806329 sim_pfm: -348.5051327208639
episode: 524 training return: tensor(-333.9327, device='cuda:0')
episode: 525 training return: tensor(-499.6003, device='cuda:0')
episode: 526 training return: tensor(-526.1325, device='cuda:0')
episode: 527 training return: tensor(-579.5578, device='cuda:0')
epoch: 132 test_true_pfm: 1037.1269942133088 sim_pfm: -584.2366197243488
episode: 528 training return: tensor(-557.6437, device='cuda:0')
episode: 529 training return: tensor(-5.3641, device='cuda:0')
episode: 530 training return: tensor(-581.6443, device='cuda:0')
episode: 531 training return: tensor(-547.3073, device='cuda:0')
epoch: 133 test_true_pfm: 1464.7056382830394 sim_pfm: -335.8408119321102
episode: 532 training return: tensor(-532.6607, device='cuda:0')
episode: 533 training return: tensor(-470.6284, device='cuda:0')
episode: 534 training return: tensor(-459.3369, device='cuda:0')
episode: 535 training return: tensor(-83.7664, device='cuda:0')
epoch: 134 test_true_pfm: 1408.4825279834433 sim_pfm: -420.17211762885563
episode: 536 training return: tensor(-523.5834, device='cuda:0')
episode: 537 training return: tensor(-584.9439, device='cuda:0')
episode: 538 training return: tensor(-269.3984, device='cuda:0')
episode: 539 training return: tensor(-308.9685, device='cuda:0')
epoch: 135 test_true_pfm: 1061.2865598910269 sim_pfm: -520.4262963517298
episode: 540 training return: tensor(-230.8294, device='cuda:0')
episode: 541 training return: tensor(-300.9122, device='cuda:0')
episode: 542 training return: tensor(-532.6866, device='cuda:0')
episode: 543 training return: tensor(-426.7707, device='cuda:0')
epoch: 136 test_true_pfm: 1320.11224690848 sim_pfm: -323.80015605284524
episode: 544 training return: tensor(-508.4664, device='cuda:0')
episode: 545 training return: tensor(-198.4935, device='cuda:0')
episode: 546 training return: tensor(-460.0912, device='cuda:0')
episode: 547 training return: tensor(-525.1603, device='cuda:0')
epoch: 137 test_true_pfm: 1448.5124754632322 sim_pfm: -415.42213625540415
episode: 548 training return: tensor(-488.3661, device='cuda:0')
episode: 549 training return: tensor(-587.0245, device='cuda:0')
episode: 550 training return: tensor(-569.2170, device='cuda:0')
episode: 551 training return: tensor(-488.9500, device='cuda:0')
epoch: 138 test_true_pfm: 1794.9436441539412 sim_pfm: -202.17335777458115
episode: 552 training return: tensor(-365.5143, device='cuda:0')
episode: 553 training return: tensor(-509.4299, device='cuda:0')
episode: 554 training return: tensor(-311.7084, device='cuda:0')
episode: 555 training return: tensor(-580.9313, device='cuda:0')
epoch: 139 test_true_pfm: 1160.7065282582714 sim_pfm: -461.7899965432783
episode: 556 training return: tensor(-238.9033, device='cuda:0')
episode: 557 training return: tensor(-372.6921, device='cuda:0')
episode: 558 training return: tensor(-321.1791, device='cuda:0')
episode: 559 training return: tensor(-486.1525, device='cuda:0')
epoch: 140 test_true_pfm: 1836.9851980267522 sim_pfm: -318.38845051347744
episode: 560 training return: tensor(-567.8046, device='cuda:0')
episode: 561 training return: tensor(-573.1408, device='cuda:0')
episode: 562 training return: tensor(-320.3052, device='cuda:0')
episode: 563 training return: tensor(-476.6497, device='cuda:0')
epoch: 141 test_true_pfm: 1075.7514370856372 sim_pfm: -475.2238024128601
episode: 564 training return: tensor(-575.3534, device='cuda:0')
episode: 565 training return: tensor(-571.8395, device='cuda:0')
episode: 566 training return: tensor(-582.9186, device='cuda:0')
episode: 567 training return: tensor(-575.6074, device='cuda:0')
epoch: 142 test_true_pfm: 1786.7229928826202 sim_pfm: -270.0730572420871
episode: 568 training return: tensor(-398.5562, device='cuda:0')
episode: 569 training return: tensor(-312.5859, device='cuda:0')
episode: 570 training return: tensor(-608.4879, device='cuda:0')
episode: 571 training return: tensor(-515.2869, device='cuda:0')
epoch: 143 test_true_pfm: 1270.1418138292345 sim_pfm: -458.93255190574564
episode: 572 training return: tensor(-516.9149, device='cuda:0')
episode: 573 training return: tensor(-423.1880, device='cuda:0')
episode: 574 training return: tensor(-551.7936, device='cuda:0')
episode: 575 training return: tensor(-484.6762, device='cuda:0')
epoch: 144 test_true_pfm: 1379.0148755953724 sim_pfm: -427.11533600829233
episode: 576 training return: tensor(-585.2939, device='cuda:0')
episode: 577 training return: tensor(-383.8750, device='cuda:0')
episode: 578 training return: tensor(-619.9648, device='cuda:0')
episode: 579 training return: tensor(-583.8915, device='cuda:0')
epoch: 145 test_true_pfm: 1387.9553324435599 sim_pfm: -466.00485326256603
episode: 580 training return: tensor(-442.0815, device='cuda:0')
episode: 581 training return: tensor(-571.5104, device='cuda:0')
episode: 582 training return: tensor(-586.2384, device='cuda:0')
episode: 583 training return: tensor(-571.5603, device='cuda:0')
epoch: 146 test_true_pfm: 1785.6316619981142 sim_pfm: -316.4468126180679
episode: 584 training return: tensor(-413.2740, device='cuda:0')
episode: 585 training return: tensor(-526.3458, device='cuda:0')
episode: 586 training return: tensor(-341.1929, device='cuda:0')
episode: 587 training return: tensor(-39.6807, device='cuda:0')
epoch: 147 test_true_pfm: 1483.5799055810337 sim_pfm: -438.7730297053543
episode: 588 training return: tensor(-261.7143, device='cuda:0')
episode: 589 training return: tensor(-561.9301, device='cuda:0')
episode: 590 training return: tensor(-379.3269, device='cuda:0')
episode: 591 training return: tensor(-242.4766, device='cuda:0')
epoch: 148 test_true_pfm: 1835.6705067077255 sim_pfm: -250.4269766129825
episode: 592 training return: tensor(-586.6251, device='cuda:0')
episode: 593 training return: tensor(-608.1082, device='cuda:0')
episode: 594 training return: tensor(55.7381, device='cuda:0')
episode: 595 training return: tensor(-565.8498, device='cuda:0')
epoch: 149 test_true_pfm: 882.2000967894128 sim_pfm: -606.1322985558849
episode: 596 training return: tensor(-375.1540, device='cuda:0')
episode: 597 training return: tensor(-461.9865, device='cuda:0')
episode: 598 training return: tensor(-444.4940, device='cuda:0')
episode: 599 training return: tensor(-402.4099, device='cuda:0')
epoch: 150 test_true_pfm: 1602.1349090921922 sim_pfm: -390.7776607462826
