['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'expert', '--seed', '2']
epoch: 0 training_loss 0.34470291554927823 test_loss: 0.26640365123748777
epoch: 1 training_loss 0.22940242297947408 test_loss: 0.21054913997650146
epoch: 2 training_loss 0.19821607507765293 test_loss: 0.17374581098556519
epoch: 3 training_loss 0.19232731007039547 test_loss: 0.16398583650588988
epoch: 4 training_loss 0.17604666337370872 test_loss: 0.14572707414627076
epoch: 5 training_loss 0.14929077781736852 test_loss: 0.1620108962059021
epoch: 6 training_loss 0.16814097955822946 test_loss: 0.19684408903121947
epoch: 7 training_loss 0.15689628027379512 test_loss: 0.12605396509170533
epoch: 8 training_loss 0.13849439319223167 test_loss: 0.13518456220626832
epoch: 9 training_loss 0.13617586793377995 test_loss: 0.11176548004150391
epoch: 10 training_loss 0.13614168874919413 test_loss: 0.11835390329360962
epoch: 11 training_loss 0.1323082296922803 test_loss: 0.1176639199256897
epoch: 12 training_loss 0.1339941716566682 test_loss: 0.12935028076171876
epoch: 13 training_loss 0.12941799651831387 test_loss: 0.13226909637451173
epoch: 14 training_loss 0.13420485258102416 test_loss: 0.11083451509475709
epoch: 15 training_loss 0.12988043744117023 test_loss: 0.09941205382347107
epoch: 16 training_loss 0.12325980167835951 test_loss: 0.11896015405654907
epoch: 17 training_loss 0.1231460191681981 test_loss: 0.12212604284286499
epoch: 18 training_loss 0.11952163077890873 test_loss: 0.11700022220611572
epoch: 19 training_loss 0.11831440038979053 test_loss: 0.10945711135864258
epoch: 20 training_loss 0.12230258021503687 test_loss: 0.11179243326187134
epoch: 21 training_loss 0.11779022067785264 test_loss: 0.12687758207321168
epoch: 22 training_loss 0.11664479888975621 test_loss: 0.1257651925086975
epoch: 23 training_loss 0.12066061504185199 test_loss: 0.10924217700958253
epoch: 24 training_loss 0.12799707040190697 test_loss: 0.11503956317901612
epoch: 25 training_loss 0.12100827973335981 test_loss: 0.10311200618743896
epoch: 26 training_loss 0.11437549704685807 test_loss: 0.13114819526672364
epoch: 27 training_loss 0.1216791794449091 test_loss: 0.10602174997329712
epoch: 28 training_loss 0.11802135802805423 test_loss: 0.1147336483001709
epoch: 29 training_loss 0.1170024324208498 test_loss: 0.13802319765090942
epoch: 30 training_loss 0.10825807521119714 test_loss: 0.11232813596725463
epoch: 31 training_loss 0.11765107735991479 test_loss: 0.11617273092269897
epoch: 32 training_loss 0.11417349096387625 test_loss: 0.11332428455352783
epoch: 33 training_loss 0.1118525305762887 test_loss: 0.11831607818603515
epoch: 34 training_loss 0.11071440510451794 test_loss: 0.12322663068771363
epoch: 35 training_loss 0.12322384083643556 test_loss: 0.1205304741859436
epoch: 36 training_loss 0.11235351346433163 test_loss: 0.12026019096374511
epoch: 37 training_loss 0.11310204392299056 test_loss: 0.10386867523193359
epoch: 38 training_loss 0.10648611892014742 test_loss: 0.1362246036529541
epoch: 39 training_loss 0.11668315287679434 test_loss: 0.11199733018875122
epoch: 40 training_loss 0.10772420909255744 test_loss: 0.11298539638519287
epoch: 41 training_loss 0.11547585856169462 test_loss: 0.09660090804100037
epoch: 42 training_loss 0.11359452437609434 test_loss: 0.11289104223251342
epoch: 43 training_loss 0.11503919653594494 test_loss: 0.09972838163375855
epoch: 44 training_loss 0.11145177680999041 test_loss: 0.1160226583480835
epoch: 45 training_loss 0.11329134833067656 test_loss: 0.11825469732284546
epoch: 46 training_loss 0.11056551745161414 test_loss: 0.10394279956817627
epoch: 47 training_loss 0.11533551029860974 test_loss: 0.10069875717163086
epoch: 48 training_loss 0.11306877385824919 test_loss: 0.11519812345504761
epoch: 49 training_loss 0.12117344882339239 test_loss: 0.11050541400909424
epoch: 50 training_loss 0.11074560072273015 test_loss: 0.10035425424575806
epoch: 51 training_loss 0.11286371923983098 test_loss: 0.11438878774642944
epoch: 52 training_loss 0.11649410661309957 test_loss: 0.11405220031738281
epoch: 53 training_loss 0.10985841050744057 test_loss: 0.10196048021316528
epoch: 54 training_loss 0.11215583797544242 test_loss: 0.10477317571640014
epoch: 55 training_loss 0.11299712788313628 test_loss: 0.12096941471099854
epoch: 56 training_loss 0.10266164369881153 test_loss: 0.10189704895019532
epoch: 57 training_loss 0.10343431033194066 test_loss: 0.10088937282562256
epoch: 58 training_loss 0.11423124648630618 test_loss: 0.1098402976989746
epoch: 59 training_loss 0.10867842711508274 test_loss: 0.12242906093597412
epoch: 60 training_loss 0.11167780496180058 test_loss: 0.11572579145431519
epoch: 61 training_loss 0.1159773263707757 test_loss: 0.11074844598770142
epoch: 62 training_loss 0.11463058046996594 test_loss: 0.09992277026176452
epoch: 63 training_loss 0.10817377842962742 test_loss: 0.11823985576629639
epoch: 64 training_loss 0.11829329993575811 test_loss: 0.11232941150665283
epoch: 65 training_loss 0.10712534792721272 test_loss: 0.10426883697509766
epoch: 66 training_loss 0.11673927308991551 test_loss: 0.1018099308013916
epoch: 67 training_loss 0.10611700108274817 test_loss: 0.0940746545791626
epoch: 68 training_loss 0.10979609264060855 test_loss: 0.09319383502006531
epoch: 69 training_loss 0.10851030822843313 test_loss: 0.10559805631637573
epoch: 70 training_loss 0.10579553103074431 test_loss: 0.12143775224685668
epoch: 71 training_loss 0.10566705290228129 test_loss: 0.09327421188354493
epoch: 72 training_loss 0.10706067126244306 test_loss: 0.10770351886749267
epoch: 73 training_loss 0.11545882670208812 test_loss: 0.11396546363830566
epoch: 74 training_loss 0.10954871490597724 test_loss: 0.11159087419509887
epoch: 75 training_loss 0.11111119214445353 test_loss: 0.11021175384521484
epoch: 76 training_loss 0.10896805364638568 test_loss: 0.11970189809799195
epoch: 77 training_loss 0.11577918626368046 test_loss: 0.08377465605735779
epoch: 78 training_loss 0.11486303109675645 test_loss: 0.11725118160247802
epoch: 79 training_loss 0.10671322921290993 test_loss: 0.10264003276824951
epoch: 80 training_loss 0.11612467214465141 test_loss: 0.13790793418884278
epoch: 81 training_loss 0.10743587836623192 test_loss: 0.1387563943862915
epoch: 82 training_loss 0.11533248975872994 test_loss: 0.09943050146102905
epoch: 83 training_loss 0.10625816777348518 test_loss: 0.10843113660812378
epoch: 84 training_loss 0.1111339020356536 test_loss: 0.11565202474594116
epoch: 85 training_loss 0.10855831079185009 test_loss: 0.11779325008392334
epoch: 86 training_loss 0.11287995748221874 test_loss: 0.11177206039428711
epoch: 87 training_loss 0.11327381636947394 test_loss: 0.11902860403060914
epoch: 88 training_loss 0.11177858632057905 test_loss: 0.10280687808990478
epoch: 89 training_loss 0.10843167018145322 test_loss: 0.0970228672027588
epoch: 90 training_loss 0.10296583458781243 test_loss: 0.10115581750869751
epoch: 91 training_loss 0.1107903153449297 test_loss: 0.10497334003448486
epoch: 92 training_loss 0.10839390864595772 test_loss: 0.1081702470779419
epoch: 93 training_loss 0.11003615349531173 test_loss: 0.1033824324607849
epoch: 94 training_loss 0.11422430947422982 test_loss: 0.11855721473693848
epoch: 95 training_loss 0.1138397329673171 test_loss: 0.11619426012039184
epoch: 96 training_loss 0.10957455221563578 test_loss: 0.10576395988464356
epoch: 97 training_loss 0.11330861244350672 test_loss: 0.10895934104919433
epoch: 98 training_loss 0.11213185474276542 test_loss: 0.1035006046295166
epoch: 99 training_loss 0.11192703224718571 test_loss: 0.11580448150634766
epoch: 100 training_loss 0.10909622218459844 test_loss: 0.11328879594802857
epoch: 101 training_loss 0.1121102337911725 test_loss: 0.11388257741928101
epoch: 102 training_loss 0.11070198375731706 test_loss: 0.11902103424072266
epoch: 103 training_loss 0.10987134631723165 test_loss: 0.1065829873085022
epoch: 104 training_loss 0.11284000337123871 test_loss: 0.09815015792846679
epoch: 105 training_loss 0.10728141322731971 test_loss: 0.10122334957122803
epoch: 106 training_loss 0.10613526113331317 test_loss: 0.10671665668487548
epoch: 107 training_loss 0.11327632870525121 test_loss: 0.10221540927886963
epoch: 108 training_loss 0.10353986004367471 test_loss: 0.10957401990890503
epoch: 109 training_loss 0.10740487897768616 test_loss: 0.1170414924621582
epoch: 110 training_loss 0.10209976837038993 test_loss: 0.1255203127861023
epoch: 111 training_loss 0.11464637249708176 test_loss: 0.11144275665283203
epoch: 112 training_loss 0.11106649447232485 test_loss: 0.10564534664154053
epoch: 113 training_loss 0.11048939026892185 test_loss: 0.10577309131622314
epoch: 114 training_loss 0.10822514966130256 test_loss: 0.09791962504386902
epoch: 115 training_loss 0.11248962171375751 test_loss: 0.11160348653793335
epoch: 116 training_loss 0.11782620258629323 test_loss: 0.11053773164749145
epoch: 117 training_loss 0.1091426146775484 test_loss: 0.12784316539764404
epoch: 118 training_loss 0.12033294465392828 test_loss: 0.11252152919769287
epoch: 119 training_loss 0.11361761271953583 test_loss: 0.10437103509902954
epoch: 120 training_loss 0.10264051299542189 test_loss: 0.11370422840118408
epoch: 121 training_loss 0.1148287895694375 test_loss: 0.09943459630012512
epoch: 122 training_loss 0.1067014217376709 test_loss: 0.10703014135360718
epoch: 123 training_loss 0.10691679250448942 test_loss: 0.10233405828475953
epoch: 124 training_loss 0.11616908047348261 test_loss: 0.09795790910720825
epoch: 125 training_loss 0.10650669228285552 test_loss: 0.10456794500350952
epoch: 126 training_loss 0.10974811010062695 test_loss: 0.10877003669738769
epoch: 127 training_loss 0.10571637235581875 test_loss: 0.10436842441558838
epoch: 128 training_loss 0.10628015864640475 test_loss: 0.10705835819244384
epoch: 129 training_loss 0.10927001416683196 test_loss: 0.09928163886070251
epoch: 130 training_loss 0.09686228394508362 test_loss: 0.11567066907882691
epoch: 131 training_loss 0.11122156102210283 test_loss: 0.11150685548782349
epoch: 132 training_loss 0.11034829381853342 test_loss: 0.10141788721084595
epoch: 133 training_loss 0.10971306559629738 test_loss: 0.10704990625381469
epoch: 134 training_loss 0.10546768095344305 test_loss: 0.10508874654769898
epoch: 135 training_loss 0.11553386330604554 test_loss: 0.11284418106079101
epoch: 136 training_loss 0.1147432467713952 test_loss: 0.11739521026611328
epoch: 137 training_loss 0.10480026632547379 test_loss: 0.1054184079170227
epoch: 138 training_loss 0.10805106922984123 test_loss: 0.10992648601531982
epoch: 139 training_loss 0.10935918506234885 test_loss: 0.11111875772476196
epoch: 140 training_loss 0.11061857899650931 test_loss: 0.1071118712425232
epoch: 141 training_loss 0.11743070419877767 test_loss: 0.10343888998031617
epoch: 142 training_loss 0.10279436584562063 test_loss: 0.10691810846328735
epoch: 143 training_loss 0.10824355099350214 test_loss: 0.09194149374961853
epoch: 144 training_loss 0.11573443848639726 test_loss: 0.11597539186477661
epoch: 145 training_loss 0.10262145172804595 test_loss: 0.11061582565307618
epoch: 146 training_loss 0.10477648355066776 test_loss: 0.1001784324645996
epoch: 147 training_loss 0.10396803479641675 test_loss: 0.09687597751617431
epoch: 148 training_loss 0.11576400637626648 test_loss: 0.11631969213485718
epoch: 149 training_loss 0.10728914488106966 test_loss: 0.11981314420700073
epoch: 0 training_loss 45.57889101028442 test_loss: 25.76494140625
epoch: 1 training_loss 21.04554630279541 test_loss: 18.110049438476562
epoch: 2 training_loss 16.195756978988648 test_loss: 14.871177673339844
epoch: 3 training_loss 13.585850744247436 test_loss: 12.304810333251954
epoch: 4 training_loss 11.573147773742676 test_loss: 10.8493408203125
epoch: 5 training_loss 10.26587109565735 test_loss: 9.345599365234374
epoch: 6 training_loss 9.038097205162048 test_loss: 8.608065032958985
epoch: 7 training_loss 8.142608022689819 test_loss: 7.514051818847657
epoch: 8 training_loss 7.617999753952026 test_loss: 6.977334594726562
epoch: 9 training_loss 6.9556643629074095 test_loss: 6.578899383544922
epoch: 10 training_loss 6.65828866481781 test_loss: 6.11693000793457
epoch: 11 training_loss 6.076439938545227 test_loss: 6.136025238037109
epoch: 12 training_loss 5.779738144874573 test_loss: 5.731946182250977
epoch: 13 training_loss 5.492643990516663 test_loss: 5.422983932495117
epoch: 14 training_loss 5.214985690116882 test_loss: 5.111447143554687
epoch: 15 training_loss 5.114650392532349 test_loss: 5.06871223449707
epoch: 16 training_loss 4.937975792884827 test_loss: 4.973500823974609
epoch: 17 training_loss 4.762292947769165 test_loss: 4.731432342529297
epoch: 18 training_loss 4.645720860958099 test_loss: 4.586114883422852
epoch: 19 training_loss 4.488809835910797 test_loss: 4.332918167114258
epoch: 20 training_loss 4.464965906143188 test_loss: 4.384312438964844
epoch: 21 training_loss 4.3543677282333375 test_loss: 4.147015380859375
epoch: 22 training_loss 4.257162928581238 test_loss: 4.219274520874023
epoch: 23 training_loss 3.982032747268677 test_loss: 3.8054550170898436
epoch: 24 training_loss 4.050099880695343 test_loss: 3.8248668670654298
epoch: 25 training_loss 3.9165711212158203 test_loss: 3.876780319213867
epoch: 26 training_loss 3.8914471316337584 test_loss: 3.9085418701171877
epoch: 27 training_loss 3.834110713005066 test_loss: 3.7465572357177734
epoch: 28 training_loss 3.8149314880371095 test_loss: 3.8055183410644533
epoch: 29 training_loss 3.6794901371002195 test_loss: 3.541909408569336
epoch: 30 training_loss 3.7180127334594726 test_loss: 3.7864757537841798
epoch: 31 training_loss 3.572146122455597 test_loss: 3.738922119140625
epoch: 32 training_loss 3.56440819978714 test_loss: 3.6987236022949217
epoch: 33 training_loss 3.5828102827072144 test_loss: 3.287751007080078
epoch: 34 training_loss 3.4469967246055604 test_loss: 3.344676208496094
epoch: 35 training_loss 3.3881902551651 test_loss: 3.345216751098633
epoch: 36 training_loss 3.3426771426200865 test_loss: 3.4806766510009766
epoch: 37 training_loss 3.2657147312164305 test_loss: 3.2141075134277344
epoch: 38 training_loss 3.30444944858551 test_loss: 3.3887210845947267
epoch: 39 training_loss 3.237315492630005 test_loss: 3.2536964416503906
epoch: 40 training_loss 3.20002756357193 test_loss: 3.148041534423828
epoch: 41 training_loss 3.177687795162201 test_loss: 3.2848628997802733
epoch: 42 training_loss 3.094078629016876 test_loss: 3.1136859893798827
epoch: 43 training_loss 3.1347971391677856 test_loss: 2.873856544494629
epoch: 44 training_loss 3.1488108372688295 test_loss: 3.1099288940429686
epoch: 45 training_loss 3.0266751265525818 test_loss: 3.0236364364624024
epoch: 46 training_loss 3.039570574760437 test_loss: 2.9037343978881838
epoch: 47 training_loss 2.943855438232422 test_loss: 3.0425643920898438
epoch: 48 training_loss 2.9157959175109864 test_loss: 2.8207393646240235
epoch: 49 training_loss 2.876935927867889 test_loss: 2.99910945892334
epoch: 50 training_loss 2.813503611087799 test_loss: 2.794456672668457
epoch: 51 training_loss 2.8020074129104615 test_loss: 2.757283592224121
epoch: 52 training_loss 2.8375236439704894 test_loss: 2.8162071228027346
epoch: 53 training_loss 2.855499360561371 test_loss: 2.7981874465942385
epoch: 54 training_loss 2.7648687887191774 test_loss: 2.7521732330322264
epoch: 55 training_loss 2.8049513292312622 test_loss: 2.6088626861572264
epoch: 56 training_loss 2.7007225060462954 test_loss: 2.7867088317871094
epoch: 57 training_loss 2.797422387599945 test_loss: 2.8137407302856445
epoch: 58 training_loss 2.6933417654037477 test_loss: 2.7302759170532225
epoch: 59 training_loss 2.6867275166511537 test_loss: 2.719465446472168
epoch: 60 training_loss 2.6882626605033875 test_loss: 2.4985612869262694
epoch: 61 training_loss 2.650574405193329 test_loss: 2.6667722702026366
epoch: 62 training_loss 2.6879245829582215 test_loss: 2.8373672485351564
epoch: 63 training_loss 2.632974648475647 test_loss: 2.5488279342651365
epoch: 64 training_loss 2.6047202134132386 test_loss: 2.5451581954956053
epoch: 65 training_loss 2.5438642835617067 test_loss: 2.433498001098633
epoch: 66 training_loss 2.5142524218559266 test_loss: 2.703022766113281
epoch: 67 training_loss 2.5700335371494294 test_loss: 2.475344657897949
epoch: 68 training_loss 2.5241522359848023 test_loss: 2.6039216995239256
epoch: 69 training_loss 2.449477651119232 test_loss: 2.481953811645508
epoch: 70 training_loss 2.5561341464519503 test_loss: 2.43184757232666
epoch: 71 training_loss 2.4703611719608305 test_loss: 2.3085247039794923
epoch: 72 training_loss 2.4369773948192597 test_loss: 2.6571962356567385
epoch: 73 training_loss 2.4667927372455596 test_loss: 2.3987041473388673
epoch: 74 training_loss 2.4285466516017915 test_loss: 2.5134435653686524
epoch: 75 training_loss 2.4407268381118774 test_loss: 2.335483169555664
epoch: 76 training_loss 2.3853863513469697 test_loss: 2.378195381164551
epoch: 77 training_loss 2.364631676673889 test_loss: 2.392019271850586
epoch: 78 training_loss 2.3707072722911833 test_loss: 2.4409408569335938
epoch: 79 training_loss 2.449686807394028 test_loss: 2.4281429290771483
epoch: 80 training_loss 2.269627126455307 test_loss: 2.322682571411133
epoch: 81 training_loss 2.3005924117565155 test_loss: 2.283906364440918
epoch: 82 training_loss 2.3151934218406676 test_loss: 2.381820487976074
epoch: 83 training_loss 2.3272153186798095 test_loss: 2.4181652069091797
epoch: 84 training_loss 2.3427613401412963 test_loss: 2.2438644409179687
epoch: 85 training_loss 2.3106040716171266 test_loss: 2.225101661682129
epoch: 86 training_loss 2.281476057767868 test_loss: 2.197539138793945
epoch: 87 training_loss 2.2662721455097197 test_loss: 2.161642837524414
epoch: 88 training_loss 2.208641440868378 test_loss: 2.3428443908691405
epoch: 89 training_loss 2.2137722933292387 test_loss: 2.3656328201293944
epoch: 90 training_loss 2.204372011423111 test_loss: 2.21689395904541
epoch: 91 training_loss 2.2217781937122343 test_loss: 2.112954330444336
epoch: 92 training_loss 2.1959975039958954 test_loss: 2.351374626159668
epoch: 93 training_loss 2.225899453163147 test_loss: 2.1448186874389648
epoch: 94 training_loss 2.237527673244476 test_loss: 2.129179000854492
epoch: 95 training_loss 2.219610387086868 test_loss: 2.1254812240600587
epoch: 96 training_loss 2.1833542585372925 test_loss: 2.084813690185547
epoch: 97 training_loss 2.1369559454917906 test_loss: 2.083628845214844
epoch: 98 training_loss 2.067581522464752 test_loss: 2.204825782775879
epoch: 99 training_loss 2.138325468301773 test_loss: 2.276413917541504
epoch: 100 training_loss 2.09538355588913 test_loss: 1.9841941833496093
epoch: 101 training_loss 2.1408163189888 test_loss: 2.226542282104492
epoch: 102 training_loss 2.127043182849884 test_loss: 2.0160818099975586
epoch: 103 training_loss 2.0952058148384096 test_loss: 2.0148067474365234
epoch: 104 training_loss 2.0891392731666567 test_loss: 1.938727569580078
epoch: 105 training_loss 2.0549548125267028 test_loss: 2.1065868377685546
epoch: 106 training_loss 2.1109744715690613 test_loss: 2.0790271759033203
epoch: 107 training_loss 2.041638561487198 test_loss: 2.0876049041748046
epoch: 108 training_loss 2.081004478931427 test_loss: 2.112807846069336
epoch: 109 training_loss 2.0873626947402952 test_loss: 2.090121269226074
epoch: 110 training_loss 2.0981644189357755 test_loss: 2.119083213806152
epoch: 111 training_loss 2.0889889109134674 test_loss: 2.044052505493164
epoch: 112 training_loss 1.9933396244049073 test_loss: 1.9633380889892578
epoch: 113 training_loss 2.0023276877403258 test_loss: 2.0654836654663087
epoch: 114 training_loss 2.04693261384964 test_loss: 2.071884346008301
epoch: 115 training_loss 1.9824820101261138 test_loss: 1.9763166427612304
epoch: 116 training_loss 1.9757330310344696 test_loss: 2.001785469055176
epoch: 117 training_loss 1.9872769546508788 test_loss: 2.051662635803223
epoch: 118 training_loss 1.999649852514267 test_loss: 1.9230928421020508
epoch: 119 training_loss 2.027568210363388 test_loss: 1.9241092681884766
epoch: 120 training_loss 1.9911770665645598 test_loss: 1.9591766357421876
epoch: 121 training_loss 1.9273308849334716 test_loss: 1.9493864059448243
epoch: 122 training_loss 2.045287346839905 test_loss: 2.0711193084716797
epoch: 123 training_loss 1.9675617516040802 test_loss: 2.1132274627685548
epoch: 124 training_loss 1.9150002789497376 test_loss: 1.8717145919799805
epoch: 125 training_loss 1.9260955023765565 test_loss: 1.9205097198486327
epoch: 126 training_loss 1.9704488170146943 test_loss: 1.892919158935547
epoch: 127 training_loss 1.931381378173828 test_loss: 1.941809844970703
epoch: 128 training_loss 1.9383812963962554 test_loss: 2.0106483459472657
epoch: 129 training_loss 1.9494616305828094 test_loss: 1.934544563293457
epoch: 130 training_loss 1.9803950810432434 test_loss: 2.0130144119262696
epoch: 131 training_loss 1.901419597864151 test_loss: 1.9578908920288085
epoch: 132 training_loss 1.9206265366077424 test_loss: 1.9516534805297852
epoch: 133 training_loss 1.8990712440013886 test_loss: 1.9315740585327148
epoch: 134 training_loss 1.8855425989627839 test_loss: 1.9054069519042969
epoch: 135 training_loss 1.8810603439807891 test_loss: 1.8662256240844726
epoch: 136 training_loss 1.891882758140564 test_loss: 1.971770668029785
epoch: 137 training_loss 1.8887138426303864 test_loss: 1.8950372695922852
epoch: 138 training_loss 1.8762630140781402 test_loss: 1.8071504592895509
epoch: 139 training_loss 1.8285569894313811 test_loss: 1.7827945709228517
epoch: 140 training_loss 1.8758015596866608 test_loss: 1.942612075805664
epoch: 141 training_loss 1.8389514577388764 test_loss: 1.8888772964477538
epoch: 142 training_loss 1.8470746552944184 test_loss: 1.8082540512084961
epoch: 143 training_loss 1.8548891043663025 test_loss: 1.817416000366211
epoch: 144 training_loss 1.8344057404994965 test_loss: 1.807270622253418
epoch: 145 training_loss 1.8735835242271424 test_loss: 1.8815599441528321
epoch: 146 training_loss 1.8335889875888824 test_loss: 1.8661428451538087
epoch: 147 training_loss 1.8247071635723113 test_loss: 1.842961883544922
epoch: 148 training_loss 1.8491584694385528 test_loss: 1.8871158599853515
epoch: 149 training_loss 1.8271435821056365 test_loss: 1.7209768295288086
8137.474715422497
episode: 0 training return: tensor(-999.9954, device='cuda:0')
episode: 1 training return: tensor(-480.4660, device='cuda:0')
episode: 2 training return: tensor(-383.5019, device='cuda:0')
episode: 3 training return: tensor(-538.3892, device='cuda:0')
epoch: 1 test_true_pfm: 10257.210974000705 sim_pfm: -186.88067090590872
episode: 4 training return: tensor(-693.3186, device='cuda:0')
episode: 5 training return: tensor(-999.9833, device='cuda:0')
episode: 6 training return: tensor(-486.1128, device='cuda:0')
episode: 7 training return: tensor(-732.0741, device='cuda:0')
epoch: 2 test_true_pfm: 3859.984698233793 sim_pfm: -395.1744637308487
episode: 8 training return: tensor(-999.9943, device='cuda:0')
episode: 9 training return: tensor(-405.0993, device='cuda:0')
episode: 10 training return: tensor(-308.8027, device='cuda:0')
episode: 11 training return: tensor(-912.0052, device='cuda:0')
epoch: 3 test_true_pfm: 6880.219070606691 sim_pfm: -481.52294687847217
episode: 12 training return: tensor(-826.9321, device='cuda:0')
episode: 13 training return: tensor(-373.7881, device='cuda:0')
episode: 14 training return: tensor(-365.1946, device='cuda:0')
episode: 15 training return: tensor(-441.1672, device='cuda:0')
epoch: 4 test_true_pfm: 9705.965069412156 sim_pfm: -179.39197297599944
episode: 16 training return: tensor(-995.5300, device='cuda:0')
episode: 17 training return: tensor(-999.9971, device='cuda:0')
episode: 18 training return: tensor(-510.7061, device='cuda:0')
episode: 19 training return: tensor(-632.8127, device='cuda:0')
epoch: 5 test_true_pfm: 3501.0593548113247 sim_pfm: -533.6275290765334
episode: 20 training return: tensor(-832.6155, device='cuda:0')
episode: 21 training return: tensor(-892.2103, device='cuda:0')
episode: 22 training return: tensor(-325.5059, device='cuda:0')
episode: 23 training return: tensor(-397.4722, device='cuda:0')
epoch: 6 test_true_pfm: 9929.433027126208 sim_pfm: -954.7446340825409
episode: 24 training return: tensor(-873.9923, device='cuda:0')
episode: 25 training return: tensor(-271.8553, device='cuda:0')
episode: 26 training return: tensor(-999.6152, device='cuda:0')
episode: 27 training return: tensor(-376.9493, device='cuda:0')
epoch: 7 test_true_pfm: 4048.0727451016915 sim_pfm: -369.1799406069719
episode: 28 training return: tensor(-999.9998, device='cuda:0')
episode: 29 training return: tensor(-63.1343, device='cuda:0')
episode: 30 training return: tensor(-655.9635, device='cuda:0')
episode: 31 training return: tensor(-998.7926, device='cuda:0')
epoch: 8 test_true_pfm: 6818.3184315880135 sim_pfm: -355.29508588684257
episode: 32 training return: tensor(-803.6694, device='cuda:0')
episode: 33 training return: tensor(-185.3544, device='cuda:0')
episode: 34 training return: tensor(-968.6030, device='cuda:0')
episode: 35 training return: tensor(-999.8967, device='cuda:0')
epoch: 9 test_true_pfm: 9671.591552987067 sim_pfm: -110.6585793478298
episode: 36 training return: tensor(-998.7489, device='cuda:0')
episode: 37 training return: tensor(-607.6534, device='cuda:0')
episode: 38 training return: tensor(-636.1652, device='cuda:0')
episode: 39 training return: tensor(-260.4261, device='cuda:0')
epoch: 10 test_true_pfm: 6596.544053031186 sim_pfm: -16.78145229513757
episode: 40 training return: tensor(-402.1950, device='cuda:0')
episode: 41 training return: tensor(-660.7873, device='cuda:0')
episode: 42 training return: tensor(-999.9996, device='cuda:0')
episode: 43 training return: tensor(-281.6226, device='cuda:0')
epoch: 11 test_true_pfm: -189.48986376936168 sim_pfm: -236.99801675808462
episode: 44 training return: tensor(-952.1031, device='cuda:0')
episode: 45 training return: tensor(-361.4102, device='cuda:0')
episode: 46 training return: tensor(-659.0286, device='cuda:0')
episode: 47 training return: tensor(-323.0478, device='cuda:0')
epoch: 12 test_true_pfm: 6513.234254449548 sim_pfm: -521.1019339552537
episode: 48 training return: tensor(-999.4738, device='cuda:0')
episode: 49 training return: tensor(-999.6566, device='cuda:0')
episode: 50 training return: tensor(-310.9893, device='cuda:0')
episode: 51 training return: tensor(-999.8429, device='cuda:0')
epoch: 13 test_true_pfm: 9014.90641245211 sim_pfm: -243.84475131133027
episode: 52 training return: tensor(-307.9095, device='cuda:0')
episode: 53 training return: tensor(-323.7362, device='cuda:0')
episode: 54 training return: tensor(-828.8411, device='cuda:0')
episode: 55 training return: tensor(-982.4624, device='cuda:0')
epoch: 14 test_true_pfm: 7576.666227815395 sim_pfm: -474.1030714188625
episode: 56 training return: tensor(-355.2685, device='cuda:0')
episode: 57 training return: tensor(-328.2589, device='cuda:0')
episode: 58 training return: tensor(-999.9999, device='cuda:0')
episode: 59 training return: tensor(-831.1392, device='cuda:0')
epoch: 15 test_true_pfm: 8609.233105558727 sim_pfm: -17.96574104411411
episode: 60 training return: tensor(-159.3523, device='cuda:0')
episode: 61 training return: tensor(-999.3531, device='cuda:0')
episode: 62 training return: tensor(-232.0091, device='cuda:0')
episode: 63 training return: tensor(-409.8632, device='cuda:0')
epoch: 16 test_true_pfm: 8564.742051201372 sim_pfm: -95.47520050932265
episode: 64 training return: tensor(-544.1438, device='cuda:0')
episode: 65 training return: tensor(-263.1598, device='cuda:0')
episode: 66 training return: tensor(-200.1709, device='cuda:0')
episode: 67 training return: tensor(-783.6714, device='cuda:0')
epoch: 17 test_true_pfm: 6327.76298327254 sim_pfm: -427.259366586882
episode: 68 training return: tensor(-943.7208, device='cuda:0')
episode: 69 training return: tensor(-323.6047, device='cuda:0')
episode: 70 training return: tensor(-386.9655, device='cuda:0')
episode: 71 training return: tensor(-217.8358, device='cuda:0')
epoch: 18 test_true_pfm: 10108.49036474876 sim_pfm: -308.5511876527841
episode: 72 training return: tensor(-704.5594, device='cuda:0')
episode: 73 training return: tensor(-999.4932, device='cuda:0')
episode: 74 training return: tensor(-985.7466, device='cuda:0')
episode: 75 training return: tensor(-373.1345, device='cuda:0')
epoch: 19 test_true_pfm: 543.3892829114303 sim_pfm: -163.135345243422
episode: 76 training return: tensor(-360.1864, device='cuda:0')
episode: 77 training return: tensor(-185.4236, device='cuda:0')
episode: 78 training return: tensor(-453.3549, device='cuda:0')
episode: 79 training return: tensor(-232.9545, device='cuda:0')
epoch: 20 test_true_pfm: 6697.18379398407 sim_pfm: -124.90056936482627
episode: 80 training return: tensor(-363.6440, device='cuda:0')
episode: 81 training return: tensor(-459.1783, device='cuda:0')
episode: 82 training return: tensor(-148.5513, device='cuda:0')
episode: 83 training return: tensor(-799.4900, device='cuda:0')
epoch: 21 test_true_pfm: 9622.674266719696 sim_pfm: -130.686756682524
episode: 84 training return: tensor(-332.9426, device='cuda:0')
episode: 85 training return: tensor(-328.1589, device='cuda:0')
episode: 86 training return: tensor(-65.6348, device='cuda:0')
episode: 87 training return: tensor(-999.9979, device='cuda:0')
epoch: 22 test_true_pfm: 7960.211339175454 sim_pfm: -246.64294260856695
episode: 88 training return: tensor(-999.9984, device='cuda:0')
episode: 89 training return: tensor(-805.0219, device='cuda:0')
episode: 90 training return: tensor(-202.7386, device='cuda:0')
episode: 91 training return: tensor(-175.4639, device='cuda:0')
epoch: 23 test_true_pfm: 8095.25732891971 sim_pfm: -127.6947855168255
episode: 92 training return: tensor(-276.0797, device='cuda:0')
episode: 93 training return: tensor(-346.4239, device='cuda:0')
episode: 94 training return: tensor(-308.8512, device='cuda:0')
episode: 95 training return: tensor(-254.5017, device='cuda:0')
epoch: 24 test_true_pfm: 9281.178781087548 sim_pfm: -153.30943839105507
episode: 96 training return: tensor(-124.5679, device='cuda:0')
episode: 97 training return: tensor(-534.2892, device='cuda:0')
episode: 98 training return: tensor(-222.0223, device='cuda:0')
episode: 99 training return: tensor(-898.7687, device='cuda:0')
epoch: 25 test_true_pfm: 9979.525441906326 sim_pfm: -586.3595758806429
episode: 100 training return: tensor(-325.1049, device='cuda:0')
episode: 101 training return: tensor(-495.9652, device='cuda:0')
episode: 102 training return: tensor(-609.3236, device='cuda:0')
episode: 103 training return: tensor(-283.1280, device='cuda:0')
epoch: 26 test_true_pfm: 9897.25670862373 sim_pfm: -412.8207166575303
episode: 104 training return: tensor(-999.4880, device='cuda:0')
episode: 105 training return: tensor(-218.0490, device='cuda:0')
episode: 106 training return: tensor(-326.2906, device='cuda:0')
episode: 107 training return: tensor(-253.6354, device='cuda:0')
epoch: 27 test_true_pfm: 8443.131826165516 sim_pfm: -399.4196995740252
episode: 108 training return: tensor(-198.8705, device='cuda:0')
episode: 109 training return: tensor(-160.0750, device='cuda:0')
episode: 110 training return: tensor(-986.3519, device='cuda:0')
episode: 111 training return: tensor(-231.6098, device='cuda:0')
epoch: 28 test_true_pfm: 8917.526754777919 sim_pfm: -218.83855442421432
episode: 112 training return: tensor(-848.7221, device='cuda:0')
episode: 113 training return: tensor(-999.5110, device='cuda:0')
episode: 114 training return: tensor(-259.1034, device='cuda:0')
episode: 115 training return: tensor(-608.0717, device='cuda:0')
epoch: 29 test_true_pfm: 8474.314861740075 sim_pfm: -420.72605864300084
episode: 116 training return: tensor(-255.4344, device='cuda:0')
episode: 117 training return: tensor(-712.9574, device='cuda:0')
episode: 118 training return: tensor(-13.6094, device='cuda:0')
episode: 119 training return: tensor(-999.5155, device='cuda:0')
epoch: 30 test_true_pfm: 6162.539748481814 sim_pfm: -233.09353912920537
episode: 120 training return: tensor(-999.7125, device='cuda:0')
episode: 121 training return: tensor(-337.1068, device='cuda:0')
episode: 122 training return: tensor(-194.5189, device='cuda:0')
episode: 123 training return: tensor(-999.9955, device='cuda:0')
epoch: 31 test_true_pfm: 10290.689664505333 sim_pfm: -75.08218642132124
episode: 124 training return: tensor(-162.0221, device='cuda:0')
episode: 125 training return: tensor(-318.6053, device='cuda:0')
episode: 126 training return: tensor(-555.2623, device='cuda:0')
episode: 127 training return: tensor(-300.1456, device='cuda:0')
epoch: 32 test_true_pfm: 10389.89332380153 sim_pfm: -156.43874549603788
episode: 128 training return: tensor(-999.8836, device='cuda:0')
episode: 129 training return: tensor(-146.4672, device='cuda:0')
episode: 130 training return: tensor(-216.3112, device='cuda:0')
episode: 131 training return: tensor(-875.6113, device='cuda:0')
epoch: 33 test_true_pfm: 6725.081477171316 sim_pfm: -84.29697169545882
episode: 132 training return: tensor(-748.9108, device='cuda:0')
episode: 133 training return: tensor(-999.7819, device='cuda:0')
episode: 134 training return: tensor(-504.7159, device='cuda:0')
episode: 135 training return: tensor(-326.7358, device='cuda:0')
epoch: 34 test_true_pfm: 6810.292793082917 sim_pfm: -10.085966071434086
episode: 136 training return: tensor(-386.5943, device='cuda:0')
episode: 137 training return: tensor(-320.8942, device='cuda:0')
episode: 138 training return: tensor(-507.7195, device='cuda:0')
episode: 139 training return: tensor(-246.8923, device='cuda:0')
epoch: 35 test_true_pfm: 10537.688434944774 sim_pfm: -135.06824216250484
episode: 140 training return: tensor(-179.3619, device='cuda:0')
episode: 141 training return: tensor(-11.1832, device='cuda:0')
episode: 142 training return: tensor(-65.1743, device='cuda:0')
episode: 143 training return: tensor(-588.7292, device='cuda:0')
epoch: 36 test_true_pfm: 10323.469468896299 sim_pfm: -290.324256461395
episode: 144 training return: tensor(-360.4532, device='cuda:0')
episode: 145 training return: tensor(-713.6301, device='cuda:0')
episode: 146 training return: tensor(-193.2830, device='cuda:0')
episode: 147 training return: tensor(-857.0018, device='cuda:0')
epoch: 37 test_true_pfm: 10000.073886467382 sim_pfm: -40.8030633519326
episode: 148 training return: tensor(-999.9964, device='cuda:0')
episode: 149 training return: tensor(-242.6422, device='cuda:0')
episode: 150 training return: tensor(-150.1315, device='cuda:0')
episode: 151 training return: tensor(-78.5620, device='cuda:0')
epoch: 38 test_true_pfm: 8559.646726152272 sim_pfm: -383.08987342598266
episode: 152 training return: tensor(-223.6711, device='cuda:0')
episode: 153 training return: tensor(-999.9959, device='cuda:0')
episode: 154 training return: tensor(-117.3342, device='cuda:0')
episode: 155 training return: tensor(-349.1181, device='cuda:0')
epoch: 39 test_true_pfm: 10425.327560177413 sim_pfm: -13.711229268442063
episode: 156 training return: tensor(-315.6135, device='cuda:0')
episode: 157 training return: tensor(-999.9574, device='cuda:0')
episode: 158 training return: tensor(-198.4673, device='cuda:0')
episode: 159 training return: tensor(-850.5281, device='cuda:0')
epoch: 40 test_true_pfm: 7186.8733001463725 sim_pfm: 16.79005744618674
episode: 160 training return: tensor(-83.4839, device='cuda:0')
episode: 161 training return: tensor(-999.9780, device='cuda:0')
episode: 162 training return: tensor(-139.6955, device='cuda:0')
episode: 163 training return: tensor(-248.8610, device='cuda:0')
epoch: 41 test_true_pfm: 10101.84279297891 sim_pfm: 28.020551249346074
episode: 164 training return: tensor(-161.1669, device='cuda:0')
episode: 165 training return: tensor(-140.3341, device='cuda:0')
episode: 166 training return: tensor(-401.0281, device='cuda:0')
episode: 167 training return: tensor(-443.0711, device='cuda:0')
epoch: 42 test_true_pfm: 10262.202640554791 sim_pfm: -14.130204513930948
episode: 168 training return: tensor(-581.4401, device='cuda:0')
episode: 169 training return: tensor(-889.4436, device='cuda:0')
episode: 170 training return: tensor(-87.3600, device='cuda:0')
episode: 171 training return: tensor(-216.7787, device='cuda:0')
epoch: 43 test_true_pfm: 10130.902037131238 sim_pfm: 47.136609026968166
episode: 172 training return: tensor(-223.3972, device='cuda:0')
episode: 173 training return: tensor(-703.9728, device='cuda:0')
episode: 174 training return: tensor(-398.8651, device='cuda:0')
episode: 175 training return: tensor(-211.9407, device='cuda:0')
epoch: 44 test_true_pfm: 10306.745001098943 sim_pfm: -384.03397283749655
episode: 176 training return: tensor(-395.3134, device='cuda:0')
episode: 177 training return: tensor(-94.7969, device='cuda:0')
episode: 178 training return: tensor(-999.9996, device='cuda:0')
episode: 179 training return: tensor(-265.3203, device='cuda:0')
epoch: 45 test_true_pfm: 10294.91381957121 sim_pfm: 69.59208910932648
episode: 180 training return: tensor(-507.9500, device='cuda:0')
episode: 181 training return: tensor(-143.1598, device='cuda:0')
episode: 182 training return: tensor(-999.9943, device='cuda:0')
episode: 183 training return: tensor(-91.3928, device='cuda:0')
epoch: 46 test_true_pfm: 8371.435047824829 sim_pfm: -164.32152307823222
episode: 184 training return: tensor(-275.9088, device='cuda:0')
episode: 185 training return: tensor(-198.3396, device='cuda:0')
episode: 186 training return: tensor(21.5804, device='cuda:0')
episode: 187 training return: tensor(-156.9796, device='cuda:0')
epoch: 47 test_true_pfm: 10200.987229601873 sim_pfm: -341.3886531703174
episode: 188 training return: tensor(-646.8527, device='cuda:0')
episode: 189 training return: tensor(-651.2133, device='cuda:0')
episode: 190 training return: tensor(-275.9090, device='cuda:0')
episode: 191 training return: tensor(-67.7091, device='cuda:0')
epoch: 48 test_true_pfm: 6209.641661298515 sim_pfm: -30.635164949131042
episode: 192 training return: tensor(-132.6957, device='cuda:0')
episode: 193 training return: tensor(-213.4634, device='cuda:0')
episode: 194 training return: tensor(-205.3143, device='cuda:0')
episode: 195 training return: tensor(-185.5926, device='cuda:0')
epoch: 49 test_true_pfm: 6209.722255068526 sim_pfm: 21.61075999122113
episode: 196 training return: tensor(-999.9965, device='cuda:0')
episode: 197 training return: tensor(-237.2620, device='cuda:0')
episode: 198 training return: tensor(-335.0861, device='cuda:0')
episode: 199 training return: tensor(-999.9205, device='cuda:0')
epoch: 50 test_true_pfm: 9572.685418190706 sim_pfm: 52.53674247977324
episode: 200 training return: tensor(-672.6384, device='cuda:0')
episode: 201 training return: tensor(-999.6029, device='cuda:0')
episode: 202 training return: tensor(-246.5668, device='cuda:0')
episode: 203 training return: tensor(-999.7739, device='cuda:0')
epoch: 51 test_true_pfm: 7696.755345786107 sim_pfm: -615.5589247758035
episode: 204 training return: tensor(-126.3200, device='cuda:0')
episode: 205 training return: tensor(-999.9913, device='cuda:0')
episode: 206 training return: tensor(-289.8878, device='cuda:0')
episode: 207 training return: tensor(-246.7604, device='cuda:0')
epoch: 52 test_true_pfm: 9525.007814881053 sim_pfm: 22.834618606371805
episode: 208 training return: tensor(-999.7986, device='cuda:0')
episode: 209 training return: tensor(-999.9990, device='cuda:0')
episode: 210 training return: tensor(-748.7057, device='cuda:0')
episode: 211 training return: tensor(-203.0101, device='cuda:0')
epoch: 53 test_true_pfm: 10461.713815127105 sim_pfm: 13.76597774119
episode: 212 training return: tensor(-995.9733, device='cuda:0')
episode: 213 training return: tensor(-142.6813, device='cuda:0')
episode: 214 training return: tensor(46.5720, device='cuda:0')
episode: 215 training return: tensor(-999.4937, device='cuda:0')
epoch: 54 test_true_pfm: 6765.3523274799845 sim_pfm: -374.17942722399795
episode: 216 training return: tensor(-528.1450, device='cuda:0')
episode: 217 training return: tensor(-238.8303, device='cuda:0')
episode: 218 training return: tensor(-497.9556, device='cuda:0')
episode: 219 training return: tensor(-882.9507, device='cuda:0')
epoch: 55 test_true_pfm: 10199.104985723736 sim_pfm: -222.38600634769924
episode: 220 training return: tensor(-73.2457, device='cuda:0')
episode: 221 training return: tensor(-657.2314, device='cuda:0')
episode: 222 training return: tensor(-279.9447, device='cuda:0')
episode: 223 training return: tensor(-63.3125, device='cuda:0')
epoch: 56 test_true_pfm: 9978.505700855296 sim_pfm: -326.52194402902387
episode: 224 training return: tensor(-165.1967, device='cuda:0')
episode: 225 training return: tensor(-300.7680, device='cuda:0')
episode: 226 training return: tensor(-60.5687, device='cuda:0')
episode: 227 training return: tensor(-999.9988, device='cuda:0')
epoch: 57 test_true_pfm: 10561.855361717733 sim_pfm: -562.0857907965934
episode: 228 training return: tensor(-116.6554, device='cuda:0')
episode: 229 training return: tensor(-999.9990, device='cuda:0')
episode: 230 training return: tensor(-184.2782, device='cuda:0')
episode: 231 training return: tensor(-615.8241, device='cuda:0')
epoch: 58 test_true_pfm: 10507.570907870739 sim_pfm: -609.0931572023643
episode: 232 training return: tensor(-142.9690, device='cuda:0')
episode: 233 training return: tensor(-711.9810, device='cuda:0')
episode: 234 training return: tensor(-142.5253, device='cuda:0')
episode: 235 training return: tensor(-56.5857, device='cuda:0')
epoch: 59 test_true_pfm: 10231.970244361873 sim_pfm: -31.956678003577206
episode: 236 training return: tensor(-217.6798, device='cuda:0')
episode: 237 training return: tensor(-32.4322, device='cuda:0')
episode: 238 training return: tensor(37.4658, device='cuda:0')
episode: 239 training return: tensor(-353.2970, device='cuda:0')
epoch: 60 test_true_pfm: 10308.801430909325 sim_pfm: 44.04021866256759
episode: 240 training return: tensor(-123.2399, device='cuda:0')
episode: 241 training return: tensor(-999.9962, device='cuda:0')
episode: 242 training return: tensor(-64.3570, device='cuda:0')
episode: 243 training return: tensor(-999.3303, device='cuda:0')
epoch: 61 test_true_pfm: 7209.417166197835 sim_pfm: -668.9562925480617
episode: 244 training return: tensor(-165.9376, device='cuda:0')
episode: 245 training return: tensor(-999.7268, device='cuda:0')
episode: 246 training return: tensor(-999.7778, device='cuda:0')
episode: 247 training return: tensor(-746.2842, device='cuda:0')
epoch: 62 test_true_pfm: 6794.066835549257 sim_pfm: 35.751702667223675
episode: 248 training return: tensor(-166.3448, device='cuda:0')
episode: 249 training return: tensor(-999.9899, device='cuda:0')
episode: 250 training return: tensor(-135.2853, device='cuda:0')
episode: 251 training return: tensor(-956.6763, device='cuda:0')
epoch: 63 test_true_pfm: 10522.19411156729 sim_pfm: -32.56315991691857
episode: 252 training return: tensor(-303.9599, device='cuda:0')
episode: 253 training return: tensor(40.0048, device='cuda:0')
episode: 254 training return: tensor(-557.4090, device='cuda:0')
episode: 255 training return: tensor(-999.9798, device='cuda:0')
epoch: 64 test_true_pfm: 10440.523427374237 sim_pfm: -356.1752035397415
episode: 256 training return: tensor(4.5287, device='cuda:0')
episode: 257 training return: tensor(-858.0501, device='cuda:0')
episode: 258 training return: tensor(-999.3846, device='cuda:0')
episode: 259 training return: tensor(-999.3665, device='cuda:0')
epoch: 65 test_true_pfm: 10505.84886009884 sim_pfm: -666.3745136158929
episode: 260 training return: tensor(-177.5803, device='cuda:0')
episode: 261 training return: tensor(21.1924, device='cuda:0')
episode: 262 training return: tensor(-489.7542, device='cuda:0')
episode: 263 training return: tensor(-270.0226, device='cuda:0')
epoch: 66 test_true_pfm: 10497.696531312316 sim_pfm: -74.1778113009932
episode: 264 training return: tensor(-100.6562, device='cuda:0')
episode: 265 training return: tensor(-996.4866, device='cuda:0')
episode: 266 training return: tensor(-136.8368, device='cuda:0')
episode: 267 training return: tensor(-161.0088, device='cuda:0')
epoch: 67 test_true_pfm: 8345.809300361412 sim_pfm: -46.52710351053005
episode: 268 training return: tensor(-84.4000, device='cuda:0')
episode: 269 training return: tensor(-186.0917, device='cuda:0')
episode: 270 training return: tensor(-414.7943, device='cuda:0')
episode: 271 training return: tensor(-246.8362, device='cuda:0')
epoch: 68 test_true_pfm: 10267.05630025854 sim_pfm: -102.88777005608426
episode: 272 training return: tensor(-999.3934, device='cuda:0')
episode: 273 training return: tensor(-136.8834, device='cuda:0')
episode: 274 training return: tensor(121.3742, device='cuda:0')
episode: 275 training return: tensor(-999.9905, device='cuda:0')
epoch: 69 test_true_pfm: 6540.277522359577 sim_pfm: -506.0832619438297
episode: 276 training return: tensor(-269.1767, device='cuda:0')
episode: 277 training return: tensor(60.6091, device='cuda:0')
episode: 278 training return: tensor(-166.6991, device='cuda:0')
episode: 279 training return: tensor(-13.6432, device='cuda:0')
epoch: 70 test_true_pfm: 7013.233083941403 sim_pfm: -400.913389630053
episode: 280 training return: tensor(-220.5968, device='cuda:0')
episode: 281 training return: tensor(-999.8919, device='cuda:0')
episode: 282 training return: tensor(-98.8309, device='cuda:0')
episode: 283 training return: tensor(-115.7572, device='cuda:0')
epoch: 71 test_true_pfm: 10524.814730828495 sim_pfm: -47.649569610288985
episode: 284 training return: tensor(-999.5912, device='cuda:0')
episode: 285 training return: tensor(-125.7861, device='cuda:0')
episode: 286 training return: tensor(-126.9438, device='cuda:0')
episode: 287 training return: tensor(-117.5636, device='cuda:0')
epoch: 72 test_true_pfm: 10497.55921286768 sim_pfm: -214.04655638113036
episode: 288 training return: tensor(-205.7146, device='cuda:0')
episode: 289 training return: tensor(-182.1781, device='cuda:0')
episode: 290 training return: tensor(-122.9213, device='cuda:0')
episode: 291 training return: tensor(-999.9805, device='cuda:0')
epoch: 73 test_true_pfm: 9770.973792942663 sim_pfm: -385.39021581989556
episode: 292 training return: tensor(-132.2312, device='cuda:0')
episode: 293 training return: tensor(-257.5905, device='cuda:0')
episode: 294 training return: tensor(-999.9858, device='cuda:0')
episode: 295 training return: tensor(-952.6434, device='cuda:0')
epoch: 74 test_true_pfm: 6887.118467420148 sim_pfm: 38.566309886674084
episode: 296 training return: tensor(-499.0795, device='cuda:0')
episode: 297 training return: tensor(-846.7972, device='cuda:0')
episode: 298 training return: tensor(-999.9993, device='cuda:0')
episode: 299 training return: tensor(-126.4186, device='cuda:0')
epoch: 75 test_true_pfm: 9373.449841617754 sim_pfm: -267.5546251228273
episode: 300 training return: tensor(-941.6333, device='cuda:0')
episode: 301 training return: tensor(-76.6412, device='cuda:0')
episode: 302 training return: tensor(-21.6665, device='cuda:0')
episode: 303 training return: tensor(-148.2905, device='cuda:0')
epoch: 76 test_true_pfm: 10284.820535236473 sim_pfm: 55.794919749110704
episode: 304 training return: tensor(-353.0926, device='cuda:0')
episode: 305 training return: tensor(-999.9518, device='cuda:0')
episode: 306 training return: tensor(-8.0606, device='cuda:0')
episode: 307 training return: tensor(134.3207, device='cuda:0')
epoch: 77 test_true_pfm: 10211.677515583548 sim_pfm: -302.5839516070361
episode: 308 training return: tensor(-43.7361, device='cuda:0')
episode: 309 training return: tensor(-999.9896, device='cuda:0')
episode: 310 training return: tensor(-49.4936, device='cuda:0')
episode: 311 training return: tensor(-141.9781, device='cuda:0')
epoch: 78 test_true_pfm: 6740.856806379422 sim_pfm: 9.317628398809271
episode: 312 training return: tensor(-207.4382, device='cuda:0')
episode: 313 training return: tensor(-200.2327, device='cuda:0')
episode: 314 training return: tensor(-908.2210, device='cuda:0')
episode: 315 training return: tensor(-81.6129, device='cuda:0')
epoch: 79 test_true_pfm: 10295.660085027723 sim_pfm: -447.74955695001216
episode: 316 training return: tensor(-999.9235, device='cuda:0')
episode: 317 training return: tensor(-706.0303, device='cuda:0')
episode: 318 training return: tensor(-999.9969, device='cuda:0')
episode: 319 training return: tensor(-137.1662, device='cuda:0')
epoch: 80 test_true_pfm: 7026.610314750578 sim_pfm: 58.2558096209929
episode: 320 training return: tensor(-188.4159, device='cuda:0')
episode: 321 training return: tensor(-999.9976, device='cuda:0')
episode: 322 training return: tensor(-148.7936, device='cuda:0')
episode: 323 training return: tensor(-999.1504, device='cuda:0')
epoch: 81 test_true_pfm: 6943.179910923995 sim_pfm: -257.2556509466861
episode: 324 training return: tensor(-657.9001, device='cuda:0')
episode: 325 training return: tensor(-996.1689, device='cuda:0')
episode: 326 training return: tensor(-86.1469, device='cuda:0')
episode: 327 training return: tensor(1.6565, device='cuda:0')
epoch: 82 test_true_pfm: 10326.376962197684 sim_pfm: 24.80558516199623
episode: 328 training return: tensor(-43.1788, device='cuda:0')
episode: 329 training return: tensor(-102.3194, device='cuda:0')
episode: 330 training return: tensor(-999.5892, device='cuda:0')
episode: 331 training return: tensor(-999.9979, device='cuda:0')
epoch: 83 test_true_pfm: 6923.818607461591 sim_pfm: 99.59873610267339
episode: 332 training return: tensor(-573.1810, device='cuda:0')
episode: 333 training return: tensor(-219.5547, device='cuda:0')
episode: 334 training return: tensor(-849.3405, device='cuda:0')
episode: 335 training return: tensor(-79.8844, device='cuda:0')
epoch: 84 test_true_pfm: 10242.54312093069 sim_pfm: -281.77735877555097
episode: 336 training return: tensor(35.5580, device='cuda:0')
episode: 337 training return: tensor(-39.1161, device='cuda:0')
episode: 338 training return: tensor(-98.8377, device='cuda:0')
episode: 339 training return: tensor(-527.6178, device='cuda:0')
epoch: 85 test_true_pfm: 7390.989721094865 sim_pfm: 4.481873832410201
episode: 340 training return: tensor(-939.0499, device='cuda:0')
episode: 341 training return: tensor(-75.9759, device='cuda:0')
episode: 342 training return: tensor(-289.8342, device='cuda:0')
episode: 343 training return: tensor(-141.2830, device='cuda:0')
epoch: 86 test_true_pfm: 10385.056946509934 sim_pfm: 48.93187933488904
episode: 344 training return: tensor(-999.9957, device='cuda:0')
episode: 345 training return: tensor(39.5940, device='cuda:0')
episode: 346 training return: tensor(-156.9134, device='cuda:0')
episode: 347 training return: tensor(-681.5666, device='cuda:0')
epoch: 87 test_true_pfm: 6901.845065720481 sim_pfm: 144.07509084242824
episode: 348 training return: tensor(-25.1457, device='cuda:0')
episode: 349 training return: tensor(-74.8829, device='cuda:0')
episode: 350 training return: tensor(-999.9970, device='cuda:0')
episode: 351 training return: tensor(-999.9541, device='cuda:0')
epoch: 88 test_true_pfm: 8287.827707464106 sim_pfm: 132.39562254057577
episode: 352 training return: tensor(-999.9956, device='cuda:0')
episode: 353 training return: tensor(-63.8333, device='cuda:0')
episode: 354 training return: tensor(-773.5151, device='cuda:0')
episode: 355 training return: tensor(-209.9912, device='cuda:0')
epoch: 89 test_true_pfm: 10261.974533080675 sim_pfm: -127.65291815212306
episode: 356 training return: tensor(-705.2887, device='cuda:0')
episode: 357 training return: tensor(-437.1302, device='cuda:0')
episode: 358 training return: tensor(44.7096, device='cuda:0')
episode: 359 training return: tensor(-88.1815, device='cuda:0')
epoch: 90 test_true_pfm: 10514.00008529348 sim_pfm: -314.99393268018804
episode: 360 training return: tensor(-999.9957, device='cuda:0')
episode: 361 training return: tensor(-111.5542, device='cuda:0')
episode: 362 training return: tensor(-999.8674, device='cuda:0')
episode: 363 training return: tensor(-749.1746, device='cuda:0')
epoch: 91 test_true_pfm: 9629.593309538193 sim_pfm: -244.884124204497
episode: 364 training return: tensor(-999.8500, device='cuda:0')
episode: 365 training return: tensor(-999.4601, device='cuda:0')
episode: 366 training return: tensor(-999.8939, device='cuda:0')
episode: 367 training return: tensor(-217.3507, device='cuda:0')
epoch: 92 test_true_pfm: 3375.911484978853 sim_pfm: -0.12238062291483705
episode: 368 training return: tensor(24.3030, device='cuda:0')
episode: 369 training return: tensor(-68.8192, device='cuda:0')
episode: 370 training return: tensor(-150.9027, device='cuda:0')
episode: 371 training return: tensor(21.3462, device='cuda:0')
epoch: 93 test_true_pfm: 4999.279611150032 sim_pfm: -253.87167969423658
episode: 372 training return: tensor(-130.0885, device='cuda:0')
episode: 373 training return: tensor(-46.5186, device='cuda:0')
episode: 374 training return: tensor(26.6988, device='cuda:0')
episode: 375 training return: tensor(-87.0284, device='cuda:0')
epoch: 94 test_true_pfm: 10376.384889868097 sim_pfm: -278.6497602367308
episode: 376 training return: tensor(-998.3287, device='cuda:0')
episode: 377 training return: tensor(-199.0754, device='cuda:0')
episode: 378 training return: tensor(-72.7692, device='cuda:0')
episode: 379 training return: tensor(-309.2962, device='cuda:0')
epoch: 95 test_true_pfm: 10407.690178714633 sim_pfm: -681.4281001467801
episode: 380 training return: tensor(-226.4911, device='cuda:0')
episode: 381 training return: tensor(-9.8562, device='cuda:0')
episode: 382 training return: tensor(-14.0167, device='cuda:0')
episode: 383 training return: tensor(-59.8292, device='cuda:0')
epoch: 96 test_true_pfm: 6721.016204817621 sim_pfm: 66.38813417411681
episode: 384 training return: tensor(-875.8053, device='cuda:0')
episode: 385 training return: tensor(-95.9133, device='cuda:0')
episode: 386 training return: tensor(-124.0133, device='cuda:0')
episode: 387 training return: tensor(-427.3137, device='cuda:0')
epoch: 97 test_true_pfm: 10581.311822378038 sim_pfm: -554.9565002907378
episode: 388 training return: tensor(-77.3014, device='cuda:0')
episode: 389 training return: tensor(-220.9471, device='cuda:0')
episode: 390 training return: tensor(-224.7550, device='cuda:0')
episode: 391 training return: tensor(-92.1118, device='cuda:0')
epoch: 98 test_true_pfm: 6693.068517558614 sim_pfm: -313.2685838728018
episode: 392 training return: tensor(-255.9007, device='cuda:0')
episode: 393 training return: tensor(37.7029, device='cuda:0')
episode: 394 training return: tensor(-115.9942, device='cuda:0')
episode: 395 training return: tensor(-173.4909, device='cuda:0')
epoch: 99 test_true_pfm: 10489.165591143203 sim_pfm: -0.9936890349102517
episode: 396 training return: tensor(27.0419, device='cuda:0')
episode: 397 training return: tensor(-113.1398, device='cuda:0')
episode: 398 training return: tensor(-999.9937, device='cuda:0')
episode: 399 training return: tensor(-196.6462, device='cuda:0')
epoch: 100 test_true_pfm: 10544.891754480639 sim_pfm: 72.8467968069599
episode: 400 training return: tensor(-997.7068, device='cuda:0')
episode: 401 training return: tensor(19.0858, device='cuda:0')
episode: 402 training return: tensor(-203.7177, device='cuda:0')
episode: 403 training return: tensor(-805.2333, device='cuda:0')
epoch: 101 test_true_pfm: 10464.691800566825 sim_pfm: 72.85726258189727
episode: 404 training return: tensor(-131.8909, device='cuda:0')
episode: 405 training return: tensor(-156.2973, device='cuda:0')
episode: 406 training return: tensor(-54.7942, device='cuda:0')
episode: 407 training return: tensor(-286.2732, device='cuda:0')
epoch: 102 test_true_pfm: 10308.189398802422 sim_pfm: -264.16097515321843
episode: 408 training return: tensor(-180.2410, device='cuda:0')
episode: 409 training return: tensor(-124.2375, device='cuda:0')
episode: 410 training return: tensor(-11.0489, device='cuda:0')
episode: 411 training return: tensor(-139.3760, device='cuda:0')
epoch: 103 test_true_pfm: 10153.911584193396 sim_pfm: 102.29537629906554
episode: 412 training return: tensor(-162.9822, device='cuda:0')
episode: 413 training return: tensor(-259.2115, device='cuda:0')
episode: 414 training return: tensor(-998.3859, device='cuda:0')
episode: 415 training return: tensor(-177.7639, device='cuda:0')
epoch: 104 test_true_pfm: 10549.90666326025 sim_pfm: 174.77366160591677
episode: 416 training return: tensor(-124.4150, device='cuda:0')
episode: 417 training return: tensor(-59.1999, device='cuda:0')
episode: 418 training return: tensor(-67.4416, device='cuda:0')
episode: 419 training return: tensor(-999.8891, device='cuda:0')
epoch: 105 test_true_pfm: 9777.579109931232 sim_pfm: 81.30881659698207
episode: 420 training return: tensor(-406.8104, device='cuda:0')
episode: 421 training return: tensor(-141.3190, device='cuda:0')
episode: 422 training return: tensor(-75.3590, device='cuda:0')
episode: 423 training return: tensor(9.6540, device='cuda:0')
epoch: 106 test_true_pfm: 10521.748109843948 sim_pfm: -480.0209996488023
episode: 424 training return: tensor(-156.8369, device='cuda:0')
episode: 425 training return: tensor(-150.2475, device='cuda:0')
episode: 426 training return: tensor(-283.0268, device='cuda:0')
episode: 427 training return: tensor(-18.5031, device='cuda:0')
epoch: 107 test_true_pfm: 6888.881963781331 sim_pfm: 23.319948657532223
episode: 428 training return: tensor(-98.3096, device='cuda:0')
episode: 429 training return: tensor(-256.3576, device='cuda:0')
episode: 430 training return: tensor(-279.9627, device='cuda:0')
episode: 431 training return: tensor(-224.5721, device='cuda:0')
epoch: 108 test_true_pfm: 10277.434761679615 sim_pfm: -641.1686272281999
episode: 432 training return: tensor(-209.5366, device='cuda:0')
episode: 433 training return: tensor(23.3129, device='cuda:0')
episode: 434 training return: tensor(-999.6286, device='cuda:0')
episode: 435 training return: tensor(-8.0777, device='cuda:0')
epoch: 109 test_true_pfm: 10427.093772878305 sim_pfm: 84.44390581814999
episode: 436 training return: tensor(-999.9992, device='cuda:0')
episode: 437 training return: tensor(-160.3135, device='cuda:0')
episode: 438 training return: tensor(-130.4705, device='cuda:0')
episode: 439 training return: tensor(-60.4140, device='cuda:0')
epoch: 110 test_true_pfm: 7490.131296431196 sim_pfm: -218.9947990072736
episode: 440 training return: tensor(67.8066, device='cuda:0')
episode: 441 training return: tensor(36.8444, device='cuda:0')
episode: 442 training return: tensor(-118.5334, device='cuda:0')
episode: 443 training return: tensor(-28.9340, device='cuda:0')
epoch: 111 test_true_pfm: 10462.502541805494 sim_pfm: -32.060330682511754
episode: 444 training return: tensor(-999.9059, device='cuda:0')
episode: 445 training return: tensor(-999.8622, device='cuda:0')
episode: 446 training return: tensor(-999.1471, device='cuda:0')
episode: 447 training return: tensor(-187.4660, device='cuda:0')
epoch: 112 test_true_pfm: 10274.226589463278 sim_pfm: 142.46178182133977
episode: 448 training return: tensor(-175.7357, device='cuda:0')
episode: 449 training return: tensor(-191.0458, device='cuda:0')
episode: 450 training return: tensor(-243.6679, device='cuda:0')
episode: 451 training return: tensor(-228.0022, device='cuda:0')
epoch: 113 test_true_pfm: 7905.079998592996 sim_pfm: -71.46441586816218
episode: 452 training return: tensor(43.3757, device='cuda:0')
episode: 453 training return: tensor(-78.3397, device='cuda:0')
episode: 454 training return: tensor(-126.9106, device='cuda:0')
episode: 455 training return: tensor(-143.2970, device='cuda:0')
epoch: 114 test_true_pfm: 10448.675955648185 sim_pfm: 73.36699162443013
episode: 456 training return: tensor(34.8044, device='cuda:0')
episode: 457 training return: tensor(-766.5682, device='cuda:0')
episode: 458 training return: tensor(-999.5846, device='cuda:0')
episode: 459 training return: tensor(-158.7181, device='cuda:0')
epoch: 115 test_true_pfm: 6889.277303086329 sim_pfm: 16.52054355488508
episode: 460 training return: tensor(-703.5286, device='cuda:0')
episode: 461 training return: tensor(-102.4544, device='cuda:0')
episode: 462 training return: tensor(-152.7629, device='cuda:0')
episode: 463 training return: tensor(-221.5740, device='cuda:0')
epoch: 116 test_true_pfm: 7032.547425801914 sim_pfm: 87.3053388594999
episode: 464 training return: tensor(-90.8491, device='cuda:0')
episode: 465 training return: tensor(-872.4730, device='cuda:0')
episode: 466 training return: tensor(-24.7844, device='cuda:0')
episode: 467 training return: tensor(-181.8188, device='cuda:0')
epoch: 117 test_true_pfm: 10207.11693952063 sim_pfm: -72.74129433904697
episode: 468 training return: tensor(-4.1288, device='cuda:0')
episode: 469 training return: tensor(-4.8132, device='cuda:0')
episode: 470 training return: tensor(-98.3940, device='cuda:0')
episode: 471 training return: tensor(-999.9404, device='cuda:0')
epoch: 118 test_true_pfm: 10381.494948952086 sim_pfm: 13.481325253194276
episode: 472 training return: tensor(-999.8474, device='cuda:0')
episode: 473 training return: tensor(-37.9801, device='cuda:0')
episode: 474 training return: tensor(-785.9163, device='cuda:0')
episode: 475 training return: tensor(-206.7100, device='cuda:0')
epoch: 119 test_true_pfm: 10561.00123806376 sim_pfm: -272.133947753134
episode: 476 training return: tensor(-100.8282, device='cuda:0')
episode: 477 training return: tensor(-999.9937, device='cuda:0')
episode: 478 training return: tensor(-50.4176, device='cuda:0')
episode: 479 training return: tensor(-437.1708, device='cuda:0')
epoch: 120 test_true_pfm: 7626.792888135841 sim_pfm: -362.05547532481916
episode: 480 training return: tensor(-134.4956, device='cuda:0')
episode: 481 training return: tensor(-143.1605, device='cuda:0')
episode: 482 training return: tensor(-99.5175, device='cuda:0')
episode: 483 training return: tensor(17.8966, device='cuda:0')
epoch: 121 test_true_pfm: 10386.643764761951 sim_pfm: 37.089307325727226
episode: 484 training return: tensor(-109.4481, device='cuda:0')
episode: 485 training return: tensor(-365.1599, device='cuda:0')
episode: 486 training return: tensor(-65.5286, device='cuda:0')
episode: 487 training return: tensor(-40.0086, device='cuda:0')
epoch: 122 test_true_pfm: 6930.589338654958 sim_pfm: -83.20762148631427
episode: 488 training return: tensor(-86.3028, device='cuda:0')
episode: 489 training return: tensor(57.1937, device='cuda:0')
episode: 490 training return: tensor(-112.5150, device='cuda:0')
episode: 491 training return: tensor(-19.4909, device='cuda:0')
epoch: 123 test_true_pfm: 8524.740304832354 sim_pfm: 76.16085869933401
episode: 492 training return: tensor(-999.9968, device='cuda:0')
episode: 493 training return: tensor(-102.8824, device='cuda:0')
episode: 494 training return: tensor(-406.9112, device='cuda:0')
episode: 495 training return: tensor(-229.4864, device='cuda:0')
epoch: 124 test_true_pfm: 6932.268001768832 sim_pfm: -306.7143687775824
episode: 496 training return: tensor(154.2654, device='cuda:0')
episode: 497 training return: tensor(-843.8594, device='cuda:0')
episode: 498 training return: tensor(-999.8469, device='cuda:0')
episode: 499 training return: tensor(-206.6192, device='cuda:0')
epoch: 125 test_true_pfm: 10388.882840348566 sim_pfm: 81.86097986383054
episode: 500 training return: tensor(-65.9205, device='cuda:0')
episode: 501 training return: tensor(-233.3647, device='cuda:0')
episode: 502 training return: tensor(-257.5028, device='cuda:0')
episode: 503 training return: tensor(-100.1303, device='cuda:0')
epoch: 126 test_true_pfm: 10484.595172916475 sim_pfm: -573.6162032473754
episode: 504 training return: tensor(-480.7695, device='cuda:0')
episode: 505 training return: tensor(69.2282, device='cuda:0')
episode: 506 training return: tensor(94.5654, device='cuda:0')
episode: 507 training return: tensor(-999.9917, device='cuda:0')
epoch: 127 test_true_pfm: 10464.714699438935 sim_pfm: 59.03245906632704
episode: 508 training return: tensor(-999.8564, device='cuda:0')
episode: 509 training return: tensor(-5.8251, device='cuda:0')
episode: 510 training return: tensor(-2.6892, device='cuda:0')
episode: 511 training return: tensor(-999.6154, device='cuda:0')
epoch: 128 test_true_pfm: 10669.270365261713 sim_pfm: -195.7152080506397
episode: 512 training return: tensor(-74.1559, device='cuda:0')
episode: 513 training return: tensor(49.8180, device='cuda:0')
episode: 514 training return: tensor(30.6616, device='cuda:0')
episode: 515 training return: tensor(-109.0179, device='cuda:0')
epoch: 129 test_true_pfm: 9802.087484574784 sim_pfm: 54.856695851883465
episode: 516 training return: tensor(-78.3227, device='cuda:0')
episode: 517 training return: tensor(36.9200, device='cuda:0')
episode: 518 training return: tensor(-171.9789, device='cuda:0')
episode: 519 training return: tensor(-18.0183, device='cuda:0')
epoch: 130 test_true_pfm: 10476.840194838229 sim_pfm: 109.05676126435476
episode: 520 training return: tensor(-112.0786, device='cuda:0')
episode: 521 training return: tensor(-144.2634, device='cuda:0')
episode: 522 training return: tensor(-26.0678, device='cuda:0')
episode: 523 training return: tensor(-999.9970, device='cuda:0')
epoch: 131 test_true_pfm: 10347.633370640659 sim_pfm: -129.07202496670652
episode: 524 training return: tensor(-236.1617, device='cuda:0')
episode: 525 training return: tensor(11.6645, device='cuda:0')
episode: 526 training return: tensor(-22.8575, device='cuda:0')
episode: 527 training return: tensor(-44.6364, device='cuda:0')
epoch: 132 test_true_pfm: 10559.789080802077 sim_pfm: -289.78211935902556
episode: 528 training return: tensor(72.2628, device='cuda:0')
episode: 529 training return: tensor(-59.3210, device='cuda:0')
episode: 530 training return: tensor(-65.0605, device='cuda:0')
episode: 531 training return: tensor(-999.9988, device='cuda:0')
epoch: 133 test_true_pfm: 10577.186807808292 sim_pfm: 161.04492721881252
episode: 532 training return: tensor(-173.1953, device='cuda:0')
episode: 533 training return: tensor(-100.9038, device='cuda:0')
episode: 534 training return: tensor(-999.9180, device='cuda:0')
episode: 535 training return: tensor(-999.9996, device='cuda:0')
epoch: 134 test_true_pfm: 8941.219030034083 sim_pfm: 38.921805153484456
episode: 536 training return: tensor(-604.8528, device='cuda:0')
episode: 537 training return: tensor(-154.1541, device='cuda:0')
episode: 538 training return: tensor(-999.8499, device='cuda:0')
episode: 539 training return: tensor(-53.5498, device='cuda:0')
epoch: 135 test_true_pfm: 10489.796531765589 sim_pfm: -122.69627763158253
episode: 540 training return: tensor(-122.7893, device='cuda:0')
episode: 541 training return: tensor(-20.3303, device='cuda:0')
episode: 542 training return: tensor(-101.0417, device='cuda:0')
episode: 543 training return: tensor(101.7897, device='cuda:0')
epoch: 136 test_true_pfm: 10579.202093415253 sim_pfm: -133.13080207111003
episode: 544 training return: tensor(-34.8779, device='cuda:0')
episode: 545 training return: tensor(-247.6713, device='cuda:0')
episode: 546 training return: tensor(52.3283, device='cuda:0')
episode: 547 training return: tensor(-25.9153, device='cuda:0')
epoch: 137 test_true_pfm: 10450.48578512234 sim_pfm: 23.693210261156008
episode: 548 training return: tensor(-36.0299, device='cuda:0')
episode: 549 training return: tensor(-121.1010, device='cuda:0')
episode: 550 training return: tensor(-100.5484, device='cuda:0')
episode: 551 training return: tensor(-172.0871, device='cuda:0')
epoch: 138 test_true_pfm: 10532.785501640852 sim_pfm: 113.6141530512444
episode: 552 training return: tensor(-802.5650, device='cuda:0')
episode: 553 training return: tensor(-54.8380, device='cuda:0')
episode: 554 training return: tensor(-309.7182, device='cuda:0')
episode: 555 training return: tensor(-999.9476, device='cuda:0')
epoch: 139 test_true_pfm: 9309.91049140525 sim_pfm: -306.1392399369991
episode: 556 training return: tensor(44.8670, device='cuda:0')
episode: 557 training return: tensor(-194.4070, device='cuda:0')
episode: 558 training return: tensor(-71.5810, device='cuda:0')
episode: 559 training return: tensor(-999.9030, device='cuda:0')
epoch: 140 test_true_pfm: 6911.04142910039 sim_pfm: -19.212136498468073
episode: 560 training return: tensor(-75.8842, device='cuda:0')
episode: 561 training return: tensor(-14.0315, device='cuda:0')
episode: 562 training return: tensor(-9.2897, device='cuda:0')
episode: 563 training return: tensor(-58.1336, device='cuda:0')
epoch: 141 test_true_pfm: 10524.804424831595 sim_pfm: 102.66829022322781
episode: 564 training return: tensor(-96.6015, device='cuda:0')
episode: 565 training return: tensor(14.7223, device='cuda:0')
episode: 566 training return: tensor(-270.9513, device='cuda:0')
episode: 567 training return: tensor(-999.8138, device='cuda:0')
epoch: 142 test_true_pfm: 10603.652544126084 sim_pfm: 34.054444681523215
episode: 568 training return: tensor(-998.8851, device='cuda:0')
episode: 569 training return: tensor(-78.6396, device='cuda:0')
episode: 570 training return: tensor(-49.5706, device='cuda:0')
episode: 571 training return: tensor(-38.9866, device='cuda:0')
epoch: 143 test_true_pfm: 10641.426755578697 sim_pfm: -322.557964182226
episode: 572 training return: tensor(-107.6007, device='cuda:0')
episode: 573 training return: tensor(81.2016, device='cuda:0')
episode: 574 training return: tensor(-999.2714, device='cuda:0')
episode: 575 training return: tensor(-589.9006, device='cuda:0')
epoch: 144 test_true_pfm: 10542.208371603136 sim_pfm: -297.6717463527942
episode: 576 training return: tensor(-291.2593, device='cuda:0')
episode: 577 training return: tensor(-36.8569, device='cuda:0')
episode: 578 training return: tensor(-16.6732, device='cuda:0')
episode: 579 training return: tensor(19.2053, device='cuda:0')
epoch: 145 test_true_pfm: 10637.90170984222 sim_pfm: -479.6126808860281
episode: 580 training return: tensor(-16.0806, device='cuda:0')
episode: 581 training return: tensor(-109.4582, device='cuda:0')
episode: 582 training return: tensor(-717.3224, device='cuda:0')
episode: 583 training return: tensor(-999.9935, device='cuda:0')
epoch: 146 test_true_pfm: 7053.957077640119 sim_pfm: 118.61323897777281
episode: 584 training return: tensor(-102.2835, device='cuda:0')
episode: 585 training return: tensor(-100.0150, device='cuda:0')
episode: 586 training return: tensor(-163.5807, device='cuda:0')
episode: 587 training return: tensor(-247.5180, device='cuda:0')
epoch: 147 test_true_pfm: 10503.97352172608 sim_pfm: 38.7836328446283
episode: 588 training return: tensor(-224.1024, device='cuda:0')
episode: 589 training return: tensor(-25.0319, device='cuda:0')
episode: 590 training return: tensor(-999.9933, device='cuda:0')
episode: 591 training return: tensor(-999.9306, device='cuda:0')
epoch: 148 test_true_pfm: 7973.394450311298 sim_pfm: 80.15138451713331
episode: 592 training return: tensor(87.6561, device='cuda:0')
episode: 593 training return: tensor(-538.6651, device='cuda:0')
episode: 594 training return: tensor(-50.7813, device='cuda:0')
episode: 595 training return: tensor(-105.1031, device='cuda:0')
epoch: 149 test_true_pfm: 5927.213770558341 sim_pfm: 76.37548261422974
episode: 596 training return: tensor(-4.7200, device='cuda:0')
episode: 597 training return: tensor(-287.3763, device='cuda:0')
episode: 598 training return: tensor(71.4920, device='cuda:0')
episode: 599 training return: tensor(-96.1626, device='cuda:0')
epoch: 150 test_true_pfm: 6832.2798494209665 sim_pfm: 6.265070356137585
