['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '8', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.32850350335240364 test_loss: 0.2057039976119995
epoch: 1 training_loss 0.18184721641242504 test_loss: 0.15906697511672974
epoch: 2 training_loss 0.15109279967844486 test_loss: 0.15434082746505737
epoch: 3 training_loss 0.13480251874774696 test_loss: 0.14194246530532836
epoch: 4 training_loss 0.12585532981902361 test_loss: 0.13581146001815797
epoch: 5 training_loss 0.12338089551776647 test_loss: 0.12889763116836547
epoch: 6 training_loss 0.13419285532087089 test_loss: 0.12124060392379761
epoch: 7 training_loss 0.12929653070867062 test_loss: 0.11361432075500488
epoch: 8 training_loss 0.11134852427989245 test_loss: 0.11362780332565307
epoch: 9 training_loss 0.12199529904872179 test_loss: 0.1326116919517517
epoch: 10 training_loss 0.1135279031842947 test_loss: 0.1293325424194336
epoch: 11 training_loss 0.11358766229823232 test_loss: 0.1041336178779602
epoch: 12 training_loss 0.11016715612262487 test_loss: 0.138339626789093
epoch: 13 training_loss 0.1110421770811081 test_loss: 0.11073695421218872
epoch: 14 training_loss 0.11185005037114024 test_loss: 0.10664564371109009
epoch: 15 training_loss 0.11531430128961802 test_loss: 0.11813246011734009
epoch: 16 training_loss 0.10966061122715473 test_loss: 0.11071252822875977
epoch: 17 training_loss 0.11530211497098207 test_loss: 0.11622307300567628
epoch: 18 training_loss 0.1156041288934648 test_loss: 0.13528623580932617
epoch: 19 training_loss 0.10866010677069425 test_loss: 0.13586664199829102
epoch: 20 training_loss 0.1097142232209444 test_loss: 0.11693702936172486
epoch: 21 training_loss 0.10887907814234495 test_loss: 0.09762582182884216
epoch: 22 training_loss 0.10777824178338051 test_loss: 0.10955324172973632
epoch: 23 training_loss 0.1086590949818492 test_loss: 0.11001482009887695
epoch: 24 training_loss 0.10643126649782061 test_loss: 0.12560741901397704
epoch: 25 training_loss 0.11411725785583257 test_loss: 0.10634092092514039
epoch: 26 training_loss 0.11699714131653309 test_loss: 0.11600524187088013
epoch: 27 training_loss 0.11133028570562602 test_loss: 0.11369102001190186
epoch: 28 training_loss 0.10760230101644992 test_loss: 0.10356851816177368
epoch: 29 training_loss 0.11732435137033463 test_loss: 0.11938828229904175
epoch: 30 training_loss 0.10410217870026826 test_loss: 0.1135012149810791
epoch: 31 training_loss 0.10497161511331797 test_loss: 0.13830848932266235
epoch: 32 training_loss 0.10718348652124404 test_loss: 0.10919814109802246
epoch: 33 training_loss 0.10891395933926105 test_loss: 0.11347606182098388
epoch: 34 training_loss 0.10254406031221151 test_loss: 0.1164739489555359
epoch: 35 training_loss 0.11575214654207229 test_loss: 0.10081301927566529
epoch: 36 training_loss 0.1031479412317276 test_loss: 0.11923248767852783
epoch: 37 training_loss 0.11066546738147735 test_loss: 0.11287803649902343
epoch: 38 training_loss 0.10996218338608742 test_loss: 0.10090529918670654
epoch: 39 training_loss 0.10967863850295544 test_loss: 0.1128629207611084
epoch: 40 training_loss 0.09700512634590268 test_loss: 0.1070320725440979
epoch: 41 training_loss 0.10417000055313111 test_loss: 0.10757383108139038
epoch: 42 training_loss 0.10853733671829105 test_loss: 0.11240996122360229
epoch: 43 training_loss 0.10930244516581297 test_loss: 0.14263161420822143
epoch: 44 training_loss 0.11149454135447741 test_loss: 0.10829309225082398
epoch: 45 training_loss 0.10367888789623976 test_loss: 0.11877321004867554
epoch: 46 training_loss 0.10071290420368313 test_loss: 0.11361682415008545
epoch: 47 training_loss 0.11169782660901546 test_loss: 0.10287894010543823
epoch: 48 training_loss 0.11063824541866779 test_loss: 0.10419938564300538
epoch: 49 training_loss 0.10598789118230342 test_loss: 0.09782779812812806
epoch: 50 training_loss 0.10303862851113081 test_loss: 0.11014972925186158
epoch: 51 training_loss 0.10107226559892296 test_loss: 0.12406537532806397
epoch: 52 training_loss 0.11195186687633395 test_loss: 0.1153969407081604
epoch: 53 training_loss 0.10107633523643017 test_loss: 0.12390224933624268
epoch: 54 training_loss 0.11035985099151731 test_loss: 0.09801481366157531
epoch: 55 training_loss 0.1104383510351181 test_loss: 0.10897436141967773
epoch: 56 training_loss 0.10883495571091771 test_loss: 0.08953420519828796
epoch: 57 training_loss 0.1045984748378396 test_loss: 0.1041806697845459
epoch: 58 training_loss 0.11019487123936415 test_loss: 0.11295349597930908
epoch: 59 training_loss 0.11064696293324232 test_loss: 0.13272972106933595
epoch: 60 training_loss 0.11035449475049973 test_loss: 0.11723275184631347
epoch: 61 training_loss 0.10677459176629782 test_loss: 0.1126481056213379
epoch: 62 training_loss 0.10234461788088084 test_loss: 0.11903833150863648
epoch: 63 training_loss 0.10202026817947626 test_loss: 0.11840646266937256
epoch: 64 training_loss 0.1033058088645339 test_loss: 0.10572319030761719
epoch: 65 training_loss 0.10712652599439025 test_loss: 0.11848762035369872
epoch: 66 training_loss 0.11139942601323127 test_loss: 0.11005315780639649
epoch: 67 training_loss 0.1015761498361826 test_loss: 0.10302289724349975
epoch: 68 training_loss 0.10044198172166943 test_loss: 0.12221801280975342
epoch: 69 training_loss 0.10599038887768984 test_loss: 0.12108085155487061
epoch: 70 training_loss 0.10449664317071437 test_loss: 0.1120897650718689
epoch: 71 training_loss 0.10908508801832795 test_loss: 0.1198880434036255
epoch: 72 training_loss 0.10224799788556993 test_loss: 0.10578684806823731
epoch: 73 training_loss 0.10861443817615508 test_loss: 0.11536468267440796
epoch: 74 training_loss 0.10634262256324291 test_loss: 0.11297765970230103
epoch: 75 training_loss 0.11096371272578835 test_loss: 0.09854467511177063
epoch: 76 training_loss 0.10841880097985268 test_loss: 0.1112160325050354
epoch: 77 training_loss 0.11400181852281094 test_loss: 0.11599661111831665
epoch: 78 training_loss 0.11488008350133896 test_loss: 0.11209278106689453
epoch: 79 training_loss 0.10959258794784546 test_loss: 0.11408315896987915
epoch: 80 training_loss 0.10053352234885096 test_loss: 0.10812468528747558
epoch: 81 training_loss 0.09783850464969873 test_loss: 0.11475753784179688
epoch: 82 training_loss 0.10641432754695415 test_loss: 0.10999906063079834
epoch: 83 training_loss 0.10198590498417616 test_loss: 0.12145477533340454
epoch: 84 training_loss 0.10209718756377698 test_loss: 0.09671595692634583
epoch: 85 training_loss 0.1094104851782322 test_loss: 0.1169610857963562
epoch: 86 training_loss 0.10301917426288128 test_loss: 0.11569260358810425
epoch: 87 training_loss 0.10713086847215891 test_loss: 0.11505566835403443
epoch: 88 training_loss 0.11326609015464782 test_loss: 0.12106847763061523
epoch: 89 training_loss 0.11575254812836647 test_loss: 0.11066234111785889
epoch: 90 training_loss 0.10339349705725909 test_loss: 0.11644147634506226
epoch: 91 training_loss 0.10273436110466719 test_loss: 0.11056098937988282
epoch: 92 training_loss 0.10156933601945639 test_loss: 0.11352403163909912
epoch: 93 training_loss 0.10185535555705429 test_loss: 0.08932639360427856
epoch: 94 training_loss 0.10461759895086288 test_loss: 0.11255109310150146
epoch: 95 training_loss 0.10618722043931485 test_loss: 0.12114500999450684
epoch: 96 training_loss 0.1092572695761919 test_loss: 0.10445090532302856
epoch: 97 training_loss 0.102132817376405 test_loss: 0.10499194860458375
epoch: 98 training_loss 0.10085411278530955 test_loss: 0.12535310983657838
epoch: 99 training_loss 0.10757224356755614 test_loss: 0.09835715293884277
epoch: 100 training_loss 0.10843139175325632 test_loss: 0.10260254144668579
epoch: 101 training_loss 0.10020206652581692 test_loss: 0.11793273687362671
epoch: 102 training_loss 0.1126643954962492 test_loss: 0.11476726531982422
epoch: 103 training_loss 0.10877230616286397 test_loss: 0.10295034646987915
epoch: 104 training_loss 0.1028305908292532 test_loss: 0.11053699254989624
epoch: 105 training_loss 0.11263960646465421 test_loss: 0.11974670886993408
epoch: 106 training_loss 0.0996614702604711 test_loss: 0.13595794439315795
epoch: 107 training_loss 0.09999680072069168 test_loss: 0.11001896858215332
epoch: 108 training_loss 0.10701348247006535 test_loss: 0.10177801847457886
epoch: 109 training_loss 0.10786573320627213 test_loss: 0.09707220792770385
epoch: 110 training_loss 0.10220439203083515 test_loss: 0.1135747790336609
epoch: 111 training_loss 0.10968663340434431 test_loss: 0.10614572763442993
epoch: 112 training_loss 0.09742614336311817 test_loss: 0.11539313793182374
epoch: 113 training_loss 0.10738422475755215 test_loss: 0.1018221378326416
epoch: 114 training_loss 0.10419157695025205 test_loss: 0.10255197286605836
epoch: 115 training_loss 0.10097097747027874 test_loss: 0.1274179458618164
epoch: 116 training_loss 0.10369439315050841 test_loss: 0.11610525846481323
epoch: 117 training_loss 0.106975865252316 test_loss: 0.11124278306961059
epoch: 118 training_loss 0.09319993883371352 test_loss: 0.10578949451446533
epoch: 119 training_loss 0.10008733741939067 test_loss: 0.10583196878433228
epoch: 120 training_loss 0.10797512788325549 test_loss: 0.10399914979934692
epoch: 121 training_loss 0.10940189629793168 test_loss: 0.11699391603469848
epoch: 122 training_loss 0.09860693397000432 test_loss: 0.12567992210388185
epoch: 123 training_loss 0.10548174746334553 test_loss: 0.10515470504760742
epoch: 124 training_loss 0.11012272115796805 test_loss: 0.11951380968093872
epoch: 125 training_loss 0.10592116974294186 test_loss: 0.11385319232940674
epoch: 126 training_loss 0.10076842719689011 test_loss: 0.12641822099685668
epoch: 127 training_loss 0.10488616485148668 test_loss: 0.11107398271560669
epoch: 128 training_loss 0.09597693590447307 test_loss: 0.1168542742729187
epoch: 129 training_loss 0.1026236929371953 test_loss: 0.09579668045043946
epoch: 130 training_loss 0.103197429664433 test_loss: 0.11549465656280518
epoch: 131 training_loss 0.09915298983454704 test_loss: 0.10679243803024292
epoch: 132 training_loss 0.10306433260440827 test_loss: 0.10741744041442872
epoch: 133 training_loss 0.09878179647028446 test_loss: 0.12276746034622192
epoch: 134 training_loss 0.10189764073118568 test_loss: 0.11920591592788696
epoch: 135 training_loss 0.10288889180868864 test_loss: 0.11708025932312012
epoch: 136 training_loss 0.10020629581063986 test_loss: 0.10050808191299439
epoch: 137 training_loss 0.09816559471189976 test_loss: 0.10316861867904663
epoch: 138 training_loss 0.10615699540823698 test_loss: 0.11141813993453979
epoch: 139 training_loss 0.10215509470552206 test_loss: 0.11711822748184204
epoch: 140 training_loss 0.11160754164680839 test_loss: 0.09514588713645936
epoch: 141 training_loss 0.1020565453544259 test_loss: 0.10119298696517945
epoch: 142 training_loss 0.10339290430769324 test_loss: 0.11471704244613648
epoch: 143 training_loss 0.11014772649854422 test_loss: 0.11641104221343994
epoch: 144 training_loss 0.10278016759082675 test_loss: 0.13158044815063477
epoch: 145 training_loss 0.1058934679813683 test_loss: 0.10702532529830933
epoch: 146 training_loss 0.09836086567491292 test_loss: 0.10419703722000122
epoch: 147 training_loss 0.09944190422073007 test_loss: 0.10290082693099975
epoch: 148 training_loss 0.09783991813659668 test_loss: 0.1024621605873108
epoch: 149 training_loss 0.11144821476191283 test_loss: 0.10268667936325074
epoch: 0 training_loss 54.77158596038819 test_loss: 27.215948486328124
epoch: 1 training_loss 20.10397261619568 test_loss: 15.911543273925782
epoch: 2 training_loss 13.829928874969482 test_loss: 12.426549530029297
epoch: 3 training_loss 11.019583549499512 test_loss: 9.88671646118164
epoch: 4 training_loss 9.157027769088746 test_loss: 8.623136138916015
epoch: 5 training_loss 7.873605823516845 test_loss: 7.0846435546875
epoch: 6 training_loss 6.846214919090271 test_loss: 6.4241081237792965
epoch: 7 training_loss 6.13603036403656 test_loss: 5.696657180786133
epoch: 8 training_loss 5.617419118881226 test_loss: 5.549733734130859
epoch: 9 training_loss 5.102752981185913 test_loss: 5.149406433105469
epoch: 10 training_loss 4.832949678897858 test_loss: 4.779758453369141
epoch: 11 training_loss 4.541813228130341 test_loss: 4.490211105346679
epoch: 12 training_loss 4.299982330799103 test_loss: 4.204067993164062
epoch: 13 training_loss 4.068966753482819 test_loss: 4.012952041625977
epoch: 14 training_loss 3.8842191815376284 test_loss: 3.833803176879883
epoch: 15 training_loss 3.8353128957748415 test_loss: 3.4591190338134767
epoch: 16 training_loss 3.618167824745178 test_loss: 3.528834915161133
epoch: 17 training_loss 3.471810505390167 test_loss: 3.2962486267089846
epoch: 18 training_loss 3.3711817193031313 test_loss: 3.210105133056641
epoch: 19 training_loss 3.251586320400238 test_loss: 3.1887622833251954
epoch: 20 training_loss 3.124678633213043 test_loss: 3.081558418273926
epoch: 21 training_loss 3.0687616300582885 test_loss: 3.0826389312744142
epoch: 22 training_loss 2.925103406906128 test_loss: 2.867592620849609
epoch: 23 training_loss 2.8536185097694395 test_loss: 2.9342594146728516
epoch: 24 training_loss 2.9195517563819884 test_loss: 2.8973051071166993
epoch: 25 training_loss 2.806676816940308 test_loss: 2.757227325439453
epoch: 26 training_loss 2.678732018470764 test_loss: 2.7625844955444334
epoch: 27 training_loss 2.63590469121933 test_loss: 2.577593994140625
epoch: 28 training_loss 2.660705876350403 test_loss: 2.5615865707397463
epoch: 29 training_loss 2.519174346923828 test_loss: 2.4561689376831053
epoch: 30 training_loss 2.532393515110016 test_loss: 2.648956298828125
epoch: 31 training_loss 2.465967730283737 test_loss: 2.5668853759765624
epoch: 32 training_loss 2.444480459690094 test_loss: 2.376765823364258
epoch: 33 training_loss 2.4232678866386412 test_loss: 2.343020439147949
epoch: 34 training_loss 2.395896625518799 test_loss: 2.364996337890625
epoch: 35 training_loss 2.330403767824173 test_loss: 2.249137115478516
epoch: 36 training_loss 2.3334448027610777 test_loss: 2.2467002868652344
epoch: 37 training_loss 2.2515672039985657 test_loss: 2.2170473098754884
epoch: 38 training_loss 2.237705947160721 test_loss: 2.1956104278564452
epoch: 39 training_loss 2.266283868551254 test_loss: 2.301514434814453
epoch: 40 training_loss 2.220207097530365 test_loss: 2.110891342163086
epoch: 41 training_loss 2.189702956676483 test_loss: 2.151835250854492
epoch: 42 training_loss 2.180974736213684 test_loss: 2.151931953430176
epoch: 43 training_loss 2.1296925246715546 test_loss: 2.1120250701904295
epoch: 44 training_loss 2.168787429332733 test_loss: 2.0991853713989257
epoch: 45 training_loss 2.0980807745456698 test_loss: 2.152342987060547
epoch: 46 training_loss 2.091589484214783 test_loss: 2.150311088562012
epoch: 47 training_loss 2.07254301905632 test_loss: 2.156011390686035
epoch: 48 training_loss 2.040905637741089 test_loss: 2.100765037536621
epoch: 49 training_loss 2.028178782463074 test_loss: 2.0120882034301757
epoch: 50 training_loss 2.0477338361740114 test_loss: 2.026668739318848
epoch: 51 training_loss 1.9972247231006621 test_loss: 2.0460842132568358
epoch: 52 training_loss 1.98769668340683 test_loss: 1.9968385696411133
epoch: 53 training_loss 1.955347000360489 test_loss: 1.941538429260254
epoch: 54 training_loss 1.9396051573753357 test_loss: 1.9303640365600585
epoch: 55 training_loss 1.972398716211319 test_loss: 1.8680715560913086
epoch: 56 training_loss 1.9492310333251952 test_loss: 1.870663833618164
epoch: 57 training_loss 1.9110882771015167 test_loss: 1.9466081619262696
epoch: 58 training_loss 1.8922425317764282 test_loss: 1.9326784133911132
epoch: 59 training_loss 1.9049701333045959 test_loss: 1.9324846267700195
epoch: 60 training_loss 1.8973816657066345 test_loss: 1.9744165420532227
epoch: 61 training_loss 1.90022944688797 test_loss: 1.9907529830932618
epoch: 62 training_loss 1.8771946001052857 test_loss: 1.7890752792358398
epoch: 63 training_loss 1.859499877691269 test_loss: 1.8829486846923829
epoch: 64 training_loss 1.8031546568870545 test_loss: 1.8403755187988282
epoch: 65 training_loss 1.8274233973026275 test_loss: 1.878957748413086
epoch: 66 training_loss 1.8350362229347228 test_loss: 1.8879777908325195
epoch: 67 training_loss 1.8517200756072998 test_loss: 1.8959821701049804
epoch: 68 training_loss 1.8530080258846282 test_loss: 1.8523353576660155
epoch: 69 training_loss 1.8095144152641296 test_loss: 1.781808090209961
epoch: 70 training_loss 1.8001681137084962 test_loss: 1.822625732421875
epoch: 71 training_loss 1.7880933582782745 test_loss: 1.757341194152832
epoch: 72 training_loss 1.7959764122962951 test_loss: 1.751362419128418
epoch: 73 training_loss 1.7730925929546357 test_loss: 1.788149070739746
epoch: 74 training_loss 1.7602322959899903 test_loss: 1.771136474609375
epoch: 75 training_loss 1.743685051202774 test_loss: 1.7456933975219726
epoch: 76 training_loss 1.7343647038936616 test_loss: 1.8501958847045898
epoch: 77 training_loss 1.7361993086338043 test_loss: 1.7139856338500976
epoch: 78 training_loss 1.7151465868949891 test_loss: 1.7142679214477539
epoch: 79 training_loss 1.7402775800228119 test_loss: 1.6952402114868164
epoch: 80 training_loss 1.764381686449051 test_loss: 1.7165081024169921
epoch: 81 training_loss 1.7081888020038605 test_loss: 1.745931625366211
epoch: 82 training_loss 1.7018812656402589 test_loss: 1.7873710632324218
epoch: 83 training_loss 1.6848695039749146 test_loss: 1.6443710327148438
epoch: 84 training_loss 1.6713416719436645 test_loss: 1.7285757064819336
epoch: 85 training_loss 1.6768173158168793 test_loss: 1.6771072387695312
epoch: 86 training_loss 1.6607263720035552 test_loss: 1.683563232421875
epoch: 87 training_loss 1.6687054431438446 test_loss: 1.7021982192993164
epoch: 88 training_loss 1.6532705450057983 test_loss: 1.6733421325683593
epoch: 89 training_loss 1.6791496503353118 test_loss: 1.7113380432128906
epoch: 90 training_loss 1.6742644321918487 test_loss: 1.703141975402832
epoch: 91 training_loss 1.6785188686847687 test_loss: 1.6550926208496093
epoch: 92 training_loss 1.659259420633316 test_loss: 1.6840503692626954
epoch: 93 training_loss 1.6372282660007478 test_loss: 1.6456680297851562
epoch: 94 training_loss 1.6407493364810943 test_loss: 1.675160026550293
epoch: 95 training_loss 1.6746470594406129 test_loss: 1.6246768951416015
epoch: 96 training_loss 1.6260113275051118 test_loss: 1.6508087158203124
epoch: 97 training_loss 1.6263629639148711 test_loss: 1.5916471481323242
epoch: 98 training_loss 1.6185920906066895 test_loss: 1.6542282104492188
epoch: 99 training_loss 1.6090971291065217 test_loss: 1.6261754989624024
epoch: 100 training_loss 1.6488062536716461 test_loss: 1.5974086761474608
epoch: 101 training_loss 1.591636997461319 test_loss: 1.5908016204833983
epoch: 102 training_loss 1.574109981060028 test_loss: 1.6271167755126954
epoch: 103 training_loss 1.5849289858341218 test_loss: 1.5973612785339355
epoch: 104 training_loss 1.5927129888534546 test_loss: 1.5719757080078125
epoch: 105 training_loss 1.5801508843898773 test_loss: 1.5832261085510253
epoch: 106 training_loss 1.5794025945663452 test_loss: 1.6652702331542968
epoch: 107 training_loss 1.5811989653110503 test_loss: 1.619725799560547
epoch: 108 training_loss 1.5781800246238709 test_loss: 1.5918822288513184
epoch: 109 training_loss 1.5898552858829498 test_loss: 1.5684236526489257
epoch: 110 training_loss 1.5832898592948914 test_loss: 1.5767688751220703
epoch: 111 training_loss 1.5620215737819672 test_loss: 1.5212462425231934
epoch: 112 training_loss 1.575533654689789 test_loss: 1.539933681488037
epoch: 113 training_loss 1.588387680053711 test_loss: 1.6005283355712892
epoch: 114 training_loss 1.584389215707779 test_loss: 1.5745176315307616
epoch: 115 training_loss 1.558574570417404 test_loss: 1.556064510345459
epoch: 116 training_loss 1.5459089577198029 test_loss: 1.546257781982422
epoch: 117 training_loss 1.5413513553142548 test_loss: 1.568817138671875
epoch: 118 training_loss 1.514903736114502 test_loss: 1.5888997077941895
epoch: 119 training_loss 1.540819091796875 test_loss: 1.501543140411377
epoch: 120 training_loss 1.5953040826320648 test_loss: 1.501673412322998
epoch: 121 training_loss 1.55375559091568 test_loss: 1.5876969337463378
epoch: 122 training_loss 1.5455583810806275 test_loss: 1.5322583198547364
epoch: 123 training_loss 1.5310805833339691 test_loss: 1.5858720779418944
epoch: 124 training_loss 1.526371306180954 test_loss: 1.5111764907836913
epoch: 125 training_loss 1.5206751263141631 test_loss: 1.472743606567383
epoch: 126 training_loss 1.5363163220882416 test_loss: 1.533878231048584
epoch: 127 training_loss 1.5149356484413148 test_loss: 1.5328691482543946
epoch: 128 training_loss 1.5193158829212188 test_loss: 1.544658088684082
epoch: 129 training_loss 1.5248811531066895 test_loss: 1.488480281829834
epoch: 130 training_loss 1.5152499854564667 test_loss: 1.5384875297546388
epoch: 131 training_loss 1.5066634273529054 test_loss: 1.5356623649597168
epoch: 132 training_loss 1.497870408296585 test_loss: 1.463465690612793
epoch: 133 training_loss 1.5144438183307647 test_loss: 1.564179039001465
epoch: 134 training_loss 1.5222558748722077 test_loss: 1.5731392860412599
epoch: 135 training_loss 1.5084559643268585 test_loss: 1.5194078445434571
epoch: 136 training_loss 1.5039745891094207 test_loss: 1.498420810699463
epoch: 137 training_loss 1.4899489426612853 test_loss: 1.513350009918213
epoch: 138 training_loss 1.481086757183075 test_loss: 1.4806201934814454
epoch: 139 training_loss 1.5161498725414275 test_loss: 1.4745206832885742
epoch: 140 training_loss 1.5285423719882965 test_loss: 1.5086417198181152
epoch: 141 training_loss 1.494463200569153 test_loss: 1.5174017906188966
epoch: 142 training_loss 1.4915339648723602 test_loss: 1.4488211631774903
epoch: 143 training_loss 1.4732918202877046 test_loss: 1.5080583572387696
epoch: 144 training_loss 1.4949668216705323 test_loss: 1.5041363716125489
epoch: 145 training_loss 1.484439014196396 test_loss: 1.4311084747314453
epoch: 146 training_loss 1.4651409554481507 test_loss: 1.5065173149108886
epoch: 147 training_loss 1.4730100059509277 test_loss: 1.4713595390319825
epoch: 148 training_loss 1.4691850304603578 test_loss: 1.4589460372924805
epoch: 149 training_loss 1.4533778512477875 test_loss: 1.442655372619629
5144.143448270923
episode: 0 training return: tensor(-216.4130, device='cuda:0')
episode: 1 training return: tensor(-212.0180, device='cuda:0')
episode: 2 training return: tensor(-377.9510, device='cuda:0')
episode: 3 training return: tensor(-366.8605, device='cuda:0')
epoch: 1 test_true_pfm: 5163.561010534452 sim_pfm: -39.847080653281104
episode: 4 training return: tensor(-315.7105, device='cuda:0')
episode: 5 training return: tensor(-223.7783, device='cuda:0')
episode: 6 training return: tensor(-382.4252, device='cuda:0')
episode: 7 training return: tensor(-181.6862, device='cuda:0')
epoch: 2 test_true_pfm: 5002.500045069128 sim_pfm: -145.7238317594359
episode: 8 training return: tensor(-314.8989, device='cuda:0')
episode: 9 training return: tensor(-306.5565, device='cuda:0')
episode: 10 training return: tensor(-235.1662, device='cuda:0')
episode: 11 training return: tensor(-209.9065, device='cuda:0')
epoch: 3 test_true_pfm: 4995.695652322977 sim_pfm: -95.15149907465093
episode: 12 training return: tensor(-143.9833, device='cuda:0')
episode: 13 training return: tensor(-262.7682, device='cuda:0')
episode: 14 training return: tensor(-94.3740, device='cuda:0')
episode: 15 training return: tensor(-117.3164, device='cuda:0')
epoch: 4 test_true_pfm: 5179.249131502638 sim_pfm: -112.35799143917393
episode: 16 training return: tensor(-260.7527, device='cuda:0')
episode: 17 training return: tensor(-259.9106, device='cuda:0')
episode: 18 training return: tensor(-203.3566, device='cuda:0')
episode: 19 training return: tensor(-269.4876, device='cuda:0')
epoch: 5 test_true_pfm: 5137.925684824346 sim_pfm: -70.50701935959903
episode: 20 training return: tensor(-301.1752, device='cuda:0')
episode: 21 training return: tensor(-234.5879, device='cuda:0')
episode: 22 training return: tensor(-231.5651, device='cuda:0')
episode: 23 training return: tensor(-102.8374, device='cuda:0')
epoch: 6 test_true_pfm: 5169.52544159684 sim_pfm: -42.87507526299063
episode: 24 training return: tensor(-196.7741, device='cuda:0')
episode: 25 training return: tensor(-124.4439, device='cuda:0')
episode: 26 training return: tensor(-254.7486, device='cuda:0')
episode: 27 training return: tensor(-93.4112, device='cuda:0')
epoch: 7 test_true_pfm: 5190.0972646820255 sim_pfm: -25.380695410179516
episode: 28 training return: tensor(-169.9626, device='cuda:0')
episode: 29 training return: tensor(-80.9669, device='cuda:0')
episode: 30 training return: tensor(-186.3039, device='cuda:0')
episode: 31 training return: tensor(-229.8618, device='cuda:0')
epoch: 8 test_true_pfm: 5101.710804615678 sim_pfm: -46.66717795322378
episode: 32 training return: tensor(-255.5017, device='cuda:0')
episode: 33 training return: tensor(-223.4191, device='cuda:0')
episode: 34 training return: tensor(-64.4539, device='cuda:0')
episode: 35 training return: tensor(-69.5537, device='cuda:0')
epoch: 9 test_true_pfm: 5245.414580106178 sim_pfm: -81.16155265065997
episode: 36 training return: tensor(-221.8807, device='cuda:0')
episode: 37 training return: tensor(-113.0064, device='cuda:0')
episode: 38 training return: tensor(-201.4760, device='cuda:0')
episode: 39 training return: tensor(-115.2356, device='cuda:0')
epoch: 10 test_true_pfm: 5215.758474438861 sim_pfm: 62.50899738049096
episode: 40 training return: tensor(-170.4915, device='cuda:0')
episode: 41 training return: tensor(-178.4522, device='cuda:0')
episode: 42 training return: tensor(-267.7760, device='cuda:0')
episode: 43 training return: tensor(-96.1669, device='cuda:0')
epoch: 11 test_true_pfm: 5359.751583928454 sim_pfm: 71.98956485376887
episode: 44 training return: tensor(-160.2962, device='cuda:0')
episode: 45 training return: tensor(-132.1833, device='cuda:0')
episode: 46 training return: tensor(-96.5102, device='cuda:0')
episode: 47 training return: tensor(-71.6832, device='cuda:0')
epoch: 12 test_true_pfm: 5309.5905851926445 sim_pfm: 93.82440955620648
episode: 48 training return: tensor(-142.9048, device='cuda:0')
episode: 49 training return: tensor(-43.3024, device='cuda:0')
episode: 50 training return: tensor(-24.1865, device='cuda:0')
episode: 51 training return: tensor(-127.9237, device='cuda:0')
epoch: 13 test_true_pfm: 5407.06780582386 sim_pfm: 103.20971179575038
episode: 52 training return: tensor(-138.4336, device='cuda:0')
episode: 53 training return: tensor(-46.4605, device='cuda:0')
episode: 54 training return: tensor(-97.0377, device='cuda:0')
episode: 55 training return: tensor(-67.6582, device='cuda:0')
epoch: 14 test_true_pfm: 5418.56490069466 sim_pfm: 131.5393018439645
episode: 56 training return: tensor(-19.2693, device='cuda:0')
episode: 57 training return: tensor(-114.1955, device='cuda:0')
episode: 58 training return: tensor(32.4291, device='cuda:0')
episode: 59 training return: tensor(-12.5275, device='cuda:0')
epoch: 15 test_true_pfm: 5390.836816736362 sim_pfm: 195.93113115757782
episode: 60 training return: tensor(-173.1221, device='cuda:0')
episode: 61 training return: tensor(71.0963, device='cuda:0')
episode: 62 training return: tensor(-44.4944, device='cuda:0')
episode: 63 training return: tensor(-24.0759, device='cuda:0')
epoch: 16 test_true_pfm: 5525.103496374144 sim_pfm: 204.4541603329029
episode: 64 training return: tensor(-7.4819, device='cuda:0')
episode: 65 training return: tensor(8.8340, device='cuda:0')
episode: 66 training return: tensor(116.3845, device='cuda:0')
episode: 67 training return: tensor(37.4198, device='cuda:0')
epoch: 17 test_true_pfm: 5565.954999460005 sim_pfm: 240.30203184322454
episode: 68 training return: tensor(7.7520, device='cuda:0')
episode: 69 training return: tensor(-107.9049, device='cuda:0')
episode: 70 training return: tensor(-90.3601, device='cuda:0')
episode: 71 training return: tensor(18.0991, device='cuda:0')
epoch: 18 test_true_pfm: 5563.425607891394 sim_pfm: 190.17098628071835
episode: 72 training return: tensor(74.3195, device='cuda:0')
episode: 73 training return: tensor(109.6645, device='cuda:0')
episode: 74 training return: tensor(38.4634, device='cuda:0')
episode: 75 training return: tensor(108.0402, device='cuda:0')
epoch: 19 test_true_pfm: 5547.808702018449 sim_pfm: 276.075263130789
episode: 76 training return: tensor(66.2242, device='cuda:0')
episode: 77 training return: tensor(123.7165, device='cuda:0')
episode: 78 training return: tensor(5.5359, device='cuda:0')
episode: 79 training return: tensor(56.7123, device='cuda:0')
epoch: 20 test_true_pfm: 5623.9283643243425 sim_pfm: 280.76617553453735
episode: 80 training return: tensor(103.2714, device='cuda:0')
episode: 81 training return: tensor(70.0860, device='cuda:0')
episode: 82 training return: tensor(119.8370, device='cuda:0')
episode: 83 training return: tensor(145.5950, device='cuda:0')
epoch: 21 test_true_pfm: 5662.890498218647 sim_pfm: 246.81323256479422
episode: 84 training return: tensor(205.1306, device='cuda:0')
episode: 85 training return: tensor(121.7273, device='cuda:0')
episode: 86 training return: tensor(197.1517, device='cuda:0')
episode: 87 training return: tensor(-172.3243, device='cuda:0')
epoch: 22 test_true_pfm: 5589.633351816966 sim_pfm: 297.97326953350176
episode: 88 training return: tensor(127.7308, device='cuda:0')
episode: 89 training return: tensor(90.6538, device='cuda:0')
episode: 90 training return: tensor(80.8008, device='cuda:0')
episode: 91 training return: tensor(281.2210, device='cuda:0')
epoch: 23 test_true_pfm: 5684.894557268945 sim_pfm: 294.5009799928036
episode: 92 training return: tensor(57.1039, device='cuda:0')
episode: 93 training return: tensor(53.9326, device='cuda:0')
episode: 94 training return: tensor(77.2309, device='cuda:0')
episode: 95 training return: tensor(156.1117, device='cuda:0')
epoch: 24 test_true_pfm: 5609.543555252592 sim_pfm: 329.0847533317089
episode: 96 training return: tensor(260.3875, device='cuda:0')
episode: 97 training return: tensor(37.6749, device='cuda:0')
episode: 98 training return: tensor(186.4918, device='cuda:0')
episode: 99 training return: tensor(101.4088, device='cuda:0')
epoch: 25 test_true_pfm: 5690.349285658133 sim_pfm: 259.5959007270867
episode: 100 training return: tensor(136.6945, device='cuda:0')
episode: 101 training return: tensor(124.6387, device='cuda:0')
episode: 102 training return: tensor(140.4506, device='cuda:0')
episode: 103 training return: tensor(76.7255, device='cuda:0')
epoch: 26 test_true_pfm: 5642.418566285065 sim_pfm: 344.5178217337816
episode: 104 training return: tensor(145.8847, device='cuda:0')
episode: 105 training return: tensor(161.6006, device='cuda:0')
episode: 106 training return: tensor(157.3790, device='cuda:0')
episode: 107 training return: tensor(160.3710, device='cuda:0')
epoch: 27 test_true_pfm: 5652.708675166202 sim_pfm: 299.64770159460994
episode: 108 training return: tensor(137.8537, device='cuda:0')
episode: 109 training return: tensor(237.0599, device='cuda:0')
episode: 110 training return: tensor(203.3695, device='cuda:0')
episode: 111 training return: tensor(144.1548, device='cuda:0')
epoch: 28 test_true_pfm: 5750.527353728081 sim_pfm: 278.5716506302124
episode: 112 training return: tensor(100.1230, device='cuda:0')
episode: 113 training return: tensor(55.4080, device='cuda:0')
episode: 114 training return: tensor(43.9720, device='cuda:0')
episode: 115 training return: tensor(98.1372, device='cuda:0')
epoch: 29 test_true_pfm: 5760.883280334398 sim_pfm: 300.4315902979967
episode: 116 training return: tensor(206.9640, device='cuda:0')
episode: 117 training return: tensor(195.1100, device='cuda:0')
episode: 118 training return: tensor(180.3059, device='cuda:0')
episode: 119 training return: tensor(181.6927, device='cuda:0')
epoch: 30 test_true_pfm: 5672.23267748845 sim_pfm: 385.27067997486057
episode: 120 training return: tensor(246.6442, device='cuda:0')
episode: 121 training return: tensor(66.3308, device='cuda:0')
episode: 122 training return: tensor(306.3000, device='cuda:0')
episode: 123 training return: tensor(158.3604, device='cuda:0')
epoch: 31 test_true_pfm: 5743.003048535222 sim_pfm: 415.653145977781
episode: 124 training return: tensor(132.3771, device='cuda:0')
episode: 125 training return: tensor(168.7187, device='cuda:0')
episode: 126 training return: tensor(235.9338, device='cuda:0')
episode: 127 training return: tensor(309.7966, device='cuda:0')
epoch: 32 test_true_pfm: 5618.611443454502 sim_pfm: 333.16421555316384
episode: 128 training return: tensor(221.4845, device='cuda:0')
episode: 129 training return: tensor(190.5301, device='cuda:0')
episode: 130 training return: tensor(278.2602, device='cuda:0')
episode: 131 training return: tensor(116.7806, device='cuda:0')
epoch: 33 test_true_pfm: 5779.761300891968 sim_pfm: 412.41025620964746
episode: 132 training return: tensor(241.9997, device='cuda:0')
episode: 133 training return: tensor(124.7535, device='cuda:0')
episode: 134 training return: tensor(296.6890, device='cuda:0')
episode: 135 training return: tensor(264.4360, device='cuda:0')
epoch: 34 test_true_pfm: 5817.450032403528 sim_pfm: 428.3224509985691
episode: 136 training return: tensor(187.9535, device='cuda:0')
episode: 137 training return: tensor(263.6169, device='cuda:0')
episode: 138 training return: tensor(252.6161, device='cuda:0')
episode: 139 training return: tensor(323.0794, device='cuda:0')
epoch: 35 test_true_pfm: 5790.433838400201 sim_pfm: 360.4851040711122
episode: 140 training return: tensor(267.7515, device='cuda:0')
episode: 141 training return: tensor(137.5254, device='cuda:0')
episode: 142 training return: tensor(173.9526, device='cuda:0')
episode: 143 training return: tensor(301.8917, device='cuda:0')
epoch: 36 test_true_pfm: 5765.853147086996 sim_pfm: 425.1263934193412
episode: 144 training return: tensor(213.2955, device='cuda:0')
episode: 145 training return: tensor(244.2826, device='cuda:0')
episode: 146 training return: tensor(213.0799, device='cuda:0')
episode: 147 training return: tensor(169.4844, device='cuda:0')
epoch: 37 test_true_pfm: 5847.302377327719 sim_pfm: 393.9561832174659
episode: 148 training return: tensor(186.0351, device='cuda:0')
episode: 149 training return: tensor(256.0817, device='cuda:0')
episode: 150 training return: tensor(261.5993, device='cuda:0')
episode: 151 training return: tensor(113.8071, device='cuda:0')
epoch: 38 test_true_pfm: 5772.1195852802075 sim_pfm: 389.41623492647585
episode: 152 training return: tensor(367.9482, device='cuda:0')
episode: 153 training return: tensor(306.3344, device='cuda:0')
episode: 154 training return: tensor(307.1942, device='cuda:0')
episode: 155 training return: tensor(323.7278, device='cuda:0')
epoch: 39 test_true_pfm: 5847.919390147469 sim_pfm: 480.4254127717577
episode: 156 training return: tensor(319.7228, device='cuda:0')
episode: 157 training return: tensor(291.6602, device='cuda:0')
episode: 158 training return: tensor(282.6221, device='cuda:0')
episode: 159 training return: tensor(322.2437, device='cuda:0')
epoch: 40 test_true_pfm: 5835.628205422351 sim_pfm: 424.9067047695765
episode: 160 training return: tensor(326.2741, device='cuda:0')
episode: 161 training return: tensor(337.8747, device='cuda:0')
episode: 162 training return: tensor(387.8502, device='cuda:0')
episode: 163 training return: tensor(278.1533, device='cuda:0')
epoch: 41 test_true_pfm: 5862.148858402127 sim_pfm: 421.149075191662
episode: 164 training return: tensor(346.2721, device='cuda:0')
episode: 165 training return: tensor(232.2154, device='cuda:0')
episode: 166 training return: tensor(280.1935, device='cuda:0')
episode: 167 training return: tensor(320.7952, device='cuda:0')
epoch: 42 test_true_pfm: 5860.964161799409 sim_pfm: 462.09030780459096
episode: 168 training return: tensor(262.1899, device='cuda:0')
episode: 169 training return: tensor(199.3279, device='cuda:0')
episode: 170 training return: tensor(307.0522, device='cuda:0')
episode: 171 training return: tensor(226.2899, device='cuda:0')
epoch: 43 test_true_pfm: 5875.243860055242 sim_pfm: 407.4570794030151
episode: 172 training return: tensor(246.4822, device='cuda:0')
episode: 173 training return: tensor(317.4486, device='cuda:0')
episode: 174 training return: tensor(236.1107, device='cuda:0')
episode: 175 training return: tensor(284.7025, device='cuda:0')
epoch: 44 test_true_pfm: 5914.750775952426 sim_pfm: 470.90213528442354
episode: 176 training return: tensor(369.1084, device='cuda:0')
episode: 177 training return: tensor(418.0869, device='cuda:0')
episode: 178 training return: tensor(297.5252, device='cuda:0')
episode: 179 training return: tensor(244.7386, device='cuda:0')
epoch: 45 test_true_pfm: 5900.299327083473 sim_pfm: 439.1329762266444
episode: 180 training return: tensor(220.1603, device='cuda:0')
episode: 181 training return: tensor(272.4811, device='cuda:0')
episode: 182 training return: tensor(268.1875, device='cuda:0')
episode: 183 training return: tensor(389.3483, device='cuda:0')
epoch: 46 test_true_pfm: 5887.546395851419 sim_pfm: 486.0866269998757
episode: 184 training return: tensor(295.4291, device='cuda:0')
episode: 185 training return: tensor(290.9073, device='cuda:0')
episode: 186 training return: tensor(252.2483, device='cuda:0')
episode: 187 training return: tensor(241.7339, device='cuda:0')
epoch: 47 test_true_pfm: 5898.2115388875945 sim_pfm: 469.1315251378498
episode: 188 training return: tensor(288.3473, device='cuda:0')
episode: 189 training return: tensor(183.9344, device='cuda:0')
episode: 190 training return: tensor(302.2661, device='cuda:0')
episode: 191 training return: tensor(352.9625, device='cuda:0')
epoch: 48 test_true_pfm: 5882.771339244544 sim_pfm: 420.89458847185597
episode: 192 training return: tensor(272.8044, device='cuda:0')
episode: 193 training return: tensor(331.0023, device='cuda:0')
episode: 194 training return: tensor(300.3213, device='cuda:0')
episode: 195 training return: tensor(269.1019, device='cuda:0')
epoch: 49 test_true_pfm: 5888.056360628921 sim_pfm: 479.94217674143147
episode: 196 training return: tensor(324.5882, device='cuda:0')
episode: 197 training return: tensor(272.9873, device='cuda:0')
episode: 198 training return: tensor(380.2947, device='cuda:0')
episode: 199 training return: tensor(328.0222, device='cuda:0')
epoch: 50 test_true_pfm: 5905.7739336905515 sim_pfm: 432.7720487602831
episode: 200 training return: tensor(233.2834, device='cuda:0')
episode: 201 training return: tensor(262.2660, device='cuda:0')
episode: 202 training return: tensor(297.5561, device='cuda:0')
episode: 203 training return: tensor(259.8300, device='cuda:0')
epoch: 51 test_true_pfm: 5889.110806284025 sim_pfm: 493.0212989137993
episode: 204 training return: tensor(425.9455, device='cuda:0')
episode: 205 training return: tensor(298.5964, device='cuda:0')
episode: 206 training return: tensor(351.7784, device='cuda:0')
episode: 207 training return: tensor(328.0201, device='cuda:0')
epoch: 52 test_true_pfm: 5947.572049904743 sim_pfm: 479.1433085348496
episode: 208 training return: tensor(281.2676, device='cuda:0')
episode: 209 training return: tensor(323.7903, device='cuda:0')
episode: 210 training return: tensor(254.3700, device='cuda:0')
episode: 211 training return: tensor(341.2063, device='cuda:0')
epoch: 53 test_true_pfm: 5945.260804959144 sim_pfm: 485.2484799994272
episode: 212 training return: tensor(383.0677, device='cuda:0')
episode: 213 training return: tensor(337.8970, device='cuda:0')
episode: 214 training return: tensor(384.4065, device='cuda:0')
episode: 215 training return: tensor(372.8767, device='cuda:0')
epoch: 54 test_true_pfm: 5967.849052379326 sim_pfm: 472.8312160159694
episode: 216 training return: tensor(267.8833, device='cuda:0')
episode: 217 training return: tensor(222.8657, device='cuda:0')
episode: 218 training return: tensor(388.4229, device='cuda:0')
episode: 219 training return: tensor(323.5715, device='cuda:0')
epoch: 55 test_true_pfm: 5910.522406094984 sim_pfm: 509.33378348931245
episode: 220 training return: tensor(236.3748, device='cuda:0')
episode: 221 training return: tensor(379.6284, device='cuda:0')
episode: 222 training return: tensor(345.4802, device='cuda:0')
episode: 223 training return: tensor(394.7727, device='cuda:0')
epoch: 56 test_true_pfm: 5934.312407662335 sim_pfm: 489.8403052773986
episode: 224 training return: tensor(356.5897, device='cuda:0')
episode: 225 training return: tensor(328.0065, device='cuda:0')
episode: 226 training return: tensor(323.0969, device='cuda:0')
episode: 227 training return: tensor(218.9670, device='cuda:0')
epoch: 57 test_true_pfm: 5981.424816021012 sim_pfm: 521.7492792051635
episode: 228 training return: tensor(263.5897, device='cuda:0')
episode: 229 training return: tensor(292.3276, device='cuda:0')
episode: 230 training return: tensor(345.9510, device='cuda:0')
episode: 231 training return: tensor(376.5891, device='cuda:0')
epoch: 58 test_true_pfm: 5991.187802285108 sim_pfm: 484.7849766989627
episode: 232 training return: tensor(408.4030, device='cuda:0')
episode: 233 training return: tensor(361.8893, device='cuda:0')
episode: 234 training return: tensor(402.2426, device='cuda:0')
episode: 235 training return: tensor(350.5333, device='cuda:0')
epoch: 59 test_true_pfm: 5965.075910129227 sim_pfm: 538.0970564673189
episode: 236 training return: tensor(341.8309, device='cuda:0')
episode: 237 training return: tensor(382.2595, device='cuda:0')
episode: 238 training return: tensor(354.2034, device='cuda:0')
episode: 239 training return: tensor(308.3929, device='cuda:0')
epoch: 60 test_true_pfm: 5966.191303613124 sim_pfm: 507.463770780043
episode: 240 training return: tensor(321.5929, device='cuda:0')
episode: 241 training return: tensor(357.1548, device='cuda:0')
episode: 242 training return: tensor(404.8163, device='cuda:0')
episode: 243 training return: tensor(364.7679, device='cuda:0')
epoch: 61 test_true_pfm: 6015.349555125849 sim_pfm: 494.2395170723418
episode: 244 training return: tensor(377.3020, device='cuda:0')
episode: 245 training return: tensor(371.3821, device='cuda:0')
episode: 246 training return: tensor(407.4470, device='cuda:0')
episode: 247 training return: tensor(340.9738, device='cuda:0')
epoch: 62 test_true_pfm: 5928.6230663253455 sim_pfm: 504.95019445726456
episode: 248 training return: tensor(366.1642, device='cuda:0')
episode: 249 training return: tensor(332.5664, device='cuda:0')
episode: 250 training return: tensor(333.2542, device='cuda:0')
episode: 251 training return: tensor(341.8812, device='cuda:0')
epoch: 63 test_true_pfm: 5947.354493396077 sim_pfm: 492.23349013663636
episode: 252 training return: tensor(405.3579, device='cuda:0')
episode: 253 training return: tensor(364.3141, device='cuda:0')
episode: 254 training return: tensor(337.1204, device='cuda:0')
episode: 255 training return: tensor(256.1532, device='cuda:0')
epoch: 64 test_true_pfm: 6005.416463392951 sim_pfm: 517.1898254120412
episode: 256 training return: tensor(325.5775, device='cuda:0')
episode: 257 training return: tensor(338.6115, device='cuda:0')
episode: 258 training return: tensor(339.4817, device='cuda:0')
episode: 259 training return: tensor(418.7609, device='cuda:0')
epoch: 65 test_true_pfm: 5953.003498892526 sim_pfm: 526.5236672650402
episode: 260 training return: tensor(362.0900, device='cuda:0')
episode: 261 training return: tensor(300.4754, device='cuda:0')
episode: 262 training return: tensor(380.1508, device='cuda:0')
episode: 263 training return: tensor(351.4325, device='cuda:0')
epoch: 66 test_true_pfm: 5977.079612624671 sim_pfm: 504.570300542245
episode: 264 training return: tensor(396.2688, device='cuda:0')
episode: 265 training return: tensor(351.5286, device='cuda:0')
episode: 266 training return: tensor(435.7059, device='cuda:0')
episode: 267 training return: tensor(385.4810, device='cuda:0')
epoch: 67 test_true_pfm: 6020.111992960175 sim_pfm: 549.4543398850365
episode: 268 training return: tensor(349.0700, device='cuda:0')
episode: 269 training return: tensor(364.8798, device='cuda:0')
episode: 270 training return: tensor(356.1392, device='cuda:0')
episode: 271 training return: tensor(345.8088, device='cuda:0')
epoch: 68 test_true_pfm: 5984.078299702585 sim_pfm: 491.9220532575079
episode: 272 training return: tensor(344.8849, device='cuda:0')
episode: 273 training return: tensor(330.8788, device='cuda:0')
episode: 274 training return: tensor(348.4731, device='cuda:0')
episode: 275 training return: tensor(413.0316, device='cuda:0')
epoch: 69 test_true_pfm: 5934.321531068642 sim_pfm: 483.0212071848412
episode: 276 training return: tensor(378.1059, device='cuda:0')
episode: 277 training return: tensor(269.7254, device='cuda:0')
episode: 278 training return: tensor(414.8524, device='cuda:0')
episode: 279 training return: tensor(327.8183, device='cuda:0')
epoch: 70 test_true_pfm: 5974.094674635214 sim_pfm: 464.2257204791531
episode: 280 training return: tensor(400.7036, device='cuda:0')
episode: 281 training return: tensor(410.2490, device='cuda:0')
episode: 282 training return: tensor(420.2932, device='cuda:0')
episode: 283 training return: tensor(395.5892, device='cuda:0')
epoch: 71 test_true_pfm: 6073.867128692767 sim_pfm: 514.6046487496156
episode: 284 training return: tensor(354.6573, device='cuda:0')
episode: 285 training return: tensor(365.1463, device='cuda:0')
episode: 286 training return: tensor(432.4681, device='cuda:0')
episode: 287 training return: tensor(419.3201, device='cuda:0')
epoch: 72 test_true_pfm: 5994.362925154547 sim_pfm: 523.2331218846763
episode: 288 training return: tensor(296.0599, device='cuda:0')
episode: 289 training return: tensor(295.7884, device='cuda:0')
episode: 290 training return: tensor(324.4138, device='cuda:0')
episode: 291 training return: tensor(352.5703, device='cuda:0')
epoch: 73 test_true_pfm: 5987.844171852989 sim_pfm: 508.1906601676407
episode: 292 training return: tensor(322.3920, device='cuda:0')
episode: 293 training return: tensor(391.9332, device='cuda:0')
episode: 294 training return: tensor(369.2985, device='cuda:0')
episode: 295 training return: tensor(335.6263, device='cuda:0')
epoch: 74 test_true_pfm: 6042.700518968907 sim_pfm: 541.98954584077
episode: 296 training return: tensor(414.3967, device='cuda:0')
episode: 297 training return: tensor(358.8069, device='cuda:0')
episode: 298 training return: tensor(425.5752, device='cuda:0')
episode: 299 training return: tensor(398.5360, device='cuda:0')
epoch: 75 test_true_pfm: 6062.807087554816 sim_pfm: 513.5531675458478
episode: 300 training return: tensor(341.6928, device='cuda:0')
episode: 301 training return: tensor(448.5031, device='cuda:0')
episode: 302 training return: tensor(327.2411, device='cuda:0')
episode: 303 training return: tensor(323.6362, device='cuda:0')
epoch: 76 test_true_pfm: 6008.643731310123 sim_pfm: 509.7107283140455
episode: 304 training return: tensor(424.9282, device='cuda:0')
episode: 305 training return: tensor(448.9815, device='cuda:0')
episode: 306 training return: tensor(374.1577, device='cuda:0')
episode: 307 training return: tensor(343.2143, device='cuda:0')
epoch: 77 test_true_pfm: 5921.745874829191 sim_pfm: 526.2101620278942
episode: 308 training return: tensor(384.5751, device='cuda:0')
episode: 309 training return: tensor(314.1071, device='cuda:0')
episode: 310 training return: tensor(369.6651, device='cuda:0')
episode: 311 training return: tensor(414.0316, device='cuda:0')
epoch: 78 test_true_pfm: 6008.258524542042 sim_pfm: 508.8933811313861
episode: 312 training return: tensor(340.2129, device='cuda:0')
episode: 313 training return: tensor(426.0843, device='cuda:0')
episode: 314 training return: tensor(392.6292, device='cuda:0')
episode: 315 training return: tensor(441.6242, device='cuda:0')
epoch: 79 test_true_pfm: 6043.045209567455 sim_pfm: 552.9515999026286
episode: 316 training return: tensor(434.9567, device='cuda:0')
episode: 317 training return: tensor(396.6516, device='cuda:0')
episode: 318 training return: tensor(292.4497, device='cuda:0')
episode: 319 training return: tensor(375.8233, device='cuda:0')
epoch: 80 test_true_pfm: 5969.528714216471 sim_pfm: 548.3312111364212
episode: 320 training return: tensor(348.2033, device='cuda:0')
episode: 321 training return: tensor(304.5479, device='cuda:0')
episode: 322 training return: tensor(385.5880, device='cuda:0')
episode: 323 training return: tensor(397.1477, device='cuda:0')
epoch: 81 test_true_pfm: 6026.921589493098 sim_pfm: 552.4205482026542
episode: 324 training return: tensor(433.3368, device='cuda:0')
episode: 325 training return: tensor(357.7849, device='cuda:0')
episode: 326 training return: tensor(456.0595, device='cuda:0')
episode: 327 training return: tensor(348.2786, device='cuda:0')
epoch: 82 test_true_pfm: 6071.968366720844 sim_pfm: 561.5705845774306
episode: 328 training return: tensor(442.5321, device='cuda:0')
episode: 329 training return: tensor(357.7986, device='cuda:0')
episode: 330 training return: tensor(273.9156, device='cuda:0')
episode: 331 training return: tensor(444.1464, device='cuda:0')
epoch: 83 test_true_pfm: 6003.152534864369 sim_pfm: 541.7241655732505
episode: 332 training return: tensor(383.2572, device='cuda:0')
episode: 333 training return: tensor(421.4890, device='cuda:0')
episode: 334 training return: tensor(411.5515, device='cuda:0')
episode: 335 training return: tensor(246.7275, device='cuda:0')
epoch: 84 test_true_pfm: 5988.974266927343 sim_pfm: 517.8968040450903
episode: 336 training return: tensor(403.6739, device='cuda:0')
episode: 337 training return: tensor(402.0104, device='cuda:0')
episode: 338 training return: tensor(381.8951, device='cuda:0')
episode: 339 training return: tensor(421.8623, device='cuda:0')
epoch: 85 test_true_pfm: 6074.047195743254 sim_pfm: 511.9803892914594
episode: 340 training return: tensor(349.3026, device='cuda:0')
episode: 341 training return: tensor(395.6284, device='cuda:0')
episode: 342 training return: tensor(417.2228, device='cuda:0')
episode: 343 training return: tensor(348.5415, device='cuda:0')
epoch: 86 test_true_pfm: 5965.036767621034 sim_pfm: 491.07487136883236
episode: 344 training return: tensor(377.5072, device='cuda:0')
episode: 345 training return: tensor(455.2877, device='cuda:0')
episode: 346 training return: tensor(368.1309, device='cuda:0')
episode: 347 training return: tensor(417.7315, device='cuda:0')
epoch: 87 test_true_pfm: 6072.388559255368 sim_pfm: 499.6799447699062
episode: 348 training return: tensor(447.2832, device='cuda:0')
episode: 349 training return: tensor(409.0324, device='cuda:0')
episode: 350 training return: tensor(425.5307, device='cuda:0')
episode: 351 training return: tensor(423.0801, device='cuda:0')
epoch: 88 test_true_pfm: 6046.707229392061 sim_pfm: 521.1695839088643
episode: 352 training return: tensor(439.4495, device='cuda:0')
episode: 353 training return: tensor(378.3130, device='cuda:0')
episode: 354 training return: tensor(468.2441, device='cuda:0')
episode: 355 training return: tensor(369.7648, device='cuda:0')
epoch: 89 test_true_pfm: 6001.501875329434 sim_pfm: 543.480439594811
episode: 356 training return: tensor(400.6992, device='cuda:0')
episode: 357 training return: tensor(373.7546, device='cuda:0')
episode: 358 training return: tensor(442.9289, device='cuda:0')
episode: 359 training return: tensor(379.4032, device='cuda:0')
epoch: 90 test_true_pfm: 6072.016097542935 sim_pfm: 578.4913268246455
episode: 360 training return: tensor(415.5214, device='cuda:0')
episode: 361 training return: tensor(425.0586, device='cuda:0')
episode: 362 training return: tensor(469.5586, device='cuda:0')
episode: 363 training return: tensor(401.5099, device='cuda:0')
epoch: 91 test_true_pfm: 6058.613892654911 sim_pfm: 533.2604044772452
episode: 364 training return: tensor(407.9630, device='cuda:0')
episode: 365 training return: tensor(219.8282, device='cuda:0')
episode: 366 training return: tensor(457.9763, device='cuda:0')
episode: 367 training return: tensor(447.0342, device='cuda:0')
epoch: 92 test_true_pfm: 6066.911389330769 sim_pfm: 543.8415526422614
episode: 368 training return: tensor(349.1390, device='cuda:0')
episode: 369 training return: tensor(365.8024, device='cuda:0')
episode: 370 training return: tensor(483.8560, device='cuda:0')
episode: 371 training return: tensor(405.7538, device='cuda:0')
epoch: 93 test_true_pfm: 6012.198262071772 sim_pfm: 574.5914166941462
episode: 372 training return: tensor(362.6763, device='cuda:0')
episode: 373 training return: tensor(403.8393, device='cuda:0')
episode: 374 training return: tensor(437.5718, device='cuda:0')
episode: 375 training return: tensor(498.3913, device='cuda:0')
epoch: 94 test_true_pfm: 6047.802214982858 sim_pfm: 529.0147966473014
episode: 376 training return: tensor(365.4360, device='cuda:0')
episode: 377 training return: tensor(374.4570, device='cuda:0')
episode: 378 training return: tensor(449.3172, device='cuda:0')
episode: 379 training return: tensor(386.3432, device='cuda:0')
epoch: 95 test_true_pfm: 6010.424263901982 sim_pfm: 518.3461280828536
episode: 380 training return: tensor(430.8943, device='cuda:0')
episode: 381 training return: tensor(427.4370, device='cuda:0')
episode: 382 training return: tensor(447.4026, device='cuda:0')
episode: 383 training return: tensor(434.4657, device='cuda:0')
epoch: 96 test_true_pfm: 6013.619833486464 sim_pfm: 564.3940197264698
episode: 384 training return: tensor(331.5144, device='cuda:0')
episode: 385 training return: tensor(418.0059, device='cuda:0')
episode: 386 training return: tensor(437.0419, device='cuda:0')
episode: 387 training return: tensor(402.1289, device='cuda:0')
epoch: 97 test_true_pfm: 6023.218902608577 sim_pfm: 525.4617192385485
episode: 388 training return: tensor(348.1530, device='cuda:0')
episode: 389 training return: tensor(451.8340, device='cuda:0')
episode: 390 training return: tensor(360.3056, device='cuda:0')
episode: 391 training return: tensor(418.0242, device='cuda:0')
epoch: 98 test_true_pfm: 6039.564757941612 sim_pfm: 556.7782585681804
episode: 392 training return: tensor(435.6741, device='cuda:0')
episode: 393 training return: tensor(383.7922, device='cuda:0')
episode: 394 training return: tensor(384.6413, device='cuda:0')
episode: 395 training return: tensor(406.8142, device='cuda:0')
epoch: 99 test_true_pfm: 6007.314711712977 sim_pfm: 548.9264537767352
episode: 396 training return: tensor(450.4658, device='cuda:0')
episode: 397 training return: tensor(460.4243, device='cuda:0')
episode: 398 training return: tensor(435.9163, device='cuda:0')
episode: 399 training return: tensor(378.5186, device='cuda:0')
epoch: 100 test_true_pfm: 6064.269973566384 sim_pfm: 566.5350589967178
episode: 400 training return: tensor(442.1660, device='cuda:0')
episode: 401 training return: tensor(451.3237, device='cuda:0')
episode: 402 training return: tensor(413.6479, device='cuda:0')
episode: 403 training return: tensor(444.6470, device='cuda:0')
epoch: 101 test_true_pfm: 6037.241686470917 sim_pfm: 554.6135906030735
episode: 404 training return: tensor(466.6557, device='cuda:0')
episode: 405 training return: tensor(488.0913, device='cuda:0')
episode: 406 training return: tensor(367.8186, device='cuda:0')
episode: 407 training return: tensor(401.8382, device='cuda:0')
epoch: 102 test_true_pfm: 6075.728686304227 sim_pfm: 558.0738299461567
episode: 408 training return: tensor(440.1444, device='cuda:0')
episode: 409 training return: tensor(467.5656, device='cuda:0')
episode: 410 training return: tensor(447.1591, device='cuda:0')
episode: 411 training return: tensor(452.0826, device='cuda:0')
epoch: 103 test_true_pfm: 5998.243871888132 sim_pfm: 525.1332436924955
episode: 412 training return: tensor(468.4781, device='cuda:0')
episode: 413 training return: tensor(401.2502, device='cuda:0')
episode: 414 training return: tensor(428.2631, device='cuda:0')
episode: 415 training return: tensor(433.4505, device='cuda:0')
epoch: 104 test_true_pfm: 6019.376083509323 sim_pfm: 547.6587825761331
episode: 416 training return: tensor(407.4840, device='cuda:0')
episode: 417 training return: tensor(448.5342, device='cuda:0')
episode: 418 training return: tensor(490.9923, device='cuda:0')
episode: 419 training return: tensor(392.3054, device='cuda:0')
epoch: 105 test_true_pfm: 6041.411521698661 sim_pfm: 547.4305216351835
episode: 420 training return: tensor(458.6206, device='cuda:0')
episode: 421 training return: tensor(383.6686, device='cuda:0')
episode: 422 training return: tensor(430.2178, device='cuda:0')
episode: 423 training return: tensor(439.7360, device='cuda:0')
epoch: 106 test_true_pfm: 6035.377094264154 sim_pfm: 565.186955049634
episode: 424 training return: tensor(413.4341, device='cuda:0')
episode: 425 training return: tensor(406.6635, device='cuda:0')
episode: 426 training return: tensor(470.6837, device='cuda:0')
episode: 427 training return: tensor(343.6411, device='cuda:0')
epoch: 107 test_true_pfm: 6037.6969062409435 sim_pfm: 544.7510389111316
episode: 428 training return: tensor(388.6183, device='cuda:0')
episode: 429 training return: tensor(430.6717, device='cuda:0')
episode: 430 training return: tensor(410.3139, device='cuda:0')
episode: 431 training return: tensor(383.7567, device='cuda:0')
epoch: 108 test_true_pfm: 6062.983568826153 sim_pfm: 538.4832629962475
episode: 432 training return: tensor(438.5380, device='cuda:0')
episode: 433 training return: tensor(376.0629, device='cuda:0')
episode: 434 training return: tensor(433.5884, device='cuda:0')
episode: 435 training return: tensor(386.4744, device='cuda:0')
epoch: 109 test_true_pfm: 6017.09269786182 sim_pfm: 545.1810722430915
episode: 436 training return: tensor(431.7364, device='cuda:0')
episode: 437 training return: tensor(381.7200, device='cuda:0')
episode: 438 training return: tensor(338.9684, device='cuda:0')
episode: 439 training return: tensor(404.7635, device='cuda:0')
epoch: 110 test_true_pfm: 6049.946594862063 sim_pfm: 559.6145637982214
episode: 440 training return: tensor(462.7837, device='cuda:0')
episode: 441 training return: tensor(494.1519, device='cuda:0')
episode: 442 training return: tensor(400.1591, device='cuda:0')
episode: 443 training return: tensor(400.4153, device='cuda:0')
epoch: 111 test_true_pfm: 6048.857356998223 sim_pfm: 517.7316443916255
episode: 444 training return: tensor(348.6128, device='cuda:0')
episode: 445 training return: tensor(389.7775, device='cuda:0')
episode: 446 training return: tensor(372.7515, device='cuda:0')
episode: 447 training return: tensor(348.7805, device='cuda:0')
epoch: 112 test_true_pfm: 6027.93914861169 sim_pfm: 576.2194560188412
episode: 448 training return: tensor(386.4016, device='cuda:0')
episode: 449 training return: tensor(444.9565, device='cuda:0')
episode: 450 training return: tensor(351.5967, device='cuda:0')
episode: 451 training return: tensor(456.6985, device='cuda:0')
epoch: 113 test_true_pfm: 6047.745070153941 sim_pfm: 567.616759186892
episode: 452 training return: tensor(486.1320, device='cuda:0')
episode: 453 training return: tensor(390.9968, device='cuda:0')
episode: 454 training return: tensor(504.7027, device='cuda:0')
episode: 455 training return: tensor(436.8590, device='cuda:0')
epoch: 114 test_true_pfm: 6137.547135418248 sim_pfm: 564.6174033394394
episode: 456 training return: tensor(493.7749, device='cuda:0')
episode: 457 training return: tensor(407.1559, device='cuda:0')
episode: 458 training return: tensor(352.1009, device='cuda:0')
episode: 459 training return: tensor(370.3557, device='cuda:0')
epoch: 115 test_true_pfm: 6117.32410251445 sim_pfm: 559.4733792199404
episode: 460 training return: tensor(376.5053, device='cuda:0')
episode: 461 training return: tensor(412.8933, device='cuda:0')
episode: 462 training return: tensor(395.7899, device='cuda:0')
episode: 463 training return: tensor(411.6875, device='cuda:0')
epoch: 116 test_true_pfm: 6121.690171240692 sim_pfm: 526.70854024721
episode: 464 training return: tensor(390.9982, device='cuda:0')
episode: 465 training return: tensor(372.3417, device='cuda:0')
episode: 466 training return: tensor(442.9645, device='cuda:0')
episode: 467 training return: tensor(474.2734, device='cuda:0')
epoch: 117 test_true_pfm: 6118.887478699352 sim_pfm: 613.5079570168551
episode: 468 training return: tensor(410.5757, device='cuda:0')
episode: 469 training return: tensor(498.2344, device='cuda:0')
episode: 470 training return: tensor(459.8826, device='cuda:0')
episode: 471 training return: tensor(435.5399, device='cuda:0')
epoch: 118 test_true_pfm: 6080.05019501585 sim_pfm: 585.818637957292
episode: 472 training return: tensor(457.0338, device='cuda:0')
episode: 473 training return: tensor(458.7446, device='cuda:0')
episode: 474 training return: tensor(366.3151, device='cuda:0')
episode: 475 training return: tensor(396.7896, device='cuda:0')
epoch: 119 test_true_pfm: 6087.693228868774 sim_pfm: 601.6351453819856
episode: 476 training return: tensor(435.0819, device='cuda:0')
episode: 477 training return: tensor(438.8289, device='cuda:0')
episode: 478 training return: tensor(425.8740, device='cuda:0')
episode: 479 training return: tensor(489.6675, device='cuda:0')
epoch: 120 test_true_pfm: 6089.946071716619 sim_pfm: 566.6433095110309
episode: 480 training return: tensor(437.2262, device='cuda:0')
episode: 481 training return: tensor(372.0886, device='cuda:0')
episode: 482 training return: tensor(416.9840, device='cuda:0')
episode: 483 training return: tensor(443.6314, device='cuda:0')
epoch: 121 test_true_pfm: 6085.209503647737 sim_pfm: 553.5275167792182
episode: 484 training return: tensor(407.7182, device='cuda:0')
episode: 485 training return: tensor(471.7111, device='cuda:0')
episode: 486 training return: tensor(435.7458, device='cuda:0')
episode: 487 training return: tensor(419.5146, device='cuda:0')
epoch: 122 test_true_pfm: 6029.385917171698 sim_pfm: 557.0732858242118
episode: 488 training return: tensor(385.8233, device='cuda:0')
episode: 489 training return: tensor(274.9344, device='cuda:0')
episode: 490 training return: tensor(470.3774, device='cuda:0')
episode: 491 training return: tensor(405.6061, device='cuda:0')
epoch: 123 test_true_pfm: 6123.335323910806 sim_pfm: 605.9951667770123
episode: 492 training return: tensor(366.6830, device='cuda:0')
episode: 493 training return: tensor(439.0527, device='cuda:0')
episode: 494 training return: tensor(431.0483, device='cuda:0')
episode: 495 training return: tensor(402.7112, device='cuda:0')
epoch: 124 test_true_pfm: 6096.645839832105 sim_pfm: 537.3874521244046
episode: 496 training return: tensor(440.2256, device='cuda:0')
episode: 497 training return: tensor(434.0302, device='cuda:0')
episode: 498 training return: tensor(461.8326, device='cuda:0')
episode: 499 training return: tensor(475.3690, device='cuda:0')
epoch: 125 test_true_pfm: 6073.921355144943 sim_pfm: 570.3971358292038
episode: 500 training return: tensor(420.3546, device='cuda:0')
episode: 501 training return: tensor(361.2521, device='cuda:0')
episode: 502 training return: tensor(413.9339, device='cuda:0')
episode: 503 training return: tensor(468.4025, device='cuda:0')
epoch: 126 test_true_pfm: 6087.965454109767 sim_pfm: 557.3509725602344
episode: 504 training return: tensor(415.3574, device='cuda:0')
episode: 505 training return: tensor(456.5820, device='cuda:0')
episode: 506 training return: tensor(460.2592, device='cuda:0')
episode: 507 training return: tensor(433.0694, device='cuda:0')
epoch: 127 test_true_pfm: 6112.390279521121 sim_pfm: 526.2409446984142
episode: 508 training return: tensor(485.3622, device='cuda:0')
episode: 509 training return: tensor(469.2147, device='cuda:0')
episode: 510 training return: tensor(317.1147, device='cuda:0')
episode: 511 training return: tensor(432.2998, device='cuda:0')
epoch: 128 test_true_pfm: 6077.663212238396 sim_pfm: 564.0431270827152
episode: 512 training return: tensor(404.8089, device='cuda:0')
episode: 513 training return: tensor(438.4266, device='cuda:0')
episode: 514 training return: tensor(430.0752, device='cuda:0')
episode: 515 training return: tensor(491.1643, device='cuda:0')
epoch: 129 test_true_pfm: 6059.335395789712 sim_pfm: 612.0340598882273
episode: 516 training return: tensor(445.0985, device='cuda:0')
episode: 517 training return: tensor(484.3975, device='cuda:0')
episode: 518 training return: tensor(386.4141, device='cuda:0')
episode: 519 training return: tensor(500.1666, device='cuda:0')
epoch: 130 test_true_pfm: 6140.449668456618 sim_pfm: 544.1785619322327
episode: 520 training return: tensor(436.4799, device='cuda:0')
episode: 521 training return: tensor(448.2918, device='cuda:0')
episode: 522 training return: tensor(434.6162, device='cuda:0')
episode: 523 training return: tensor(434.6783, device='cuda:0')
epoch: 131 test_true_pfm: 5988.878526733497 sim_pfm: 577.781408271403
episode: 524 training return: tensor(502.2748, device='cuda:0')
episode: 525 training return: tensor(375.6822, device='cuda:0')
episode: 526 training return: tensor(361.7432, device='cuda:0')
episode: 527 training return: tensor(462.8715, device='cuda:0')
epoch: 132 test_true_pfm: 6074.864827841965 sim_pfm: 560.0912097640394
episode: 528 training return: tensor(404.0207, device='cuda:0')
episode: 529 training return: tensor(384.7890, device='cuda:0')
episode: 530 training return: tensor(405.2399, device='cuda:0')
episode: 531 training return: tensor(421.4405, device='cuda:0')
epoch: 133 test_true_pfm: 6051.965596741714 sim_pfm: 568.8776527210526
episode: 532 training return: tensor(386.8189, device='cuda:0')
episode: 533 training return: tensor(420.5117, device='cuda:0')
episode: 534 training return: tensor(406.5512, device='cuda:0')
episode: 535 training return: tensor(501.2302, device='cuda:0')
epoch: 134 test_true_pfm: 6102.965225280591 sim_pfm: 596.3126889821142
episode: 536 training return: tensor(419.7257, device='cuda:0')
episode: 537 training return: tensor(416.6500, device='cuda:0')
episode: 538 training return: tensor(481.3860, device='cuda:0')
episode: 539 training return: tensor(508.1904, device='cuda:0')
epoch: 135 test_true_pfm: 6127.659459110604 sim_pfm: 579.4833815052407
episode: 540 training return: tensor(452.3892, device='cuda:0')
episode: 541 training return: tensor(447.0809, device='cuda:0')
episode: 542 training return: tensor(490.0583, device='cuda:0')
episode: 543 training return: tensor(403.0858, device='cuda:0')
epoch: 136 test_true_pfm: 6156.708796371175 sim_pfm: 566.8702797866426
episode: 544 training return: tensor(363.3761, device='cuda:0')
episode: 545 training return: tensor(433.3044, device='cuda:0')
episode: 546 training return: tensor(478.5378, device='cuda:0')
episode: 547 training return: tensor(455.6039, device='cuda:0')
epoch: 137 test_true_pfm: 6103.958123584092 sim_pfm: 586.7946301618746
episode: 548 training return: tensor(348.2976, device='cuda:0')
episode: 549 training return: tensor(421.6945, device='cuda:0')
episode: 550 training return: tensor(450.7625, device='cuda:0')
episode: 551 training return: tensor(393.5889, device='cuda:0')
epoch: 138 test_true_pfm: 6068.6407122158025 sim_pfm: 555.1515597773929
episode: 552 training return: tensor(427.6956, device='cuda:0')
episode: 553 training return: tensor(421.4419, device='cuda:0')
episode: 554 training return: tensor(446.3839, device='cuda:0')
episode: 555 training return: tensor(450.4960, device='cuda:0')
epoch: 139 test_true_pfm: 6121.671436521235 sim_pfm: 578.4709916207261
episode: 556 training return: tensor(399.0277, device='cuda:0')
episode: 557 training return: tensor(430.4412, device='cuda:0')
episode: 558 training return: tensor(456.3990, device='cuda:0')
episode: 559 training return: tensor(424.1758, device='cuda:0')
epoch: 140 test_true_pfm: 6049.0604894326 sim_pfm: 567.474750915193
episode: 560 training return: tensor(453.9340, device='cuda:0')
episode: 561 training return: tensor(461.7133, device='cuda:0')
episode: 562 training return: tensor(375.8882, device='cuda:0')
episode: 563 training return: tensor(449.9559, device='cuda:0')
epoch: 141 test_true_pfm: 6087.96046713685 sim_pfm: 583.6021739967886
episode: 564 training return: tensor(462.0265, device='cuda:0')
episode: 565 training return: tensor(431.9977, device='cuda:0')
episode: 566 training return: tensor(418.8672, device='cuda:0')
episode: 567 training return: tensor(376.1026, device='cuda:0')
epoch: 142 test_true_pfm: 6028.445506107898 sim_pfm: 567.3582097756056
episode: 568 training return: tensor(442.8649, device='cuda:0')
episode: 569 training return: tensor(446.2659, device='cuda:0')
episode: 570 training return: tensor(441.3138, device='cuda:0')
episode: 571 training return: tensor(433.6112, device='cuda:0')
epoch: 143 test_true_pfm: 6131.038508176597 sim_pfm: 602.1437731685
episode: 572 training return: tensor(448.1686, device='cuda:0')
episode: 573 training return: tensor(397.7071, device='cuda:0')
episode: 574 training return: tensor(407.5929, device='cuda:0')
episode: 575 training return: tensor(450.0896, device='cuda:0')
epoch: 144 test_true_pfm: 6123.095455510484 sim_pfm: 594.7207141418476
episode: 576 training return: tensor(410.7216, device='cuda:0')
episode: 577 training return: tensor(516.4211, device='cuda:0')
episode: 578 training return: tensor(433.8380, device='cuda:0')
episode: 579 training return: tensor(490.5294, device='cuda:0')
epoch: 145 test_true_pfm: 6106.090395710987 sim_pfm: 588.519183061008
episode: 580 training return: tensor(430.4300, device='cuda:0')
episode: 581 training return: tensor(453.9981, device='cuda:0')
episode: 582 training return: tensor(389.9367, device='cuda:0')
episode: 583 training return: tensor(388.7238, device='cuda:0')
epoch: 146 test_true_pfm: 6153.088853877957 sim_pfm: 609.9179728109739
episode: 584 training return: tensor(446.8427, device='cuda:0')
episode: 585 training return: tensor(469.3389, device='cuda:0')
episode: 586 training return: tensor(407.5479, device='cuda:0')
episode: 587 training return: tensor(510.0125, device='cuda:0')
epoch: 147 test_true_pfm: 6145.210669837495 sim_pfm: 583.1796640051762
episode: 588 training return: tensor(383.3257, device='cuda:0')
episode: 589 training return: tensor(323.1099, device='cuda:0')
episode: 590 training return: tensor(508.3691, device='cuda:0')
episode: 591 training return: tensor(441.1078, device='cuda:0')
epoch: 148 test_true_pfm: 6084.411969418433 sim_pfm: 535.6089784291011
episode: 592 training return: tensor(386.4183, device='cuda:0')
episode: 593 training return: tensor(480.6367, device='cuda:0')
episode: 594 training return: tensor(448.5661, device='cuda:0')
episode: 595 training return: tensor(487.4489, device='cuda:0')
epoch: 149 test_true_pfm: 6160.464579289889 sim_pfm: 587.5118790549653
episode: 596 training return: tensor(440.7821, device='cuda:0')
episode: 597 training return: tensor(411.6381, device='cuda:0')
episode: 598 training return: tensor(371.1921, device='cuda:0')
episode: 599 training return: tensor(510.9980, device='cuda:0')
epoch: 150 test_true_pfm: 6038.354628879405 sim_pfm: 588.180723734365
