['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'expert', '--seed', '3']
epoch: 0 training_loss 0.33527669876813887 test_loss: 0.27495827674865725
epoch: 1 training_loss 0.2246123956888914 test_loss: 0.1878595232963562
epoch: 2 training_loss 0.1949882161617279 test_loss: 0.192629075050354
epoch: 3 training_loss 0.1720542711019516 test_loss: 0.1666496992111206
epoch: 4 training_loss 0.16244756244122982 test_loss: 0.1674458622932434
epoch: 5 training_loss 0.1513741447031498 test_loss: 0.1469202995300293
epoch: 6 training_loss 0.14829925764352084 test_loss: 0.13733631372451782
epoch: 7 training_loss 0.1412685162946582 test_loss: 0.13379589319229127
epoch: 8 training_loss 0.13749952856451272 test_loss: 0.15339590311050416
epoch: 9 training_loss 0.13142939444631338 test_loss: 0.12605141401290892
epoch: 10 training_loss 0.13637732941657305 test_loss: 0.1263597011566162
epoch: 11 training_loss 0.12733854930847882 test_loss: 0.12456080913543702
epoch: 12 training_loss 0.12821443438529967 test_loss: 0.13154804706573486
epoch: 13 training_loss 0.13225672237575054 test_loss: 0.13065701723098755
epoch: 14 training_loss 0.12353495296090841 test_loss: 0.12520434856414794
epoch: 15 training_loss 0.11893409319221973 test_loss: 0.12126250267028808
epoch: 16 training_loss 0.12517269495874644 test_loss: 0.13498035669326783
epoch: 17 training_loss 0.12054197534918785 test_loss: 0.10280508995056152
epoch: 18 training_loss 0.12167724698781968 test_loss: 0.1111607551574707
epoch: 19 training_loss 0.12216659914702177 test_loss: 0.11648961305618286
epoch: 20 training_loss 0.11959166817367077 test_loss: 0.12028329372406006
epoch: 21 training_loss 0.12604257691651582 test_loss: 0.11918649673461915
epoch: 22 training_loss 0.11403676576912403 test_loss: 0.13114789724349976
epoch: 23 training_loss 0.11741340842097997 test_loss: 0.10621610879898072
epoch: 24 training_loss 0.11525086391717196 test_loss: 0.12606871128082275
epoch: 25 training_loss 0.10968241017311811 test_loss: 0.11598670482635498
epoch: 26 training_loss 0.11112340945750475 test_loss: 0.13616490364074707
epoch: 27 training_loss 0.1235446309670806 test_loss: 0.11350762844085693
epoch: 28 training_loss 0.10590279281139374 test_loss: 0.10937010049819947
epoch: 29 training_loss 0.11607512339949608 test_loss: 0.1403337836265564
epoch: 30 training_loss 0.11772559478878974 test_loss: 0.11264036893844605
epoch: 31 training_loss 0.11507982823997737 test_loss: 0.09515284299850464
epoch: 32 training_loss 0.11191290222108365 test_loss: 0.1083025336265564
epoch: 33 training_loss 0.11403516095131636 test_loss: 0.1119457721710205
epoch: 34 training_loss 0.11320134170353413 test_loss: 0.13068438768386842
epoch: 35 training_loss 0.11503973737359047 test_loss: 0.11921082735061646
epoch: 36 training_loss 0.10845999244600535 test_loss: 0.10891708135604858
epoch: 37 training_loss 0.115840316824615 test_loss: 0.1268840551376343
epoch: 38 training_loss 0.11195536747574807 test_loss: 0.14574495553970337
epoch: 39 training_loss 0.11173568740487098 test_loss: 0.11967234611511231
epoch: 40 training_loss 0.12133737318217755 test_loss: 0.09950652122497558
epoch: 41 training_loss 0.1173455261439085 test_loss: 0.13263972997665405
epoch: 42 training_loss 0.11724799001589417 test_loss: 0.11397993564605713
epoch: 43 training_loss 0.11393891166895628 test_loss: 0.10749611854553223
epoch: 44 training_loss 0.1093599833548069 test_loss: 0.11558613777160645
epoch: 45 training_loss 0.10150687463581562 test_loss: 0.09326438307762146
epoch: 46 training_loss 0.11415688375011086 test_loss: 0.11214210987091064
epoch: 47 training_loss 0.1125794992223382 test_loss: 0.12800219058990478
epoch: 48 training_loss 0.1130571261420846 test_loss: 0.1067466378211975
epoch: 49 training_loss 0.11100387820973992 test_loss: 0.11651326417922973
epoch: 50 training_loss 0.10508800311014056 test_loss: 0.12320326566696167
epoch: 51 training_loss 0.11296897374093533 test_loss: 0.11553686857223511
epoch: 52 training_loss 0.11284803114831447 test_loss: 0.12247017621994019
epoch: 53 training_loss 0.11070819081738591 test_loss: 0.12368292808532715
epoch: 54 training_loss 0.10558343660086393 test_loss: 0.10986051559448243
epoch: 55 training_loss 0.10162755385041237 test_loss: 0.10466209650039673
epoch: 56 training_loss 0.11043646942824126 test_loss: 0.10824391841888428
epoch: 57 training_loss 0.11004620481282473 test_loss: 0.1272693991661072
epoch: 58 training_loss 0.1175184291601181 test_loss: 0.1110079288482666
epoch: 59 training_loss 0.10765970509499312 test_loss: 0.12479796409606933
epoch: 60 training_loss 0.11156050480902195 test_loss: 0.10782562494277954
epoch: 61 training_loss 0.1178015686199069 test_loss: 0.11205483675003051
epoch: 62 training_loss 0.10542823184281587 test_loss: 0.12826042175292968
epoch: 63 training_loss 0.11162662209942936 test_loss: 0.11224905252456666
epoch: 64 training_loss 0.10420151621103287 test_loss: 0.1174115777015686
epoch: 65 training_loss 0.11864276371896267 test_loss: 0.10347403287887573
epoch: 66 training_loss 0.10545884162187576 test_loss: 0.11678913831710816
epoch: 67 training_loss 0.10616551507264375 test_loss: 0.11523793935775757
epoch: 68 training_loss 0.10509114256128668 test_loss: 0.10773791074752807
epoch: 69 training_loss 0.10717334691435099 test_loss: 0.10923596620559692
epoch: 70 training_loss 0.11112895652651787 test_loss: 0.10539394617080688
epoch: 71 training_loss 0.10984698582440615 test_loss: 0.11502513885498047
epoch: 72 training_loss 0.11294296525418758 test_loss: 0.1347096800804138
epoch: 73 training_loss 0.10569093301892281 test_loss: 0.10779550075531005
epoch: 74 training_loss 0.11842937633395195 test_loss: 0.10400654077529907
epoch: 75 training_loss 0.11062926612794399 test_loss: 0.13289350271224976
epoch: 76 training_loss 0.11177956603467465 test_loss: 0.1180830717086792
epoch: 77 training_loss 0.10655064556747675 test_loss: 0.1000781536102295
epoch: 78 training_loss 0.10466337323188782 test_loss: 0.11485599279403687
epoch: 79 training_loss 0.11124820470809936 test_loss: 0.11448808908462524
epoch: 80 training_loss 0.10196121837943792 test_loss: 0.1328525185585022
epoch: 81 training_loss 0.11313703309744597 test_loss: 0.1175918698310852
epoch: 82 training_loss 0.1147735495865345 test_loss: 0.12657783031463624
epoch: 83 training_loss 0.11383077323436737 test_loss: 0.11152374744415283
epoch: 84 training_loss 0.11133654044941067 test_loss: 0.11385140419006348
epoch: 85 training_loss 0.10999271085485816 test_loss: 0.1073148250579834
epoch: 86 training_loss 0.10717989277094603 test_loss: 0.10402529239654541
epoch: 87 training_loss 0.10458213090896606 test_loss: 0.11777728796005249
epoch: 88 training_loss 0.10407314367592335 test_loss: 0.10084928274154663
epoch: 89 training_loss 0.1112728538736701 test_loss: 0.09687515497207641
epoch: 90 training_loss 0.11060746850445867 test_loss: 0.1349451184272766
epoch: 91 training_loss 0.10663422238081693 test_loss: 0.09818695187568664
epoch: 92 training_loss 0.11331403642892837 test_loss: 0.11602052450180053
epoch: 93 training_loss 0.11528818577528 test_loss: 0.1189038634300232
epoch: 94 training_loss 0.10512822076678276 test_loss: 0.10931339263916015
epoch: 95 training_loss 0.11361744591966272 test_loss: 0.10086849927902222
epoch: 96 training_loss 0.10510552871972323 test_loss: 0.10588771104812622
epoch: 97 training_loss 0.10520021252334118 test_loss: 0.1103284478187561
epoch: 98 training_loss 0.10646040957421064 test_loss: 0.107174551486969
epoch: 99 training_loss 0.11425651270896196 test_loss: 0.12368202209472656
epoch: 100 training_loss 0.10642585935071111 test_loss: 0.1089095115661621
epoch: 101 training_loss 0.11240640673786402 test_loss: 0.10970594882965087
epoch: 102 training_loss 0.10166169116273523 test_loss: 0.1100853681564331
epoch: 103 training_loss 0.11253144301474094 test_loss: 0.10698070526123046
epoch: 104 training_loss 0.10586783815175295 test_loss: 0.12782272100448608
epoch: 105 training_loss 0.1053708234988153 test_loss: 0.11464891433715821
epoch: 106 training_loss 0.10423999855294824 test_loss: 0.11376144886016845
epoch: 107 training_loss 0.10978408645838499 test_loss: 0.11413620710372925
epoch: 108 training_loss 0.10541970212012529 test_loss: 0.11500712633132934
epoch: 109 training_loss 0.10512487765401601 test_loss: 0.12838611602783204
epoch: 110 training_loss 0.1097128914296627 test_loss: 0.11707127094268799
epoch: 111 training_loss 0.1019165650382638 test_loss: 0.09835741519927979
epoch: 112 training_loss 0.10670870799571276 test_loss: 0.12930335998535156
epoch: 113 training_loss 0.10458627285435795 test_loss: 0.0939279854297638
epoch: 114 training_loss 0.10248494781553745 test_loss: 0.1139952301979065
epoch: 115 training_loss 0.10075683439150453 test_loss: 0.11621764898300171
epoch: 116 training_loss 0.10586153551936149 test_loss: 0.09892994165420532
epoch: 117 training_loss 0.10967950072139501 test_loss: 0.12782348394393922
epoch: 118 training_loss 0.10691446248441934 test_loss: 0.10861738920211791
epoch: 119 training_loss 0.10287290919572115 test_loss: 0.11113605499267579
epoch: 120 training_loss 0.11234043036587536 test_loss: 0.10152097940444946
epoch: 121 training_loss 0.11032610844820738 test_loss: 0.10991406440734863
epoch: 122 training_loss 0.1079561361297965 test_loss: 0.10866795778274536
epoch: 123 training_loss 0.1098866069689393 test_loss: 0.12148568630218506
epoch: 124 training_loss 0.1017897741869092 test_loss: 0.10530226230621338
epoch: 125 training_loss 0.10435676490887999 test_loss: 0.11465431451797485
epoch: 126 training_loss 0.10929115675389767 test_loss: 0.10755422115325927
epoch: 127 training_loss 0.10268317531794309 test_loss: 0.10805995464324951
epoch: 128 training_loss 0.12155119888484478 test_loss: 0.10626274347305298
epoch: 129 training_loss 0.11054146025329828 test_loss: 0.11373047828674317
epoch: 130 training_loss 0.11130652777850628 test_loss: 0.10421168804168701
epoch: 131 training_loss 0.11165467694401741 test_loss: 0.12859143018722535
epoch: 132 training_loss 0.10827028822153807 test_loss: 0.10427424907684327
epoch: 133 training_loss 0.11130039475858211 test_loss: 0.1051891565322876
epoch: 134 training_loss 0.10909319050610065 test_loss: 0.10362917184829712
epoch: 135 training_loss 0.10323637412860989 test_loss: 0.11762580871582032
epoch: 136 training_loss 0.11570991344749927 test_loss: 0.12292462587356567
epoch: 137 training_loss 0.11058099325746298 test_loss: 0.11079068183898926
epoch: 138 training_loss 0.10269691932946444 test_loss: 0.1252148985862732
epoch: 139 training_loss 0.10734381400048733 test_loss: 0.1084179162979126
epoch: 140 training_loss 0.10764681562781334 test_loss: 0.09972382187843323
epoch: 141 training_loss 0.10611824510619045 test_loss: 0.13593024015426636
epoch: 142 training_loss 0.10852956857532263 test_loss: 0.0946672260761261
epoch: 143 training_loss 0.10615262571722268 test_loss: 0.1065504789352417
epoch: 144 training_loss 0.10705722386017441 test_loss: 0.11973319053649903
epoch: 145 training_loss 0.11218632284551859 test_loss: 0.12031654119491578
epoch: 146 training_loss 0.10931584998965263 test_loss: 0.12220145463943481
epoch: 147 training_loss 0.105511443503201 test_loss: 0.10428136587142944
epoch: 148 training_loss 0.1110050660930574 test_loss: 0.1015128254890442
epoch: 149 training_loss 0.10218663778156042 test_loss: 0.10311146974563598
epoch: 0 training_loss 46.2856791305542 test_loss: 26.45026550292969
epoch: 1 training_loss 21.647687797546386 test_loss: 18.0353759765625
epoch: 2 training_loss 16.454749689102172 test_loss: 14.716995239257812
epoch: 3 training_loss 13.788023700714112 test_loss: 12.53891830444336
epoch: 4 training_loss 12.006619529724121 test_loss: 11.23531265258789
epoch: 5 training_loss 10.475848302841186 test_loss: 9.952568817138673
epoch: 6 training_loss 9.281353430747986 test_loss: 8.464189147949218
epoch: 7 training_loss 8.275169310569764 test_loss: 7.885181427001953
epoch: 8 training_loss 7.843991446495056 test_loss: 7.272631072998047
epoch: 9 training_loss 7.0857327365875244 test_loss: 6.810803985595703
epoch: 10 training_loss 6.64009617805481 test_loss: 6.576385498046875
epoch: 11 training_loss 6.362586140632629 test_loss: 6.40538330078125
epoch: 12 training_loss 6.075808792114258 test_loss: 5.902079010009766
epoch: 13 training_loss 5.7764591884613035 test_loss: 5.545228576660156
epoch: 14 training_loss 5.478796367645264 test_loss: 5.476320266723633
epoch: 15 training_loss 5.446758213043213 test_loss: 5.246079635620117
epoch: 16 training_loss 5.288075914382935 test_loss: 5.047005462646484
epoch: 17 training_loss 5.105452499389648 test_loss: 5.007488250732422
epoch: 18 training_loss 4.914264557361602 test_loss: 4.7127830505371096
epoch: 19 training_loss 4.8630890774726865 test_loss: 4.720803451538086
epoch: 20 training_loss 4.810801751613617 test_loss: 4.7004741668701175
epoch: 21 training_loss 4.652587156295777 test_loss: 4.607216644287109
epoch: 22 training_loss 4.540262379646301 test_loss: 4.488805770874023
epoch: 23 training_loss 4.318226928710938 test_loss: 4.195187759399414
epoch: 24 training_loss 4.245040519237518 test_loss: 4.389731979370117
epoch: 25 training_loss 4.262145891189575 test_loss: 4.339251327514648
epoch: 26 training_loss 4.221314651966095 test_loss: 3.994076156616211
epoch: 27 training_loss 4.0086520123481755 test_loss: 3.9326480865478515
epoch: 28 training_loss 4.069589407444 test_loss: 4.071434020996094
epoch: 29 training_loss 3.8550233149528506 test_loss: 4.061957168579101
epoch: 30 training_loss 3.925867621898651 test_loss: 3.725143051147461
epoch: 31 training_loss 3.8902842712402346 test_loss: 3.930116653442383
epoch: 32 training_loss 3.744913964271545 test_loss: 3.599134063720703
epoch: 33 training_loss 3.7535970568656922 test_loss: 3.393275833129883
epoch: 34 training_loss 3.5524742007255554 test_loss: 3.3436309814453127
epoch: 35 training_loss 3.698549146652222 test_loss: 3.6581546783447267
epoch: 36 training_loss 3.6134397554397584 test_loss: 3.5152328491210936
epoch: 37 training_loss 3.4773303031921388 test_loss: 3.427821731567383
epoch: 38 training_loss 3.528389925956726 test_loss: 3.508550262451172
epoch: 39 training_loss 3.526917772293091 test_loss: 3.466327667236328
epoch: 40 training_loss 3.375668604373932 test_loss: 3.167983627319336
epoch: 41 training_loss 3.3249457931518553 test_loss: 3.2863090515136717
epoch: 42 training_loss 3.349507772922516 test_loss: 3.305596923828125
epoch: 43 training_loss 3.366367292404175 test_loss: 3.2401561737060547
epoch: 44 training_loss 3.2965664577484133 test_loss: 3.2719352722167967
epoch: 45 training_loss 3.1930604147911072 test_loss: 3.1910625457763673
epoch: 46 training_loss 3.1089255881309508 test_loss: 3.302322769165039
epoch: 47 training_loss 3.083739233016968 test_loss: 3.123836135864258
epoch: 48 training_loss 3.0994786334037783 test_loss: 3.138738250732422
epoch: 49 training_loss 3.052993505001068 test_loss: 2.9315690994262695
epoch: 50 training_loss 3.0088969683647155 test_loss: 3.0758682250976563
epoch: 51 training_loss 3.0559956288337706 test_loss: 2.93658561706543
epoch: 52 training_loss 3.0300309443473816 test_loss: 3.02152042388916
epoch: 53 training_loss 2.997885422706604 test_loss: 2.9857234954833984
epoch: 54 training_loss 2.9301734662055967 test_loss: 2.893338203430176
epoch: 55 training_loss 2.9849789571762084 test_loss: 2.9550750732421873
epoch: 56 training_loss 2.868682014942169 test_loss: 2.9255775451660155
epoch: 57 training_loss 2.859329459667206 test_loss: 2.8664995193481446
epoch: 58 training_loss 2.867015311717987 test_loss: 2.815180206298828
epoch: 59 training_loss 2.849019916057587 test_loss: 2.7881681442260744
epoch: 60 training_loss 2.81489444732666 test_loss: 2.6822309494018555
epoch: 61 training_loss 2.878329916000366 test_loss: 2.9775470733642577
epoch: 62 training_loss 2.8054891419410706 test_loss: 2.724933624267578
epoch: 63 training_loss 2.810566349029541 test_loss: 2.6831991195678713
epoch: 64 training_loss 2.7109130907058714 test_loss: 2.670481872558594
epoch: 65 training_loss 2.6598180294036866 test_loss: 2.703904151916504
epoch: 66 training_loss 2.6643843054771423 test_loss: 2.749259185791016
epoch: 67 training_loss 2.7367130374908446 test_loss: 2.539842414855957
epoch: 68 training_loss 2.6785436725616454 test_loss: 2.725519561767578
epoch: 69 training_loss 2.6522288799285887 test_loss: 2.5272537231445313
epoch: 70 training_loss 2.620563759803772 test_loss: 2.6488466262817383
epoch: 71 training_loss 2.682889628410339 test_loss: 2.7269287109375
epoch: 72 training_loss 2.531505763530731 test_loss: 2.7274356842041017
epoch: 73 training_loss 2.567733873128891 test_loss: 2.4197587966918945
epoch: 74 training_loss 2.5421863794326782 test_loss: 2.4516645431518556
epoch: 75 training_loss 2.5665719723701477 test_loss: 2.6188837051391602
epoch: 76 training_loss 2.5625556325912475 test_loss: 2.4882648468017576
epoch: 77 training_loss 2.5516571950912477 test_loss: 2.700268363952637
epoch: 78 training_loss 2.5121469259262086 test_loss: 2.33455753326416
epoch: 79 training_loss 2.384144389629364 test_loss: 2.5631900787353517
epoch: 80 training_loss 2.4816001892089843 test_loss: 2.4235612869262697
epoch: 81 training_loss 2.4607259488105773 test_loss: 2.6183565139770506
epoch: 82 training_loss 2.4486370348930357 test_loss: 2.3770111083984373
epoch: 83 training_loss 2.3991172432899477 test_loss: 2.3275861740112305
epoch: 84 training_loss 2.3803069031238557 test_loss: 2.3722070693969726
epoch: 85 training_loss 2.3907105803489683 test_loss: 2.5776201248168946
epoch: 86 training_loss 2.4184026956558227 test_loss: 2.4452613830566405
epoch: 87 training_loss 2.3863529074192047 test_loss: 2.282476043701172
epoch: 88 training_loss 2.362624754905701 test_loss: 2.3728620529174806
epoch: 89 training_loss 2.3473321628570556 test_loss: 2.272644805908203
epoch: 90 training_loss 2.3951157343387606 test_loss: 2.270565223693848
epoch: 91 training_loss 2.359992039203644 test_loss: 2.2353553771972656
epoch: 92 training_loss 2.303604106903076 test_loss: 2.2576740264892576
epoch: 93 training_loss 2.3694225227832795 test_loss: 2.2891986846923826
epoch: 94 training_loss 2.2779736328125 test_loss: 2.2331308364868163
epoch: 95 training_loss 2.269251412153244 test_loss: 2.467744827270508
epoch: 96 training_loss 2.2944798243045805 test_loss: 2.250170135498047
epoch: 97 training_loss 2.2518784952163697 test_loss: 2.1637905120849608
epoch: 98 training_loss 2.238926968574524 test_loss: 2.24501953125
epoch: 99 training_loss 2.189896968603134 test_loss: 2.0944869995117186
epoch: 100 training_loss 2.20651633143425 test_loss: 2.2992982864379883
epoch: 101 training_loss 2.2797076642513274 test_loss: 2.1225852966308594
epoch: 102 training_loss 2.233480525016785 test_loss: 2.144710159301758
epoch: 103 training_loss 2.17937150478363 test_loss: 2.1534448623657227
epoch: 104 training_loss 2.143425545692444 test_loss: 2.232108497619629
epoch: 105 training_loss 2.174034523963928 test_loss: 2.101192283630371
epoch: 106 training_loss 2.1800029993057253 test_loss: 2.3020036697387694
epoch: 107 training_loss 2.2006665050983427 test_loss: 2.1482839584350586
epoch: 108 training_loss 2.1263432478904725 test_loss: 2.110791778564453
epoch: 109 training_loss 2.139606684446335 test_loss: 2.288075256347656
epoch: 110 training_loss 2.1390264439582825 test_loss: 2.140562057495117
epoch: 111 training_loss 2.080055996179581 test_loss: 2.1448390960693358
epoch: 112 training_loss 2.101100032329559 test_loss: 2.0018123626708983
epoch: 113 training_loss 2.0704819524288176 test_loss: 2.124043273925781
epoch: 114 training_loss 2.051009479761124 test_loss: 2.1077320098876955
epoch: 115 training_loss 2.110136493444443 test_loss: 2.168160820007324
epoch: 116 training_loss 2.0597985768318177 test_loss: 2.0238824844360352
epoch: 117 training_loss 2.078475978374481 test_loss: 1.9698541641235352
epoch: 118 training_loss 2.099891711473465 test_loss: 2.0694387435913084
epoch: 119 training_loss 2.046921268701553 test_loss: 1.9622177124023437
epoch: 120 training_loss 2.0048670637607575 test_loss: 1.9682439804077148
epoch: 121 training_loss 2.0196419036388398 test_loss: 1.9507936477661132
epoch: 122 training_loss 2.0323973286151884 test_loss: 2.0029071807861327
epoch: 123 training_loss 2.0373609578609466 test_loss: 1.9945369720458985
epoch: 124 training_loss 2.004227869510651 test_loss: 1.9029285430908203
epoch: 125 training_loss 2.018330899477005 test_loss: 2.0065032958984377
epoch: 126 training_loss 2.0090411007404327 test_loss: 2.0194412231445313
epoch: 127 training_loss 1.9379017102718352 test_loss: 1.9565866470336915
epoch: 128 training_loss 1.9566390419006348 test_loss: 2.0389171600341798
epoch: 129 training_loss 1.9734207153320313 test_loss: 2.0148460388183596
epoch: 130 training_loss 1.9301311302185058 test_loss: 1.9193437576293946
epoch: 131 training_loss 2.0048887979984285 test_loss: 2.008426475524902
epoch: 132 training_loss 1.9913774085044862 test_loss: 1.9439374923706054
epoch: 133 training_loss 1.935331562757492 test_loss: 1.8601263046264649
epoch: 134 training_loss 1.9671263825893401 test_loss: 1.9104179382324218
epoch: 135 training_loss 1.958098750114441 test_loss: 1.9799921035766601
epoch: 136 training_loss 1.9304239308834077 test_loss: 1.8701421737670898
epoch: 137 training_loss 1.9344515466690064 test_loss: 1.9457147598266602
epoch: 138 training_loss 1.9273184633255005 test_loss: 1.930740737915039
epoch: 139 training_loss 1.9265412163734437 test_loss: 1.9209421157836915
epoch: 140 training_loss 1.9406328201293945 test_loss: 1.8598051071166992
epoch: 141 training_loss 1.8922789740562438 test_loss: 1.8764410018920898
epoch: 142 training_loss 1.8911477267742156 test_loss: 1.9191370010375977
epoch: 143 training_loss 1.8882635235786438 test_loss: 1.9617382049560548
epoch: 144 training_loss 1.862496166229248 test_loss: 1.8346307754516602
epoch: 145 training_loss 1.8957519960403442 test_loss: 1.9662818908691406
epoch: 146 training_loss 1.8649737560749053 test_loss: 2.003348731994629
epoch: 147 training_loss 1.8472698152065277 test_loss: 1.877495002746582
epoch: 148 training_loss 1.8482951271533965 test_loss: 1.833826446533203
epoch: 149 training_loss 1.8686506712436677 test_loss: 1.8551198959350585
7526.185476946948
episode: 0 training return: tensor(-413.7149, device='cuda:0')
episode: 1 training return: tensor(-513.2414, device='cuda:0')
episode: 2 training return: tensor(-341.1835, device='cuda:0')
episode: 3 training return: tensor(-836.5054, device='cuda:0')
epoch: 1 test_true_pfm: 4717.872512549578 sim_pfm: -562.893914223086
episode: 4 training return: tensor(-495.1821, device='cuda:0')
episode: 5 training return: tensor(-593.2739, device='cuda:0')
episode: 6 training return: tensor(-454.2285, device='cuda:0')
episode: 7 training return: tensor(-926.1660, device='cuda:0')
epoch: 2 test_true_pfm: 9587.72127891615 sim_pfm: -510.8418146329641
episode: 8 training return: tensor(-999.9986, device='cuda:0')
episode: 9 training return: tensor(-377.7845, device='cuda:0')
episode: 10 training return: tensor(-488.2816, device='cuda:0')
episode: 11 training return: tensor(-807.3870, device='cuda:0')
epoch: 3 test_true_pfm: 1549.0356920979775 sim_pfm: -655.9918178806935
episode: 12 training return: tensor(-737.5261, device='cuda:0')
episode: 13 training return: tensor(-600.6779, device='cuda:0')
episode: 14 training return: tensor(-534.4375, device='cuda:0')
episode: 15 training return: tensor(-380.1259, device='cuda:0')
epoch: 4 test_true_pfm: 3668.095290928613 sim_pfm: -585.9652588298001
episode: 16 training return: tensor(-522.9164, device='cuda:0')
episode: 17 training return: tensor(-914.8127, device='cuda:0')
episode: 18 training return: tensor(-999.9970, device='cuda:0')
episode: 19 training return: tensor(-368.9201, device='cuda:0')
epoch: 5 test_true_pfm: 9916.322107930826 sim_pfm: -439.4270176358793
episode: 20 training return: tensor(-999.9996, device='cuda:0')
episode: 21 training return: tensor(-999.9872, device='cuda:0')
episode: 22 training return: tensor(-548.2050, device='cuda:0')
episode: 23 training return: tensor(-581.2546, device='cuda:0')
epoch: 6 test_true_pfm: 7669.922020534596 sim_pfm: -773.2613076150495
episode: 24 training return: tensor(-651.2953, device='cuda:0')
episode: 25 training return: tensor(-471.5139, device='cuda:0')
episode: 26 training return: tensor(-552.4828, device='cuda:0')
episode: 27 training return: tensor(-543.0996, device='cuda:0')
epoch: 7 test_true_pfm: 6636.7414746651275 sim_pfm: -611.9272272788997
episode: 28 training return: tensor(-459.2056, device='cuda:0')
episode: 29 training return: tensor(-278.7939, device='cuda:0')
episode: 30 training return: tensor(-598.6375, device='cuda:0')
episode: 31 training return: tensor(-991.8663, device='cuda:0')
epoch: 8 test_true_pfm: 3636.6085250298897 sim_pfm: -357.8275906874721
episode: 32 training return: tensor(-989.8818, device='cuda:0')
episode: 33 training return: tensor(-999.9521, device='cuda:0')
episode: 34 training return: tensor(-508.6525, device='cuda:0')
episode: 35 training return: tensor(-999.9996, device='cuda:0')
epoch: 9 test_true_pfm: 6578.206164051393 sim_pfm: -420.5638851877302
episode: 36 training return: tensor(-814.1801, device='cuda:0')
episode: 37 training return: tensor(-546.2877, device='cuda:0')
episode: 38 training return: tensor(-386.1766, device='cuda:0')
episode: 39 training return: tensor(-359.3990, device='cuda:0')
epoch: 10 test_true_pfm: 7656.611054639207 sim_pfm: -259.9250562513868
episode: 40 training return: tensor(-490.1856, device='cuda:0')
episode: 41 training return: tensor(-999.9993, device='cuda:0')
episode: 42 training return: tensor(-916.4838, device='cuda:0')
episode: 43 training return: tensor(-499.8148, device='cuda:0')
epoch: 11 test_true_pfm: 5830.283445353667 sim_pfm: -459.94910676878254
episode: 44 training return: tensor(-396.8748, device='cuda:0')
episode: 45 training return: tensor(-999.9906, device='cuda:0')
episode: 46 training return: tensor(-421.9695, device='cuda:0')
episode: 47 training return: tensor(-399.5710, device='cuda:0')
epoch: 12 test_true_pfm: 8019.976487132189 sim_pfm: -149.27184663297763
episode: 48 training return: tensor(-999.7415, device='cuda:0')
episode: 49 training return: tensor(-999.2443, device='cuda:0')
episode: 50 training return: tensor(-262.9494, device='cuda:0')
episode: 51 training return: tensor(-310.0028, device='cuda:0')
epoch: 13 test_true_pfm: 9715.416703351011 sim_pfm: -185.41719028019966
episode: 52 training return: tensor(-329.9859, device='cuda:0')
episode: 53 training return: tensor(-485.9845, device='cuda:0')
episode: 54 training return: tensor(-998.2148, device='cuda:0')
episode: 55 training return: tensor(-999.9658, device='cuda:0')
epoch: 14 test_true_pfm: 6474.945978044186 sim_pfm: -215.03404608023507
episode: 56 training return: tensor(-412.2089, device='cuda:0')
episode: 57 training return: tensor(-353.6444, device='cuda:0')
episode: 58 training return: tensor(-153.4262, device='cuda:0')
episode: 59 training return: tensor(-377.7652, device='cuda:0')
epoch: 15 test_true_pfm: 10141.520123718996 sim_pfm: -148.04558244910245
episode: 60 training return: tensor(-198.5377, device='cuda:0')
episode: 61 training return: tensor(-999.3263, device='cuda:0')
episode: 62 training return: tensor(-199.2551, device='cuda:0')
episode: 63 training return: tensor(-427.0316, device='cuda:0')
epoch: 16 test_true_pfm: 9949.770844439156 sim_pfm: -430.15495145817596
episode: 64 training return: tensor(-509.3940, device='cuda:0')
episode: 65 training return: tensor(-999.9993, device='cuda:0')
episode: 66 training return: tensor(-999.9998, device='cuda:0')
episode: 67 training return: tensor(-422.9586, device='cuda:0')
epoch: 17 test_true_pfm: 6614.224405764545 sim_pfm: -306.4482708995153
episode: 68 training return: tensor(-391.4521, device='cuda:0')
episode: 69 training return: tensor(-210.0053, device='cuda:0')
episode: 70 training return: tensor(-233.2681, device='cuda:0')
episode: 71 training return: tensor(-264.1555, device='cuda:0')
epoch: 18 test_true_pfm: 6534.965725769569 sim_pfm: -71.3285972607943
episode: 72 training return: tensor(-399.2495, device='cuda:0')
episode: 73 training return: tensor(-186.7202, device='cuda:0')
episode: 74 training return: tensor(-366.5406, device='cuda:0')
episode: 75 training return: tensor(-315.2906, device='cuda:0')
epoch: 19 test_true_pfm: 6956.3428554936945 sim_pfm: -425.43236894969596
episode: 76 training return: tensor(-999.9994, device='cuda:0')
episode: 77 training return: tensor(-411.8357, device='cuda:0')
episode: 78 training return: tensor(-321.3506, device='cuda:0')
episode: 79 training return: tensor(-422.8371, device='cuda:0')
epoch: 20 test_true_pfm: 6634.779237010247 sim_pfm: -647.9628365178747
episode: 80 training return: tensor(-999.9999, device='cuda:0')
episode: 81 training return: tensor(-999.9993, device='cuda:0')
episode: 82 training return: tensor(-727.8665, device='cuda:0')
episode: 83 training return: tensor(-387.2794, device='cuda:0')
epoch: 21 test_true_pfm: 3417.501733842291 sim_pfm: -752.5194858566101
episode: 84 training return: tensor(-202.6343, device='cuda:0')
episode: 85 training return: tensor(-292.9684, device='cuda:0')
episode: 86 training return: tensor(-999.9945, device='cuda:0')
episode: 87 training return: tensor(-200.4103, device='cuda:0')
epoch: 22 test_true_pfm: 10213.450766054484 sim_pfm: -227.06414658475356
episode: 88 training return: tensor(-271.6313, device='cuda:0')
episode: 89 training return: tensor(-309.8872, device='cuda:0')
episode: 90 training return: tensor(-252.7810, device='cuda:0')
episode: 91 training return: tensor(-470.7066, device='cuda:0')
epoch: 23 test_true_pfm: 10126.369655690252 sim_pfm: -452.17983492075774
episode: 92 training return: tensor(-999.9944, device='cuda:0')
episode: 93 training return: tensor(-999.9995, device='cuda:0')
episode: 94 training return: tensor(-224.0252, device='cuda:0')
episode: 95 training return: tensor(-999.9625, device='cuda:0')
epoch: 24 test_true_pfm: 10196.606722917593 sim_pfm: -86.3359409943029
episode: 96 training return: tensor(-291.1943, device='cuda:0')
episode: 97 training return: tensor(-432.7009, device='cuda:0')
episode: 98 training return: tensor(-317.7441, device='cuda:0')
episode: 99 training return: tensor(-334.5788, device='cuda:0')
epoch: 25 test_true_pfm: 8760.027655574033 sim_pfm: -66.86110136141845
episode: 100 training return: tensor(-999.9974, device='cuda:0')
episode: 101 training return: tensor(-233.1588, device='cuda:0')
episode: 102 training return: tensor(-914.9585, device='cuda:0')
episode: 103 training return: tensor(-999.9735, device='cuda:0')
epoch: 26 test_true_pfm: 3113.826813453626 sim_pfm: -347.2276224096325
episode: 104 training return: tensor(-289.2350, device='cuda:0')
episode: 105 training return: tensor(-292.6286, device='cuda:0')
episode: 106 training return: tensor(-999.7101, device='cuda:0')
episode: 107 training return: tensor(-325.0389, device='cuda:0')
epoch: 27 test_true_pfm: 6663.756875374454 sim_pfm: -26.413095808781993
episode: 108 training return: tensor(-357.4500, device='cuda:0')
episode: 109 training return: tensor(-297.5155, device='cuda:0')
episode: 110 training return: tensor(-173.7501, device='cuda:0')
episode: 111 training return: tensor(-997.6474, device='cuda:0')
epoch: 28 test_true_pfm: 10319.108001422448 sim_pfm: -999.9534754355749
episode: 112 training return: tensor(-999.9241, device='cuda:0')
episode: 113 training return: tensor(-132.2841, device='cuda:0')
episode: 114 training return: tensor(-571.3082, device='cuda:0')
episode: 115 training return: tensor(-999.9812, device='cuda:0')
epoch: 29 test_true_pfm: 10018.384302373657 sim_pfm: -285.35971494902816
episode: 116 training return: tensor(-998.4449, device='cuda:0')
episode: 117 training return: tensor(-577.1835, device='cuda:0')
episode: 118 training return: tensor(-5.0215, device='cuda:0')
episode: 119 training return: tensor(-999.9996, device='cuda:0')
epoch: 30 test_true_pfm: 7821.180489320265 sim_pfm: -41.87143989413744
episode: 120 training return: tensor(-935.6997, device='cuda:0')
episode: 121 training return: tensor(-628.5026, device='cuda:0')
episode: 122 training return: tensor(-83.3578, device='cuda:0')
episode: 123 training return: tensor(-407.5775, device='cuda:0')
epoch: 31 test_true_pfm: 6700.976308469242 sim_pfm: -359.55241565288935
episode: 124 training return: tensor(-166.7791, device='cuda:0')
episode: 125 training return: tensor(-999.9993, device='cuda:0')
episode: 126 training return: tensor(-321.6629, device='cuda:0')
episode: 127 training return: tensor(-384.3488, device='cuda:0')
epoch: 32 test_true_pfm: 6762.74845044364 sim_pfm: -0.7328919422191879
episode: 128 training return: tensor(-326.3341, device='cuda:0')
episode: 129 training return: tensor(-247.0164, device='cuda:0')
episode: 130 training return: tensor(-744.4296, device='cuda:0')
episode: 131 training return: tensor(-403.5501, device='cuda:0')
epoch: 33 test_true_pfm: 10225.842666528266 sim_pfm: -51.214649653527886
episode: 132 training return: tensor(-999.9970, device='cuda:0')
episode: 133 training return: tensor(-301.3882, device='cuda:0')
episode: 134 training return: tensor(-287.1733, device='cuda:0')
episode: 135 training return: tensor(-999.9991, device='cuda:0')
epoch: 34 test_true_pfm: 6748.839378425148 sim_pfm: -354.50845370410633
episode: 136 training return: tensor(-410.3056, device='cuda:0')
episode: 137 training return: tensor(-946.4546, device='cuda:0')
episode: 138 training return: tensor(-112.2575, device='cuda:0')
episode: 139 training return: tensor(-282.1514, device='cuda:0')
epoch: 35 test_true_pfm: 6630.254688455231 sim_pfm: -999.7419873277346
episode: 140 training return: tensor(-81.6074, device='cuda:0')
episode: 141 training return: tensor(-430.8302, device='cuda:0')
episode: 142 training return: tensor(-196.2574, device='cuda:0')
episode: 143 training return: tensor(-996.9464, device='cuda:0')
epoch: 36 test_true_pfm: 8518.74570634138 sim_pfm: -424.0797948984352
episode: 144 training return: tensor(-999.9298, device='cuda:0')
episode: 145 training return: tensor(-727.5253, device='cuda:0')
episode: 146 training return: tensor(-999.9762, device='cuda:0')
episode: 147 training return: tensor(-289.0962, device='cuda:0')
epoch: 37 test_true_pfm: 10157.756816070674 sim_pfm: -150.6970577624937
episode: 148 training return: tensor(-269.0847, device='cuda:0')
episode: 149 training return: tensor(-118.4146, device='cuda:0')
episode: 150 training return: tensor(-999.9984, device='cuda:0')
episode: 151 training return: tensor(-999.9855, device='cuda:0')
epoch: 38 test_true_pfm: 6743.205917243933 sim_pfm: -43.56036301075559
episode: 152 training return: tensor(-342.3153, device='cuda:0')
episode: 153 training return: tensor(-365.6129, device='cuda:0')
episode: 154 training return: tensor(-233.5792, device='cuda:0')
episode: 155 training return: tensor(-999.9831, device='cuda:0')
epoch: 39 test_true_pfm: 6680.51969026238 sim_pfm: -604.4810020770528
episode: 156 training return: tensor(-365.8044, device='cuda:0')
episode: 157 training return: tensor(-293.2290, device='cuda:0')
episode: 158 training return: tensor(-999.9951, device='cuda:0')
episode: 159 training return: tensor(-711.3647, device='cuda:0')
epoch: 40 test_true_pfm: 6749.104301950708 sim_pfm: -355.8820201918231
episode: 160 training return: tensor(-325.9158, device='cuda:0')
episode: 161 training return: tensor(-471.5455, device='cuda:0')
episode: 162 training return: tensor(-999.9985, device='cuda:0')
episode: 163 training return: tensor(-999.9805, device='cuda:0')
epoch: 41 test_true_pfm: 6693.646806914866 sim_pfm: -110.53071815848428
episode: 164 training return: tensor(-999.9999, device='cuda:0')
episode: 165 training return: tensor(-386.9152, device='cuda:0')
episode: 166 training return: tensor(-520.7001, device='cuda:0')
episode: 167 training return: tensor(-272.2288, device='cuda:0')
epoch: 42 test_true_pfm: 6821.801093807267 sim_pfm: -654.6258187230366
episode: 168 training return: tensor(-230.1894, device='cuda:0')
episode: 169 training return: tensor(-124.7952, device='cuda:0')
episode: 170 training return: tensor(-988.4957, device='cuda:0')
episode: 171 training return: tensor(-245.3439, device='cuda:0')
epoch: 43 test_true_pfm: 10533.719013911155 sim_pfm: -512.9003438386231
episode: 172 training return: tensor(-999.5876, device='cuda:0')
episode: 173 training return: tensor(-309.7811, device='cuda:0')
episode: 174 training return: tensor(-312.2527, device='cuda:0')
episode: 175 training return: tensor(-999.9373, device='cuda:0')
epoch: 44 test_true_pfm: 10268.834647583473 sim_pfm: -680.332416057509
episode: 176 training return: tensor(-171.2510, device='cuda:0')
episode: 177 training return: tensor(-162.6035, device='cuda:0')
episode: 178 training return: tensor(-999.9999, device='cuda:0')
episode: 179 training return: tensor(-997.9119, device='cuda:0')
epoch: 45 test_true_pfm: 10329.58794570888 sim_pfm: -584.4383522533657
episode: 180 training return: tensor(-999.9820, device='cuda:0')
episode: 181 training return: tensor(-191.5969, device='cuda:0')
episode: 182 training return: tensor(-278.7139, device='cuda:0')
episode: 183 training return: tensor(-998.1425, device='cuda:0')
epoch: 46 test_true_pfm: 10328.654518302441 sim_pfm: -79.15007537661586
episode: 184 training return: tensor(-209.8680, device='cuda:0')
episode: 185 training return: tensor(-125.6398, device='cuda:0')
episode: 186 training return: tensor(-239.1948, device='cuda:0')
episode: 187 training return: tensor(-159.8690, device='cuda:0')
epoch: 47 test_true_pfm: 6473.613643789307 sim_pfm: -357.4407794546375
episode: 188 training return: tensor(-217.8621, device='cuda:0')
episode: 189 training return: tensor(-252.2532, device='cuda:0')
episode: 190 training return: tensor(-131.4718, device='cuda:0')
episode: 191 training return: tensor(-78.3512, device='cuda:0')
epoch: 48 test_true_pfm: 10344.539502202599 sim_pfm: 55.993454421986826
episode: 192 training return: tensor(-293.8351, device='cuda:0')
episode: 193 training return: tensor(-216.4037, device='cuda:0')
episode: 194 training return: tensor(-999.8381, device='cuda:0')
episode: 195 training return: tensor(-991.5373, device='cuda:0')
epoch: 49 test_true_pfm: 10355.852322675024 sim_pfm: -390.9011176091153
episode: 196 training return: tensor(-999.9879, device='cuda:0')
episode: 197 training return: tensor(-999.8232, device='cuda:0')
episode: 198 training return: tensor(-35.7902, device='cuda:0')
episode: 199 training return: tensor(-230.8339, device='cuda:0')
epoch: 50 test_true_pfm: 6832.1521490602145 sim_pfm: -54.83667095042377
episode: 200 training return: tensor(-346.5393, device='cuda:0')
episode: 201 training return: tensor(-197.2379, device='cuda:0')
episode: 202 training return: tensor(-258.0229, device='cuda:0')
episode: 203 training return: tensor(-253.3851, device='cuda:0')
epoch: 51 test_true_pfm: 10269.85690664295 sim_pfm: -342.2736807684026
episode: 204 training return: tensor(-212.7855, device='cuda:0')
episode: 205 training return: tensor(-999.9994, device='cuda:0')
episode: 206 training return: tensor(-208.2462, device='cuda:0')
episode: 207 training return: tensor(-90.8947, device='cuda:0')
epoch: 52 test_true_pfm: 6688.115445845804 sim_pfm: -271.99746628115344
episode: 208 training return: tensor(-574.7900, device='cuda:0')
episode: 209 training return: tensor(-247.5286, device='cuda:0')
episode: 210 training return: tensor(-207.1678, device='cuda:0')
episode: 211 training return: tensor(-999.9956, device='cuda:0')
epoch: 53 test_true_pfm: 6795.555877919581 sim_pfm: -694.3641586471349
episode: 212 training return: tensor(-284.0070, device='cuda:0')
episode: 213 training return: tensor(-999.9993, device='cuda:0')
episode: 214 training return: tensor(-999.9993, device='cuda:0')
episode: 215 training return: tensor(-147.7041, device='cuda:0')
epoch: 54 test_true_pfm: 10270.943974395295 sim_pfm: -710.3081265507886
episode: 216 training return: tensor(-220.6156, device='cuda:0')
episode: 217 training return: tensor(-989.6591, device='cuda:0')
episode: 218 training return: tensor(-343.2729, device='cuda:0')
episode: 219 training return: tensor(-999.9997, device='cuda:0')
epoch: 55 test_true_pfm: 10008.040876458179 sim_pfm: -402.62974075519014
episode: 220 training return: tensor(-427.1031, device='cuda:0')
episode: 221 training return: tensor(-67.2577, device='cuda:0')
episode: 222 training return: tensor(-110.0522, device='cuda:0')
episode: 223 training return: tensor(-999.9205, device='cuda:0')
epoch: 56 test_true_pfm: 6901.488915791288 sim_pfm: -364.88722358997137
episode: 224 training return: tensor(-297.2037, device='cuda:0')
episode: 225 training return: tensor(-998.5156, device='cuda:0')
episode: 226 training return: tensor(-999.9756, device='cuda:0')
episode: 227 training return: tensor(-146.7289, device='cuda:0')
epoch: 57 test_true_pfm: 6796.984257263292 sim_pfm: -662.6758173807951
episode: 228 training return: tensor(-999.9360, device='cuda:0')
episode: 229 training return: tensor(-999.4241, device='cuda:0')
episode: 230 training return: tensor(-255.3738, device='cuda:0')
episode: 231 training return: tensor(-999.5565, device='cuda:0')
epoch: 58 test_true_pfm: 3140.524345180831 sim_pfm: -403.12098799436353
episode: 232 training return: tensor(-999.7227, device='cuda:0')
episode: 233 training return: tensor(-25.7467, device='cuda:0')
episode: 234 training return: tensor(-133.9173, device='cuda:0')
episode: 235 training return: tensor(-626.0188, device='cuda:0')
epoch: 59 test_true_pfm: 3252.5959132647295 sim_pfm: -693.373914278578
episode: 236 training return: tensor(-808.0051, device='cuda:0')
episode: 237 training return: tensor(-97.6740, device='cuda:0')
episode: 238 training return: tensor(-999.9938, device='cuda:0')
episode: 239 training return: tensor(-205.9438, device='cuda:0')
epoch: 60 test_true_pfm: 6500.534522116711 sim_pfm: -133.7222419641524
episode: 240 training return: tensor(-336.6140, device='cuda:0')
episode: 241 training return: tensor(-731.8922, device='cuda:0')
episode: 242 training return: tensor(-728.8040, device='cuda:0')
episode: 243 training return: tensor(-999.8981, device='cuda:0')
epoch: 61 test_true_pfm: 6796.618449408909 sim_pfm: -655.620425998602
episode: 244 training return: tensor(-442.8004, device='cuda:0')
episode: 245 training return: tensor(-999.9963, device='cuda:0')
episode: 246 training return: tensor(-391.7324, device='cuda:0')
episode: 247 training return: tensor(-55.6677, device='cuda:0')
epoch: 62 test_true_pfm: 3109.6782385658285 sim_pfm: -356.9252926923412
episode: 248 training return: tensor(-999.9750, device='cuda:0')
episode: 249 training return: tensor(-337.2292, device='cuda:0')
episode: 250 training return: tensor(-999.9999, device='cuda:0')
episode: 251 training return: tensor(-354.3279, device='cuda:0')
epoch: 63 test_true_pfm: 3233.651140779435 sim_pfm: -653.8569696495737
episode: 252 training return: tensor(-999.9995, device='cuda:0')
episode: 253 training return: tensor(-999.9996, device='cuda:0')
episode: 254 training return: tensor(-30.2845, device='cuda:0')
episode: 255 training return: tensor(-810.1234, device='cuda:0')
epoch: 64 test_true_pfm: 10165.694156044903 sim_pfm: 8.892425473003337
episode: 256 training return: tensor(-314.7726, device='cuda:0')
episode: 257 training return: tensor(-62.9705, device='cuda:0')
episode: 258 training return: tensor(-999.9989, device='cuda:0')
episode: 259 training return: tensor(-999.9985, device='cuda:0')
epoch: 65 test_true_pfm: 9193.378668086034 sim_pfm: -391.54208812800545
episode: 260 training return: tensor(-237.7819, device='cuda:0')
episode: 261 training return: tensor(-853.8715, device='cuda:0')
episode: 262 training return: tensor(-96.8965, device='cuda:0')
episode: 263 training return: tensor(-995.3278, device='cuda:0')
epoch: 66 test_true_pfm: 3236.8368950742733 sim_pfm: 7.7183905795100145
episode: 264 training return: tensor(-999.7952, device='cuda:0')
episode: 265 training return: tensor(-997.7106, device='cuda:0')
episode: 266 training return: tensor(-719.0718, device='cuda:0')
episode: 267 training return: tensor(-184.2270, device='cuda:0')
epoch: 67 test_true_pfm: 10349.82141457026 sim_pfm: -458.72750489679555
episode: 268 training return: tensor(-219.4651, device='cuda:0')
episode: 269 training return: tensor(48.8809, device='cuda:0')
episode: 270 training return: tensor(-999.9988, device='cuda:0')
episode: 271 training return: tensor(-188.6341, device='cuda:0')
epoch: 68 test_true_pfm: 6817.578264353065 sim_pfm: 43.247436885091396
episode: 272 training return: tensor(-204.6908, device='cuda:0')
episode: 273 training return: tensor(-403.0681, device='cuda:0')
episode: 274 training return: tensor(-512.6414, device='cuda:0')
episode: 275 training return: tensor(-200.6902, device='cuda:0')
epoch: 69 test_true_pfm: 6772.179651350639 sim_pfm: 106.9353738890107
episode: 276 training return: tensor(-333.7473, device='cuda:0')
episode: 277 training return: tensor(-998.3527, device='cuda:0')
episode: 278 training return: tensor(-804.0234, device='cuda:0')
episode: 279 training return: tensor(-332.5569, device='cuda:0')
epoch: 70 test_true_pfm: 3237.0691903754996 sim_pfm: 52.52059703834433
episode: 280 training return: tensor(-136.9787, device='cuda:0')
episode: 281 training return: tensor(-109.1625, device='cuda:0')
episode: 282 training return: tensor(-999.9996, device='cuda:0')
episode: 283 training return: tensor(-999.9996, device='cuda:0')
epoch: 71 test_true_pfm: 6497.63516209651 sim_pfm: -30.62902697645283
episode: 284 training return: tensor(-69.7109, device='cuda:0')
episode: 285 training return: tensor(-132.7791, device='cuda:0')
episode: 286 training return: tensor(-999.9998, device='cuda:0')
episode: 287 training return: tensor(-101.8752, device='cuda:0')
epoch: 72 test_true_pfm: 6742.634340809429 sim_pfm: 13.704075585507477
episode: 288 training return: tensor(-79.8483, device='cuda:0')
episode: 289 training return: tensor(-999.9000, device='cuda:0')
episode: 290 training return: tensor(-252.9657, device='cuda:0')
episode: 291 training return: tensor(-74.5506, device='cuda:0')
epoch: 73 test_true_pfm: 10280.40812618621 sim_pfm: -298.69413129265496
episode: 292 training return: tensor(-181.8341, device='cuda:0')
episode: 293 training return: tensor(-112.3811, device='cuda:0')
episode: 294 training return: tensor(-107.9631, device='cuda:0')
episode: 295 training return: tensor(-308.4041, device='cuda:0')
epoch: 74 test_true_pfm: 6911.788943333406 sim_pfm: 35.109991020561814
episode: 296 training return: tensor(-53.6357, device='cuda:0')
episode: 297 training return: tensor(-68.5021, device='cuda:0')
episode: 298 training return: tensor(-183.9589, device='cuda:0')
episode: 299 training return: tensor(-999.9991, device='cuda:0')
epoch: 75 test_true_pfm: 3243.107964604728 sim_pfm: -665.4074022875478
episode: 300 training return: tensor(-196.8582, device='cuda:0')
episode: 301 training return: tensor(-996.2560, device='cuda:0')
episode: 302 training return: tensor(-166.1448, device='cuda:0')
episode: 303 training return: tensor(-999.9922, device='cuda:0')
epoch: 76 test_true_pfm: 3356.173008421774 sim_pfm: 107.63781545501358
episode: 304 training return: tensor(-999.9996, device='cuda:0')
episode: 305 training return: tensor(-693.7910, device='cuda:0')
episode: 306 training return: tensor(-343.6121, device='cuda:0')
episode: 307 training return: tensor(-244.4711, device='cuda:0')
epoch: 77 test_true_pfm: 6750.4128369062055 sim_pfm: -333.80869669030653
episode: 308 training return: tensor(-118.6086, device='cuda:0')
episode: 309 training return: tensor(-64.2416, device='cuda:0')
episode: 310 training return: tensor(-396.2185, device='cuda:0')
episode: 311 training return: tensor(-94.0263, device='cuda:0')
epoch: 78 test_true_pfm: 10178.65051858429 sim_pfm: -472.60575050104916
episode: 312 training return: tensor(-999.5057, device='cuda:0')
episode: 313 training return: tensor(-999.9978, device='cuda:0')
episode: 314 training return: tensor(-999.9555, device='cuda:0')
episode: 315 training return: tensor(-275.7524, device='cuda:0')
epoch: 79 test_true_pfm: 6714.554898681407 sim_pfm: -360.13231971344794
episode: 316 training return: tensor(-999.3668, device='cuda:0')
episode: 317 training return: tensor(-999.9991, device='cuda:0')
episode: 318 training return: tensor(-123.4318, device='cuda:0')
episode: 319 training return: tensor(-139.8016, device='cuda:0')
epoch: 80 test_true_pfm: 1229.5776919991451 sim_pfm: -385.56869230861776
episode: 320 training return: tensor(-169.5977, device='cuda:0')
episode: 321 training return: tensor(-471.1221, device='cuda:0')
episode: 322 training return: tensor(-255.2405, device='cuda:0')
episode: 323 training return: tensor(-256.1331, device='cuda:0')
epoch: 81 test_true_pfm: 6784.726689444252 sim_pfm: -304.86142990445177
episode: 324 training return: tensor(-409.5811, device='cuda:0')
episode: 325 training return: tensor(-999.9954, device='cuda:0')
episode: 326 training return: tensor(-185.1416, device='cuda:0')
episode: 327 training return: tensor(-262.2268, device='cuda:0')
epoch: 82 test_true_pfm: 7993.641575814389 sim_pfm: 50.03467330608206
episode: 328 training return: tensor(-210.5244, device='cuda:0')
episode: 329 training return: tensor(-999.9633, device='cuda:0')
episode: 330 training return: tensor(-999.9957, device='cuda:0')
episode: 331 training return: tensor(-222.3734, device='cuda:0')
epoch: 83 test_true_pfm: 6772.240326493236 sim_pfm: 55.15042327522921
episode: 332 training return: tensor(-999.9976, device='cuda:0')
episode: 333 training return: tensor(-234.5370, device='cuda:0')
episode: 334 training return: tensor(-197.5514, device='cuda:0')
episode: 335 training return: tensor(-247.4429, device='cuda:0')
epoch: 84 test_true_pfm: 6822.809387437671 sim_pfm: -112.32127644416566
episode: 336 training return: tensor(-337.6862, device='cuda:0')
episode: 337 training return: tensor(-998.1121, device='cuda:0')
episode: 338 training return: tensor(-72.1550, device='cuda:0')
episode: 339 training return: tensor(-999.9998, device='cuda:0')
epoch: 85 test_true_pfm: 6763.220074478362 sim_pfm: 114.7244158279306
episode: 340 training return: tensor(-572.8315, device='cuda:0')
episode: 341 training return: tensor(-78.4792, device='cuda:0')
episode: 342 training return: tensor(-121.4485, device='cuda:0')
episode: 343 training return: tensor(-61.4277, device='cuda:0')
epoch: 86 test_true_pfm: 10353.254613741055 sim_pfm: -321.0181661970176
episode: 344 training return: tensor(51.8000, device='cuda:0')
episode: 345 training return: tensor(-99.4625, device='cuda:0')
episode: 346 training return: tensor(-999.9212, device='cuda:0')
episode: 347 training return: tensor(-72.1562, device='cuda:0')
epoch: 87 test_true_pfm: 10291.031125635858 sim_pfm: 64.80510500927146
episode: 348 training return: tensor(-996.0442, device='cuda:0')
episode: 349 training return: tensor(-999.9996, device='cuda:0')
episode: 350 training return: tensor(-186.8327, device='cuda:0')
episode: 351 training return: tensor(-90.2411, device='cuda:0')
epoch: 88 test_true_pfm: 8233.526968076894 sim_pfm: -382.4568601665281
episode: 352 training return: tensor(-143.8575, device='cuda:0')
episode: 353 training return: tensor(-849.4359, device='cuda:0')
episode: 354 training return: tensor(-1.6902, device='cuda:0')
episode: 355 training return: tensor(-84.8367, device='cuda:0')
epoch: 89 test_true_pfm: 10152.81607913508 sim_pfm: -379.1899801442535
episode: 356 training return: tensor(-243.5705, device='cuda:0')
episode: 357 training return: tensor(-18.5620, device='cuda:0')
episode: 358 training return: tensor(-54.8779, device='cuda:0')
episode: 359 training return: tensor(-999.9945, device='cuda:0')
epoch: 90 test_true_pfm: 4587.446377108726 sim_pfm: -316.0751736416714
episode: 360 training return: tensor(-186.3951, device='cuda:0')
episode: 361 training return: tensor(-243.3256, device='cuda:0')
episode: 362 training return: tensor(-353.2404, device='cuda:0')
episode: 363 training return: tensor(-999.9894, device='cuda:0')
epoch: 91 test_true_pfm: 4980.334237482434 sim_pfm: -312.75071724092896
episode: 364 training return: tensor(-999.9349, device='cuda:0')
episode: 365 training return: tensor(-160.1705, device='cuda:0')
episode: 366 training return: tensor(-152.4675, device='cuda:0')
episode: 367 training return: tensor(-999.9988, device='cuda:0')
epoch: 92 test_true_pfm: 10476.69655104147 sim_pfm: -657.2696877195364
episode: 368 training return: tensor(-176.0418, device='cuda:0')
episode: 369 training return: tensor(-208.4869, device='cuda:0')
episode: 370 training return: tensor(-2.9101, device='cuda:0')
episode: 371 training return: tensor(86.9764, device='cuda:0')
epoch: 93 test_true_pfm: 10330.720743281536 sim_pfm: -324.4592725674641
episode: 372 training return: tensor(-162.7704, device='cuda:0')
episode: 373 training return: tensor(-167.4942, device='cuda:0')
episode: 374 training return: tensor(-999.9996, device='cuda:0')
episode: 375 training return: tensor(-999.5835, device='cuda:0')
epoch: 94 test_true_pfm: 6698.165678382834 sim_pfm: -703.4453706414594
episode: 376 training return: tensor(-140.4009, device='cuda:0')
episode: 377 training return: tensor(-321.5215, device='cuda:0')
episode: 378 training return: tensor(-84.6469, device='cuda:0')
episode: 379 training return: tensor(-184.8938, device='cuda:0')
epoch: 95 test_true_pfm: 10455.971788477531 sim_pfm: -253.03929251767113
episode: 380 training return: tensor(-653.8113, device='cuda:0')
episode: 381 training return: tensor(-402.4160, device='cuda:0')
episode: 382 training return: tensor(-998.4608, device='cuda:0')
episode: 383 training return: tensor(-58.8139, device='cuda:0')
epoch: 96 test_true_pfm: 10422.423666299042 sim_pfm: -287.73493646954495
episode: 384 training return: tensor(-999.8903, device='cuda:0')
episode: 385 training return: tensor(-67.1733, device='cuda:0')
episode: 386 training return: tensor(-136.2985, device='cuda:0')
episode: 387 training return: tensor(-240.6602, device='cuda:0')
epoch: 97 test_true_pfm: 10175.060159482107 sim_pfm: -386.4645725491767
episode: 388 training return: tensor(-83.4609, device='cuda:0')
episode: 389 training return: tensor(-170.5242, device='cuda:0')
episode: 390 training return: tensor(-214.3622, device='cuda:0')
episode: 391 training return: tensor(-124.0522, device='cuda:0')
epoch: 98 test_true_pfm: 6810.966976982436 sim_pfm: -460.68882271860883
episode: 392 training return: tensor(-80.4026, device='cuda:0')
episode: 393 training return: tensor(-400.1135, device='cuda:0')
episode: 394 training return: tensor(24.8172, device='cuda:0')
episode: 395 training return: tensor(-183.7934, device='cuda:0')
epoch: 99 test_true_pfm: 10356.652915772815 sim_pfm: 54.597017081124555
episode: 396 training return: tensor(-11.9442, device='cuda:0')
episode: 397 training return: tensor(-373.8294, device='cuda:0')
episode: 398 training return: tensor(-999.9992, device='cuda:0')
episode: 399 training return: tensor(-124.0738, device='cuda:0')
epoch: 100 test_true_pfm: 10171.219688419938 sim_pfm: -381.33393472989945
episode: 400 training return: tensor(-844.3872, device='cuda:0')
episode: 401 training return: tensor(-243.5485, device='cuda:0')
episode: 402 training return: tensor(-999.9999, device='cuda:0')
episode: 403 training return: tensor(-860.6038, device='cuda:0')
epoch: 101 test_true_pfm: 10284.922572148605 sim_pfm: -310.5291783734768
episode: 404 training return: tensor(3.9902, device='cuda:0')
episode: 405 training return: tensor(-999.9835, device='cuda:0')
episode: 406 training return: tensor(-999.8726, device='cuda:0')
episode: 407 training return: tensor(-135.9373, device='cuda:0')
epoch: 102 test_true_pfm: 4314.463929967687 sim_pfm: 47.20975064666709
episode: 408 training return: tensor(-51.2716, device='cuda:0')
episode: 409 training return: tensor(-66.1835, device='cuda:0')
episode: 410 training return: tensor(-90.3199, device='cuda:0')
episode: 411 training return: tensor(-999.9965, device='cuda:0')
epoch: 103 test_true_pfm: 10495.220020843117 sim_pfm: -636.2964376975724
episode: 412 training return: tensor(-999.9990, device='cuda:0')
episode: 413 training return: tensor(-137.6687, device='cuda:0')
episode: 414 training return: tensor(-249.6298, device='cuda:0')
episode: 415 training return: tensor(-999.9973, device='cuda:0')
epoch: 104 test_true_pfm: 10439.336186910376 sim_pfm: -227.1125568661761
episode: 416 training return: tensor(-264.0880, device='cuda:0')
episode: 417 training return: tensor(-999.9993, device='cuda:0')
episode: 418 training return: tensor(-419.9004, device='cuda:0')
episode: 419 training return: tensor(-153.3061, device='cuda:0')
epoch: 105 test_true_pfm: 3204.4747860562948 sim_pfm: -362.98246543024044
episode: 420 training return: tensor(-140.3467, device='cuda:0')
episode: 421 training return: tensor(-999.9713, device='cuda:0')
episode: 422 training return: tensor(-999.9693, device='cuda:0')
episode: 423 training return: tensor(-120.5864, device='cuda:0')
epoch: 106 test_true_pfm: 10429.443822407178 sim_pfm: -283.7125471665252
episode: 424 training return: tensor(6.7504, device='cuda:0')
episode: 425 training return: tensor(-159.6827, device='cuda:0')
episode: 426 training return: tensor(-24.8069, device='cuda:0')
episode: 427 training return: tensor(-999.9947, device='cuda:0')
epoch: 107 test_true_pfm: 8611.672147118192 sim_pfm: 88.2736356802634
episode: 428 training return: tensor(38.7146, device='cuda:0')
episode: 429 training return: tensor(-60.4296, device='cuda:0')
episode: 430 training return: tensor(-999.9987, device='cuda:0')
episode: 431 training return: tensor(-999.9984, device='cuda:0')
epoch: 108 test_true_pfm: 6841.824967578359 sim_pfm: -315.01892684190534
episode: 432 training return: tensor(-188.7717, device='cuda:0')
episode: 433 training return: tensor(-202.9753, device='cuda:0')
episode: 434 training return: tensor(-999.9916, device='cuda:0')
episode: 435 training return: tensor(-311.6874, device='cuda:0')
epoch: 109 test_true_pfm: 6939.589863682136 sim_pfm: -225.1828925332908
episode: 436 training return: tensor(-73.7210, device='cuda:0')
episode: 437 training return: tensor(-6.2469, device='cuda:0')
episode: 438 training return: tensor(-999.9998, device='cuda:0')
episode: 439 training return: tensor(-139.8217, device='cuda:0')
epoch: 110 test_true_pfm: 10325.58979589388 sim_pfm: -185.69903196157733
episode: 440 training return: tensor(-999.8683, device='cuda:0')
episode: 441 training return: tensor(-999.9952, device='cuda:0')
episode: 442 training return: tensor(-32.4762, device='cuda:0')
episode: 443 training return: tensor(-999.9987, device='cuda:0')
epoch: 111 test_true_pfm: 6744.575531812489 sim_pfm: -257.77663215913344
episode: 444 training return: tensor(-62.0862, device='cuda:0')
episode: 445 training return: tensor(89.4830, device='cuda:0')
episode: 446 training return: tensor(-27.6905, device='cuda:0')
episode: 447 training return: tensor(-166.9078, device='cuda:0')
epoch: 112 test_true_pfm: 6893.215350683137 sim_pfm: 46.02460431945898
episode: 448 training return: tensor(21.4093, device='cuda:0')
episode: 449 training return: tensor(-171.3983, device='cuda:0')
episode: 450 training return: tensor(-975.1240, device='cuda:0')
episode: 451 training return: tensor(-999.9999, device='cuda:0')
epoch: 113 test_true_pfm: 6652.205490982508 sim_pfm: 98.78191126791837
episode: 452 training return: tensor(-142.6714, device='cuda:0')
episode: 453 training return: tensor(-75.1232, device='cuda:0')
episode: 454 training return: tensor(14.0095, device='cuda:0')
episode: 455 training return: tensor(63.0316, device='cuda:0')
epoch: 114 test_true_pfm: 10502.176159064335 sim_pfm: 170.66952658153605
episode: 456 training return: tensor(-234.1382, device='cuda:0')
episode: 457 training return: tensor(-3.8429, device='cuda:0')
episode: 458 training return: tensor(-114.6730, device='cuda:0')
episode: 459 training return: tensor(-161.1844, device='cuda:0')
epoch: 115 test_true_pfm: 10325.848486061794 sim_pfm: -490.077426046395
episode: 460 training return: tensor(11.4274, device='cuda:0')
episode: 461 training return: tensor(-829.6617, device='cuda:0')
episode: 462 training return: tensor(-185.6042, device='cuda:0')
episode: 463 training return: tensor(-41.9635, device='cuda:0')
epoch: 116 test_true_pfm: 10385.107493943056 sim_pfm: -384.930684562821
episode: 464 training return: tensor(-85.2400, device='cuda:0')
episode: 465 training return: tensor(-68.7123, device='cuda:0')
episode: 466 training return: tensor(-14.5619, device='cuda:0')
episode: 467 training return: tensor(-165.0546, device='cuda:0')
epoch: 117 test_true_pfm: 10400.031504731143 sim_pfm: -309.1920878168312
episode: 468 training return: tensor(-129.6445, device='cuda:0')
episode: 469 training return: tensor(-999.9255, device='cuda:0')
episode: 470 training return: tensor(-19.1575, device='cuda:0')
episode: 471 training return: tensor(-66.3940, device='cuda:0')
epoch: 118 test_true_pfm: 6728.275729053818 sim_pfm: 49.543369707932776
episode: 472 training return: tensor(-83.0011, device='cuda:0')
episode: 473 training return: tensor(-166.6455, device='cuda:0')
episode: 474 training return: tensor(-91.1184, device='cuda:0')
episode: 475 training return: tensor(13.4029, device='cuda:0')
epoch: 119 test_true_pfm: 10442.466058592088 sim_pfm: -619.6540527851321
episode: 476 training return: tensor(-257.4284, device='cuda:0')
episode: 477 training return: tensor(-999.9430, device='cuda:0')
episode: 478 training return: tensor(-999.9589, device='cuda:0')
episode: 479 training return: tensor(9.8982, device='cuda:0')
epoch: 120 test_true_pfm: 6660.140953993886 sim_pfm: -335.7316702264167
episode: 480 training return: tensor(-999.9790, device='cuda:0')
episode: 481 training return: tensor(-1.3428, device='cuda:0')
episode: 482 training return: tensor(-507.1202, device='cuda:0')
episode: 483 training return: tensor(-122.7635, device='cuda:0')
epoch: 121 test_true_pfm: 6848.267650241046 sim_pfm: -295.3446620615723
episode: 484 training return: tensor(-200.2219, device='cuda:0')
episode: 485 training return: tensor(-140.0087, device='cuda:0')
episode: 486 training return: tensor(-8.9425, device='cuda:0')
episode: 487 training return: tensor(-217.6414, device='cuda:0')
epoch: 122 test_true_pfm: 10352.965249261462 sim_pfm: -623.7612118847513
episode: 488 training return: tensor(-124.5275, device='cuda:0')
episode: 489 training return: tensor(-125.0870, device='cuda:0')
episode: 490 training return: tensor(-75.5712, device='cuda:0')
episode: 491 training return: tensor(-999.9981, device='cuda:0')
epoch: 123 test_true_pfm: 6968.312253773661 sim_pfm: -370.60398159216857
episode: 492 training return: tensor(-70.4091, device='cuda:0')
episode: 493 training return: tensor(-998.9624, device='cuda:0')
episode: 494 training return: tensor(-214.1097, device='cuda:0')
episode: 495 training return: tensor(22.8352, device='cuda:0')
epoch: 124 test_true_pfm: 10539.624522548002 sim_pfm: 131.51973350858316
episode: 496 training return: tensor(-182.0472, device='cuda:0')
episode: 497 training return: tensor(51.0243, device='cuda:0')
episode: 498 training return: tensor(-999.9895, device='cuda:0')
episode: 499 training return: tensor(-70.5415, device='cuda:0')
epoch: 125 test_true_pfm: 6912.106065440879 sim_pfm: -333.1819649518002
episode: 500 training return: tensor(-34.6035, device='cuda:0')
episode: 501 training return: tensor(84.8033, device='cuda:0')
episode: 502 training return: tensor(-999.9974, device='cuda:0')
episode: 503 training return: tensor(-290.6591, device='cuda:0')
epoch: 126 test_true_pfm: 10281.477838746907 sim_pfm: 148.0160316003603
episode: 504 training return: tensor(-999.9995, device='cuda:0')
episode: 505 training return: tensor(-999.9909, device='cuda:0')
episode: 506 training return: tensor(-39.0108, device='cuda:0')
episode: 507 training return: tensor(-117.8213, device='cuda:0')
epoch: 127 test_true_pfm: 6793.9790665178125 sim_pfm: 49.38007411774015
episode: 508 training return: tensor(39.2877, device='cuda:0')
episode: 509 training return: tensor(19.8397, device='cuda:0')
episode: 510 training return: tensor(-166.1335, device='cuda:0')
episode: 511 training return: tensor(-267.1805, device='cuda:0')
epoch: 128 test_true_pfm: 6790.867955441606 sim_pfm: -68.60465876146918
episode: 512 training return: tensor(-93.6325, device='cuda:0')
episode: 513 training return: tensor(-844.7772, device='cuda:0')
episode: 514 training return: tensor(-65.0615, device='cuda:0')
episode: 515 training return: tensor(-999.6717, device='cuda:0')
epoch: 129 test_true_pfm: 3444.618705402443 sim_pfm: -78.69043359110947
episode: 516 training return: tensor(-999.7669, device='cuda:0')
episode: 517 training return: tensor(-999.9973, device='cuda:0')
episode: 518 training return: tensor(-128.8103, device='cuda:0')
episode: 519 training return: tensor(-108.5998, device='cuda:0')
epoch: 130 test_true_pfm: 6902.764767735861 sim_pfm: 16.890022869144257
episode: 520 training return: tensor(-10.2481, device='cuda:0')
episode: 521 training return: tensor(-973.8399, device='cuda:0')
episode: 522 training return: tensor(-999.9929, device='cuda:0')
episode: 523 training return: tensor(-37.1227, device='cuda:0')
epoch: 131 test_true_pfm: 10363.537425005377 sim_pfm: -249.21537730877753
episode: 524 training return: tensor(2.7174, device='cuda:0')
episode: 525 training return: tensor(-999.8748, device='cuda:0')
episode: 526 training return: tensor(-100.8307, device='cuda:0')
episode: 527 training return: tensor(-999.9969, device='cuda:0')
epoch: 132 test_true_pfm: 6808.749936037323 sim_pfm: -301.6208641188762
episode: 528 training return: tensor(-999.9969, device='cuda:0')
episode: 529 training return: tensor(-75.6204, device='cuda:0')
episode: 530 training return: tensor(-162.7564, device='cuda:0')
episode: 531 training return: tensor(-407.9365, device='cuda:0')
epoch: 133 test_true_pfm: 10131.640733261025 sim_pfm: 35.906160116156876
episode: 532 training return: tensor(-142.6113, device='cuda:0')
episode: 533 training return: tensor(-106.3143, device='cuda:0')
episode: 534 training return: tensor(-87.0786, device='cuda:0')
episode: 535 training return: tensor(-190.1508, device='cuda:0')
epoch: 134 test_true_pfm: 10343.153768794522 sim_pfm: -484.0657491994789
episode: 536 training return: tensor(-71.1680, device='cuda:0')
episode: 537 training return: tensor(-59.3678, device='cuda:0')
episode: 538 training return: tensor(-72.3161, device='cuda:0')
episode: 539 training return: tensor(-94.4179, device='cuda:0')
epoch: 135 test_true_pfm: 3303.0571865245024 sim_pfm: -335.0567864763434
episode: 540 training return: tensor(-359.8529, device='cuda:0')
episode: 541 training return: tensor(-928.3396, device='cuda:0')
episode: 542 training return: tensor(-134.4814, device='cuda:0')
episode: 543 training return: tensor(-999.9999, device='cuda:0')
epoch: 136 test_true_pfm: 10348.968781373356 sim_pfm: 7.586222738570844
episode: 544 training return: tensor(-162.7500, device='cuda:0')
episode: 545 training return: tensor(-31.0212, device='cuda:0')
episode: 546 training return: tensor(-999.7758, device='cuda:0')
episode: 547 training return: tensor(68.9254, device='cuda:0')
epoch: 137 test_true_pfm: 8803.855285637945 sim_pfm: -370.55344500072533
episode: 548 training return: tensor(-999.9992, device='cuda:0')
episode: 549 training return: tensor(-155.1768, device='cuda:0')
episode: 550 training return: tensor(-999.9991, device='cuda:0')
episode: 551 training return: tensor(-70.1619, device='cuda:0')
epoch: 138 test_true_pfm: 9086.057224374386 sim_pfm: -687.8609103216635
episode: 552 training return: tensor(-135.7645, device='cuda:0')
episode: 553 training return: tensor(-246.8906, device='cuda:0')
episode: 554 training return: tensor(-999.7294, device='cuda:0')
episode: 555 training return: tensor(22.4685, device='cuda:0')
epoch: 139 test_true_pfm: 10344.138578154272 sim_pfm: 15.637927480255408
episode: 556 training return: tensor(-999.9916, device='cuda:0')
episode: 557 training return: tensor(-142.1333, device='cuda:0')
episode: 558 training return: tensor(-246.4994, device='cuda:0')
episode: 559 training return: tensor(-189.2948, device='cuda:0')
epoch: 140 test_true_pfm: 8232.919785256492 sim_pfm: -277.5592926532457
episode: 560 training return: tensor(-149.1274, device='cuda:0')
episode: 561 training return: tensor(-159.7738, device='cuda:0')
episode: 562 training return: tensor(-67.9129, device='cuda:0')
episode: 563 training return: tensor(-959.8719, device='cuda:0')
epoch: 141 test_true_pfm: 10507.074613779829 sim_pfm: -358.4363451374423
episode: 564 training return: tensor(58.6161, device='cuda:0')
episode: 565 training return: tensor(-9.8047, device='cuda:0')
episode: 566 training return: tensor(-146.4799, device='cuda:0')
episode: 567 training return: tensor(-216.8093, device='cuda:0')
epoch: 142 test_true_pfm: 4152.521206714459 sim_pfm: -390.2174086149316
episode: 568 training return: tensor(-999.9976, device='cuda:0')
episode: 569 training return: tensor(-999.9855, device='cuda:0')
episode: 570 training return: tensor(6.9744, device='cuda:0')
episode: 571 training return: tensor(-87.1355, device='cuda:0')
epoch: 143 test_true_pfm: 10300.890761651432 sim_pfm: 62.50461158376614
episode: 572 training return: tensor(5.8138, device='cuda:0')
episode: 573 training return: tensor(-60.9799, device='cuda:0')
episode: 574 training return: tensor(-104.5866, device='cuda:0')
episode: 575 training return: tensor(-999.9589, device='cuda:0')
epoch: 144 test_true_pfm: 8470.754792054222 sim_pfm: -351.90269380876754
episode: 576 training return: tensor(-34.2504, device='cuda:0')
episode: 577 training return: tensor(-74.6751, device='cuda:0')
episode: 578 training return: tensor(-999.9703, device='cuda:0')
episode: 579 training return: tensor(-252.7102, device='cuda:0')
epoch: 145 test_true_pfm: 6588.0386152544515 sim_pfm: -16.957004635468667
episode: 580 training return: tensor(-452.5705, device='cuda:0')
episode: 581 training return: tensor(-224.3777, device='cuda:0')
episode: 582 training return: tensor(-120.1865, device='cuda:0')
episode: 583 training return: tensor(-72.8136, device='cuda:0')
epoch: 146 test_true_pfm: 6836.764035826144 sim_pfm: -659.2049544958863
episode: 584 training return: tensor(-999.9692, device='cuda:0')
episode: 585 training return: tensor(90.9068, device='cuda:0')
episode: 586 training return: tensor(-31.8233, device='cuda:0')
episode: 587 training return: tensor(-111.6003, device='cuda:0')
epoch: 147 test_true_pfm: 3078.5184892376296 sim_pfm: -256.7988176109017
episode: 588 training return: tensor(-121.5937, device='cuda:0')
episode: 589 training return: tensor(-963.2050, device='cuda:0')
episode: 590 training return: tensor(-93.2062, device='cuda:0')
episode: 591 training return: tensor(-42.8235, device='cuda:0')
epoch: 148 test_true_pfm: 10173.046714275317 sim_pfm: -372.0383256107355
episode: 592 training return: tensor(-999.9271, device='cuda:0')
episode: 593 training return: tensor(-142.7977, device='cuda:0')
episode: 594 training return: tensor(-255.1868, device='cuda:0')
episode: 595 training return: tensor(-160.6063, device='cuda:0')
epoch: 149 test_true_pfm: 10492.508342876616 sim_pfm: -245.6583213560904
episode: 596 training return: tensor(-240.0392, device='cuda:0')
episode: 597 training return: tensor(18.9286, device='cuda:0')
episode: 598 training return: tensor(-235.0726, device='cuda:0')
episode: 599 training return: tensor(-79.4319, device='cuda:0')
epoch: 150 test_true_pfm: 10294.451966011879 sim_pfm: -99.84991687058937
