['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '3']
epoch: 0 training_loss 0.2073359864205122 test_loss: 0.1487908959388733
epoch: 1 training_loss 0.14318077992647887 test_loss: 0.12832566499710082
epoch: 2 training_loss 0.1327489076182246 test_loss: 0.15965623855590821
epoch: 3 training_loss 0.13740689881145954 test_loss: 0.12495436668395996
epoch: 4 training_loss 0.13018305465579033 test_loss: 0.12256127595901489
epoch: 5 training_loss 0.13026508532464504 test_loss: 0.13365718126296997
epoch: 6 training_loss 0.1242498053982854 test_loss: 0.12555009126663208
epoch: 7 training_loss 0.12418540909886361 test_loss: 0.12056893110275269
epoch: 8 training_loss 0.12672969169914722 test_loss: 0.12912126779556274
epoch: 9 training_loss 0.12718835830688477 test_loss: 0.1297245740890503
epoch: 10 training_loss 0.12877291142940522 test_loss: 0.12493240833282471
epoch: 11 training_loss 0.12092596363276244 test_loss: 0.11693750619888306
epoch: 12 training_loss 0.12445791278034449 test_loss: 0.11332578659057617
epoch: 13 training_loss 0.11345964401960373 test_loss: 0.13015891313552858
epoch: 14 training_loss 0.11330103874206543 test_loss: 0.11904507875442505
epoch: 15 training_loss 0.12284270912408829 test_loss: 0.11181954145431519
epoch: 16 training_loss 0.1147905782610178 test_loss: 0.1094924807548523
epoch: 17 training_loss 0.12180386781692505 test_loss: 0.1217841625213623
epoch: 18 training_loss 0.1189181899651885 test_loss: 0.1229623556137085
epoch: 19 training_loss 0.11419284582138062 test_loss: 0.11930316686630249
epoch: 20 training_loss 0.11501048658043146 test_loss: 0.11268712282180786
epoch: 21 training_loss 0.11352409083396196 test_loss: 0.12291566133499146
epoch: 22 training_loss 0.12027013681828976 test_loss: 0.12230579853057862
epoch: 23 training_loss 0.1102398445084691 test_loss: 0.12661091089248658
epoch: 24 training_loss 0.1226545238122344 test_loss: 0.12644745111465455
epoch: 25 training_loss 0.11741109002381563 test_loss: 0.10603368282318115
epoch: 26 training_loss 0.1207490060850978 test_loss: 0.11164355278015137
epoch: 27 training_loss 0.11923569668084383 test_loss: 0.11974055767059326
epoch: 28 training_loss 0.12646319799125194 test_loss: 0.11014440059661865
epoch: 29 training_loss 0.11884493675082922 test_loss: 0.10982842445373535
epoch: 30 training_loss 0.11493765532970429 test_loss: 0.1106886625289917
epoch: 31 training_loss 0.12084836311638356 test_loss: 0.1118769884109497
epoch: 32 training_loss 0.11566863916814327 test_loss: 0.1125446081161499
epoch: 33 training_loss 0.12102535344660283 test_loss: 0.10502774715423584
epoch: 34 training_loss 0.12218112375587226 test_loss: 0.12186428308486938
epoch: 35 training_loss 0.12008080564439297 test_loss: 0.111003839969635
epoch: 36 training_loss 0.11625773303210735 test_loss: 0.11436055898666382
epoch: 37 training_loss 0.11636972922831773 test_loss: 0.11798634529113769
epoch: 38 training_loss 0.12155965350568294 test_loss: 0.11432207822799682
epoch: 39 training_loss 0.11877236723899841 test_loss: 0.10804811716079712
epoch: 40 training_loss 0.11528401844203472 test_loss: 0.11759696006774903
epoch: 41 training_loss 0.11589137312024832 test_loss: 0.12416743040084839
epoch: 42 training_loss 0.11520623680204153 test_loss: 0.12010025978088379
epoch: 43 training_loss 0.11403769686818123 test_loss: 0.12025156021118164
epoch: 44 training_loss 0.11945124946534634 test_loss: 0.11702409982681275
epoch: 45 training_loss 0.1181065320968628 test_loss: 0.11058292388916016
epoch: 46 training_loss 0.11455421790480613 test_loss: 0.1087766408920288
epoch: 47 training_loss 0.12091314930468798 test_loss: 0.10684462785720825
epoch: 48 training_loss 0.11355987902730703 test_loss: 0.13372901678085328
epoch: 49 training_loss 0.11461962025612593 test_loss: 0.13276135921478271
epoch: 50 training_loss 0.11839609757065774 test_loss: 0.11704176664352417
epoch: 51 training_loss 0.11563793826848269 test_loss: 0.11757038831710816
epoch: 52 training_loss 0.11135476000607014 test_loss: 0.11286543607711792
epoch: 53 training_loss 0.11578723482787609 test_loss: 0.1081779956817627
epoch: 54 training_loss 0.12002112228423358 test_loss: 0.12650651931762696
epoch: 55 training_loss 0.11649317122995853 test_loss: 0.12252656221389771
epoch: 56 training_loss 0.11425862554460764 test_loss: 0.1117634892463684
epoch: 57 training_loss 0.11216066863387823 test_loss: 0.11898980140686036
epoch: 58 training_loss 0.11597747538238763 test_loss: 0.11133558750152588
epoch: 59 training_loss 0.11296493977308274 test_loss: 0.11998867988586426
epoch: 60 training_loss 0.11899521078914405 test_loss: 0.11907719373703003
epoch: 61 training_loss 0.11451317217200994 test_loss: 0.11146392822265624
epoch: 62 training_loss 0.11411179777234792 test_loss: 0.12260435819625855
epoch: 63 training_loss 0.11923792835325003 test_loss: 0.11830277442932129
epoch: 64 training_loss 0.1186886640638113 test_loss: 0.1174057126045227
epoch: 65 training_loss 0.1119875467196107 test_loss: 0.11226537227630615
epoch: 66 training_loss 0.1136445839330554 test_loss: 0.10777151584625244
epoch: 67 training_loss 0.11622682634741067 test_loss: 0.10819323062896728
epoch: 68 training_loss 0.11490566652268171 test_loss: 0.10657677650451661
epoch: 69 training_loss 0.11783224839717149 test_loss: 0.1146880030632019
epoch: 70 training_loss 0.11162170093506575 test_loss: 0.12079423666000366
epoch: 71 training_loss 0.11345686808228493 test_loss: 0.12145092487335205
epoch: 72 training_loss 0.11227409951388836 test_loss: 0.12091660499572754
epoch: 73 training_loss 0.11467352148145438 test_loss: 0.11431311368942261
epoch: 74 training_loss 0.11129901517182589 test_loss: 0.11783162355422974
epoch: 75 training_loss 0.11930541753768921 test_loss: 0.11028813123703003
epoch: 76 training_loss 0.11818198077380657 test_loss: 0.11476185321807861
epoch: 77 training_loss 0.11277696374803782 test_loss: 0.11194332838058471
epoch: 78 training_loss 0.11368566453456878 test_loss: 0.1103473424911499
epoch: 79 training_loss 0.11865260053426027 test_loss: 0.10898027420043946
epoch: 80 training_loss 0.11196215912699699 test_loss: 0.10699582099914551
epoch: 81 training_loss 0.1221707084774971 test_loss: 0.11390985250473022
epoch: 82 training_loss 0.12072413101792336 test_loss: 0.09565019607543945
epoch: 83 training_loss 0.11400902904570102 test_loss: 0.111407470703125
epoch: 84 training_loss 0.10853486247360707 test_loss: 0.11055797338485718
epoch: 85 training_loss 0.11629830248653888 test_loss: 0.11746338605880738
epoch: 86 training_loss 0.11351562332361936 test_loss: 0.10508881807327271
epoch: 87 training_loss 0.11527129445225 test_loss: 0.10992637872695923
epoch: 88 training_loss 0.1161241852119565 test_loss: 0.1132582187652588
epoch: 89 training_loss 0.12028396099805833 test_loss: 0.12025120258331298
epoch: 90 training_loss 0.11180692911148071 test_loss: 0.11277371644973755
epoch: 91 training_loss 0.11213687613606453 test_loss: 0.11433627605438232
epoch: 92 training_loss 0.1192298025265336 test_loss: 0.11568827629089355
epoch: 93 training_loss 0.11444705005735159 test_loss: 0.10565969944000245
epoch: 94 training_loss 0.11128578681498766 test_loss: 0.1113469123840332
epoch: 95 training_loss 0.11225274998694658 test_loss: 0.1183479905128479
epoch: 96 training_loss 0.11470326077193022 test_loss: 0.1228192925453186
epoch: 97 training_loss 0.11343493964523077 test_loss: 0.10896432399749756
epoch: 98 training_loss 0.11720396049320697 test_loss: 0.10946513414382934
epoch: 99 training_loss 0.11625873077660799 test_loss: 0.11673270463943482
epoch: 100 training_loss 0.11552864529192447 test_loss: 0.108479905128479
epoch: 101 training_loss 0.11857737578451634 test_loss: 0.10383169651031494
epoch: 102 training_loss 0.10917901586741209 test_loss: 0.12225068807601928
epoch: 103 training_loss 0.11205776419490576 test_loss: 0.11131892204284669
epoch: 104 training_loss 0.1154651989042759 test_loss: 0.11688159704208374
epoch: 105 training_loss 0.11437496960163117 test_loss: 0.11522939205169677
epoch: 106 training_loss 0.10570956813171506 test_loss: 0.10341479778289794
epoch: 107 training_loss 0.11561852533370257 test_loss: 0.11190667152404785
epoch: 108 training_loss 0.10698529817163945 test_loss: 0.11545175313949585
epoch: 109 training_loss 0.11360016025602818 test_loss: 0.10368313789367675
epoch: 110 training_loss 0.1112706507369876 test_loss: 0.1149634838104248
epoch: 111 training_loss 0.11030735854059458 test_loss: 0.10403318405151367
epoch: 112 training_loss 0.11336103182286024 test_loss: 0.10611355304718018
epoch: 113 training_loss 0.11262346904724836 test_loss: 0.11769425868988037
epoch: 114 training_loss 0.11234919928014278 test_loss: 0.1272202491760254
epoch: 115 training_loss 0.1111259352043271 test_loss: 0.11044574975967407
epoch: 116 training_loss 0.11674377482384443 test_loss: 0.11423658132553101
epoch: 117 training_loss 0.11329550560563803 test_loss: 0.12426302433013917
epoch: 118 training_loss 0.11917986303567886 test_loss: 0.10420027971267701
epoch: 119 training_loss 0.10872648637741804 test_loss: 0.11481596231460571
epoch: 120 training_loss 0.10988531094044447 test_loss: 0.11448092460632324
epoch: 121 training_loss 0.11730401389300824 test_loss: 0.10967906713485717
epoch: 122 training_loss 0.10625301137566566 test_loss: 0.11431677341461181
epoch: 123 training_loss 0.11684925306588412 test_loss: 0.11032958030700683
epoch: 124 training_loss 0.11516190327703953 test_loss: 0.10070786476135254
epoch: 125 training_loss 0.11231763515621423 test_loss: 0.09902148246765137
epoch: 126 training_loss 0.1099988417327404 test_loss: 0.12207269668579102
epoch: 127 training_loss 0.1129351169988513 test_loss: 0.1169397473335266
epoch: 128 training_loss 0.11440017163753509 test_loss: 0.12376813888549805
epoch: 129 training_loss 0.11251126013696194 test_loss: 0.10853004455566406
epoch: 130 training_loss 0.11450948245823384 test_loss: 0.11884312629699707
epoch: 131 training_loss 0.11729230243712664 test_loss: 0.10970839262008666
epoch: 132 training_loss 0.10991514775902032 test_loss: 0.12150617837905883
epoch: 133 training_loss 0.11571766182780266 test_loss: 0.09981215596199036
epoch: 134 training_loss 0.114215252622962 test_loss: 0.11573916673660278
epoch: 135 training_loss 0.10973155580461025 test_loss: 0.11606830358505249
epoch: 136 training_loss 0.11733109250664711 test_loss: 0.11160666942596435
epoch: 137 training_loss 0.1190590376406908 test_loss: 0.11114046573638917
epoch: 138 training_loss 0.10999655060470104 test_loss: 0.09710087776184081
epoch: 139 training_loss 0.11462104052305222 test_loss: 0.11789299249649048
epoch: 140 training_loss 0.11209388583898544 test_loss: 0.11447539329528808
epoch: 141 training_loss 0.1133552210777998 test_loss: 0.1121471643447876
epoch: 142 training_loss 0.10897087469696999 test_loss: 0.12060426473617554
epoch: 143 training_loss 0.11529051210731268 test_loss: 0.118519127368927
epoch: 144 training_loss 0.11267326928675175 test_loss: 0.11075888872146607
epoch: 145 training_loss 0.11233092162758113 test_loss: 0.10835597515106202
epoch: 146 training_loss 0.11736894324421883 test_loss: 0.12854911088943483
epoch: 147 training_loss 0.11109985481947661 test_loss: 0.10867668390274048
epoch: 148 training_loss 0.10463359478861094 test_loss: 0.11145870685577393
epoch: 149 training_loss 0.11433577764779329 test_loss: 0.11681892871856689
epoch: 0 training_loss 22.088055667877196 test_loss: 15.459825134277343
epoch: 1 training_loss 11.69753936767578 test_loss: 9.188278198242188
epoch: 2 training_loss 8.411764178276062 test_loss: 7.813632965087891
epoch: 3 training_loss 7.5469375467300415 test_loss: 6.911785888671875
epoch: 4 training_loss 6.897576828002929 test_loss: 6.476764678955078
epoch: 5 training_loss 6.337443566322326 test_loss: 6.094015502929688
epoch: 6 training_loss 5.770072360038757 test_loss: 5.663887405395508
epoch: 7 training_loss 5.475272703170776 test_loss: 5.147979354858398
epoch: 8 training_loss 5.003382902145386 test_loss: 4.762960433959961
epoch: 9 training_loss 4.854637231826782 test_loss: 4.635428237915039
epoch: 10 training_loss 4.5879621505737305 test_loss: 4.5868995666503904
epoch: 11 training_loss 4.364560613632202 test_loss: 4.424137878417969
epoch: 12 training_loss 4.229086775779724 test_loss: 4.12630615234375
epoch: 13 training_loss 4.199807708263397 test_loss: 3.9156204223632813
epoch: 14 training_loss 4.191422436237335 test_loss: 3.953719711303711
epoch: 15 training_loss 3.9477283644676207 test_loss: 3.7045818328857423
epoch: 16 training_loss 3.925296995639801 test_loss: 3.7218360900878906
epoch: 17 training_loss 3.8139701461791993 test_loss: 3.591250991821289
epoch: 18 training_loss 3.7123972749710084 test_loss: 3.5834083557128906
epoch: 19 training_loss 3.665374233722687 test_loss: 3.6482654571533204
epoch: 20 training_loss 3.6131326723098756 test_loss: 3.5073970794677733
epoch: 21 training_loss 3.4831164979934695 test_loss: 3.3340774536132813
epoch: 22 training_loss 3.515881018638611 test_loss: 3.443439483642578
epoch: 23 training_loss 3.403689696788788 test_loss: 3.330169677734375
epoch: 24 training_loss 3.2880813097953796 test_loss: 3.311822509765625
epoch: 25 training_loss 3.2986544728279115 test_loss: 3.3181121826171873
epoch: 26 training_loss 3.3432500314712525 test_loss: 3.161734390258789
epoch: 27 training_loss 3.1989596581459043 test_loss: 3.041438865661621
epoch: 28 training_loss 3.2595387053489686 test_loss: 3.0612071990966796
epoch: 29 training_loss 3.221084291934967 test_loss: 3.098201370239258
epoch: 30 training_loss 3.2076374077796936 test_loss: 3.0584714889526365
epoch: 31 training_loss 3.050509901046753 test_loss: 3.037177085876465
epoch: 32 training_loss 3.0583840107917784 test_loss: 3.157668113708496
epoch: 33 training_loss 3.179147744178772 test_loss: 2.8889797210693358
epoch: 34 training_loss 3.006348900794983 test_loss: 2.9075918197631836
epoch: 35 training_loss 3.0775137531757353 test_loss: 3.136783409118652
epoch: 36 training_loss 3.013970921039581 test_loss: 2.8873430252075196
epoch: 37 training_loss 3.0252014994621277 test_loss: 3.013993835449219
epoch: 38 training_loss 3.0171502017974854 test_loss: 2.8487665176391603
epoch: 39 training_loss 2.9158347392082216 test_loss: 2.9633481979370115
epoch: 40 training_loss 2.901280815601349 test_loss: 2.8118354797363283
epoch: 41 training_loss 2.842318208217621 test_loss: 2.8051836013793947
epoch: 42 training_loss 2.9169012618064882 test_loss: 3.001008224487305
epoch: 43 training_loss 2.8609761095046995 test_loss: 2.7911283493041994
epoch: 44 training_loss 2.8582217645645143 test_loss: 2.836286926269531
epoch: 45 training_loss 2.840647315979004 test_loss: 2.8004074096679688
epoch: 46 training_loss 2.7830536580085754 test_loss: 2.785091209411621
epoch: 47 training_loss 2.7966793036460875 test_loss: 2.659262466430664
epoch: 48 training_loss 2.7127404272556306 test_loss: 2.8535552978515626
epoch: 49 training_loss 2.7666798400878907 test_loss: 2.5782323837280274
epoch: 50 training_loss 2.5126038444042207 test_loss: 2.2537988662719726
epoch: 51 training_loss 2.4401166892051696 test_loss: 2.4007692337036133
epoch: 52 training_loss 2.3520128750801086 test_loss: 2.1735687255859375
epoch: 53 training_loss 2.343872036933899 test_loss: 2.336343002319336
epoch: 54 training_loss 2.3684730648994448 test_loss: 2.2715475082397463
epoch: 55 training_loss 2.3120897603034973 test_loss: 2.2115474700927735
epoch: 56 training_loss 2.337564251422882 test_loss: 2.2291133880615233
epoch: 57 training_loss 2.364344207048416 test_loss: 2.222548484802246
epoch: 58 training_loss 2.2853891324996947 test_loss: 2.0590112686157225
epoch: 59 training_loss 2.3573831129074097 test_loss: 2.2297714233398436
epoch: 60 training_loss 2.2453618443012235 test_loss: 2.291337585449219
epoch: 61 training_loss 2.2415102028846743 test_loss: 2.0394638061523436
epoch: 62 training_loss 2.24780908703804 test_loss: 2.3190011978149414
epoch: 63 training_loss 2.243950629234314 test_loss: 2.094784164428711
epoch: 64 training_loss 2.299714996814728 test_loss: 2.129340934753418
epoch: 65 training_loss 2.139822483062744 test_loss: 2.1605926513671876
epoch: 66 training_loss 2.1626962292194367 test_loss: 2.0518932342529297
epoch: 67 training_loss 2.1668988239765166 test_loss: 2.1062862396240236
epoch: 68 training_loss 2.1080876898765566 test_loss: 1.953166389465332
epoch: 69 training_loss 2.127004246711731 test_loss: 2.02900390625
epoch: 70 training_loss 2.1291484129428864 test_loss: 2.0404808044433596
epoch: 71 training_loss 2.082785849571228 test_loss: 1.9034164428710938
epoch: 72 training_loss 2.141599485874176 test_loss: 1.9774702072143555
epoch: 73 training_loss 2.057195475101471 test_loss: 2.0763614654541014
epoch: 74 training_loss 2.048251819610596 test_loss: 1.9751857757568358
epoch: 75 training_loss 2.0272288978099824 test_loss: 2.0771915435791017
epoch: 76 training_loss 2.0809875965118407 test_loss: 1.9915796279907227
epoch: 77 training_loss 2.0024631178379058 test_loss: 1.9939027786254884
epoch: 78 training_loss 2.0410734367370607 test_loss: 2.0948341369628904
epoch: 79 training_loss 2.0612185370922087 test_loss: 1.9497400283813477
epoch: 80 training_loss 2.0097147679328917 test_loss: 2.067416191101074
epoch: 81 training_loss 2.023268721103668 test_loss: 2.053420829772949
epoch: 82 training_loss 1.9332165026664734 test_loss: 1.901629638671875
epoch: 83 training_loss 2.005610612630844 test_loss: 2.1218338012695312
epoch: 84 training_loss 1.9709193050861358 test_loss: 1.8922439575195313
epoch: 85 training_loss 1.9797731244564056 test_loss: 1.9152236938476563
epoch: 86 training_loss 1.9922500693798064 test_loss: 1.934708595275879
epoch: 87 training_loss 1.9995513272285461 test_loss: 2.0362749099731445
epoch: 88 training_loss 1.9661290073394775 test_loss: 1.8439489364624024
epoch: 89 training_loss 1.9324057173728943 test_loss: 1.8087173461914063
epoch: 90 training_loss 1.9604483819007874 test_loss: 1.9225000381469726
epoch: 91 training_loss 1.8596451795101165 test_loss: 1.8275854110717773
epoch: 92 training_loss 1.924018119573593 test_loss: 1.8557641983032227
epoch: 93 training_loss 1.8911787116527556 test_loss: 1.782491111755371
epoch: 94 training_loss 1.9399163460731506 test_loss: 1.881525421142578
epoch: 95 training_loss 1.8981367659568786 test_loss: 1.8078573226928711
epoch: 96 training_loss 1.9027326643466949 test_loss: 1.8132829666137695
epoch: 97 training_loss 1.912671011686325 test_loss: 1.7201499938964844
epoch: 98 training_loss 1.8546894192695618 test_loss: 1.7788909912109374
epoch: 99 training_loss 1.820108333826065 test_loss: 1.7995832443237305
epoch: 100 training_loss 1.8814481055736543 test_loss: 1.8269445419311523
epoch: 101 training_loss 1.844160214662552 test_loss: 1.8079681396484375
epoch: 102 training_loss 1.8490399420261383 test_loss: 2.03031005859375
epoch: 103 training_loss 1.8279124879837036 test_loss: 1.8134014129638671
epoch: 104 training_loss 1.7987758362293242 test_loss: 1.7597766876220704
epoch: 105 training_loss 1.8450535941123962 test_loss: 1.9122274398803711
epoch: 106 training_loss 1.8394600284099578 test_loss: 1.8435230255126953
epoch: 107 training_loss 1.8986785006523133 test_loss: 1.7610530853271484
epoch: 108 training_loss 1.755658539533615 test_loss: 1.730854606628418
epoch: 109 training_loss 1.868635697364807 test_loss: 1.7904775619506836
epoch: 110 training_loss 1.837068876028061 test_loss: 1.7381074905395508
epoch: 111 training_loss 1.8159933280944824 test_loss: 1.7674318313598634
epoch: 112 training_loss 1.8444564497470857 test_loss: 2.034122657775879
epoch: 113 training_loss 1.7632623612880707 test_loss: 1.7603042602539063
epoch: 114 training_loss 1.7734434306621552 test_loss: 1.830674171447754
epoch: 115 training_loss 1.805890153646469 test_loss: 1.677943229675293
epoch: 116 training_loss 1.7983267652988433 test_loss: 1.7342121124267578
epoch: 117 training_loss 1.7917208874225616 test_loss: 1.5342223167419433
epoch: 118 training_loss 1.7501025652885438 test_loss: 1.8136381149291991
epoch: 119 training_loss 1.7389422595500945 test_loss: 1.7911544799804688
epoch: 120 training_loss 1.7243284809589385 test_loss: 1.6367765426635743
epoch: 121 training_loss 1.8175672090053558 test_loss: 1.7210933685302734
epoch: 122 training_loss 1.8155796098709107 test_loss: 1.7619640350341796
epoch: 123 training_loss 1.7848159897327422 test_loss: 1.740030860900879
epoch: 124 training_loss 1.6881834363937378 test_loss: 1.725485610961914
epoch: 125 training_loss 1.73104762673378 test_loss: 1.6001554489135743
epoch: 126 training_loss 1.7286132335662843 test_loss: 1.8224929809570312
epoch: 127 training_loss 1.7343940114974976 test_loss: 1.6668546676635743
epoch: 128 training_loss 1.7763434696197509 test_loss: 1.5765439033508302
epoch: 129 training_loss 1.7064235532283782 test_loss: 1.6526876449584962
epoch: 130 training_loss 1.7018302989006042 test_loss: 1.6883329391479491
epoch: 131 training_loss 1.7606561589241028 test_loss: 1.6420421600341797
epoch: 132 training_loss 1.7131589937210083 test_loss: 1.6150127410888673
epoch: 133 training_loss 1.7510199093818664 test_loss: 1.8294048309326172
epoch: 134 training_loss 1.7338106894493104 test_loss: 1.598210620880127
epoch: 135 training_loss 1.7521220231056214 test_loss: 1.6460121154785157
epoch: 136 training_loss 1.7367431282997132 test_loss: 1.6049612045288086
epoch: 137 training_loss 1.7363195645809173 test_loss: 1.5011881828308105
epoch: 138 training_loss 1.6935568809509278 test_loss: 1.6119857788085938
epoch: 139 training_loss 1.6894148564338685 test_loss: 1.5025646209716796
epoch: 140 training_loss 1.6419897747039796 test_loss: 1.69397029876709
epoch: 141 training_loss 1.6816738080978393 test_loss: 1.5710112571716308
epoch: 142 training_loss 1.6456662791967391 test_loss: 1.6449731826782226
epoch: 143 training_loss 1.6675887495279311 test_loss: 1.710215187072754
epoch: 144 training_loss 1.6595866179466248 test_loss: 1.7219598770141602
epoch: 145 training_loss 1.6714901328086853 test_loss: 1.6085817337036132
epoch: 146 training_loss 1.6893496870994569 test_loss: 1.4925382614135743
epoch: 147 training_loss 1.5965986943244934 test_loss: 1.6083429336547852
epoch: 148 training_loss 1.6266906571388244 test_loss: 1.7004207611083983
epoch: 149 training_loss 1.7011031126976013 test_loss: 1.5030458450317383
132.72979181585112
episode: 0 training return: tensor(110.6505, device='cuda:0')
episode: 1 training return: tensor(123.5481, device='cuda:0')
episode: 2 training return: tensor(124.2345, device='cuda:0')
episode: 3 training return: tensor(116.6099, device='cuda:0')
epoch: 1 test_true_pfm: 131.06680269693555 sim_pfm: 115.42070800175425
episode: 4 training return: tensor(110.9947, device='cuda:0')
episode: 5 training return: tensor(119.8973, device='cuda:0')
episode: 6 training return: tensor(106.8635, device='cuda:0')
episode: 7 training return: tensor(118.1917, device='cuda:0')
epoch: 2 test_true_pfm: 129.1498537875798 sim_pfm: 116.09859916813438
episode: 8 training return: tensor(107.9171, device='cuda:0')
episode: 9 training return: tensor(112.2561, device='cuda:0')
episode: 10 training return: tensor(122.1269, device='cuda:0')
episode: 11 training return: tensor(107.7470, device='cuda:0')
epoch: 3 test_true_pfm: 132.64301981552399 sim_pfm: 105.81849300296744
episode: 12 training return: tensor(115.3951, device='cuda:0')
episode: 13 training return: tensor(119.1583, device='cuda:0')
episode: 14 training return: tensor(112.9797, device='cuda:0')
episode: 15 training return: tensor(113.1306, device='cuda:0')
epoch: 4 test_true_pfm: 133.66826577388025 sim_pfm: 108.14170843183528
episode: 16 training return: tensor(116.8796, device='cuda:0')
episode: 17 training return: tensor(121.2386, device='cuda:0')
episode: 18 training return: tensor(111.0043, device='cuda:0')
episode: 19 training return: tensor(101.8903, device='cuda:0')
epoch: 5 test_true_pfm: 129.9737640711167 sim_pfm: 100.69300291736144
episode: 20 training return: tensor(104.2187, device='cuda:0')
episode: 21 training return: tensor(110.5833, device='cuda:0')
episode: 22 training return: tensor(104.6776, device='cuda:0')
episode: 23 training return: tensor(133.0683, device='cuda:0')
epoch: 6 test_true_pfm: 135.8298302468774 sim_pfm: 113.29106980519718
episode: 24 training return: tensor(115.5702, device='cuda:0')
episode: 25 training return: tensor(110.5379, device='cuda:0')
episode: 26 training return: tensor(125.2745, device='cuda:0')
episode: 27 training return: tensor(124.6033, device='cuda:0')
epoch: 7 test_true_pfm: 131.16840903393583 sim_pfm: 109.0806337787304
episode: 28 training return: tensor(127.9694, device='cuda:0')
episode: 29 training return: tensor(108.9476, device='cuda:0')
episode: 30 training return: tensor(117.2195, device='cuda:0')
episode: 31 training return: tensor(125.1588, device='cuda:0')
epoch: 8 test_true_pfm: 131.58396342632184 sim_pfm: 121.71466279887245
episode: 32 training return: tensor(118.0687, device='cuda:0')
episode: 33 training return: tensor(125.1421, device='cuda:0')
episode: 34 training return: tensor(123.3830, device='cuda:0')
episode: 35 training return: tensor(146.8410, device='cuda:0')
epoch: 9 test_true_pfm: 136.26957516044843 sim_pfm: 129.09945709429448
episode: 36 training return: tensor(114.4782, device='cuda:0')
episode: 37 training return: tensor(117.4078, device='cuda:0')
episode: 38 training return: tensor(121.5545, device='cuda:0')
episode: 39 training return: tensor(127.2051, device='cuda:0')
epoch: 10 test_true_pfm: 130.1118599301956 sim_pfm: 119.81459222287522
episode: 40 training return: tensor(107.4768, device='cuda:0')
episode: 41 training return: tensor(132.2078, device='cuda:0')
episode: 42 training return: tensor(131.8190, device='cuda:0')
episode: 43 training return: tensor(108.0417, device='cuda:0')
epoch: 11 test_true_pfm: 129.76000188584678 sim_pfm: 127.20472576530884
episode: 44 training return: tensor(114.0112, device='cuda:0')
episode: 45 training return: tensor(111.2842, device='cuda:0')
episode: 46 training return: tensor(119.9521, device='cuda:0')
episode: 47 training return: tensor(132.5450, device='cuda:0')
epoch: 12 test_true_pfm: 134.62641634299604 sim_pfm: 125.86987073244526
episode: 48 training return: tensor(119.5906, device='cuda:0')
episode: 49 training return: tensor(126.3870, device='cuda:0')
episode: 50 training return: tensor(125.1799, device='cuda:0')
episode: 51 training return: tensor(120.2501, device='cuda:0')
epoch: 13 test_true_pfm: 132.84658003345388 sim_pfm: 118.23091252137674
episode: 52 training return: tensor(131.0281, device='cuda:0')
episode: 53 training return: tensor(104.0075, device='cuda:0')
episode: 54 training return: tensor(113.3230, device='cuda:0')
episode: 55 training return: tensor(123.2120, device='cuda:0')
epoch: 14 test_true_pfm: 131.4254204754307 sim_pfm: 124.07518374228384
episode: 56 training return: tensor(122.6912, device='cuda:0')
episode: 57 training return: tensor(120.9905, device='cuda:0')
episode: 58 training return: tensor(121.3999, device='cuda:0')
episode: 59 training return: tensor(122.1882, device='cuda:0')
epoch: 15 test_true_pfm: 129.4772350265764 sim_pfm: 128.9341927736008
episode: 60 training return: tensor(122.7210, device='cuda:0')
episode: 61 training return: tensor(115.8055, device='cuda:0')
episode: 62 training return: tensor(133.5269, device='cuda:0')
episode: 63 training return: tensor(119.7789, device='cuda:0')
epoch: 16 test_true_pfm: 132.7644456815937 sim_pfm: 117.26173462354927
episode: 64 training return: tensor(120.9498, device='cuda:0')
episode: 65 training return: tensor(128.0126, device='cuda:0')
episode: 66 training return: tensor(115.3576, device='cuda:0')
episode: 67 training return: tensor(120.9097, device='cuda:0')
epoch: 17 test_true_pfm: 131.41245581729663 sim_pfm: 125.59440758996644
episode: 68 training return: tensor(121.9686, device='cuda:0')
episode: 69 training return: tensor(125.1528, device='cuda:0')
episode: 70 training return: tensor(115.2735, device='cuda:0')
episode: 71 training return: tensor(125.4456, device='cuda:0')
epoch: 18 test_true_pfm: 129.1627836922284 sim_pfm: 120.7766073760693
episode: 72 training return: tensor(133.2323, device='cuda:0')
episode: 73 training return: tensor(116.1681, device='cuda:0')
episode: 74 training return: tensor(121.7190, device='cuda:0')
episode: 75 training return: tensor(119.6695, device='cuda:0')
epoch: 19 test_true_pfm: 130.34556764170514 sim_pfm: 129.16086764691863
episode: 76 training return: tensor(128.4729, device='cuda:0')
episode: 77 training return: tensor(128.6317, device='cuda:0')
episode: 78 training return: tensor(132.6593, device='cuda:0')
episode: 79 training return: tensor(112.6575, device='cuda:0')
epoch: 20 test_true_pfm: 129.06574458236898 sim_pfm: 114.22518470297219
episode: 80 training return: tensor(123.4091, device='cuda:0')
episode: 81 training return: tensor(121.9532, device='cuda:0')
episode: 82 training return: tensor(121.8672, device='cuda:0')
episode: 83 training return: tensor(112.6890, device='cuda:0')
epoch: 21 test_true_pfm: 130.17848937320576 sim_pfm: 118.11313935184735
episode: 84 training return: tensor(127.1116, device='cuda:0')
episode: 85 training return: tensor(118.5882, device='cuda:0')
episode: 86 training return: tensor(129.1411, device='cuda:0')
episode: 87 training return: tensor(125.0261, device='cuda:0')
epoch: 22 test_true_pfm: 130.2244695665093 sim_pfm: 119.65574788408121
episode: 88 training return: tensor(101.8943, device='cuda:0')
episode: 89 training return: tensor(122.2912, device='cuda:0')
episode: 90 training return: tensor(120.8751, device='cuda:0')
episode: 91 training return: tensor(121.0297, device='cuda:0')
epoch: 23 test_true_pfm: 128.26579931461148 sim_pfm: 119.29952161223628
episode: 92 training return: tensor(128.8173, device='cuda:0')
episode: 93 training return: tensor(125.8208, device='cuda:0')
episode: 94 training return: tensor(122.9763, device='cuda:0')
episode: 95 training return: tensor(127.5645, device='cuda:0')
epoch: 24 test_true_pfm: 125.99243874763279 sim_pfm: 117.68124311364954
episode: 96 training return: tensor(126.1421, device='cuda:0')
episode: 97 training return: tensor(122.5383, device='cuda:0')
episode: 98 training return: tensor(114.7980, device='cuda:0')
episode: 99 training return: tensor(128.5084, device='cuda:0')
epoch: 25 test_true_pfm: 126.72442561459953 sim_pfm: 116.65339914037031
episode: 100 training return: tensor(128.6599, device='cuda:0')
episode: 101 training return: tensor(119.4852, device='cuda:0')
episode: 102 training return: tensor(116.8062, device='cuda:0')
episode: 103 training return: tensor(131.5366, device='cuda:0')
epoch: 26 test_true_pfm: 127.61499178754471 sim_pfm: 119.98789792599855
episode: 104 training return: tensor(126.9481, device='cuda:0')
episode: 105 training return: tensor(103.2781, device='cuda:0')
episode: 106 training return: tensor(121.0845, device='cuda:0')
episode: 107 training return: tensor(121.1180, device='cuda:0')
epoch: 27 test_true_pfm: 124.63125281996281 sim_pfm: 118.10524101981427
episode: 108 training return: tensor(124.6001, device='cuda:0')
episode: 109 training return: tensor(118.5121, device='cuda:0')
episode: 110 training return: tensor(130.1325, device='cuda:0')
episode: 111 training return: tensor(126.9460, device='cuda:0')
epoch: 28 test_true_pfm: 130.00216322655697 sim_pfm: 130.82080660857028
episode: 112 training return: tensor(118.2230, device='cuda:0')
episode: 113 training return: tensor(121.9409, device='cuda:0')
episode: 114 training return: tensor(119.9003, device='cuda:0')
episode: 115 training return: tensor(134.0496, device='cuda:0')
epoch: 29 test_true_pfm: 126.29126233159911 sim_pfm: 123.33148149639601
episode: 116 training return: tensor(122.9341, device='cuda:0')
episode: 117 training return: tensor(128.2296, device='cuda:0')
episode: 118 training return: tensor(124.2657, device='cuda:0')
episode: 119 training return: tensor(111.2625, device='cuda:0')
epoch: 30 test_true_pfm: 127.86354963001311 sim_pfm: 122.71494869043818
episode: 120 training return: tensor(126.7498, device='cuda:0')
episode: 121 training return: tensor(113.9850, device='cuda:0')
episode: 122 training return: tensor(121.4497, device='cuda:0')
episode: 123 training return: tensor(125.9536, device='cuda:0')
epoch: 31 test_true_pfm: 127.05946414573482 sim_pfm: 122.82854781848145
episode: 124 training return: tensor(118.2200, device='cuda:0')
episode: 125 training return: tensor(127.0722, device='cuda:0')
episode: 126 training return: tensor(127.7231, device='cuda:0')
episode: 127 training return: tensor(116.9292, device='cuda:0')
epoch: 32 test_true_pfm: 130.44416429339933 sim_pfm: 124.8483726484119
episode: 128 training return: tensor(120.8100, device='cuda:0')
episode: 129 training return: tensor(122.4430, device='cuda:0')
episode: 130 training return: tensor(136.5585, device='cuda:0')
episode: 131 training return: tensor(130.3416, device='cuda:0')
epoch: 33 test_true_pfm: 128.2311774781034 sim_pfm: 124.16414183382294
episode: 132 training return: tensor(117.5694, device='cuda:0')
episode: 133 training return: tensor(124.6767, device='cuda:0')
episode: 134 training return: tensor(131.3247, device='cuda:0')
episode: 135 training return: tensor(123.4268, device='cuda:0')
epoch: 34 test_true_pfm: 125.85437182185083 sim_pfm: 127.94398705321946
episode: 136 training return: tensor(122.3778, device='cuda:0')
episode: 137 training return: tensor(124.4719, device='cuda:0')
episode: 138 training return: tensor(128.3478, device='cuda:0')
episode: 139 training return: tensor(109.9887, device='cuda:0')
epoch: 35 test_true_pfm: 131.73406717282768 sim_pfm: 113.96847390520853
episode: 140 training return: tensor(127.5036, device='cuda:0')
episode: 141 training return: tensor(122.9573, device='cuda:0')
episode: 142 training return: tensor(131.0524, device='cuda:0')
episode: 143 training return: tensor(119.8837, device='cuda:0')
epoch: 36 test_true_pfm: 128.25242063908553 sim_pfm: 128.25100248351228
episode: 144 training return: tensor(114.7897, device='cuda:0')
episode: 145 training return: tensor(128.9106, device='cuda:0')
episode: 146 training return: tensor(130.8623, device='cuda:0')
episode: 147 training return: tensor(128.2317, device='cuda:0')
epoch: 37 test_true_pfm: 128.8515371551876 sim_pfm: 129.16188965828042
episode: 148 training return: tensor(119.0356, device='cuda:0')
episode: 149 training return: tensor(130.1943, device='cuda:0')
episode: 150 training return: tensor(131.4840, device='cuda:0')
episode: 151 training return: tensor(122.4228, device='cuda:0')
epoch: 38 test_true_pfm: 131.167376373001 sim_pfm: 127.71925370526151
episode: 152 training return: tensor(118.8716, device='cuda:0')
episode: 153 training return: tensor(133.5996, device='cuda:0')
episode: 154 training return: tensor(123.8392, device='cuda:0')
episode: 155 training return: tensor(136.4798, device='cuda:0')
epoch: 39 test_true_pfm: 131.39116599307914 sim_pfm: 124.90150552215054
episode: 156 training return: tensor(120.6409, device='cuda:0')
episode: 157 training return: tensor(117.9101, device='cuda:0')
episode: 158 training return: tensor(126.0130, device='cuda:0')
episode: 159 training return: tensor(130.9588, device='cuda:0')
epoch: 40 test_true_pfm: 131.35003284310324 sim_pfm: 116.2013440543611
episode: 160 training return: tensor(134.5520, device='cuda:0')
episode: 161 training return: tensor(114.1547, device='cuda:0')
episode: 162 training return: tensor(132.2135, device='cuda:0')
episode: 163 training return: tensor(132.5938, device='cuda:0')
epoch: 41 test_true_pfm: 132.0924476490887 sim_pfm: 123.46039443608024
episode: 164 training return: tensor(126.5029, device='cuda:0')
episode: 165 training return: tensor(125.9467, device='cuda:0')
episode: 166 training return: tensor(113.3524, device='cuda:0')
episode: 167 training return: tensor(131.0841, device='cuda:0')
epoch: 42 test_true_pfm: 130.15751007019827 sim_pfm: 119.07392229488468
episode: 168 training return: tensor(128.5193, device='cuda:0')
episode: 169 training return: tensor(123.4591, device='cuda:0')
episode: 170 training return: tensor(133.5940, device='cuda:0')
episode: 171 training return: tensor(123.2870, device='cuda:0')
epoch: 43 test_true_pfm: 129.04117586736214 sim_pfm: 128.59040697026066
episode: 172 training return: tensor(136.6682, device='cuda:0')
episode: 173 training return: tensor(139.5939, device='cuda:0')
episode: 174 training return: tensor(119.9493, device='cuda:0')
episode: 175 training return: tensor(132.5281, device='cuda:0')
epoch: 44 test_true_pfm: 130.05124399549305 sim_pfm: 122.96571701313951
episode: 176 training return: tensor(120.0576, device='cuda:0')
episode: 177 training return: tensor(125.4692, device='cuda:0')
episode: 178 training return: tensor(133.5211, device='cuda:0')
episode: 179 training return: tensor(121.5880, device='cuda:0')
epoch: 45 test_true_pfm: 130.49825310307068 sim_pfm: 118.82390868365182
episode: 180 training return: tensor(125.5256, device='cuda:0')
episode: 181 training return: tensor(120.5970, device='cuda:0')
episode: 182 training return: tensor(135.9018, device='cuda:0')
episode: 183 training return: tensor(126.6323, device='cuda:0')
epoch: 46 test_true_pfm: 130.60841736944312 sim_pfm: 127.36107605848228
episode: 184 training return: tensor(115.6644, device='cuda:0')
episode: 185 training return: tensor(128.5351, device='cuda:0')
episode: 186 training return: tensor(120.0874, device='cuda:0')
episode: 187 training return: tensor(122.8931, device='cuda:0')
epoch: 47 test_true_pfm: 128.63959912873952 sim_pfm: 127.47606886388967
episode: 188 training return: tensor(122.4221, device='cuda:0')
episode: 189 training return: tensor(125.4080, device='cuda:0')
episode: 190 training return: tensor(122.0744, device='cuda:0')
episode: 191 training return: tensor(131.9309, device='cuda:0')
epoch: 48 test_true_pfm: 128.90105794184313 sim_pfm: 121.11432326924987
episode: 192 training return: tensor(137.3299, device='cuda:0')
episode: 193 training return: tensor(106.0303, device='cuda:0')
episode: 194 training return: tensor(123.0312, device='cuda:0')
episode: 195 training return: tensor(134.9719, device='cuda:0')
epoch: 49 test_true_pfm: 131.36495467374806 sim_pfm: 126.91660996502033
episode: 196 training return: tensor(127.9250, device='cuda:0')
episode: 197 training return: tensor(134.1837, device='cuda:0')
episode: 198 training return: tensor(134.2920, device='cuda:0')
episode: 199 training return: tensor(132.1120, device='cuda:0')
epoch: 50 test_true_pfm: 127.35739980095104 sim_pfm: 124.8257579307945
episode: 200 training return: tensor(134.6536, device='cuda:0')
episode: 201 training return: tensor(131.2577, device='cuda:0')
episode: 202 training return: tensor(122.7218, device='cuda:0')
episode: 203 training return: tensor(124.1476, device='cuda:0')
epoch: 51 test_true_pfm: 130.2051101516838 sim_pfm: 133.07743451870047
episode: 204 training return: tensor(124.2001, device='cuda:0')
episode: 205 training return: tensor(126.3139, device='cuda:0')
episode: 206 training return: tensor(131.4404, device='cuda:0')
episode: 207 training return: tensor(134.9162, device='cuda:0')
epoch: 52 test_true_pfm: 131.0101931009402 sim_pfm: 130.00214722251985
episode: 208 training return: tensor(127.8187, device='cuda:0')
episode: 209 training return: tensor(125.6028, device='cuda:0')
episode: 210 training return: tensor(122.2011, device='cuda:0')
episode: 211 training return: tensor(120.9306, device='cuda:0')
epoch: 53 test_true_pfm: 129.91760364291437 sim_pfm: 134.54237484728802
episode: 212 training return: tensor(129.2574, device='cuda:0')
episode: 213 training return: tensor(130.9203, device='cuda:0')
episode: 214 training return: tensor(133.2719, device='cuda:0')
episode: 215 training return: tensor(136.8452, device='cuda:0')
epoch: 54 test_true_pfm: 131.19444963998313 sim_pfm: 130.8312179845816
episode: 216 training return: tensor(134.2937, device='cuda:0')
episode: 217 training return: tensor(128.3788, device='cuda:0')
episode: 218 training return: tensor(129.5060, device='cuda:0')
episode: 219 training return: tensor(126.6834, device='cuda:0')
epoch: 55 test_true_pfm: 129.21256442231333 sim_pfm: 135.16963852008922
episode: 220 training return: tensor(126.5763, device='cuda:0')
episode: 221 training return: tensor(132.4611, device='cuda:0')
episode: 222 training return: tensor(118.4607, device='cuda:0')
episode: 223 training return: tensor(123.5242, device='cuda:0')
epoch: 56 test_true_pfm: 128.93050425441402 sim_pfm: 131.72064140706206
episode: 224 training return: tensor(122.7325, device='cuda:0')
episode: 225 training return: tensor(123.6877, device='cuda:0')
episode: 226 training return: tensor(131.5544, device='cuda:0')
episode: 227 training return: tensor(132.6886, device='cuda:0')
epoch: 57 test_true_pfm: 131.17085177708316 sim_pfm: 125.28338690606179
episode: 228 training return: tensor(115.7361, device='cuda:0')
episode: 229 training return: tensor(132.6969, device='cuda:0')
episode: 230 training return: tensor(124.9180, device='cuda:0')
episode: 231 training return: tensor(132.1345, device='cuda:0')
epoch: 58 test_true_pfm: 131.86770883931408 sim_pfm: 129.7755417307373
episode: 232 training return: tensor(140.8109, device='cuda:0')
episode: 233 training return: tensor(130.6344, device='cuda:0')
episode: 234 training return: tensor(111.0822, device='cuda:0')
episode: 235 training return: tensor(127.9673, device='cuda:0')
epoch: 59 test_true_pfm: 127.75433033583163 sim_pfm: 126.08882803475134
episode: 236 training return: tensor(122.3721, device='cuda:0')
episode: 237 training return: tensor(131.2451, device='cuda:0')
episode: 238 training return: tensor(134.6198, device='cuda:0')
episode: 239 training return: tensor(123.4936, device='cuda:0')
epoch: 60 test_true_pfm: 130.93927868246868 sim_pfm: 128.3393001265067
episode: 240 training return: tensor(123.2914, device='cuda:0')
episode: 241 training return: tensor(123.5804, device='cuda:0')
episode: 242 training return: tensor(119.9809, device='cuda:0')
episode: 243 training return: tensor(138.1691, device='cuda:0')
epoch: 61 test_true_pfm: 131.10579375397566 sim_pfm: 133.1457057742635
episode: 244 training return: tensor(134.1027, device='cuda:0')
episode: 245 training return: tensor(131.9413, device='cuda:0')
episode: 246 training return: tensor(125.2130, device='cuda:0')
episode: 247 training return: tensor(130.4775, device='cuda:0')
epoch: 62 test_true_pfm: 132.6169025940921 sim_pfm: 124.451054004248
episode: 248 training return: tensor(119.5132, device='cuda:0')
episode: 249 training return: tensor(118.7458, device='cuda:0')
episode: 250 training return: tensor(136.7210, device='cuda:0')
episode: 251 training return: tensor(133.1807, device='cuda:0')
epoch: 63 test_true_pfm: 129.80800038296235 sim_pfm: 121.9779122254462
episode: 252 training return: tensor(130.4453, device='cuda:0')
episode: 253 training return: tensor(117.7041, device='cuda:0')
episode: 254 training return: tensor(133.2583, device='cuda:0')
episode: 255 training return: tensor(133.9687, device='cuda:0')
epoch: 64 test_true_pfm: 130.52655226286942 sim_pfm: 125.96515795760789
episode: 256 training return: tensor(129.9933, device='cuda:0')
episode: 257 training return: tensor(125.1460, device='cuda:0')
episode: 258 training return: tensor(130.2966, device='cuda:0')
episode: 259 training return: tensor(135.1203, device='cuda:0')
epoch: 65 test_true_pfm: 129.30241625119677 sim_pfm: 131.62035377233406
episode: 260 training return: tensor(129.1864, device='cuda:0')
episode: 261 training return: tensor(113.8346, device='cuda:0')
episode: 262 training return: tensor(120.6023, device='cuda:0')
episode: 263 training return: tensor(132.4568, device='cuda:0')
epoch: 66 test_true_pfm: 133.6058521055342 sim_pfm: 126.88071605002624
episode: 264 training return: tensor(122.2199, device='cuda:0')
episode: 265 training return: tensor(128.7859, device='cuda:0')
episode: 266 training return: tensor(114.9263, device='cuda:0')
episode: 267 training return: tensor(127.5925, device='cuda:0')
epoch: 67 test_true_pfm: 130.10695677941672 sim_pfm: 133.18980701445835
episode: 268 training return: tensor(132.6937, device='cuda:0')
episode: 269 training return: tensor(127.4598, device='cuda:0')
episode: 270 training return: tensor(120.3484, device='cuda:0')
episode: 271 training return: tensor(123.8757, device='cuda:0')
epoch: 68 test_true_pfm: 127.80913018123815 sim_pfm: 127.44651750389603
episode: 272 training return: tensor(130.9533, device='cuda:0')
episode: 273 training return: tensor(133.9087, device='cuda:0')
episode: 274 training return: tensor(122.4123, device='cuda:0')
episode: 275 training return: tensor(129.3949, device='cuda:0')
epoch: 69 test_true_pfm: 130.30183240190064 sim_pfm: 126.31133589552482
episode: 276 training return: tensor(123.5837, device='cuda:0')
episode: 277 training return: tensor(130.6584, device='cuda:0')
episode: 278 training return: tensor(127.5717, device='cuda:0')
episode: 279 training return: tensor(123.9380, device='cuda:0')
epoch: 70 test_true_pfm: 131.47599734996953 sim_pfm: 131.91876194520154
episode: 280 training return: tensor(130.7167, device='cuda:0')
episode: 281 training return: tensor(125.4084, device='cuda:0')
episode: 282 training return: tensor(115.9814, device='cuda:0')
episode: 283 training return: tensor(134.7605, device='cuda:0')
epoch: 71 test_true_pfm: 129.93687018460085 sim_pfm: 124.89348871539114
episode: 284 training return: tensor(133.6215, device='cuda:0')
episode: 285 training return: tensor(132.6360, device='cuda:0')
episode: 286 training return: tensor(131.2280, device='cuda:0')
episode: 287 training return: tensor(130.8071, device='cuda:0')
epoch: 72 test_true_pfm: 130.29654985409897 sim_pfm: 128.3237324320944
episode: 288 training return: tensor(119.7950, device='cuda:0')
episode: 289 training return: tensor(128.8892, device='cuda:0')
episode: 290 training return: tensor(131.1881, device='cuda:0')
episode: 291 training return: tensor(125.6039, device='cuda:0')
epoch: 73 test_true_pfm: 128.61026786751214 sim_pfm: 122.39578601296526
episode: 292 training return: tensor(134.8994, device='cuda:0')
episode: 293 training return: tensor(128.5024, device='cuda:0')
episode: 294 training return: tensor(117.5269, device='cuda:0')
episode: 295 training return: tensor(132.2523, device='cuda:0')
epoch: 74 test_true_pfm: 131.02405075915908 sim_pfm: 132.71774027016946
episode: 296 training return: tensor(122.5750, device='cuda:0')
episode: 297 training return: tensor(130.9133, device='cuda:0')
episode: 298 training return: tensor(122.6995, device='cuda:0')
episode: 299 training return: tensor(127.4690, device='cuda:0')
epoch: 75 test_true_pfm: 130.35550214598976 sim_pfm: 130.61473265461973
episode: 300 training return: tensor(136.0482, device='cuda:0')
episode: 301 training return: tensor(129.7664, device='cuda:0')
episode: 302 training return: tensor(123.0623, device='cuda:0')
episode: 303 training return: tensor(120.3428, device='cuda:0')
epoch: 76 test_true_pfm: 132.14742413056402 sim_pfm: 133.30302153521916
episode: 304 training return: tensor(130.8473, device='cuda:0')
episode: 305 training return: tensor(130.6761, device='cuda:0')
episode: 306 training return: tensor(124.0263, device='cuda:0')
episode: 307 training return: tensor(140.1553, device='cuda:0')
epoch: 77 test_true_pfm: 128.90004854476234 sim_pfm: 121.4862255691085
episode: 308 training return: tensor(131.0045, device='cuda:0')
episode: 309 training return: tensor(131.1671, device='cuda:0')
episode: 310 training return: tensor(116.9447, device='cuda:0')
episode: 311 training return: tensor(127.8662, device='cuda:0')
epoch: 78 test_true_pfm: 129.85761908249677 sim_pfm: 124.69141364315874
episode: 312 training return: tensor(118.0506, device='cuda:0')
episode: 313 training return: tensor(130.9896, device='cuda:0')
episode: 314 training return: tensor(132.8906, device='cuda:0')
episode: 315 training return: tensor(116.6753, device='cuda:0')
epoch: 79 test_true_pfm: 130.1506817901955 sim_pfm: 128.0108846420422
episode: 316 training return: tensor(133.4642, device='cuda:0')
episode: 317 training return: tensor(125.2370, device='cuda:0')
episode: 318 training return: tensor(131.9205, device='cuda:0')
episode: 319 training return: tensor(131.1373, device='cuda:0')
epoch: 80 test_true_pfm: 129.49708065354753 sim_pfm: 130.4651987359044
episode: 320 training return: tensor(118.1060, device='cuda:0')
episode: 321 training return: tensor(123.6595, device='cuda:0')
episode: 322 training return: tensor(129.0002, device='cuda:0')
episode: 323 training return: tensor(124.0891, device='cuda:0')
epoch: 81 test_true_pfm: 127.34236431104746 sim_pfm: 123.35171965117915
episode: 324 training return: tensor(139.9125, device='cuda:0')
episode: 325 training return: tensor(135.5251, device='cuda:0')
episode: 326 training return: tensor(139.4737, device='cuda:0')
episode: 327 training return: tensor(126.9418, device='cuda:0')
epoch: 82 test_true_pfm: 130.3134288183092 sim_pfm: 131.13992657274358
episode: 328 training return: tensor(133.5141, device='cuda:0')
episode: 329 training return: tensor(130.3734, device='cuda:0')
episode: 330 training return: tensor(128.9809, device='cuda:0')
episode: 331 training return: tensor(119.8189, device='cuda:0')
epoch: 83 test_true_pfm: 131.72589209195843 sim_pfm: 127.83208665925777
episode: 332 training return: tensor(125.8333, device='cuda:0')
episode: 333 training return: tensor(133.4843, device='cuda:0')
episode: 334 training return: tensor(141.0923, device='cuda:0')
episode: 335 training return: tensor(139.3884, device='cuda:0')
epoch: 84 test_true_pfm: 128.44530565287795 sim_pfm: 130.10312685998505
episode: 336 training return: tensor(130.5175, device='cuda:0')
episode: 337 training return: tensor(129.4139, device='cuda:0')
episode: 338 training return: tensor(133.1328, device='cuda:0')
episode: 339 training return: tensor(124.5806, device='cuda:0')
epoch: 85 test_true_pfm: 128.84761379180173 sim_pfm: 122.84790881298832
episode: 340 training return: tensor(136.0877, device='cuda:0')
episode: 341 training return: tensor(126.4488, device='cuda:0')
episode: 342 training return: tensor(126.8269, device='cuda:0')
episode: 343 training return: tensor(126.6649, device='cuda:0')
epoch: 86 test_true_pfm: 130.37901415874487 sim_pfm: 129.5270342419273
episode: 344 training return: tensor(123.3192, device='cuda:0')
episode: 345 training return: tensor(122.7187, device='cuda:0')
episode: 346 training return: tensor(119.2114, device='cuda:0')
episode: 347 training return: tensor(116.9323, device='cuda:0')
epoch: 87 test_true_pfm: 129.15222322441485 sim_pfm: 127.96165959403734
episode: 348 training return: tensor(123.1975, device='cuda:0')
episode: 349 training return: tensor(133.6781, device='cuda:0')
episode: 350 training return: tensor(128.2275, device='cuda:0')
episode: 351 training return: tensor(133.1656, device='cuda:0')
epoch: 88 test_true_pfm: 129.71001153721733 sim_pfm: 132.89052652576356
episode: 352 training return: tensor(122.2225, device='cuda:0')
episode: 353 training return: tensor(126.2663, device='cuda:0')
episode: 354 training return: tensor(129.4796, device='cuda:0')
episode: 355 training return: tensor(135.2067, device='cuda:0')
epoch: 89 test_true_pfm: 131.8756476644079 sim_pfm: 130.01769990056056
episode: 356 training return: tensor(129.1727, device='cuda:0')
episode: 357 training return: tensor(121.0857, device='cuda:0')
episode: 358 training return: tensor(140.4743, device='cuda:0')
episode: 359 training return: tensor(119.8955, device='cuda:0')
epoch: 90 test_true_pfm: 128.74695110963535 sim_pfm: 126.52609608536586
episode: 360 training return: tensor(135.5779, device='cuda:0')
episode: 361 training return: tensor(130.5775, device='cuda:0')
episode: 362 training return: tensor(134.3357, device='cuda:0')
episode: 363 training return: tensor(117.2662, device='cuda:0')
epoch: 91 test_true_pfm: 130.49514440889297 sim_pfm: 130.94391149106667
episode: 364 training return: tensor(131.4671, device='cuda:0')
episode: 365 training return: tensor(116.3672, device='cuda:0')
episode: 366 training return: tensor(122.2344, device='cuda:0')
episode: 367 training return: tensor(124.5820, device='cuda:0')
epoch: 92 test_true_pfm: 130.6437425588752 sim_pfm: 127.72398625254282
episode: 368 training return: tensor(121.8301, device='cuda:0')
episode: 369 training return: tensor(135.3259, device='cuda:0')
episode: 370 training return: tensor(123.5373, device='cuda:0')
episode: 371 training return: tensor(133.0185, device='cuda:0')
epoch: 93 test_true_pfm: 132.11988826483918 sim_pfm: 131.44903774166013
episode: 372 training return: tensor(131.4834, device='cuda:0')
episode: 373 training return: tensor(121.4063, device='cuda:0')
episode: 374 training return: tensor(135.1095, device='cuda:0')
episode: 375 training return: tensor(120.2029, device='cuda:0')
epoch: 94 test_true_pfm: 131.88418297344305 sim_pfm: 125.01571169319213
episode: 376 training return: tensor(125.0328, device='cuda:0')
episode: 377 training return: tensor(126.9856, device='cuda:0')
episode: 378 training return: tensor(129.5823, device='cuda:0')
episode: 379 training return: tensor(120.8868, device='cuda:0')
epoch: 95 test_true_pfm: 129.62037594292676 sim_pfm: 127.87620700154221
episode: 380 training return: tensor(125.3464, device='cuda:0')
episode: 381 training return: tensor(122.5134, device='cuda:0')
episode: 382 training return: tensor(123.5437, device='cuda:0')
episode: 383 training return: tensor(123.0636, device='cuda:0')
epoch: 96 test_true_pfm: 128.42485838898227 sim_pfm: 131.96426176925306
episode: 384 training return: tensor(134.0518, device='cuda:0')
episode: 385 training return: tensor(132.4933, device='cuda:0')
episode: 386 training return: tensor(114.1647, device='cuda:0')
episode: 387 training return: tensor(117.0775, device='cuda:0')
epoch: 97 test_true_pfm: 131.48479913449728 sim_pfm: 132.74143596334616
episode: 388 training return: tensor(135.5250, device='cuda:0')
episode: 389 training return: tensor(131.2064, device='cuda:0')
episode: 390 training return: tensor(120.4829, device='cuda:0')
episode: 391 training return: tensor(125.1525, device='cuda:0')
epoch: 98 test_true_pfm: 127.41928031933404 sim_pfm: 126.31220628714655
episode: 392 training return: tensor(135.3527, device='cuda:0')
episode: 393 training return: tensor(128.3494, device='cuda:0')
episode: 394 training return: tensor(138.7627, device='cuda:0')
episode: 395 training return: tensor(138.2601, device='cuda:0')
epoch: 99 test_true_pfm: 129.77525030603658 sim_pfm: 133.9654590631253
episode: 396 training return: tensor(120.4755, device='cuda:0')
episode: 397 training return: tensor(130.3639, device='cuda:0')
episode: 398 training return: tensor(127.2848, device='cuda:0')
episode: 399 training return: tensor(121.5703, device='cuda:0')
epoch: 100 test_true_pfm: 128.36535184958035 sim_pfm: 127.04506118535065
episode: 400 training return: tensor(133.8135, device='cuda:0')
episode: 401 training return: tensor(112.6159, device='cuda:0')
episode: 402 training return: tensor(125.7609, device='cuda:0')
episode: 403 training return: tensor(113.9825, device='cuda:0')
epoch: 101 test_true_pfm: 130.82000422021025 sim_pfm: 126.4874732547847
episode: 404 training return: tensor(120.8905, device='cuda:0')
episode: 405 training return: tensor(121.1218, device='cuda:0')
episode: 406 training return: tensor(124.4522, device='cuda:0')
episode: 407 training return: tensor(127.0888, device='cuda:0')
epoch: 102 test_true_pfm: 128.08421497114463 sim_pfm: 127.2178415598988
episode: 408 training return: tensor(129.8103, device='cuda:0')
episode: 409 training return: tensor(125.7944, device='cuda:0')
episode: 410 training return: tensor(133.8953, device='cuda:0')
episode: 411 training return: tensor(120.9720, device='cuda:0')
epoch: 103 test_true_pfm: 129.37752985904473 sim_pfm: 128.59047208030825
episode: 412 training return: tensor(125.0269, device='cuda:0')
episode: 413 training return: tensor(113.8753, device='cuda:0')
episode: 414 training return: tensor(129.4104, device='cuda:0')
episode: 415 training return: tensor(132.2489, device='cuda:0')
epoch: 104 test_true_pfm: 130.3536828168291 sim_pfm: 133.5605382139911
episode: 416 training return: tensor(131.4742, device='cuda:0')
episode: 417 training return: tensor(119.2416, device='cuda:0')
episode: 418 training return: tensor(132.3589, device='cuda:0')
episode: 419 training return: tensor(130.9688, device='cuda:0')
epoch: 105 test_true_pfm: 132.26821466463474 sim_pfm: 131.79036796976578
episode: 420 training return: tensor(124.3761, device='cuda:0')
episode: 421 training return: tensor(132.1152, device='cuda:0')
episode: 422 training return: tensor(119.6217, device='cuda:0')
episode: 423 training return: tensor(124.0098, device='cuda:0')
epoch: 106 test_true_pfm: 132.72207216428004 sim_pfm: 128.03179523794097
episode: 424 training return: tensor(133.6893, device='cuda:0')
episode: 425 training return: tensor(120.3695, device='cuda:0')
episode: 426 training return: tensor(129.3762, device='cuda:0')
episode: 427 training return: tensor(119.9676, device='cuda:0')
epoch: 107 test_true_pfm: 131.48279641949426 sim_pfm: 131.53082493706023
episode: 428 training return: tensor(133.3692, device='cuda:0')
episode: 429 training return: tensor(134.4171, device='cuda:0')
episode: 430 training return: tensor(127.1759, device='cuda:0')
episode: 431 training return: tensor(130.5969, device='cuda:0')
epoch: 108 test_true_pfm: 132.22953546367543 sim_pfm: 128.40439742452
episode: 432 training return: tensor(134.0794, device='cuda:0')
episode: 433 training return: tensor(137.9881, device='cuda:0')
episode: 434 training return: tensor(133.5746, device='cuda:0')
episode: 435 training return: tensor(147.3999, device='cuda:0')
epoch: 109 test_true_pfm: 127.71582227099802 sim_pfm: 130.92207890537102
episode: 436 training return: tensor(130.8522, device='cuda:0')
episode: 437 training return: tensor(131.1555, device='cuda:0')
episode: 438 training return: tensor(124.9067, device='cuda:0')
episode: 439 training return: tensor(137.6709, device='cuda:0')
epoch: 110 test_true_pfm: 126.96873227501112 sim_pfm: 124.30524745939765
episode: 440 training return: tensor(137.3400, device='cuda:0')
episode: 441 training return: tensor(130.0692, device='cuda:0')
episode: 442 training return: tensor(129.9657, device='cuda:0')
episode: 443 training return: tensor(117.3310, device='cuda:0')
epoch: 111 test_true_pfm: 134.88484610829104 sim_pfm: 113.77882334081224
episode: 444 training return: tensor(110.3806, device='cuda:0')
episode: 445 training return: tensor(116.5969, device='cuda:0')
episode: 446 training return: tensor(122.2336, device='cuda:0')
episode: 447 training return: tensor(115.4875, device='cuda:0')
epoch: 112 test_true_pfm: 130.6911695461412 sim_pfm: 122.63994827951538
episode: 448 training return: tensor(123.4062, device='cuda:0')
episode: 449 training return: tensor(130.4563, device='cuda:0')
episode: 450 training return: tensor(122.5171, device='cuda:0')
episode: 451 training return: tensor(119.5647, device='cuda:0')
epoch: 113 test_true_pfm: 130.53838257853857 sim_pfm: 124.01892374294111
episode: 452 training return: tensor(138.9415, device='cuda:0')
episode: 453 training return: tensor(131.6186, device='cuda:0')
episode: 454 training return: tensor(125.0043, device='cuda:0')
episode: 455 training return: tensor(134.2803, device='cuda:0')
epoch: 114 test_true_pfm: 132.74060270079156 sim_pfm: 125.55015818540706
episode: 456 training return: tensor(135.0614, device='cuda:0')
episode: 457 training return: tensor(129.8086, device='cuda:0')
episode: 458 training return: tensor(133.0966, device='cuda:0')
episode: 459 training return: tensor(125.9102, device='cuda:0')
epoch: 115 test_true_pfm: 131.46392617529506 sim_pfm: 129.0020547077642
episode: 460 training return: tensor(116.7821, device='cuda:0')
episode: 461 training return: tensor(131.5324, device='cuda:0')
episode: 462 training return: tensor(127.3266, device='cuda:0')
episode: 463 training return: tensor(132.8629, device='cuda:0')
epoch: 116 test_true_pfm: 130.49528068989054 sim_pfm: 125.48034010374104
episode: 464 training return: tensor(128.1358, device='cuda:0')
episode: 465 training return: tensor(146.6558, device='cuda:0')
episode: 466 training return: tensor(132.2294, device='cuda:0')
episode: 467 training return: tensor(128.3827, device='cuda:0')
epoch: 117 test_true_pfm: 129.50380256712546 sim_pfm: 130.54352561635315
episode: 468 training return: tensor(138.2355, device='cuda:0')
episode: 469 training return: tensor(137.1240, device='cuda:0')
episode: 470 training return: tensor(118.9606, device='cuda:0')
episode: 471 training return: tensor(137.3903, device='cuda:0')
epoch: 118 test_true_pfm: 131.63055839590277 sim_pfm: 114.28227405855432
episode: 472 training return: tensor(133.6794, device='cuda:0')
episode: 473 training return: tensor(122.5267, device='cuda:0')
episode: 474 training return: tensor(124.6843, device='cuda:0')
episode: 475 training return: tensor(132.8308, device='cuda:0')
epoch: 119 test_true_pfm: 131.4316152868551 sim_pfm: 132.13509476967738
episode: 476 training return: tensor(131.6059, device='cuda:0')
episode: 477 training return: tensor(124.1285, device='cuda:0')
episode: 478 training return: tensor(125.2050, device='cuda:0')
episode: 479 training return: tensor(127.9116, device='cuda:0')
epoch: 120 test_true_pfm: 130.39144085713752 sim_pfm: 129.86347421358806
episode: 480 training return: tensor(117.1563, device='cuda:0')
episode: 481 training return: tensor(136.3165, device='cuda:0')
episode: 482 training return: tensor(135.8802, device='cuda:0')
episode: 483 training return: tensor(122.8295, device='cuda:0')
epoch: 121 test_true_pfm: 129.65397975995222 sim_pfm: 130.31543029018795
episode: 484 training return: tensor(125.9315, device='cuda:0')
episode: 485 training return: tensor(118.2326, device='cuda:0')
episode: 486 training return: tensor(119.9107, device='cuda:0')
episode: 487 training return: tensor(133.4864, device='cuda:0')
epoch: 122 test_true_pfm: 132.26348371523324 sim_pfm: 129.44893284794526
episode: 488 training return: tensor(130.5451, device='cuda:0')
episode: 489 training return: tensor(129.5979, device='cuda:0')
episode: 490 training return: tensor(109.1159, device='cuda:0')
episode: 491 training return: tensor(133.7852, device='cuda:0')
epoch: 123 test_true_pfm: 131.35236485228683 sim_pfm: 132.18102403323866
episode: 492 training return: tensor(131.9083, device='cuda:0')
episode: 493 training return: tensor(126.6447, device='cuda:0')
episode: 494 training return: tensor(123.7244, device='cuda:0')
episode: 495 training return: tensor(130.2152, device='cuda:0')
epoch: 124 test_true_pfm: 131.7303496503963 sim_pfm: 127.00361561998724
episode: 496 training return: tensor(128.3796, device='cuda:0')
episode: 497 training return: tensor(121.6778, device='cuda:0')
episode: 498 training return: tensor(124.1531, device='cuda:0')
episode: 499 training return: tensor(137.4971, device='cuda:0')
epoch: 125 test_true_pfm: 132.57161501955562 sim_pfm: 136.13840740025043
episode: 500 training return: tensor(133.2906, device='cuda:0')
episode: 501 training return: tensor(140.0930, device='cuda:0')
episode: 502 training return: tensor(138.0748, device='cuda:0')
episode: 503 training return: tensor(128.7811, device='cuda:0')
epoch: 126 test_true_pfm: 129.3423507432963 sim_pfm: 131.34199656988494
episode: 504 training return: tensor(110.9156, device='cuda:0')
episode: 505 training return: tensor(129.8740, device='cuda:0')
episode: 506 training return: tensor(126.5278, device='cuda:0')
episode: 507 training return: tensor(132.0997, device='cuda:0')
epoch: 127 test_true_pfm: 129.5176917815471 sim_pfm: 130.2975828153896
episode: 508 training return: tensor(137.4830, device='cuda:0')
episode: 509 training return: tensor(133.5329, device='cuda:0')
episode: 510 training return: tensor(135.8226, device='cuda:0')
episode: 511 training return: tensor(124.4202, device='cuda:0')
epoch: 128 test_true_pfm: 130.4319871566442 sim_pfm: 132.39620862827869
episode: 512 training return: tensor(125.4686, device='cuda:0')
episode: 513 training return: tensor(136.6173, device='cuda:0')
episode: 514 training return: tensor(129.7915, device='cuda:0')
episode: 515 training return: tensor(132.2086, device='cuda:0')
epoch: 129 test_true_pfm: 130.45443080335806 sim_pfm: 129.4195737528149
episode: 516 training return: tensor(121.6275, device='cuda:0')
episode: 517 training return: tensor(138.1820, device='cuda:0')
episode: 518 training return: tensor(131.8563, device='cuda:0')
episode: 519 training return: tensor(127.8642, device='cuda:0')
epoch: 130 test_true_pfm: 129.75718669893055 sim_pfm: 133.34199555629166
episode: 520 training return: tensor(125.4722, device='cuda:0')
episode: 521 training return: tensor(122.9530, device='cuda:0')
episode: 522 training return: tensor(125.8028, device='cuda:0')
episode: 523 training return: tensor(125.1533, device='cuda:0')
epoch: 131 test_true_pfm: 130.34601629184257 sim_pfm: 118.06915090703406
episode: 524 training return: tensor(130.9455, device='cuda:0')
episode: 525 training return: tensor(134.9671, device='cuda:0')
episode: 526 training return: tensor(131.1059, device='cuda:0')
episode: 527 training return: tensor(136.2043, device='cuda:0')
epoch: 132 test_true_pfm: 129.18337899979886 sim_pfm: 129.7149390535429
episode: 528 training return: tensor(127.5092, device='cuda:0')
episode: 529 training return: tensor(142.0878, device='cuda:0')
episode: 530 training return: tensor(132.0490, device='cuda:0')
episode: 531 training return: tensor(133.9356, device='cuda:0')
epoch: 133 test_true_pfm: 130.68455156788647 sim_pfm: 126.4861256787146
episode: 532 training return: tensor(132.6596, device='cuda:0')
episode: 533 training return: tensor(129.4246, device='cuda:0')
episode: 534 training return: tensor(123.7810, device='cuda:0')
episode: 535 training return: tensor(118.2798, device='cuda:0')
epoch: 134 test_true_pfm: 131.8559709536693 sim_pfm: 132.17343988123466
episode: 536 training return: tensor(133.4926, device='cuda:0')
episode: 537 training return: tensor(143.0237, device='cuda:0')
episode: 538 training return: tensor(137.5396, device='cuda:0')
episode: 539 training return: tensor(115.8500, device='cuda:0')
epoch: 135 test_true_pfm: 128.7063851306529 sim_pfm: 126.91893852360081
episode: 540 training return: tensor(130.2307, device='cuda:0')
episode: 541 training return: tensor(121.4390, device='cuda:0')
episode: 542 training return: tensor(140.2857, device='cuda:0')
episode: 543 training return: tensor(139.0791, device='cuda:0')
epoch: 136 test_true_pfm: 130.9351679800167 sim_pfm: 133.86091574371093
episode: 544 training return: tensor(139.1977, device='cuda:0')
episode: 545 training return: tensor(132.2766, device='cuda:0')
episode: 546 training return: tensor(129.6631, device='cuda:0')
episode: 547 training return: tensor(137.4769, device='cuda:0')
epoch: 137 test_true_pfm: 129.52574986202413 sim_pfm: 124.50066320751212
episode: 548 training return: tensor(125.2459, device='cuda:0')
episode: 549 training return: tensor(135.8333, device='cuda:0')
episode: 550 training return: tensor(126.5835, device='cuda:0')
episode: 551 training return: tensor(117.0771, device='cuda:0')
epoch: 138 test_true_pfm: 130.64098569826953 sim_pfm: 131.51455087976646
episode: 552 training return: tensor(142.5012, device='cuda:0')
episode: 553 training return: tensor(136.6290, device='cuda:0')
episode: 554 training return: tensor(111.3593, device='cuda:0')
episode: 555 training return: tensor(132.7382, device='cuda:0')
epoch: 139 test_true_pfm: 131.02190024593514 sim_pfm: 130.23128514176352
episode: 556 training return: tensor(137.7681, device='cuda:0')
episode: 557 training return: tensor(124.6805, device='cuda:0')
episode: 558 training return: tensor(123.1219, device='cuda:0')
episode: 559 training return: tensor(135.5120, device='cuda:0')
epoch: 140 test_true_pfm: 130.4367584448084 sim_pfm: 125.49804159171181
episode: 560 training return: tensor(128.3815, device='cuda:0')
episode: 561 training return: tensor(120.8521, device='cuda:0')
episode: 562 training return: tensor(123.9302, device='cuda:0')
episode: 563 training return: tensor(131.4468, device='cuda:0')
epoch: 141 test_true_pfm: 130.3039652214522 sim_pfm: 131.36473857670208
episode: 564 training return: tensor(123.7172, device='cuda:0')
episode: 565 training return: tensor(128.0845, device='cuda:0')
episode: 566 training return: tensor(128.0641, device='cuda:0')
episode: 567 training return: tensor(125.5112, device='cuda:0')
epoch: 142 test_true_pfm: 130.09243153467582 sim_pfm: 136.42928412962937
episode: 568 training return: tensor(134.2896, device='cuda:0')
episode: 569 training return: tensor(134.8123, device='cuda:0')
episode: 570 training return: tensor(124.1041, device='cuda:0')
episode: 571 training return: tensor(132.3608, device='cuda:0')
epoch: 143 test_true_pfm: 129.3737935534067 sim_pfm: 123.6644904776942
episode: 572 training return: tensor(134.5584, device='cuda:0')
episode: 573 training return: tensor(130.0311, device='cuda:0')
episode: 574 training return: tensor(135.9516, device='cuda:0')
episode: 575 training return: tensor(120.6158, device='cuda:0')
epoch: 144 test_true_pfm: 130.05492732659053 sim_pfm: 128.20245311221805
episode: 576 training return: tensor(128.6370, device='cuda:0')
episode: 577 training return: tensor(127.9698, device='cuda:0')
episode: 578 training return: tensor(134.8271, device='cuda:0')
episode: 579 training return: tensor(136.0931, device='cuda:0')
epoch: 145 test_true_pfm: 129.2275715374198 sim_pfm: 134.9416474717669
episode: 580 training return: tensor(129.5246, device='cuda:0')
episode: 581 training return: tensor(140.0285, device='cuda:0')
episode: 582 training return: tensor(128.3168, device='cuda:0')
episode: 583 training return: tensor(133.0308, device='cuda:0')
epoch: 146 test_true_pfm: 130.66636960188936 sim_pfm: 125.79444311342085
episode: 584 training return: tensor(131.9028, device='cuda:0')
episode: 585 training return: tensor(123.6188, device='cuda:0')
episode: 586 training return: tensor(134.0571, device='cuda:0')
episode: 587 training return: tensor(130.1670, device='cuda:0')
epoch: 147 test_true_pfm: 129.56409177761168 sim_pfm: 134.80422471419442
episode: 588 training return: tensor(136.7265, device='cuda:0')
episode: 589 training return: tensor(133.3698, device='cuda:0')
episode: 590 training return: tensor(129.2013, device='cuda:0')
episode: 591 training return: tensor(124.8344, device='cuda:0')
epoch: 148 test_true_pfm: 128.95254442517032 sim_pfm: 128.55688323930954
episode: 592 training return: tensor(132.3450, device='cuda:0')
episode: 593 training return: tensor(124.8142, device='cuda:0')
episode: 594 training return: tensor(128.1909, device='cuda:0')
episode: 595 training return: tensor(141.8268, device='cuda:0')
epoch: 149 test_true_pfm: 128.42059751970527 sim_pfm: 126.3192764657957
episode: 596 training return: tensor(134.1973, device='cuda:0')
episode: 597 training return: tensor(121.2285, device='cuda:0')
episode: 598 training return: tensor(119.6370, device='cuda:0')
episode: 599 training return: tensor(132.5007, device='cuda:0')
epoch: 150 test_true_pfm: 130.36228926678262 sim_pfm: 130.6813708263391
