['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '3000']
epoch: 0 training_loss 0.30773769795894623 test_loss: 0.10542325973510742
epoch: 1 training_loss 0.1692501711845398 test_loss: 0.07598187923431396
epoch: 2 training_loss 0.12660427760332824 test_loss: 0.07849026918411255
epoch: 3 training_loss 0.11495154358446598 test_loss: 0.07968202829360962
epoch: 4 training_loss 0.09892382819205522 test_loss: 0.060753893852233884
epoch: 5 training_loss 0.08518475234508514 test_loss: 0.058519613742828366
epoch: 6 training_loss 0.07775391731411219 test_loss: 0.0626165509223938
epoch: 7 training_loss 0.07449490373954176 test_loss: 0.0658560574054718
epoch: 8 training_loss 0.07937200525775552 test_loss: 0.07453282475471497
epoch: 9 training_loss 0.06752051772549748 test_loss: 0.05554121732711792
epoch: 10 training_loss 0.06583542812615634 test_loss: 0.0744544506072998
epoch: 11 training_loss 0.057407936435192826 test_loss: 0.06044022440910339
epoch: 12 training_loss 0.05875096827745438 test_loss: 0.07667467594146729
epoch: 13 training_loss 0.05256752518936992 test_loss: 0.07298370003700257
epoch: 14 training_loss 0.04746827726252377 test_loss: 0.07066412568092346
epoch: 15 training_loss 0.04395370392128825 test_loss: 0.07244471907615661
epoch: 16 training_loss 0.055901922676712276 test_loss: 0.08892001509666443
epoch: 17 training_loss 0.04529452231712639 test_loss: 0.06694510579109192
epoch: 18 training_loss 0.03736845210194588 test_loss: 0.07457152009010315
epoch: 19 training_loss 0.03343142295256257 test_loss: 0.07244134545326233
epoch: 20 training_loss 0.030189387621358036 test_loss: 0.07836306095123291
epoch: 21 training_loss 0.030904504745267333 test_loss: 0.07181383371353149
epoch: 22 training_loss 0.032142823971807954 test_loss: 0.06904745101928711
epoch: 23 training_loss 0.026591832893900574 test_loss: 0.07929595112800598
epoch: 24 training_loss 0.032545571699738506 test_loss: 0.0785853385925293
epoch: 25 training_loss 0.036261028721928594 test_loss: 0.07446101903915406
epoch: 26 training_loss 0.024983582496643068 test_loss: 0.07374164462089539
epoch: 27 training_loss 0.02004400433972478 test_loss: 0.07831228971481323
epoch: 28 training_loss 0.020899947616271675 test_loss: 0.08307608366012573
epoch: 29 training_loss 0.018752234578132628 test_loss: 0.0818329095840454
epoch: 30 training_loss 0.021878290339373052 test_loss: 0.07751408815383912
epoch: 31 training_loss 0.022025906885974108 test_loss: 0.08353819847106933
epoch: 32 training_loss 0.017683213509153573 test_loss: 0.08420271873474121
epoch: 33 training_loss 0.013870199788361787 test_loss: 0.09038071632385254
epoch: 34 training_loss 0.012481050912756473 test_loss: 0.08640674352645875
epoch: 35 training_loss 0.020785715533420443 test_loss: 0.10062674283981324
epoch: 36 training_loss 0.044195780334994196 test_loss: 0.09293252229690552
epoch: 37 training_loss 0.03434001645538956 test_loss: 0.09169764518737793
epoch: 38 training_loss 0.021305243712849916 test_loss: 0.08481674194335938
epoch: 39 training_loss 0.012424156495835632 test_loss: 0.08892574906349182
epoch: 40 training_loss 0.011387671222910284 test_loss: 0.08767321705818176
epoch: 41 training_loss 0.009793585548177362 test_loss: 0.07692226767539978
epoch: 42 training_loss 0.00969017322640866 test_loss: 0.08405597805976868
epoch: 43 training_loss 0.007978119982872157 test_loss: 0.089847731590271
epoch: 44 training_loss 0.009114633379504085 test_loss: 0.08899319767951966
epoch: 45 training_loss 0.008860271910671146 test_loss: 0.09014910459518433
epoch: 46 training_loss 0.009345513433218003 test_loss: 0.08588098287582398
epoch: 47 training_loss 0.007808134169317782 test_loss: 0.09377464056015014
epoch: 48 training_loss 0.010016101845540106 test_loss: 0.07958572506904601
epoch: 49 training_loss 0.0530344991851598 test_loss: 0.11363306045532226
epoch: 50 training_loss 0.040323612261563536 test_loss: 0.07500523924827576
epoch: 51 training_loss 0.017229888557922096 test_loss: 0.08682835102081299
epoch: 52 training_loss 0.010245504679623992 test_loss: 0.08556588888168334
epoch: 53 training_loss 0.0075203060102649035 test_loss: 0.08562979698181153
epoch: 54 training_loss 0.006802084560040385 test_loss: 0.08617160320281983
epoch: 55 training_loss 0.006546527189202607 test_loss: 0.08504303097724915
epoch: 56 training_loss 0.006057274101767689 test_loss: 0.08214126825332642
epoch: 57 training_loss 0.0059277998493053015 test_loss: 0.08797073364257812
epoch: 58 training_loss 0.005422074210364372 test_loss: 0.08723062872886658
epoch: 59 training_loss 0.005348745634546503 test_loss: 0.09402233958244324
epoch: 60 training_loss 0.004855575284454972 test_loss: 0.09244273900985718
epoch: 61 training_loss 0.004683906494174153 test_loss: 0.09299117922782899
epoch: 62 training_loss 0.005217035074019804 test_loss: 0.09402800798416137
epoch: 63 training_loss 0.03226424922468141 test_loss: 0.10764175653457642
epoch: 64 training_loss 0.0925074777007103 test_loss: 0.07807961106300354
epoch: 65 training_loss 0.02548777441959828 test_loss: 0.07881125211715698
epoch: 66 training_loss 0.014182561971247197 test_loss: 0.09025858640670777
epoch: 67 training_loss 0.011365844171959906 test_loss: 0.08655169010162353
epoch: 68 training_loss 0.007210392795968801 test_loss: 0.08693199753761291
epoch: 69 training_loss 0.006168019147589803 test_loss: 0.08995816111564636
epoch: 70 training_loss 0.0052183026296552274 test_loss: 0.09086009263992309
epoch: 71 training_loss 0.0055791881843470035 test_loss: 0.09447779655456542
epoch: 72 training_loss 0.0613874107436277 test_loss: 0.09544734954833985
epoch: 73 training_loss 0.03318813208839856 test_loss: 0.07576813697814941
epoch: 74 training_loss 0.009480490908026695 test_loss: 0.07999234199523926
epoch: 75 training_loss 0.008478806319180876 test_loss: 0.08043313622474671
epoch: 76 training_loss 0.005780614073155448 test_loss: 0.0869266927242279
epoch: 77 training_loss 0.005130395025480539 test_loss: 0.0876223623752594
epoch: 78 training_loss 0.0041778636246453974 test_loss: 0.09061744213104247
epoch: 79 training_loss 0.004111865357263014 test_loss: 0.09358003139495849
epoch: 80 training_loss 0.003923914809711278 test_loss: 0.09224542379379272
epoch: 81 training_loss 0.003719270140863955 test_loss: 0.096083265542984
epoch: 82 training_loss 0.003264784732600674 test_loss: 0.09608431458473206
epoch: 83 training_loss 0.003185491723706946 test_loss: 0.09746122360229492
epoch: 84 training_loss 0.0033266203466337176 test_loss: 0.10056887865066529
epoch: 85 training_loss 0.002676011094590649 test_loss: 0.09905184507369995
epoch: 86 training_loss 0.0026441401906777172 test_loss: 0.10325843095779419
epoch: 87 training_loss 0.0024999669258249924 test_loss: 0.10077779293060303
epoch: 88 training_loss 0.002766693918965757 test_loss: 0.10252317190170288
epoch: 89 training_loss 0.002598178976913914 test_loss: 0.10731762647628784
epoch: 90 training_loss 0.0024249210860580205 test_loss: 0.10375266075134278
epoch: 91 training_loss 0.0025248424836900084 test_loss: 0.10800271034240723
epoch: 92 training_loss 0.0023871678888099266 test_loss: 0.10471861362457276
epoch: 93 training_loss 0.002189647099585272 test_loss: 0.10861282348632813
epoch: 94 training_loss 0.002057478257920593 test_loss: 0.11053850650787353
epoch: 95 training_loss 0.002236324190162122 test_loss: 0.11291102170944214
epoch: 96 training_loss 0.0020679968857439234 test_loss: 0.10849196910858154
epoch: 97 training_loss 0.048484036712907255 test_loss: 0.17760841846466063
epoch: 98 training_loss 0.14850831979885698 test_loss: 0.06518939137458801
epoch: 99 training_loss 0.03518430963158607 test_loss: 0.0699293851852417
epoch: 100 training_loss 0.018882247009314596 test_loss: 0.07839061617851258
epoch: 101 training_loss 0.012041252451017498 test_loss: 0.07515243887901306
epoch: 102 training_loss 0.007965313998283818 test_loss: 0.08509225845336914
epoch: 103 training_loss 0.006345981779741123 test_loss: 0.08010421991348267
epoch: 104 training_loss 0.00623911366565153 test_loss: 0.08762011528015137
epoch: 105 training_loss 0.004893820462748408 test_loss: 0.08530381917953492
epoch: 106 training_loss 0.0040921930933836845 test_loss: 0.08704546093940735
epoch: 107 training_loss 0.004000834717880934 test_loss: 0.08951413631439209
epoch: 108 training_loss 0.0033459395298268647 test_loss: 0.09098924398422241
epoch: 109 training_loss 0.0032077392732026058 test_loss: 0.09223526120185851
epoch: 110 training_loss 0.0032073082803981377 test_loss: 0.10098192691802979
epoch: 111 training_loss 0.0026029489177744835 test_loss: 0.09751995801925659
epoch: 112 training_loss 0.0025260865222662687 test_loss: 0.10272825956344604
epoch: 113 training_loss 0.002487294300226495 test_loss: 0.10271538496017456
epoch: 114 training_loss 0.0023592064005788418 test_loss: 0.10375015735626221
epoch: 115 training_loss 0.00215579028474167 test_loss: 0.10012962818145751
epoch: 116 training_loss 0.002381927635287866 test_loss: 0.09852607250213623
epoch: 117 training_loss 0.0025811086571775376 test_loss: 0.10651243925094604
epoch: 118 training_loss 0.0021754110319307076 test_loss: 0.10463395118713378
epoch: 119 training_loss 0.0035553214873652907 test_loss: 0.11835026741027832
epoch: 120 training_loss 0.12055249480530619 test_loss: 0.11095534563064575
epoch: 121 training_loss 0.05191039849072695 test_loss: 0.08112595081329346
epoch: 122 training_loss 0.021086357487365603 test_loss: 0.09089512825012207
epoch: 123 training_loss 0.01047040881589055 test_loss: 0.09024947881698608
epoch: 124 training_loss 0.0069653225596994165 test_loss: 0.09567558765411377
epoch: 125 training_loss 0.0059523583366535605 test_loss: 0.09335167407989502
epoch: 126 training_loss 0.004621604004641995 test_loss: 0.09543986916542054
epoch: 127 training_loss 0.003985425365390256 test_loss: 0.09579285383224487
epoch: 128 training_loss 0.0037058204505592586 test_loss: 0.09685598015785217
epoch: 129 training_loss 0.0032063555088825526 test_loss: 0.09987945556640625
epoch: 130 training_loss 0.0034221017104573547 test_loss: 0.1017792820930481
epoch: 131 training_loss 0.002739235739572905 test_loss: 0.10197637081146241
epoch: 132 training_loss 0.002875982057885267 test_loss: 0.10481095314025879
epoch: 133 training_loss 0.002444553740788251 test_loss: 0.1099508285522461
epoch: 134 training_loss 0.002464665378211066 test_loss: 0.11089766025543213
epoch: 135 training_loss 0.0021199853980215268 test_loss: 0.11205447912216186
epoch: 136 training_loss 0.0021920892572961747 test_loss: 0.11144015789031983
epoch: 137 training_loss 0.0020818816835526375 test_loss: 0.11875149011611938
epoch: 138 training_loss 0.001993936261860654 test_loss: 0.11772924661636353
epoch: 139 training_loss 0.0018480674058082513 test_loss: 0.11922315359115601
epoch: 140 training_loss 0.0021441201528068633 test_loss: 0.11787035465240478
epoch: 141 training_loss 0.001611752025783062 test_loss: 0.12439953088760376
epoch: 142 training_loss 0.0021651399368420245 test_loss: 0.12313321828842164
epoch: 143 training_loss 0.0033770519075915217 test_loss: 0.11537946462631225
epoch: 144 training_loss 0.1884496959974058 test_loss: 0.09144450426101684
epoch: 145 training_loss 0.05772073687985539 test_loss: 0.09847331047058105
epoch: 146 training_loss 0.026144923949614166 test_loss: 0.08663206100463867
epoch: 147 training_loss 0.014760924885049462 test_loss: 0.08771865367889405
epoch: 148 training_loss 0.009386233864352106 test_loss: 0.08864005208015442
epoch: 149 training_loss 0.006942749412264675 test_loss: 0.09309864640235901
epoch: 0 training_loss 50.375332317352296 test_loss: 11.158361053466797
epoch: 1 training_loss 18.092090721130372 test_loss: 6.992331695556641
epoch: 2 training_loss 13.012528877258301 test_loss: 5.470432281494141
epoch: 3 training_loss 10.276291542053222 test_loss: 4.437186813354492
epoch: 4 training_loss 8.356852130889893 test_loss: 3.847583770751953
epoch: 5 training_loss 7.287826099395752 test_loss: 3.3522499084472654
epoch: 6 training_loss 6.443384404182434 test_loss: 2.997648239135742
epoch: 7 training_loss 5.869391012191772 test_loss: 2.7381505966186523
epoch: 8 training_loss 5.362390518188477 test_loss: 2.5344655990600584
epoch: 9 training_loss 5.007848443984986 test_loss: 2.3688451766967775
epoch: 10 training_loss 4.715779495239258 test_loss: 2.2250429153442384
epoch: 11 training_loss 4.3156847524642945 test_loss: 2.0805654525756836
epoch: 12 training_loss 4.1181511926651 test_loss: 1.9691408157348633
epoch: 13 training_loss 3.983190667629242 test_loss: 1.8738336563110352
epoch: 14 training_loss 3.7636562752723695 test_loss: 1.8041566848754882
epoch: 15 training_loss 3.6011138916015626 test_loss: 1.7471536636352538
epoch: 16 training_loss 3.5137505173683166 test_loss: 1.6651979446411134
epoch: 17 training_loss 3.3547776126861573 test_loss: 1.6412864685058595
epoch: 18 training_loss 3.2918459463119505 test_loss: 1.5850156784057616
epoch: 19 training_loss 3.2189290380477904 test_loss: 1.5345229148864745
epoch: 20 training_loss 3.0785955023765563 test_loss: 1.4889691352844239
epoch: 21 training_loss 2.983314266204834 test_loss: 1.456321620941162
epoch: 22 training_loss 2.913312633037567 test_loss: 1.4395821571350098
epoch: 23 training_loss 2.8671460580825805 test_loss: 1.392583179473877
epoch: 24 training_loss 2.7193326926231385 test_loss: 1.3625164031982422
epoch: 25 training_loss 2.689643769264221 test_loss: 1.337701416015625
epoch: 26 training_loss 2.625261137485504 test_loss: 1.3148513793945313
epoch: 27 training_loss 2.6046693658828737 test_loss: 1.2825965881347656
epoch: 28 training_loss 2.5698195576667784 test_loss: 1.258018970489502
epoch: 29 training_loss 2.479819667339325 test_loss: 1.2580881118774414
epoch: 30 training_loss 2.4411055660247802 test_loss: 1.2356647491455077
epoch: 31 training_loss 2.3922976779937746 test_loss: 1.219247531890869
epoch: 32 training_loss 2.405721640586853 test_loss: 1.2056665420532227
epoch: 33 training_loss 2.339924031496048 test_loss: 1.1683317184448243
epoch: 34 training_loss 2.361024736166 test_loss: 1.1598575592041016
epoch: 35 training_loss 2.291293236017227 test_loss: 1.1437617301940919
epoch: 36 training_loss 2.284748339653015 test_loss: 1.1379992485046386
epoch: 37 training_loss 2.2245462882518767 test_loss: 1.1229883193969727
epoch: 38 training_loss 2.162657561302185 test_loss: 1.1157249450683593
epoch: 39 training_loss 2.1861603331565855 test_loss: 1.1017313957214356
epoch: 40 training_loss 2.1639412796497344 test_loss: 1.12306489944458
epoch: 41 training_loss 2.154488078355789 test_loss: 1.0745696067810058
epoch: 42 training_loss 2.174704163074493 test_loss: 1.07284574508667
epoch: 43 training_loss 2.1573843812942504 test_loss: 1.0572322845458983
epoch: 44 training_loss 2.1006766080856325 test_loss: 1.0781888961791992
epoch: 45 training_loss 2.05509791135788 test_loss: 1.0501519203186036
epoch: 46 training_loss 2.0403509068489076 test_loss: 1.038944625854492
epoch: 47 training_loss 2.0345671868324278 test_loss: 1.0310770988464355
epoch: 48 training_loss 2.0081583321094514 test_loss: 1.034234619140625
epoch: 49 training_loss 1.9902613961696625 test_loss: 1.0178789138793944
epoch: 50 training_loss 1.9456875932216644 test_loss: 1.0086733818054199
epoch: 51 training_loss 1.9718181145191194 test_loss: 1.0233906745910644
epoch: 52 training_loss 1.9812336575984955 test_loss: 1.0213726043701172
epoch: 53 training_loss 1.9177618324756622 test_loss: 0.9963001251220703
epoch: 54 training_loss 1.9168705999851228 test_loss: 1.0058445930480957
epoch: 55 training_loss 1.919981311559677 test_loss: 0.9875175476074218
epoch: 56 training_loss 1.8955229997634888 test_loss: 0.975056266784668
epoch: 57 training_loss 1.9091359663009644 test_loss: 0.9850418090820312
epoch: 58 training_loss 1.857562074661255 test_loss: 0.9600212097167968
epoch: 59 training_loss 1.8998423993587494 test_loss: 0.962162971496582
epoch: 60 training_loss 1.8442411589622498 test_loss: 0.9635636329650878
epoch: 61 training_loss 1.8322735381126405 test_loss: 0.9564115524291992
epoch: 62 training_loss 1.8788761985301972 test_loss: 0.9572050094604492
epoch: 63 training_loss 1.8643325996398925 test_loss: 0.9466437339782715
epoch: 64 training_loss 1.77511039018631 test_loss: 0.9344845771789551
epoch: 65 training_loss 1.818961319923401 test_loss: 0.9244510650634765
epoch: 66 training_loss 1.7815706849098205 test_loss: 0.9250882148742676
epoch: 67 training_loss 1.7965791380405427 test_loss: 0.9493728637695312
epoch: 68 training_loss 1.7782106852531434 test_loss: 0.9305473327636719
epoch: 69 training_loss 1.7822343719005584 test_loss: 0.9356549263000489
epoch: 70 training_loss 1.7544096052646636 test_loss: 0.899998664855957
epoch: 71 training_loss 1.7328101646900178 test_loss: 0.9343477249145508
epoch: 72 training_loss 1.7412021231651307 test_loss: 0.8987343788146973
epoch: 73 training_loss 1.7305492401123046 test_loss: 0.8909763336181641
epoch: 74 training_loss 1.7347244501113892 test_loss: 0.8862048149108886
epoch: 75 training_loss 1.7198327744007111 test_loss: 0.8850984573364258
epoch: 76 training_loss 1.6993995797634125 test_loss: 0.8762893676757812
epoch: 77 training_loss 1.668919757604599 test_loss: 0.8793285369873047
epoch: 78 training_loss 1.678695729970932 test_loss: 0.8805203437805176
epoch: 79 training_loss 1.698346655368805 test_loss: 0.8700023651123047
epoch: 80 training_loss 1.664636993408203 test_loss: 0.8716293334960937
epoch: 81 training_loss 1.6504615974426269 test_loss: 0.8829631805419922
epoch: 82 training_loss 1.6727574479579925 test_loss: 0.8591687202453613
epoch: 83 training_loss 1.7018923449516297 test_loss: 0.8686150550842285
epoch: 84 training_loss 1.6367347073554992 test_loss: 0.8550678253173828
epoch: 85 training_loss 1.6744434690475465 test_loss: 0.8586935997009277
epoch: 86 training_loss 1.6299292504787446 test_loss: 0.8615072250366211
epoch: 87 training_loss 1.6293483126163482 test_loss: 0.8519614219665528
epoch: 88 training_loss 1.6419810569286346 test_loss: 0.8463381767272949
epoch: 89 training_loss 1.6225952982902527 test_loss: 0.84298677444458
epoch: 90 training_loss 1.592871197462082 test_loss: 0.8549240112304688
epoch: 91 training_loss 1.620054256916046 test_loss: 0.8377976417541504
epoch: 92 training_loss 1.620289716720581 test_loss: 0.8458551406860352
epoch: 93 training_loss 1.5988605904579163 test_loss: 0.8334713935852051
epoch: 94 training_loss 1.6056268692016602 test_loss: 0.8449335098266602
epoch: 95 training_loss 1.6159575510025024 test_loss: 0.8349435806274415
epoch: 96 training_loss 1.6030033147335052 test_loss: 0.8413702964782714
epoch: 97 training_loss 1.6057332527637482 test_loss: 0.8377608299255371
epoch: 98 training_loss 1.5831492364406585 test_loss: 0.8227761268615723
epoch: 99 training_loss 1.5774689280986787 test_loss: 0.8244893074035644
epoch: 100 training_loss 1.568749428987503 test_loss: 0.8246408462524414
epoch: 101 training_loss 1.5933173894882202 test_loss: 0.8203107833862304
epoch: 102 training_loss 1.5819851350784302 test_loss: 0.8167870521545411
epoch: 103 training_loss 1.5510102570056916 test_loss: 0.8127869606018067
epoch: 104 training_loss 1.537785645723343 test_loss: 0.8135168075561523
epoch: 105 training_loss 1.538853462934494 test_loss: 0.8111110687255859
epoch: 106 training_loss 1.550639101266861 test_loss: 0.8184741020202637
epoch: 107 training_loss 1.5461828136444091 test_loss: 0.810515308380127
epoch: 108 training_loss 1.5469420337677002 test_loss: 0.8067233085632324
epoch: 109 training_loss 1.5260908317565918 test_loss: 0.8101358413696289
epoch: 110 training_loss 1.5110876035690308 test_loss: 0.8098424911499024
epoch: 111 training_loss 1.5249537408351899 test_loss: 0.8028307914733886
epoch: 112 training_loss 1.5266971230506896 test_loss: 0.7988653182983398
epoch: 113 training_loss 1.5233938264846802 test_loss: 0.8037356376647949
epoch: 114 training_loss 1.5415274894237518 test_loss: 0.8102730751037598
epoch: 115 training_loss 1.5197520422935487 test_loss: 0.8011053085327149
epoch: 116 training_loss 1.515606632232666 test_loss: 0.7998257637023926
epoch: 117 training_loss 1.5010155522823334 test_loss: 0.7947266101837158
epoch: 118 training_loss 1.5327917110919953 test_loss: 0.7976451396942139
epoch: 119 training_loss 1.5178933918476105 test_loss: 0.7991092681884766
epoch: 120 training_loss 1.5038735032081605 test_loss: 0.7871788024902344
epoch: 121 training_loss 1.5084560585021973 test_loss: 0.794002103805542
epoch: 122 training_loss 1.5039377880096436 test_loss: 0.7830516815185546
epoch: 123 training_loss 1.473122318983078 test_loss: 0.7804830074310303
epoch: 124 training_loss 1.4802156794071197 test_loss: 0.8013476371765137
epoch: 125 training_loss 1.4946605896949767 test_loss: 0.7825551986694336
epoch: 126 training_loss 1.4710601544380189 test_loss: 0.7803036212921143
epoch: 127 training_loss 1.507570468187332 test_loss: 0.783845329284668
epoch: 128 training_loss 1.4817613327503205 test_loss: 0.7784831047058105
epoch: 129 training_loss 1.4578257298469544 test_loss: 0.7811518669128418
epoch: 130 training_loss 1.46919459939003 test_loss: 0.7760043621063233
epoch: 131 training_loss 1.4786737227439881 test_loss: 0.7829827785491943
epoch: 132 training_loss 1.449140818119049 test_loss: 0.7687407016754151
epoch: 133 training_loss 1.4799852526187898 test_loss: 0.7841292381286621
epoch: 134 training_loss 1.4676848399639129 test_loss: 0.7656690120697022
epoch: 135 training_loss 1.462131178379059 test_loss: 0.7772058963775634
epoch: 136 training_loss 1.478233096599579 test_loss: 0.7779037952423096
epoch: 137 training_loss 1.4597732675075532 test_loss: 0.7673154354095459
epoch: 138 training_loss 1.4598320281505586 test_loss: 0.7759969711303711
epoch: 139 training_loss 1.4808317065238952 test_loss: 0.7703861236572266
epoch: 140 training_loss 1.455420776605606 test_loss: 0.7640992164611816
epoch: 141 training_loss 1.4572937297821045 test_loss: 0.7772077083587646
epoch: 142 training_loss 1.4364240217208861 test_loss: 0.7692666530609131
epoch: 143 training_loss 1.4455066227912903 test_loss: 0.7526796340942383
epoch: 144 training_loss 1.4836509013175965 test_loss: 0.7931904315948486
epoch: 145 training_loss 1.4490298354625701 test_loss: 0.7616814613342285
epoch: 146 training_loss 1.4283443641662599 test_loss: 0.7631866931915283
epoch: 147 training_loss 1.4593518888950348 test_loss: 0.7625338554382324
epoch: 148 training_loss 1.4266906344890595 test_loss: 0.7646089553833008
epoch: 149 training_loss 1.4295640242099763 test_loss: 0.7559330463409424
5072.618210791039
episode: 0 training return: tensor(56.8103, device='cuda:0')
episode: 1 training return: tensor(-197.3973, device='cuda:0')
episode: 2 training return: tensor(-195.3143, device='cuda:0')
episode: 3 training return: tensor(138.1178, device='cuda:0')
epoch: 1 test_true_pfm: 5034.551956763513 sim_pfm: -16.04732829805774
episode: 4 training return: tensor(-119.3536, device='cuda:0')
episode: 5 training return: tensor(-10.5543, device='cuda:0')
episode: 6 training return: tensor(32.9861, device='cuda:0')
episode: 7 training return: tensor(-90.9327, device='cuda:0')
epoch: 2 test_true_pfm: 5038.030785012698 sim_pfm: -108.57216066630401
episode: 8 training return: tensor(10.5824, device='cuda:0')
episode: 9 training return: tensor(92.8827, device='cuda:0')
episode: 10 training return: tensor(162.9359, device='cuda:0')
episode: 11 training return: tensor(-45.6391, device='cuda:0')
epoch: 3 test_true_pfm: 5234.064136937383 sim_pfm: -0.12507657811511308
episode: 12 training return: tensor(79.5286, device='cuda:0')
episode: 13 training return: tensor(190.7303, device='cuda:0')
episode: 14 training return: tensor(5.1783, device='cuda:0')
episode: 15 training return: tensor(-32.3640, device='cuda:0')
epoch: 4 test_true_pfm: 5048.008272932605 sim_pfm: 105.90546042585629
episode: 16 training return: tensor(-3.1471, device='cuda:0')
episode: 17 training return: tensor(31.6121, device='cuda:0')
episode: 18 training return: tensor(39.1325, device='cuda:0')
episode: 19 training return: tensor(-38.3718, device='cuda:0')
epoch: 5 test_true_pfm: 5150.833562274969 sim_pfm: 78.67659513225469
episode: 20 training return: tensor(16.5642, device='cuda:0')
episode: 21 training return: tensor(-71.1018, device='cuda:0')
episode: 22 training return: tensor(75.0835, device='cuda:0')
episode: 23 training return: tensor(-119.2136, device='cuda:0')
epoch: 6 test_true_pfm: 5153.030986006317 sim_pfm: 16.512288106760632
episode: 24 training return: tensor(17.8700, device='cuda:0')
episode: 25 training return: tensor(69.4106, device='cuda:0')
episode: 26 training return: tensor(136.7723, device='cuda:0')
episode: 27 training return: tensor(154.9380, device='cuda:0')
epoch: 7 test_true_pfm: 5174.913362220893 sim_pfm: 31.732255072371725
episode: 28 training return: tensor(24.6340, device='cuda:0')
episode: 29 training return: tensor(6.6298, device='cuda:0')
episode: 30 training return: tensor(102.4133, device='cuda:0')
episode: 31 training return: tensor(32.0190, device='cuda:0')
epoch: 8 test_true_pfm: 5261.681437474816 sim_pfm: 51.13194956669273
episode: 32 training return: tensor(208.0762, device='cuda:0')
episode: 33 training return: tensor(178.9093, device='cuda:0')
episode: 34 training return: tensor(-378.3181, device='cuda:0')
episode: 35 training return: tensor(11.4253, device='cuda:0')
epoch: 9 test_true_pfm: 5202.489749220634 sim_pfm: 261.4916093092082
episode: 36 training return: tensor(-525.6595, device='cuda:0')
episode: 37 training return: tensor(242.5533, device='cuda:0')
episode: 38 training return: tensor(198.6287, device='cuda:0')
episode: 39 training return: tensor(-168.6403, device='cuda:0')
epoch: 10 test_true_pfm: 5142.316915823153 sim_pfm: 151.13185281936117
episode: 40 training return: tensor(-68.0061, device='cuda:0')
episode: 41 training return: tensor(83.5805, device='cuda:0')
episode: 42 training return: tensor(119.0379, device='cuda:0')
episode: 43 training return: tensor(162.3213, device='cuda:0')
epoch: 11 test_true_pfm: 5183.808596901611 sim_pfm: 49.163337903931584
episode: 44 training return: tensor(98.8478, device='cuda:0')
episode: 45 training return: tensor(53.5630, device='cuda:0')
episode: 46 training return: tensor(68.4060, device='cuda:0')
episode: 47 training return: tensor(197.1421, device='cuda:0')
epoch: 12 test_true_pfm: 5232.1120497336115 sim_pfm: 174.66001047432655
episode: 48 training return: tensor(78.2494, device='cuda:0')
episode: 49 training return: tensor(156.1686, device='cuda:0')
episode: 50 training return: tensor(153.3053, device='cuda:0')
episode: 51 training return: tensor(184.7254, device='cuda:0')
epoch: 13 test_true_pfm: 5295.49552492509 sim_pfm: 115.07246135819393
episode: 52 training return: tensor(210.6153, device='cuda:0')
episode: 53 training return: tensor(70.7544, device='cuda:0')
episode: 54 training return: tensor(66.3401, device='cuda:0')
episode: 55 training return: tensor(49.3169, device='cuda:0')
epoch: 14 test_true_pfm: 5264.1404340878535 sim_pfm: 296.0395933559242
episode: 56 training return: tensor(174.2407, device='cuda:0')
episode: 57 training return: tensor(179.0864, device='cuda:0')
episode: 58 training return: tensor(22.7142, device='cuda:0')
episode: 59 training return: tensor(304.0104, device='cuda:0')
epoch: 15 test_true_pfm: 5252.434212613335 sim_pfm: 146.47169513998475
episode: 60 training return: tensor(-78.0934, device='cuda:0')
episode: 61 training return: tensor(310.5632, device='cuda:0')
episode: 62 training return: tensor(0.5703, device='cuda:0')
episode: 63 training return: tensor(-33.7822, device='cuda:0')
epoch: 16 test_true_pfm: 5303.652422358892 sim_pfm: 236.39774380244003
episode: 64 training return: tensor(142.7263, device='cuda:0')
episode: 65 training return: tensor(186.1359, device='cuda:0')
episode: 66 training return: tensor(189.5174, device='cuda:0')
episode: 67 training return: tensor(-64.2852, device='cuda:0')
epoch: 17 test_true_pfm: 5302.54233818296 sim_pfm: 296.6999277434467
episode: 68 training return: tensor(166.7639, device='cuda:0')
episode: 69 training return: tensor(36.6050, device='cuda:0')
episode: 70 training return: tensor(93.0792, device='cuda:0')
episode: 71 training return: tensor(44.4684, device='cuda:0')
epoch: 18 test_true_pfm: 5219.660699503652 sim_pfm: 185.3111800670158
episode: 72 training return: tensor(170.9447, device='cuda:0')
episode: 73 training return: tensor(127.2642, device='cuda:0')
episode: 74 training return: tensor(125.1848, device='cuda:0')
episode: 75 training return: tensor(95.3860, device='cuda:0')
epoch: 19 test_true_pfm: 5196.737202798507 sim_pfm: 228.27020778107303
episode: 76 training return: tensor(205.7879, device='cuda:0')
episode: 77 training return: tensor(80.7842, device='cuda:0')
episode: 78 training return: tensor(266.8024, device='cuda:0')
episode: 79 training return: tensor(192.5437, device='cuda:0')
epoch: 20 test_true_pfm: 5297.228858743624 sim_pfm: 179.6856435481168
episode: 80 training return: tensor(125.1620, device='cuda:0')
episode: 81 training return: tensor(165.2121, device='cuda:0')
episode: 82 training return: tensor(180.5942, device='cuda:0')
episode: 83 training return: tensor(310.5986, device='cuda:0')
epoch: 21 test_true_pfm: 5411.977075718766 sim_pfm: 229.5069535248913
episode: 84 training return: tensor(256.0704, device='cuda:0')
episode: 85 training return: tensor(237.5975, device='cuda:0')
episode: 86 training return: tensor(95.4777, device='cuda:0')
episode: 87 training return: tensor(99.8212, device='cuda:0')
epoch: 22 test_true_pfm: 5359.101248720079 sim_pfm: 377.4777942014237
episode: 88 training return: tensor(112.3521, device='cuda:0')
episode: 89 training return: tensor(-26.9749, device='cuda:0')
episode: 90 training return: tensor(46.3197, device='cuda:0')
episode: 91 training return: tensor(91.9711, device='cuda:0')
epoch: 23 test_true_pfm: 5265.287641448877 sim_pfm: 356.1368398023769
episode: 92 training return: tensor(-52.6211, device='cuda:0')
episode: 93 training return: tensor(357.6011, device='cuda:0')
episode: 94 training return: tensor(260.8789, device='cuda:0')
episode: 95 training return: tensor(250.6055, device='cuda:0')
epoch: 24 test_true_pfm: 5371.081860098687 sim_pfm: 228.03724879149618
episode: 96 training return: tensor(320.2806, device='cuda:0')
episode: 97 training return: tensor(34.5409, device='cuda:0')
episode: 98 training return: tensor(119.8242, device='cuda:0')
episode: 99 training return: tensor(247.6803, device='cuda:0')
epoch: 25 test_true_pfm: 5398.557621067545 sim_pfm: 374.79560022628476
episode: 100 training return: tensor(208.6882, device='cuda:0')
episode: 101 training return: tensor(213.0546, device='cuda:0')
episode: 102 training return: tensor(138.7114, device='cuda:0')
episode: 103 training return: tensor(-44.9999, device='cuda:0')
epoch: 26 test_true_pfm: 5325.418769233784 sim_pfm: 365.16687835562817
episode: 104 training return: tensor(289.9167, device='cuda:0')
episode: 105 training return: tensor(162.8270, device='cuda:0')
episode: 106 training return: tensor(254.1726, device='cuda:0')
episode: 107 training return: tensor(131.4402, device='cuda:0')
epoch: 27 test_true_pfm: 5259.659995482272 sim_pfm: 379.49834726343397
episode: 108 training return: tensor(254.9162, device='cuda:0')
episode: 109 training return: tensor(191.9392, device='cuda:0')
episode: 110 training return: tensor(10.8714, device='cuda:0')
episode: 111 training return: tensor(111.5267, device='cuda:0')
epoch: 28 test_true_pfm: 5397.582193547511 sim_pfm: 381.61391827002325
episode: 112 training return: tensor(276.4060, device='cuda:0')
episode: 113 training return: tensor(275.9354, device='cuda:0')
episode: 114 training return: tensor(319.9874, device='cuda:0')
episode: 115 training return: tensor(230.8673, device='cuda:0')
epoch: 29 test_true_pfm: 5416.966516488996 sim_pfm: 400.78936488204636
episode: 116 training return: tensor(148.2267, device='cuda:0')
episode: 117 training return: tensor(292.1746, device='cuda:0')
episode: 118 training return: tensor(315.3945, device='cuda:0')
episode: 119 training return: tensor(306.9844, device='cuda:0')
epoch: 30 test_true_pfm: 5347.576221770727 sim_pfm: 308.70925898690865
episode: 120 training return: tensor(281.4250, device='cuda:0')
episode: 121 training return: tensor(156.7158, device='cuda:0')
episode: 122 training return: tensor(254.3960, device='cuda:0')
episode: 123 training return: tensor(166.9318, device='cuda:0')
epoch: 31 test_true_pfm: 5454.27725523426 sim_pfm: 352.45091433768783
episode: 124 training return: tensor(347.3138, device='cuda:0')
episode: 125 training return: tensor(140.0500, device='cuda:0')
episode: 126 training return: tensor(250.2550, device='cuda:0')
episode: 127 training return: tensor(248.4414, device='cuda:0')
epoch: 32 test_true_pfm: 5502.983180959091 sim_pfm: 262.7660998092809
episode: 128 training return: tensor(35.6182, device='cuda:0')
episode: 129 training return: tensor(232.7311, device='cuda:0')
episode: 130 training return: tensor(313.8358, device='cuda:0')
episode: 131 training return: tensor(221.8792, device='cuda:0')
epoch: 33 test_true_pfm: 5396.57533260611 sim_pfm: 252.2078716971349
episode: 132 training return: tensor(90.6740, device='cuda:0')
episode: 133 training return: tensor(163.0867, device='cuda:0')
episode: 134 training return: tensor(187.4128, device='cuda:0')
episode: 135 training return: tensor(267.6102, device='cuda:0')
epoch: 34 test_true_pfm: 5451.153065595995 sim_pfm: 335.8080321329471
episode: 136 training return: tensor(211.0590, device='cuda:0')
episode: 137 training return: tensor(9.4329, device='cuda:0')
episode: 138 training return: tensor(86.6609, device='cuda:0')
episode: 139 training return: tensor(361.2559, device='cuda:0')
epoch: 35 test_true_pfm: 5426.421666350389 sim_pfm: 350.8449148365374
episode: 140 training return: tensor(161.3497, device='cuda:0')
episode: 141 training return: tensor(150.5099, device='cuda:0')
episode: 142 training return: tensor(172.3295, device='cuda:0')
episode: 143 training return: tensor(40.1628, device='cuda:0')
epoch: 36 test_true_pfm: 5497.732177256665 sim_pfm: 370.8130719783173
episode: 144 training return: tensor(154.9822, device='cuda:0')
episode: 145 training return: tensor(221.6367, device='cuda:0')
episode: 146 training return: tensor(285.3276, device='cuda:0')
episode: 147 training return: tensor(366.6620, device='cuda:0')
epoch: 37 test_true_pfm: 5310.183666234775 sim_pfm: 297.0587490107767
episode: 148 training return: tensor(322.8968, device='cuda:0')
episode: 149 training return: tensor(264.1156, device='cuda:0')
episode: 150 training return: tensor(140.7165, device='cuda:0')
episode: 151 training return: tensor(147.7170, device='cuda:0')
epoch: 38 test_true_pfm: 5506.996898655115 sim_pfm: 384.665709194184
episode: 152 training return: tensor(241.7376, device='cuda:0')
episode: 153 training return: tensor(264.7183, device='cuda:0')
episode: 154 training return: tensor(57.4065, device='cuda:0')
episode: 155 training return: tensor(167.1343, device='cuda:0')
epoch: 39 test_true_pfm: 5397.965432368314 sim_pfm: 314.912906387782
episode: 156 training return: tensor(350.1582, device='cuda:0')
episode: 157 training return: tensor(150.7036, device='cuda:0')
episode: 158 training return: tensor(507.2868, device='cuda:0')
episode: 159 training return: tensor(325.6318, device='cuda:0')
epoch: 40 test_true_pfm: 5493.997021602231 sim_pfm: 422.55945232437807
episode: 160 training return: tensor(258.3755, device='cuda:0')
episode: 161 training return: tensor(225.2941, device='cuda:0')
episode: 162 training return: tensor(224.4330, device='cuda:0')
episode: 163 training return: tensor(359.5256, device='cuda:0')
epoch: 41 test_true_pfm: 5380.246655178431 sim_pfm: 394.8550289690029
episode: 164 training return: tensor(267.2565, device='cuda:0')
episode: 165 training return: tensor(249.2623, device='cuda:0')
episode: 166 training return: tensor(218.9706, device='cuda:0')
episode: 167 training return: tensor(268.9382, device='cuda:0')
epoch: 42 test_true_pfm: 5463.618323694501 sim_pfm: 342.5059480463581
episode: 168 training return: tensor(253.3416, device='cuda:0')
episode: 169 training return: tensor(280.5324, device='cuda:0')
episode: 170 training return: tensor(10.9344, device='cuda:0')
episode: 171 training return: tensor(357.4347, device='cuda:0')
epoch: 43 test_true_pfm: 5591.664251901478 sim_pfm: 437.5245854171614
episode: 172 training return: tensor(80.3566, device='cuda:0')
episode: 173 training return: tensor(316.5210, device='cuda:0')
episode: 174 training return: tensor(262.7057, device='cuda:0')
episode: 175 training return: tensor(250.0179, device='cuda:0')
epoch: 44 test_true_pfm: 5506.679737185528 sim_pfm: 348.57477027981076
episode: 176 training return: tensor(217.7558, device='cuda:0')
episode: 177 training return: tensor(426.9244, device='cuda:0')
episode: 178 training return: tensor(359.1638, device='cuda:0')
episode: 179 training return: tensor(303.5774, device='cuda:0')
epoch: 45 test_true_pfm: 5494.323211318241 sim_pfm: 379.96750183502445
episode: 180 training return: tensor(146.2525, device='cuda:0')
episode: 181 training return: tensor(295.8281, device='cuda:0')
episode: 182 training return: tensor(305.5202, device='cuda:0')
episode: 183 training return: tensor(420.3937, device='cuda:0')
epoch: 46 test_true_pfm: 5451.648529113812 sim_pfm: 413.53339481994044
episode: 184 training return: tensor(348.0333, device='cuda:0')
episode: 185 training return: tensor(413.0866, device='cuda:0')
episode: 186 training return: tensor(269.2805, device='cuda:0')
episode: 187 training return: tensor(404.1492, device='cuda:0')
epoch: 47 test_true_pfm: 5514.894363185617 sim_pfm: -1.0132263309884972
episode: 188 training return: tensor(183.4892, device='cuda:0')
episode: 189 training return: tensor(322.7881, device='cuda:0')
episode: 190 training return: tensor(397.3305, device='cuda:0')
episode: 191 training return: tensor(467.0992, device='cuda:0')
epoch: 48 test_true_pfm: 5543.448812829879 sim_pfm: 518.7988011285585
episode: 192 training return: tensor(238.2513, device='cuda:0')
episode: 193 training return: tensor(366.2760, device='cuda:0')
episode: 194 training return: tensor(299.4945, device='cuda:0')
episode: 195 training return: tensor(361.6884, device='cuda:0')
epoch: 49 test_true_pfm: 5511.143287194073 sim_pfm: 456.5854209023528
episode: 196 training return: tensor(414.5809, device='cuda:0')
episode: 197 training return: tensor(359.4040, device='cuda:0')
episode: 198 training return: tensor(302.2498, device='cuda:0')
episode: 199 training return: tensor(369.9988, device='cuda:0')
epoch: 50 test_true_pfm: 5583.808731317651 sim_pfm: 480.77490257434937
episode: 200 training return: tensor(455.5345, device='cuda:0')
episode: 201 training return: tensor(240.5060, device='cuda:0')
episode: 202 training return: tensor(93.9833, device='cuda:0')
episode: 203 training return: tensor(362.3450, device='cuda:0')
epoch: 51 test_true_pfm: 5552.631964436046 sim_pfm: 490.87695350936457
episode: 204 training return: tensor(180.2808, device='cuda:0')
episode: 205 training return: tensor(355.1627, device='cuda:0')
episode: 206 training return: tensor(374.9769, device='cuda:0')
episode: 207 training return: tensor(237.2521, device='cuda:0')
epoch: 52 test_true_pfm: 5515.532517162785 sim_pfm: 528.6741089229472
episode: 208 training return: tensor(404.8960, device='cuda:0')
episode: 209 training return: tensor(264.1833, device='cuda:0')
episode: 210 training return: tensor(298.3472, device='cuda:0')
episode: 211 training return: tensor(296.7387, device='cuda:0')
epoch: 53 test_true_pfm: 5565.202932911482 sim_pfm: 424.23074660241645
episode: 212 training return: tensor(318.6980, device='cuda:0')
episode: 213 training return: tensor(476.0920, device='cuda:0')
episode: 214 training return: tensor(442.4005, device='cuda:0')
episode: 215 training return: tensor(176.2181, device='cuda:0')
epoch: 54 test_true_pfm: 5546.039517796368 sim_pfm: 428.21544230545015
episode: 216 training return: tensor(242.9767, device='cuda:0')
episode: 217 training return: tensor(322.7422, device='cuda:0')
episode: 218 training return: tensor(372.3246, device='cuda:0')
episode: 219 training return: tensor(315.5173, device='cuda:0')
epoch: 55 test_true_pfm: 5524.285233504866 sim_pfm: 417.06025862693787
episode: 220 training return: tensor(297.5107, device='cuda:0')
episode: 221 training return: tensor(222.6536, device='cuda:0')
episode: 222 training return: tensor(349.4287, device='cuda:0')
episode: 223 training return: tensor(287.1047, device='cuda:0')
epoch: 56 test_true_pfm: 5647.823434452114 sim_pfm: 507.7249177985359
episode: 224 training return: tensor(419.5849, device='cuda:0')
episode: 225 training return: tensor(478.8617, device='cuda:0')
episode: 226 training return: tensor(319.0053, device='cuda:0')
episode: 227 training return: tensor(394.4862, device='cuda:0')
epoch: 57 test_true_pfm: 5538.233668473396 sim_pfm: 527.0453118936857
episode: 228 training return: tensor(148.5435, device='cuda:0')
episode: 229 training return: tensor(349.2821, device='cuda:0')
episode: 230 training return: tensor(373.2487, device='cuda:0')
episode: 231 training return: tensor(128.6726, device='cuda:0')
epoch: 58 test_true_pfm: 5505.9840963399365 sim_pfm: 376.25169613652787
episode: 232 training return: tensor(279.1730, device='cuda:0')
episode: 233 training return: tensor(449.7528, device='cuda:0')
episode: 234 training return: tensor(199.0685, device='cuda:0')
episode: 235 training return: tensor(383.0421, device='cuda:0')
epoch: 59 test_true_pfm: 5577.153405697924 sim_pfm: 475.5873608904658
episode: 236 training return: tensor(279.0172, device='cuda:0')
episode: 237 training return: tensor(302.9677, device='cuda:0')
episode: 238 training return: tensor(368.1808, device='cuda:0')
episode: 239 training return: tensor(270.7333, device='cuda:0')
epoch: 60 test_true_pfm: 5544.411150535364 sim_pfm: 519.0117184585348
episode: 240 training return: tensor(173.0255, device='cuda:0')
episode: 241 training return: tensor(314.5700, device='cuda:0')
episode: 242 training return: tensor(286.4807, device='cuda:0')
episode: 243 training return: tensor(405.5775, device='cuda:0')
epoch: 61 test_true_pfm: 5655.808579049294 sim_pfm: 560.5913818626044
episode: 244 training return: tensor(383.4357, device='cuda:0')
episode: 245 training return: tensor(329.9925, device='cuda:0')
episode: 246 training return: tensor(338.2622, device='cuda:0')
episode: 247 training return: tensor(18.2254, device='cuda:0')
epoch: 62 test_true_pfm: 5477.996439434827 sim_pfm: 506.7891062842294
episode: 248 training return: tensor(396.5868, device='cuda:0')
episode: 249 training return: tensor(313.4193, device='cuda:0')
episode: 250 training return: tensor(302.6090, device='cuda:0')
episode: 251 training return: tensor(236.5221, device='cuda:0')
epoch: 63 test_true_pfm: 5597.235721916967 sim_pfm: 486.8431746551457
episode: 252 training return: tensor(244.2906, device='cuda:0')
episode: 253 training return: tensor(173.4447, device='cuda:0')
episode: 254 training return: tensor(344.1227, device='cuda:0')
episode: 255 training return: tensor(242.9096, device='cuda:0')
epoch: 64 test_true_pfm: 5501.071754091932 sim_pfm: 461.2905006894919
episode: 256 training return: tensor(263.3416, device='cuda:0')
episode: 257 training return: tensor(227.9463, device='cuda:0')
episode: 258 training return: tensor(442.8969, device='cuda:0')
episode: 259 training return: tensor(367.1440, device='cuda:0')
epoch: 65 test_true_pfm: 5497.468713880032 sim_pfm: 437.3395022339925
episode: 260 training return: tensor(354.3008, device='cuda:0')
episode: 261 training return: tensor(333.7354, device='cuda:0')
episode: 262 training return: tensor(268.7687, device='cuda:0')
episode: 263 training return: tensor(363.0927, device='cuda:0')
epoch: 66 test_true_pfm: 5572.018854962797 sim_pfm: 487.22945578812505
episode: 264 training return: tensor(339.3698, device='cuda:0')
episode: 265 training return: tensor(338.1809, device='cuda:0')
episode: 266 training return: tensor(320.2523, device='cuda:0')
episode: 267 training return: tensor(245.3691, device='cuda:0')
epoch: 67 test_true_pfm: 5667.960442688135 sim_pfm: 544.1916795866176
episode: 268 training return: tensor(441.4242, device='cuda:0')
episode: 269 training return: tensor(402.6571, device='cuda:0')
episode: 270 training return: tensor(252.9990, device='cuda:0')
episode: 271 training return: tensor(364.9206, device='cuda:0')
epoch: 68 test_true_pfm: 5581.349955955626 sim_pfm: 544.3834757717559
episode: 272 training return: tensor(404.3751, device='cuda:0')
episode: 273 training return: tensor(278.6116, device='cuda:0')
episode: 274 training return: tensor(328.8247, device='cuda:0')
episode: 275 training return: tensor(314.8434, device='cuda:0')
epoch: 69 test_true_pfm: 5617.5579944846695 sim_pfm: 374.79866053837276
episode: 276 training return: tensor(318.0282, device='cuda:0')
episode: 277 training return: tensor(333.3246, device='cuda:0')
episode: 278 training return: tensor(317.0618, device='cuda:0')
episode: 279 training return: tensor(433.0183, device='cuda:0')
epoch: 70 test_true_pfm: 5578.002216772537 sim_pfm: 394.44282745569944
episode: 280 training return: tensor(290.1598, device='cuda:0')
episode: 281 training return: tensor(408.7011, device='cuda:0')
episode: 282 training return: tensor(298.1939, device='cuda:0')
episode: 283 training return: tensor(332.0681, device='cuda:0')
epoch: 71 test_true_pfm: 5613.964991489339 sim_pfm: 484.1111514634492
episode: 284 training return: tensor(286.6862, device='cuda:0')
episode: 285 training return: tensor(526.1505, device='cuda:0')
episode: 286 training return: tensor(482.1448, device='cuda:0')
episode: 287 training return: tensor(365.3206, device='cuda:0')
epoch: 72 test_true_pfm: 5617.526534409098 sim_pfm: 546.815210777655
episode: 288 training return: tensor(371.2305, device='cuda:0')
episode: 289 training return: tensor(186.9360, device='cuda:0')
episode: 290 training return: tensor(403.8893, device='cuda:0')
episode: 291 training return: tensor(430.1108, device='cuda:0')
epoch: 73 test_true_pfm: 5731.703427520614 sim_pfm: 477.87513526692055
episode: 292 training return: tensor(384.7307, device='cuda:0')
episode: 293 training return: tensor(293.6639, device='cuda:0')
episode: 294 training return: tensor(452.8393, device='cuda:0')
episode: 295 training return: tensor(408.8486, device='cuda:0')
epoch: 74 test_true_pfm: 5577.729684226221 sim_pfm: 502.21109369055677
episode: 296 training return: tensor(503.1436, device='cuda:0')
episode: 297 training return: tensor(415.9892, device='cuda:0')
episode: 298 training return: tensor(253.6219, device='cuda:0')
episode: 299 training return: tensor(443.9125, device='cuda:0')
epoch: 75 test_true_pfm: 5660.810667077653 sim_pfm: 488.1579169941445
episode: 300 training return: tensor(414.9271, device='cuda:0')
episode: 301 training return: tensor(375.8947, device='cuda:0')
episode: 302 training return: tensor(487.5823, device='cuda:0')
episode: 303 training return: tensor(550.7905, device='cuda:0')
epoch: 76 test_true_pfm: 5600.396575586935 sim_pfm: 506.49267055559903
episode: 304 training return: tensor(363.3759, device='cuda:0')
episode: 305 training return: tensor(224.1428, device='cuda:0')
episode: 306 training return: tensor(401.4113, device='cuda:0')
episode: 307 training return: tensor(402.9395, device='cuda:0')
epoch: 77 test_true_pfm: 5599.676142335305 sim_pfm: 565.3485395540969
episode: 308 training return: tensor(429.8004, device='cuda:0')
episode: 309 training return: tensor(420.2858, device='cuda:0')
episode: 310 training return: tensor(309.7903, device='cuda:0')
episode: 311 training return: tensor(79.1012, device='cuda:0')
epoch: 78 test_true_pfm: 5655.274926842365 sim_pfm: 412.0421793111212
episode: 312 training return: tensor(309.0666, device='cuda:0')
episode: 313 training return: tensor(278.3382, device='cuda:0')
episode: 314 training return: tensor(299.3004, device='cuda:0')
episode: 315 training return: tensor(426.0133, device='cuda:0')
epoch: 79 test_true_pfm: 5607.7176352464985 sim_pfm: 443.15703908895375
episode: 316 training return: tensor(439.3440, device='cuda:0')
episode: 317 training return: tensor(359.4529, device='cuda:0')
episode: 318 training return: tensor(278.4564, device='cuda:0')
episode: 319 training return: tensor(442.2914, device='cuda:0')
epoch: 80 test_true_pfm: 5606.407736586058 sim_pfm: 555.587841031161
episode: 320 training return: tensor(451.1321, device='cuda:0')
episode: 321 training return: tensor(506.6033, device='cuda:0')
episode: 322 training return: tensor(428.1495, device='cuda:0')
episode: 323 training return: tensor(405.3145, device='cuda:0')
epoch: 81 test_true_pfm: 5639.45045168438 sim_pfm: 544.5167338010602
episode: 324 training return: tensor(430.6772, device='cuda:0')
episode: 325 training return: tensor(437.8922, device='cuda:0')
episode: 326 training return: tensor(389.4070, device='cuda:0')
episode: 327 training return: tensor(371.4932, device='cuda:0')
epoch: 82 test_true_pfm: 5570.54986352051 sim_pfm: 590.8527218556652
episode: 328 training return: tensor(434.4698, device='cuda:0')
episode: 329 training return: tensor(533.1180, device='cuda:0')
episode: 330 training return: tensor(378.5315, device='cuda:0')
episode: 331 training return: tensor(470.1218, device='cuda:0')
epoch: 83 test_true_pfm: 5507.458589364222 sim_pfm: 460.8869039654771
episode: 332 training return: tensor(324.6282, device='cuda:0')
episode: 333 training return: tensor(402.2258, device='cuda:0')
episode: 334 training return: tensor(478.7714, device='cuda:0')
episode: 335 training return: tensor(392.9564, device='cuda:0')
epoch: 84 test_true_pfm: 5678.223669359334 sim_pfm: 478.3758802784917
episode: 336 training return: tensor(284.0856, device='cuda:0')
episode: 337 training return: tensor(336.5955, device='cuda:0')
episode: 338 training return: tensor(435.5588, device='cuda:0')
episode: 339 training return: tensor(240.3020, device='cuda:0')
epoch: 85 test_true_pfm: 5556.320201746264 sim_pfm: 479.3013151984972
episode: 340 training return: tensor(583.9409, device='cuda:0')
episode: 341 training return: tensor(438.3108, device='cuda:0')
episode: 342 training return: tensor(467.9453, device='cuda:0')
episode: 343 training return: tensor(428.9581, device='cuda:0')
epoch: 86 test_true_pfm: 5667.008843482873 sim_pfm: 529.1007063154442
episode: 344 training return: tensor(335.6939, device='cuda:0')
episode: 345 training return: tensor(295.7068, device='cuda:0')
episode: 346 training return: tensor(368.2534, device='cuda:0')
episode: 347 training return: tensor(464.8618, device='cuda:0')
epoch: 87 test_true_pfm: 5627.091661103969 sim_pfm: 542.0896975384094
episode: 348 training return: tensor(514.0553, device='cuda:0')
episode: 349 training return: tensor(361.0769, device='cuda:0')
episode: 350 training return: tensor(258.3071, device='cuda:0')
episode: 351 training return: tensor(442.0669, device='cuda:0')
epoch: 88 test_true_pfm: 5663.139370446525 sim_pfm: 573.3503370880304
episode: 352 training return: tensor(499.6787, device='cuda:0')
episode: 353 training return: tensor(254.5814, device='cuda:0')
episode: 354 training return: tensor(411.1012, device='cuda:0')
episode: 355 training return: tensor(389.5555, device='cuda:0')
epoch: 89 test_true_pfm: 5568.7675735572375 sim_pfm: 551.5162067813022
episode: 356 training return: tensor(292.7112, device='cuda:0')
episode: 357 training return: tensor(486.4711, device='cuda:0')
episode: 358 training return: tensor(354.3820, device='cuda:0')
episode: 359 training return: tensor(368.7510, device='cuda:0')
epoch: 90 test_true_pfm: 5698.631330768846 sim_pfm: 525.5465354672439
episode: 360 training return: tensor(403.0471, device='cuda:0')
episode: 361 training return: tensor(540.6147, device='cuda:0')
episode: 362 training return: tensor(243.5440, device='cuda:0')
episode: 363 training return: tensor(416.5911, device='cuda:0')
epoch: 91 test_true_pfm: 5626.902324598018 sim_pfm: 425.8073776928165
episode: 364 training return: tensor(404.0776, device='cuda:0')
episode: 365 training return: tensor(392.7005, device='cuda:0')
episode: 366 training return: tensor(472.7755, device='cuda:0')
episode: 367 training return: tensor(566.5593, device='cuda:0')
epoch: 92 test_true_pfm: 5618.273408915565 sim_pfm: 521.508559279299
episode: 368 training return: tensor(476.0099, device='cuda:0')
episode: 369 training return: tensor(490.7202, device='cuda:0')
episode: 370 training return: tensor(268.4174, device='cuda:0')
episode: 371 training return: tensor(490.9094, device='cuda:0')
epoch: 93 test_true_pfm: 5575.690996319132 sim_pfm: 537.3237438331902
episode: 372 training return: tensor(455.6707, device='cuda:0')
episode: 373 training return: tensor(558.2191, device='cuda:0')
episode: 374 training return: tensor(405.5258, device='cuda:0')
episode: 375 training return: tensor(390.2576, device='cuda:0')
epoch: 94 test_true_pfm: 5602.011671618089 sim_pfm: 478.74530792546767
episode: 376 training return: tensor(351.6476, device='cuda:0')
episode: 377 training return: tensor(538.3251, device='cuda:0')
episode: 378 training return: tensor(478.1206, device='cuda:0')
episode: 379 training return: tensor(451.9615, device='cuda:0')
epoch: 95 test_true_pfm: 5691.917163271669 sim_pfm: 582.5383832380176
episode: 380 training return: tensor(524.3475, device='cuda:0')
episode: 381 training return: tensor(368.8211, device='cuda:0')
episode: 382 training return: tensor(324.7921, device='cuda:0')
episode: 383 training return: tensor(324.0637, device='cuda:0')
epoch: 96 test_true_pfm: 5656.470869707508 sim_pfm: 563.2531484175319
episode: 384 training return: tensor(512.8104, device='cuda:0')
episode: 385 training return: tensor(467.1933, device='cuda:0')
episode: 386 training return: tensor(581.7997, device='cuda:0')
episode: 387 training return: tensor(447.5276, device='cuda:0')
epoch: 97 test_true_pfm: 5670.545990384194 sim_pfm: 536.0945723564752
episode: 388 training return: tensor(552.7445, device='cuda:0')
episode: 389 training return: tensor(448.3999, device='cuda:0')
episode: 390 training return: tensor(516.7872, device='cuda:0')
episode: 391 training return: tensor(392.4943, device='cuda:0')
epoch: 98 test_true_pfm: 5698.074422543218 sim_pfm: 636.3733113013828
episode: 392 training return: tensor(382.5783, device='cuda:0')
episode: 393 training return: tensor(412.1182, device='cuda:0')
episode: 394 training return: tensor(287.0966, device='cuda:0')
episode: 395 training return: tensor(435.6464, device='cuda:0')
epoch: 99 test_true_pfm: 5712.2104601463025 sim_pfm: 529.5630141614625
episode: 396 training return: tensor(497.4041, device='cuda:0')
episode: 397 training return: tensor(368.7092, device='cuda:0')
episode: 398 training return: tensor(357.0085, device='cuda:0')
episode: 399 training return: tensor(556.7853, device='cuda:0')
epoch: 100 test_true_pfm: 5633.553183843775 sim_pfm: 528.1406647649516
episode: 400 training return: tensor(448.8269, device='cuda:0')
episode: 401 training return: tensor(501.4799, device='cuda:0')
episode: 402 training return: tensor(371.9146, device='cuda:0')
episode: 403 training return: tensor(391.4380, device='cuda:0')
epoch: 101 test_true_pfm: 5669.615860271008 sim_pfm: 532.7067719077459
episode: 404 training return: tensor(586.7968, device='cuda:0')
episode: 405 training return: tensor(353.7780, device='cuda:0')
episode: 406 training return: tensor(436.3338, device='cuda:0')
episode: 407 training return: tensor(382.9836, device='cuda:0')
epoch: 102 test_true_pfm: 5680.532066330306 sim_pfm: 575.7430265352983
episode: 408 training return: tensor(531.8858, device='cuda:0')
episode: 409 training return: tensor(419.0717, device='cuda:0')
episode: 410 training return: tensor(518.4894, device='cuda:0')
episode: 411 training return: tensor(492.5755, device='cuda:0')
epoch: 103 test_true_pfm: 5640.592835187496 sim_pfm: 503.2361405469128
episode: 412 training return: tensor(457.3207, device='cuda:0')
episode: 413 training return: tensor(213.4635, device='cuda:0')
episode: 414 training return: tensor(356.0386, device='cuda:0')
episode: 415 training return: tensor(340.9988, device='cuda:0')
epoch: 104 test_true_pfm: 5622.3601463230525 sim_pfm: 499.65255163771025
episode: 416 training return: tensor(431.9850, device='cuda:0')
episode: 417 training return: tensor(388.9193, device='cuda:0')
episode: 418 training return: tensor(421.2244, device='cuda:0')
episode: 419 training return: tensor(454.2181, device='cuda:0')
epoch: 105 test_true_pfm: 5716.797662741045 sim_pfm: 545.7910409332835
episode: 420 training return: tensor(192.6700, device='cuda:0')
episode: 421 training return: tensor(535.0493, device='cuda:0')
episode: 422 training return: tensor(453.0730, device='cuda:0')
episode: 423 training return: tensor(460.3426, device='cuda:0')
epoch: 106 test_true_pfm: 5716.36439542122 sim_pfm: 622.7407752461731
episode: 424 training return: tensor(522.3656, device='cuda:0')
episode: 425 training return: tensor(504.9309, device='cuda:0')
episode: 426 training return: tensor(553.0667, device='cuda:0')
episode: 427 training return: tensor(391.1669, device='cuda:0')
epoch: 107 test_true_pfm: 5626.436018909448 sim_pfm: 513.1325337595384
episode: 428 training return: tensor(477.7395, device='cuda:0')
episode: 429 training return: tensor(414.4860, device='cuda:0')
episode: 430 training return: tensor(425.6649, device='cuda:0')
episode: 431 training return: tensor(344.6344, device='cuda:0')
epoch: 108 test_true_pfm: 5623.366764870773 sim_pfm: 532.7974549077238
episode: 432 training return: tensor(348.9570, device='cuda:0')
episode: 433 training return: tensor(498.7069, device='cuda:0')
episode: 434 training return: tensor(347.3905, device='cuda:0')
episode: 435 training return: tensor(482.3217, device='cuda:0')
epoch: 109 test_true_pfm: 5735.971104039116 sim_pfm: 572.9092559275063
episode: 436 training return: tensor(377.4053, device='cuda:0')
episode: 437 training return: tensor(526.7018, device='cuda:0')
episode: 438 training return: tensor(443.5836, device='cuda:0')
episode: 439 training return: tensor(435.9616, device='cuda:0')
epoch: 110 test_true_pfm: 5717.1570226073245 sim_pfm: 541.8318747775435
episode: 440 training return: tensor(541.4404, device='cuda:0')
episode: 441 training return: tensor(485.5500, device='cuda:0')
episode: 442 training return: tensor(464.7634, device='cuda:0')
episode: 443 training return: tensor(376.7275, device='cuda:0')
epoch: 111 test_true_pfm: 5672.481456736567 sim_pfm: 590.5254329740225
episode: 444 training return: tensor(526.9824, device='cuda:0')
episode: 445 training return: tensor(501.9806, device='cuda:0')
episode: 446 training return: tensor(554.2753, device='cuda:0')
episode: 447 training return: tensor(505.3129, device='cuda:0')
epoch: 112 test_true_pfm: 5734.974025233462 sim_pfm: 615.5841181713331
episode: 448 training return: tensor(373.8445, device='cuda:0')
episode: 449 training return: tensor(493.0786, device='cuda:0')
episode: 450 training return: tensor(595.2060, device='cuda:0')
episode: 451 training return: tensor(474.5399, device='cuda:0')
epoch: 113 test_true_pfm: 5620.802372186526 sim_pfm: 576.951018434794
episode: 452 training return: tensor(470.3932, device='cuda:0')
episode: 453 training return: tensor(489.9914, device='cuda:0')
episode: 454 training return: tensor(450.0377, device='cuda:0')
episode: 455 training return: tensor(315.9962, device='cuda:0')
epoch: 114 test_true_pfm: 5669.933618001759 sim_pfm: 589.0036376732556
episode: 456 training return: tensor(477.9272, device='cuda:0')
episode: 457 training return: tensor(462.9323, device='cuda:0')
episode: 458 training return: tensor(393.3843, device='cuda:0')
episode: 459 training return: tensor(445.9576, device='cuda:0')
epoch: 115 test_true_pfm: 5652.71610217603 sim_pfm: 648.764093779726
episode: 460 training return: tensor(393.2825, device='cuda:0')
episode: 461 training return: tensor(415.5462, device='cuda:0')
episode: 462 training return: tensor(457.2488, device='cuda:0')
episode: 463 training return: tensor(401.0580, device='cuda:0')
epoch: 116 test_true_pfm: 5720.21772649964 sim_pfm: 522.135343411103
episode: 464 training return: tensor(323.2430, device='cuda:0')
episode: 465 training return: tensor(405.9141, device='cuda:0')
episode: 466 training return: tensor(498.0215, device='cuda:0')
episode: 467 training return: tensor(504.7821, device='cuda:0')
epoch: 117 test_true_pfm: 5699.211102736818 sim_pfm: 536.0453260528544
episode: 468 training return: tensor(428.2724, device='cuda:0')
episode: 469 training return: tensor(601.8253, device='cuda:0')
episode: 470 training return: tensor(322.2283, device='cuda:0')
episode: 471 training return: tensor(473.2319, device='cuda:0')
epoch: 118 test_true_pfm: 5614.413104267248 sim_pfm: 579.4489538429965
episode: 472 training return: tensor(445.6891, device='cuda:0')
episode: 473 training return: tensor(404.3344, device='cuda:0')
episode: 474 training return: tensor(288.9247, device='cuda:0')
episode: 475 training return: tensor(439.1361, device='cuda:0')
epoch: 119 test_true_pfm: 5669.581163236932 sim_pfm: 575.7306942755046
episode: 476 training return: tensor(558.1832, device='cuda:0')
episode: 477 training return: tensor(567.3396, device='cuda:0')
episode: 478 training return: tensor(538.4017, device='cuda:0')
episode: 479 training return: tensor(507.1479, device='cuda:0')
epoch: 120 test_true_pfm: 5742.906896890101 sim_pfm: 570.5575959978547
episode: 480 training return: tensor(484.3696, device='cuda:0')
episode: 481 training return: tensor(448.8830, device='cuda:0')
episode: 482 training return: tensor(364.8861, device='cuda:0')
episode: 483 training return: tensor(533.4075, device='cuda:0')
epoch: 121 test_true_pfm: 5756.545032959006 sim_pfm: 568.5852422212096
episode: 484 training return: tensor(365.9796, device='cuda:0')
episode: 485 training return: tensor(387.3664, device='cuda:0')
episode: 486 training return: tensor(458.4771, device='cuda:0')
episode: 487 training return: tensor(336.4659, device='cuda:0')
epoch: 122 test_true_pfm: 5685.0960192185885 sim_pfm: 571.1342634946728
episode: 488 training return: tensor(415.9388, device='cuda:0')
episode: 489 training return: tensor(553.0010, device='cuda:0')
episode: 490 training return: tensor(393.0524, device='cuda:0')
episode: 491 training return: tensor(449.2381, device='cuda:0')
epoch: 123 test_true_pfm: 5651.8970900166305 sim_pfm: 621.7732111535346
episode: 492 training return: tensor(514.0793, device='cuda:0')
episode: 493 training return: tensor(477.5188, device='cuda:0')
episode: 494 training return: tensor(527.2447, device='cuda:0')
episode: 495 training return: tensor(472.2672, device='cuda:0')
epoch: 124 test_true_pfm: 5661.235528385471 sim_pfm: 511.0459756141645
episode: 496 training return: tensor(442.7901, device='cuda:0')
episode: 497 training return: tensor(513.3614, device='cuda:0')
episode: 498 training return: tensor(574.4951, device='cuda:0')
episode: 499 training return: tensor(339.2082, device='cuda:0')
epoch: 125 test_true_pfm: 5639.616769400651 sim_pfm: 579.0432246583126
episode: 500 training return: tensor(577.4952, device='cuda:0')
episode: 501 training return: tensor(460.0723, device='cuda:0')
episode: 502 training return: tensor(462.5566, device='cuda:0')
episode: 503 training return: tensor(478.7225, device='cuda:0')
epoch: 126 test_true_pfm: 5785.406520974401 sim_pfm: 600.3128663562238
episode: 504 training return: tensor(404.2152, device='cuda:0')
episode: 505 training return: tensor(528.6182, device='cuda:0')
episode: 506 training return: tensor(350.1131, device='cuda:0')
episode: 507 training return: tensor(488.1331, device='cuda:0')
epoch: 127 test_true_pfm: 5681.0290073179995 sim_pfm: 588.3839846781144
episode: 508 training return: tensor(553.5558, device='cuda:0')
episode: 509 training return: tensor(253.0559, device='cuda:0')
episode: 510 training return: tensor(505.5020, device='cuda:0')
episode: 511 training return: tensor(343.2580, device='cuda:0')
epoch: 128 test_true_pfm: 5731.004087818187 sim_pfm: 487.1846169822772
episode: 512 training return: tensor(485.7826, device='cuda:0')
episode: 513 training return: tensor(391.0768, device='cuda:0')
episode: 514 training return: tensor(242.9161, device='cuda:0')
episode: 515 training return: tensor(527.8315, device='cuda:0')
epoch: 129 test_true_pfm: 5654.64768089111 sim_pfm: 668.5991436167387
episode: 516 training return: tensor(603.8934, device='cuda:0')
episode: 517 training return: tensor(521.7133, device='cuda:0')
episode: 518 training return: tensor(439.1521, device='cuda:0')
episode: 519 training return: tensor(423.5742, device='cuda:0')
epoch: 130 test_true_pfm: 5678.101278297804 sim_pfm: 591.714694752552
episode: 520 training return: tensor(566.2717, device='cuda:0')
episode: 521 training return: tensor(371.2914, device='cuda:0')
episode: 522 training return: tensor(491.1018, device='cuda:0')
episode: 523 training return: tensor(586.8075, device='cuda:0')
epoch: 131 test_true_pfm: 5714.053791483834 sim_pfm: 602.4475819007494
episode: 524 training return: tensor(505.4065, device='cuda:0')
episode: 525 training return: tensor(372.7820, device='cuda:0')
episode: 526 training return: tensor(520.9308, device='cuda:0')
episode: 527 training return: tensor(438.6444, device='cuda:0')
epoch: 132 test_true_pfm: 5679.555636870369 sim_pfm: 588.4559583566928
episode: 528 training return: tensor(493.7777, device='cuda:0')
episode: 529 training return: tensor(539.1815, device='cuda:0')
episode: 530 training return: tensor(413.9361, device='cuda:0')
episode: 531 training return: tensor(554.5593, device='cuda:0')
epoch: 133 test_true_pfm: 5701.192661067034 sim_pfm: 595.3210062081149
episode: 532 training return: tensor(399.8353, device='cuda:0')
episode: 533 training return: tensor(516.9490, device='cuda:0')
episode: 534 training return: tensor(503.5265, device='cuda:0')
episode: 535 training return: tensor(519.7900, device='cuda:0')
epoch: 134 test_true_pfm: 5651.419247211986 sim_pfm: 629.7973643658528
episode: 536 training return: tensor(565.3234, device='cuda:0')
episode: 537 training return: tensor(485.4495, device='cuda:0')
episode: 538 training return: tensor(346.3162, device='cuda:0')
episode: 539 training return: tensor(568.8105, device='cuda:0')
epoch: 135 test_true_pfm: 5674.926380313901 sim_pfm: 601.129025736067
episode: 540 training return: tensor(494.5136, device='cuda:0')
episode: 541 training return: tensor(450.0248, device='cuda:0')
episode: 542 training return: tensor(451.1085, device='cuda:0')
episode: 543 training return: tensor(545.5753, device='cuda:0')
epoch: 136 test_true_pfm: 5701.989308624667 sim_pfm: 606.7367152233297
episode: 544 training return: tensor(365.2152, device='cuda:0')
episode: 545 training return: tensor(558.5769, device='cuda:0')
episode: 546 training return: tensor(544.7847, device='cuda:0')
episode: 547 training return: tensor(460.4684, device='cuda:0')
epoch: 137 test_true_pfm: 5775.170137170154 sim_pfm: 625.8995388878199
episode: 548 training return: tensor(454.4438, device='cuda:0')
episode: 549 training return: tensor(539.8391, device='cuda:0')
episode: 550 training return: tensor(533.8208, device='cuda:0')
episode: 551 training return: tensor(493.0758, device='cuda:0')
epoch: 138 test_true_pfm: 5732.369359527033 sim_pfm: 586.9953928254932
episode: 552 training return: tensor(485.9112, device='cuda:0')
episode: 553 training return: tensor(535.0371, device='cuda:0')
episode: 554 training return: tensor(442.5280, device='cuda:0')
episode: 555 training return: tensor(520.5679, device='cuda:0')
epoch: 139 test_true_pfm: 5632.20468713219 sim_pfm: 595.7506659983968
episode: 556 training return: tensor(442.1966, device='cuda:0')
episode: 557 training return: tensor(499.6147, device='cuda:0')
episode: 558 training return: tensor(546.5324, device='cuda:0')
episode: 559 training return: tensor(473.2031, device='cuda:0')
epoch: 140 test_true_pfm: 5700.252523429334 sim_pfm: 625.5523682161001
episode: 560 training return: tensor(370.4837, device='cuda:0')
episode: 561 training return: tensor(371.6148, device='cuda:0')
episode: 562 training return: tensor(362.6733, device='cuda:0')
episode: 563 training return: tensor(458.9681, device='cuda:0')
epoch: 141 test_true_pfm: 5617.442450228348 sim_pfm: 572.4866975352634
episode: 564 training return: tensor(418.8903, device='cuda:0')
episode: 565 training return: tensor(508.5522, device='cuda:0')
episode: 566 training return: tensor(498.7557, device='cuda:0')
episode: 567 training return: tensor(401.1313, device='cuda:0')
epoch: 142 test_true_pfm: 5668.475635189626 sim_pfm: 598.9592169144113
episode: 568 training return: tensor(562.3704, device='cuda:0')
episode: 569 training return: tensor(446.8266, device='cuda:0')
episode: 570 training return: tensor(515.4777, device='cuda:0')
episode: 571 training return: tensor(361.3878, device='cuda:0')
epoch: 143 test_true_pfm: 5750.520486612676 sim_pfm: 641.8792571843272
episode: 572 training return: tensor(407.8066, device='cuda:0')
episode: 573 training return: tensor(558.7855, device='cuda:0')
episode: 574 training return: tensor(482.5905, device='cuda:0')
episode: 575 training return: tensor(540.2579, device='cuda:0')
epoch: 144 test_true_pfm: 5678.400660298538 sim_pfm: 618.5897150973711
episode: 576 training return: tensor(466.2093, device='cuda:0')
episode: 577 training return: tensor(404.7557, device='cuda:0')
episode: 578 training return: tensor(602.6532, device='cuda:0')
episode: 579 training return: tensor(489.0734, device='cuda:0')
epoch: 145 test_true_pfm: 5558.058438219173 sim_pfm: 599.6963851489903
episode: 580 training return: tensor(367.8601, device='cuda:0')
episode: 581 training return: tensor(485.8130, device='cuda:0')
episode: 582 training return: tensor(336.8759, device='cuda:0')
episode: 583 training return: tensor(519.4124, device='cuda:0')
epoch: 146 test_true_pfm: 5730.029572894065 sim_pfm: 662.2382183530135
episode: 584 training return: tensor(507.6929, device='cuda:0')
episode: 585 training return: tensor(329.4733, device='cuda:0')
episode: 586 training return: tensor(349.7242, device='cuda:0')
episode: 587 training return: tensor(533.4036, device='cuda:0')
epoch: 147 test_true_pfm: 5745.085749180124 sim_pfm: 586.5151240192936
episode: 588 training return: tensor(417.3871, device='cuda:0')
episode: 589 training return: tensor(450.6496, device='cuda:0')
episode: 590 training return: tensor(540.3798, device='cuda:0')
episode: 591 training return: tensor(533.9546, device='cuda:0')
epoch: 148 test_true_pfm: 5754.268278023518 sim_pfm: 539.0424418101708
episode: 592 training return: tensor(515.2600, device='cuda:0')
episode: 593 training return: tensor(376.5006, device='cuda:0')
episode: 594 training return: tensor(429.6492, device='cuda:0')
episode: 595 training return: tensor(478.1765, device='cuda:0')
epoch: 149 test_true_pfm: 5685.05004519068 sim_pfm: 632.9709736856943
episode: 596 training return: tensor(447.3451, device='cuda:0')
episode: 597 training return: tensor(458.7274, device='cuda:0')
episode: 598 training return: tensor(630.2155, device='cuda:0')
episode: 599 training return: tensor(388.5562, device='cuda:0')
epoch: 150 test_true_pfm: 5640.301776682888 sim_pfm: 628.1003160407223
