['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '1']
epoch: 0 training_loss 0.21195484466850759 test_loss: 0.15275417566299437
epoch: 1 training_loss 0.14945287190377712 test_loss: 0.14344029426574706
epoch: 2 training_loss 0.13668345760554076 test_loss: 0.13907768726348876
epoch: 3 training_loss 0.13903391279280186 test_loss: 0.1373664379119873
epoch: 4 training_loss 0.1379537757486105 test_loss: 0.14453717470169067
epoch: 5 training_loss 0.12460648328065872 test_loss: 0.14910445213317872
epoch: 6 training_loss 0.1349690568819642 test_loss: 0.1340131163597107
epoch: 7 training_loss 0.12831295847892762 test_loss: 0.12068706750869751
epoch: 8 training_loss 0.11610450528562069 test_loss: 0.125801157951355
epoch: 9 training_loss 0.12260408215224743 test_loss: 0.11764659881591796
epoch: 10 training_loss 0.1275352804735303 test_loss: 0.12798149585723878
epoch: 11 training_loss 0.1234400100260973 test_loss: 0.12644879817962645
epoch: 12 training_loss 0.12138803448528052 test_loss: 0.12359433174133301
epoch: 13 training_loss 0.1196910486370325 test_loss: 0.11690329313278199
epoch: 14 training_loss 0.1195712347328663 test_loss: 0.13190908432006837
epoch: 15 training_loss 0.1152981674298644 test_loss: 0.12422609329223633
epoch: 16 training_loss 0.11837738454341888 test_loss: 0.13594725131988525
epoch: 17 training_loss 0.11947862654924393 test_loss: 0.1154616117477417
epoch: 18 training_loss 0.11897519685328006 test_loss: 0.12024425268173218
epoch: 19 training_loss 0.12064835280179978 test_loss: 0.11451144218444824
epoch: 20 training_loss 0.11552213553339243 test_loss: 0.11605942249298096
epoch: 21 training_loss 0.12067992001771927 test_loss: 0.11197812557220459
epoch: 22 training_loss 0.1183354239165783 test_loss: 0.12100805044174194
epoch: 23 training_loss 0.11715534154325724 test_loss: 0.12035717964172363
epoch: 24 training_loss 0.11515572182834148 test_loss: 0.12150206565856933
epoch: 25 training_loss 0.11867255181074142 test_loss: 0.11941705942153931
epoch: 26 training_loss 0.1106804833561182 test_loss: 0.11988387107849122
epoch: 27 training_loss 0.11091114986687899 test_loss: 0.11846842765808105
epoch: 28 training_loss 0.12420322582125663 test_loss: 0.12068169116973877
epoch: 29 training_loss 0.11632210772484541 test_loss: 0.11310988664627075
epoch: 30 training_loss 0.11811850786209106 test_loss: 0.12579277753829957
epoch: 31 training_loss 0.11410974610596895 test_loss: 0.10884466171264648
epoch: 32 training_loss 0.11036057818681001 test_loss: 0.09917452931404114
epoch: 33 training_loss 0.11278483264148235 test_loss: 0.12020951509475708
epoch: 34 training_loss 0.1118959741294384 test_loss: 0.1282219648361206
epoch: 35 training_loss 0.10712941531091928 test_loss: 0.1248245120048523
epoch: 36 training_loss 0.11845724303275347 test_loss: 0.10926088094711303
epoch: 37 training_loss 0.11326211512088775 test_loss: 0.12056581974029541
epoch: 38 training_loss 0.11372646901756525 test_loss: 0.11941856145858765
epoch: 39 training_loss 0.126209950260818 test_loss: 0.12434980869293213
epoch: 40 training_loss 0.11923797082155943 test_loss: 0.10631433725357056
epoch: 41 training_loss 0.11276420917361975 test_loss: 0.1163406491279602
epoch: 42 training_loss 0.11852587454020977 test_loss: 0.10738095045089721
epoch: 43 training_loss 0.11429593674838542 test_loss: 0.10421503782272339
epoch: 44 training_loss 0.1180002724006772 test_loss: 0.12831006050109864
epoch: 45 training_loss 0.11415970455855132 test_loss: 0.12648863792419435
epoch: 46 training_loss 0.11567801743745804 test_loss: 0.11932939291000366
epoch: 47 training_loss 0.11678776498883962 test_loss: 0.1300896644592285
epoch: 48 training_loss 0.11426864467561244 test_loss: 0.11898608207702636
epoch: 49 training_loss 0.11268705006688834 test_loss: 0.12370513677597046
epoch: 50 training_loss 0.11658725596964359 test_loss: 0.10064796209335328
epoch: 51 training_loss 0.1140110283344984 test_loss: 0.1259139060974121
epoch: 52 training_loss 0.11298483777791261 test_loss: 0.12288892269134521
epoch: 53 training_loss 0.11192430838942528 test_loss: 0.13787535429000855
epoch: 54 training_loss 0.1137002819031477 test_loss: 0.12440530061721802
epoch: 55 training_loss 0.11987412650138139 test_loss: 0.11657555103302002
epoch: 56 training_loss 0.11669848050922155 test_loss: 0.11655580997467041
epoch: 57 training_loss 0.11902388110756874 test_loss: 0.14365904331207274
epoch: 58 training_loss 0.11130751080811024 test_loss: 0.12268974781036376
epoch: 59 training_loss 0.11314909618347883 test_loss: 0.12356648445129395
epoch: 60 training_loss 0.11161398753523827 test_loss: 0.11385687589645385
epoch: 61 training_loss 0.11466431211680174 test_loss: 0.11015008687973023
epoch: 62 training_loss 0.1193666784465313 test_loss: 0.13086060285568238
epoch: 63 training_loss 0.115873908624053 test_loss: 0.11140153408050538
epoch: 64 training_loss 0.11306539919227361 test_loss: 0.1123661994934082
epoch: 65 training_loss 0.11906175781041384 test_loss: 0.12245386838912964
epoch: 66 training_loss 0.1146099167689681 test_loss: 0.1337696671485901
epoch: 67 training_loss 0.11337360475212335 test_loss: 0.1158632516860962
epoch: 68 training_loss 0.11101130370050669 test_loss: 0.12069720029830933
epoch: 69 training_loss 0.11710046987980605 test_loss: 0.11337435245513916
epoch: 70 training_loss 0.11225468579679727 test_loss: 0.1130752444267273
epoch: 71 training_loss 0.12166240852326154 test_loss: 0.10800578594207763
epoch: 72 training_loss 0.11214301567524672 test_loss: 0.13136508464813232
epoch: 73 training_loss 0.11409236188977957 test_loss: 0.11895976066589356
epoch: 74 training_loss 0.1150015938282013 test_loss: 0.11576567888259888
epoch: 75 training_loss 0.10547833319753408 test_loss: 0.12358291149139404
epoch: 76 training_loss 0.10893712982535363 test_loss: 0.11823339462280273
epoch: 77 training_loss 0.10984094418585301 test_loss: 0.12398580312728882
epoch: 78 training_loss 0.10294501263648272 test_loss: 0.1141663908958435
epoch: 79 training_loss 0.11157180538401007 test_loss: 0.12009140253067016
epoch: 80 training_loss 0.1129242967069149 test_loss: 0.11338915824890136
epoch: 81 training_loss 0.12103076007217169 test_loss: 0.11487765312194824
epoch: 82 training_loss 0.11269586462527513 test_loss: 0.12307443618774414
epoch: 83 training_loss 0.11500989459455013 test_loss: 0.11134432554244995
epoch: 84 training_loss 0.11763696290552617 test_loss: 0.10870593786239624
epoch: 85 training_loss 0.1092782298475504 test_loss: 0.11741936206817627
epoch: 86 training_loss 0.11527349647134542 test_loss: 0.10536103248596192
epoch: 87 training_loss 0.11217986956238747 test_loss: 0.10663914680480957
epoch: 88 training_loss 0.12023078344762325 test_loss: 0.11148842573165893
epoch: 89 training_loss 0.11361195724457503 test_loss: 0.11024302244186401
epoch: 90 training_loss 0.11242063000798225 test_loss: 0.11676938533782959
epoch: 91 training_loss 0.11058080743998289 test_loss: 0.12627966403961183
epoch: 92 training_loss 0.12148724064230919 test_loss: 0.11055041551589966
epoch: 93 training_loss 0.10864082347601652 test_loss: 0.11545747518539429
epoch: 94 training_loss 0.11170877080410718 test_loss: 0.11610225439071656
epoch: 95 training_loss 0.11459223169833421 test_loss: 0.12064857482910156
epoch: 96 training_loss 0.10798803295940161 test_loss: 0.10809799432754516
epoch: 97 training_loss 0.11262990050017833 test_loss: 0.12029761075973511
epoch: 98 training_loss 0.11311980504542589 test_loss: 0.10458974838256836
epoch: 99 training_loss 0.11065294176340103 test_loss: 0.11788192987442017
epoch: 100 training_loss 0.1112704648450017 test_loss: 0.10578964948654175
epoch: 101 training_loss 0.10804962698370218 test_loss: 0.12972899675369262
epoch: 102 training_loss 0.1117074989527464 test_loss: 0.12948188781738282
epoch: 103 training_loss 0.11290363676846027 test_loss: 0.11792612075805664
epoch: 104 training_loss 0.11759000025689602 test_loss: 0.12775400876998902
epoch: 105 training_loss 0.1186335877329111 test_loss: 0.10987420082092285
epoch: 106 training_loss 0.1180613973364234 test_loss: 0.11912233829498291
epoch: 107 training_loss 0.11459776189178228 test_loss: 0.1035920262336731
epoch: 108 training_loss 0.10592012789100408 test_loss: 0.1194459319114685
epoch: 109 training_loss 0.1139988668449223 test_loss: 0.11892203092575074
epoch: 110 training_loss 0.11485297333449125 test_loss: 0.11042912006378174
epoch: 111 training_loss 0.11351210463792086 test_loss: 0.13013463020324706
epoch: 112 training_loss 0.11428695272654295 test_loss: 0.11951402425765992
epoch: 113 training_loss 0.10846317660063505 test_loss: 0.11815931797027587
epoch: 114 training_loss 0.10819664664566517 test_loss: 0.11520456075668335
epoch: 115 training_loss 0.11359993323683738 test_loss: 0.10736689567565919
epoch: 116 training_loss 0.10894248820841312 test_loss: 0.12437677383422852
epoch: 117 training_loss 0.11293359756469727 test_loss: 0.11195961236953736
epoch: 118 training_loss 0.11116948872804641 test_loss: 0.10616686344146728
epoch: 119 training_loss 0.11056551653891802 test_loss: 0.11252330541610718
epoch: 120 training_loss 0.1112616940960288 test_loss: 0.11100912094116211
epoch: 121 training_loss 0.11545776780694723 test_loss: 0.1165521502494812
epoch: 122 training_loss 0.11121519315987825 test_loss: 0.10533082485198975
epoch: 123 training_loss 0.10666910532861948 test_loss: 0.11835342645645142
epoch: 124 training_loss 0.11314782805740833 test_loss: 0.10997713804244995
epoch: 125 training_loss 0.11802544217556715 test_loss: 0.09603687524795532
epoch: 126 training_loss 0.10939114477485418 test_loss: 0.1129610776901245
epoch: 127 training_loss 0.11138830184936524 test_loss: 0.11511586904525757
epoch: 128 training_loss 0.10654901474714279 test_loss: 0.11378573179244995
epoch: 129 training_loss 0.10782174333930016 test_loss: 0.12298423051834106
epoch: 130 training_loss 0.11199543543159962 test_loss: 0.12559752464294432
epoch: 131 training_loss 0.11227687235921621 test_loss: 0.11942451000213623
epoch: 132 training_loss 0.11241327397525311 test_loss: 0.10940072536468506
epoch: 133 training_loss 0.10937658831477165 test_loss: 0.11443495750427246
epoch: 134 training_loss 0.10899016786366701 test_loss: 0.133158278465271
epoch: 135 training_loss 0.11328346122056246 test_loss: 0.12234718799591064
epoch: 136 training_loss 0.11773545611649752 test_loss: 0.11331223249435425
epoch: 137 training_loss 0.11518612757325172 test_loss: 0.11465276479721069
epoch: 138 training_loss 0.11529675897210837 test_loss: 0.12399802207946778
epoch: 139 training_loss 0.11015308927744627 test_loss: 0.10971561670303345
epoch: 140 training_loss 0.11070553347468376 test_loss: 0.10249791145324708
epoch: 141 training_loss 0.10847939312458038 test_loss: 0.10813250541687011
epoch: 142 training_loss 0.11252382073551416 test_loss: 0.13290144205093385
epoch: 143 training_loss 0.10848404897376895 test_loss: 0.10527009963989258
epoch: 144 training_loss 0.11378584910184145 test_loss: 0.11804975271224975
epoch: 145 training_loss 0.11450781401246786 test_loss: 0.10708917379379272
epoch: 146 training_loss 0.10783889777958393 test_loss: 0.1376712441444397
epoch: 147 training_loss 0.10985655136406422 test_loss: 0.12339043617248535
epoch: 148 training_loss 0.11080843292176723 test_loss: 0.11821315288543702
epoch: 149 training_loss 0.11375238850712777 test_loss: 0.12627378702163697
epoch: 0 training_loss 20.837657384872436 test_loss: 15.361738586425782
epoch: 1 training_loss 11.653206148147582 test_loss: 9.131842041015625
epoch: 2 training_loss 8.222448654174805 test_loss: 7.508521270751953
epoch: 3 training_loss 7.0183174467086795 test_loss: 6.798623657226562
epoch: 4 training_loss 6.520349063873291 test_loss: 6.151955795288086
epoch: 5 training_loss 5.752137746810913 test_loss: 5.530338287353516
epoch: 6 training_loss 5.373268389701844 test_loss: 5.233786392211914
epoch: 7 training_loss 5.101290225982666 test_loss: 4.912931823730469
epoch: 8 training_loss 4.7996627974510195 test_loss: 4.663897323608398
epoch: 9 training_loss 4.551626832485199 test_loss: 4.61369743347168
epoch: 10 training_loss 4.365632057189941 test_loss: 4.305368804931641
epoch: 11 training_loss 4.327545835971832 test_loss: 4.401770401000976
epoch: 12 training_loss 4.097177336215973 test_loss: 4.094390106201172
epoch: 13 training_loss 3.9920282125473023 test_loss: 4.084008026123047
epoch: 14 training_loss 3.9491909337043762 test_loss: 4.039699935913086
epoch: 15 training_loss 3.8178729581832886 test_loss: 3.8823787689208986
epoch: 16 training_loss 3.675597460269928 test_loss: 3.644818115234375
epoch: 17 training_loss 3.636383891105652 test_loss: 3.5478946685791017
epoch: 18 training_loss 3.4931498408317565 test_loss: 3.657119369506836
epoch: 19 training_loss 3.5076734709739683 test_loss: 3.3978908538818358
epoch: 20 training_loss 3.4693710136413576 test_loss: 3.735016632080078
epoch: 21 training_loss 3.398439500331879 test_loss: 3.4634174346923827
epoch: 22 training_loss 3.337631633281708 test_loss: 3.2041095733642577
epoch: 23 training_loss 3.2959471225738524 test_loss: 3.1423246383666994
epoch: 24 training_loss 3.235319974422455 test_loss: 3.265370178222656
epoch: 25 training_loss 3.187245934009552 test_loss: 3.188867378234863
epoch: 26 training_loss 3.127916491031647 test_loss: 3.150504302978516
epoch: 27 training_loss 3.1491989207267763 test_loss: 3.3839073181152344
epoch: 28 training_loss 3.0929332971572876 test_loss: 3.217967224121094
epoch: 29 training_loss 3.174001522064209 test_loss: 3.04882755279541
epoch: 30 training_loss 3.1028384017944335 test_loss: 3.0751087188720705
epoch: 31 training_loss 3.0660678553581238 test_loss: 2.896994400024414
epoch: 32 training_loss 2.9664995956420896 test_loss: 3.0285837173461916
epoch: 33 training_loss 3.028121626377106 test_loss: 2.8994062423706053
epoch: 34 training_loss 2.918183037042618 test_loss: 3.005925750732422
epoch: 35 training_loss 2.95903626203537 test_loss: 2.829059600830078
epoch: 36 training_loss 2.8399128556251525 test_loss: 2.8277326583862306
epoch: 37 training_loss 2.8466491961479186 test_loss: 2.755655860900879
epoch: 38 training_loss 2.7682499480247498 test_loss: 2.7358976364135743
epoch: 39 training_loss 2.730276565551758 test_loss: 2.807948684692383
epoch: 40 training_loss 2.814491821527481 test_loss: 2.783085060119629
epoch: 41 training_loss 2.7734575617313384 test_loss: 2.711052131652832
epoch: 42 training_loss 2.6723484706878664 test_loss: 2.677089500427246
epoch: 43 training_loss 2.7687018585205077 test_loss: 2.7202072143554688
epoch: 44 training_loss 2.7405407786369325 test_loss: 2.6508163452148437
epoch: 45 training_loss 2.6869434106349943 test_loss: 2.80059871673584
epoch: 46 training_loss 2.6095341277122497 test_loss: 2.878730583190918
epoch: 47 training_loss 2.7356696462631227 test_loss: 2.6595733642578123
epoch: 48 training_loss 2.631348526477814 test_loss: 2.658427429199219
epoch: 49 training_loss 2.587527587413788 test_loss: 2.65633544921875
epoch: 50 training_loss 2.525887916088104 test_loss: 2.6343511581420898
epoch: 51 training_loss 2.6726806676387787 test_loss: 2.5577320098876952
epoch: 52 training_loss 2.5742800414562224 test_loss: 2.5033952713012697
epoch: 53 training_loss 2.5352453112602236 test_loss: 2.473337745666504
epoch: 54 training_loss 2.520767344236374 test_loss: 2.4603796005249023
epoch: 55 training_loss 2.508746699094772 test_loss: 2.7884170532226564
epoch: 56 training_loss 2.538464317321777 test_loss: 2.489653205871582
epoch: 57 training_loss 2.5017082822322845 test_loss: 2.5040477752685546
epoch: 58 training_loss 2.4508116245269775 test_loss: 2.324114227294922
epoch: 59 training_loss 2.4104446005821227 test_loss: 2.3820444107055665
epoch: 60 training_loss 2.4816459858417512 test_loss: 2.854374885559082
epoch: 61 training_loss 2.6130358576774597 test_loss: 2.4383960723876954
epoch: 62 training_loss 2.421820250749588 test_loss: 2.558300018310547
epoch: 63 training_loss 2.42224068403244 test_loss: 2.44226188659668
epoch: 64 training_loss 2.3754144871234892 test_loss: 2.6166391372680664
epoch: 65 training_loss 2.527839617729187 test_loss: 2.450678253173828
epoch: 66 training_loss 2.4037566924095155 test_loss: 2.2563640594482424
epoch: 67 training_loss 2.384243365526199 test_loss: 2.384467124938965
epoch: 68 training_loss 2.3604807686805724 test_loss: 2.3730703353881837
epoch: 69 training_loss 2.4196720898151396 test_loss: 2.382328224182129
epoch: 70 training_loss 2.332839033603668 test_loss: 2.5272701263427733
epoch: 71 training_loss 2.4070362508296967 test_loss: 2.3071321487426757
epoch: 72 training_loss 2.3919158399105074 test_loss: 2.4697547912597657
epoch: 73 training_loss 2.321790980100632 test_loss: 2.746112060546875
epoch: 74 training_loss 2.4746626031398775 test_loss: 2.337281608581543
epoch: 75 training_loss 2.36812318444252 test_loss: 2.1983774185180662
epoch: 76 training_loss 2.3549476933479307 test_loss: 2.3294961929321287
epoch: 77 training_loss 2.2804993081092833 test_loss: 2.2297454833984376
epoch: 78 training_loss 2.324755961894989 test_loss: 2.3162103652954102
epoch: 79 training_loss 2.224263025522232 test_loss: 2.3516494750976564
epoch: 80 training_loss 2.3004076194763186 test_loss: 2.302128028869629
epoch: 81 training_loss 2.2256216979026795 test_loss: 2.3530252456665037
epoch: 82 training_loss 2.30278657078743 test_loss: 2.4088891983032226
epoch: 83 training_loss 2.2700255239009857 test_loss: 2.326666831970215
epoch: 84 training_loss 2.335800701379776 test_loss: 2.167514991760254
epoch: 85 training_loss 2.325984295606613 test_loss: 2.2437986373901366
epoch: 86 training_loss 2.298654842376709 test_loss: 2.3756275177001953
epoch: 87 training_loss 2.269069073200226 test_loss: 2.1506103515625
epoch: 88 training_loss 2.2083145582675936 test_loss: 2.4415109634399412
epoch: 89 training_loss 2.1982487416267396 test_loss: 2.261612892150879
epoch: 90 training_loss 2.2048241722583772 test_loss: 2.2494976043701174
epoch: 91 training_loss 2.2105985140800475 test_loss: 2.3740819931030273
epoch: 92 training_loss 2.281105018854141 test_loss: 2.16943416595459
epoch: 93 training_loss 2.230889530181885 test_loss: 2.147041130065918
epoch: 94 training_loss 2.3174425518512725 test_loss: 2.35648250579834
epoch: 95 training_loss 2.2665920341014862 test_loss: 2.24047737121582
epoch: 96 training_loss 2.180220851898193 test_loss: 2.146580696105957
epoch: 97 training_loss 2.2520003378391267 test_loss: 2.140242576599121
epoch: 98 training_loss 2.136998896598816 test_loss: 2.4163396835327147
epoch: 99 training_loss 2.2206718063354494 test_loss: 2.2268692016601563
epoch: 100 training_loss 2.2160158455371857 test_loss: 2.2276943206787108
epoch: 101 training_loss 2.1271541893482206 test_loss: 2.208806800842285
epoch: 102 training_loss 2.1119082903862 test_loss: 2.073049545288086
epoch: 103 training_loss 2.210223088264465 test_loss: 2.4362424850463866
epoch: 104 training_loss 2.133746621608734 test_loss: 2.136269950866699
epoch: 105 training_loss 2.127996052503586 test_loss: 2.2290676116943358
epoch: 106 training_loss 2.143949415683746 test_loss: 2.257895088195801
epoch: 107 training_loss 2.2149060201644897 test_loss: 2.2614133834838865
epoch: 108 training_loss 2.1405316722393035 test_loss: 2.415848731994629
epoch: 109 training_loss 2.1467869794368744 test_loss: 2.0317815780639648
epoch: 110 training_loss 2.1635984134674073 test_loss: 2.1616722106933595
epoch: 111 training_loss 2.254726597070694 test_loss: 2.1074352264404297
epoch: 112 training_loss 2.161893379688263 test_loss: 2.07171688079834
epoch: 113 training_loss 2.104526606798172 test_loss: 2.028827095031738
epoch: 114 training_loss 2.1546378564834594 test_loss: 2.169207954406738
epoch: 115 training_loss 2.1102122080326082 test_loss: 2.037429428100586
epoch: 116 training_loss 2.1033266186714172 test_loss: 2.1139686584472654
epoch: 117 training_loss 2.139276683330536 test_loss: 2.012582015991211
epoch: 118 training_loss 2.046193931102753 test_loss: 2.025828552246094
epoch: 119 training_loss 2.140202714204788 test_loss: 2.1597518920898438
epoch: 120 training_loss 2.2054474186897277 test_loss: 2.064229202270508
epoch: 121 training_loss 2.0793301284313204 test_loss: 2.111611557006836
epoch: 122 training_loss 2.065297256708145 test_loss: 2.5900596618652343
epoch: 123 training_loss 2.180794997215271 test_loss: 2.3335521697998045
epoch: 124 training_loss 2.111284353733063 test_loss: 2.0759963989257812
epoch: 125 training_loss 2.1286213278770445 test_loss: 2.0327938079833983
epoch: 126 training_loss 2.1054433178901673 test_loss: 2.123280715942383
epoch: 127 training_loss 2.18772780418396 test_loss: 2.1334352493286133
epoch: 128 training_loss 2.073941920995712 test_loss: 2.2433486938476563
epoch: 129 training_loss 2.0886055982112883 test_loss: 2.088020896911621
epoch: 130 training_loss 2.0252247726917267 test_loss: 2.270041084289551
epoch: 131 training_loss 2.0818362724781037 test_loss: 2.08770809173584
epoch: 132 training_loss 2.1107551872730257 test_loss: 2.1699609756469727
epoch: 133 training_loss 2.061054449081421 test_loss: 2.135714530944824
epoch: 134 training_loss 2.066757563352585 test_loss: 2.0388275146484376
epoch: 135 training_loss 2.1242222690582278 test_loss: 1.9575843811035156
epoch: 136 training_loss 2.0461345982551573 test_loss: 2.0615493774414064
epoch: 137 training_loss 2.0965681004524233 test_loss: 1.9701749801635742
epoch: 138 training_loss 2.0771597957611085 test_loss: 2.0621259689331053
epoch: 139 training_loss 2.0918965625762937 test_loss: 2.0348016738891603
epoch: 140 training_loss 2.066955374479294 test_loss: 1.9324090957641602
epoch: 141 training_loss 1.9769813513755798 test_loss: 2.039627265930176
epoch: 142 training_loss 2.0672791957855225 test_loss: 1.9597486495971679
epoch: 143 training_loss 1.9438298177719116 test_loss: 2.113594818115234
epoch: 144 training_loss 1.967115797996521 test_loss: 2.030988311767578
epoch: 145 training_loss 1.947015038728714 test_loss: 1.8680448532104492
epoch: 146 training_loss 1.8726401245594024 test_loss: 1.8361501693725586
epoch: 147 training_loss 1.9863571584224702 test_loss: 2.0079484939575196
epoch: 148 training_loss 1.9999027526378632 test_loss: 2.0009450912475586
epoch: 149 training_loss 1.8955912292003632 test_loss: 1.8992155075073243
133.14151446243665
episode: 0 training return: tensor(169.1126, device='cuda:0')
episode: 1 training return: tensor(174.9380, device='cuda:0')
episode: 2 training return: tensor(172.3322, device='cuda:0')
episode: 3 training return: tensor(188.8431, device='cuda:0')
epoch: 1 test_true_pfm: 131.16761994813152 sim_pfm: 180.18399331633117
episode: 4 training return: tensor(164.6086, device='cuda:0')
episode: 5 training return: tensor(182.5448, device='cuda:0')
episode: 6 training return: tensor(186.9661, device='cuda:0')
episode: 7 training return: tensor(169.3687, device='cuda:0')
epoch: 2 test_true_pfm: 128.46846968017695 sim_pfm: 175.1403522647801
episode: 8 training return: tensor(177.4393, device='cuda:0')
episode: 9 training return: tensor(189.0204, device='cuda:0')
episode: 10 training return: tensor(175.4995, device='cuda:0')
episode: 11 training return: tensor(174.6314, device='cuda:0')
epoch: 3 test_true_pfm: 128.32986679663205 sim_pfm: 179.34345640458633
episode: 12 training return: tensor(181.0946, device='cuda:0')
episode: 13 training return: tensor(175.3022, device='cuda:0')
episode: 14 training return: tensor(188.4924, device='cuda:0')
episode: 15 training return: tensor(171.1438, device='cuda:0')
epoch: 4 test_true_pfm: 128.38454207450363 sim_pfm: 175.5143188643211
episode: 16 training return: tensor(170.3964, device='cuda:0')
episode: 17 training return: tensor(172.3123, device='cuda:0')
episode: 18 training return: tensor(179.3414, device='cuda:0')
episode: 19 training return: tensor(190.4523, device='cuda:0')
epoch: 5 test_true_pfm: 128.66630326703628 sim_pfm: 176.98371531165904
episode: 20 training return: tensor(170.8486, device='cuda:0')
episode: 21 training return: tensor(168.8746, device='cuda:0')
episode: 22 training return: tensor(191.4806, device='cuda:0')
episode: 23 training return: tensor(157.8517, device='cuda:0')
epoch: 6 test_true_pfm: 133.5614949226042 sim_pfm: 154.36370516937458
episode: 24 training return: tensor(160.0875, device='cuda:0')
episode: 25 training return: tensor(156.3395, device='cuda:0')
episode: 26 training return: tensor(166.9992, device='cuda:0')
episode: 27 training return: tensor(154.6518, device='cuda:0')
epoch: 7 test_true_pfm: 135.53305054823832 sim_pfm: 162.8298751689901
episode: 28 training return: tensor(151.2309, device='cuda:0')
episode: 29 training return: tensor(156.6404, device='cuda:0')
episode: 30 training return: tensor(156.0889, device='cuda:0')
episode: 31 training return: tensor(170.3841, device='cuda:0')
epoch: 8 test_true_pfm: 135.745818017412 sim_pfm: 154.36172344064107
episode: 32 training return: tensor(166.5183, device='cuda:0')
episode: 33 training return: tensor(157.8929, device='cuda:0')
episode: 34 training return: tensor(157.5918, device='cuda:0')
episode: 35 training return: tensor(160.1202, device='cuda:0')
epoch: 9 test_true_pfm: 135.68905681837214 sim_pfm: 160.47385144056753
episode: 36 training return: tensor(166.9881, device='cuda:0')
episode: 37 training return: tensor(148.9531, device='cuda:0')
episode: 38 training return: tensor(155.7486, device='cuda:0')
episode: 39 training return: tensor(149.6046, device='cuda:0')
epoch: 10 test_true_pfm: 134.28099977477285 sim_pfm: 157.99480698664556
episode: 40 training return: tensor(165.8203, device='cuda:0')
episode: 41 training return: tensor(161.4415, device='cuda:0')
episode: 42 training return: tensor(159.3800, device='cuda:0')
episode: 43 training return: tensor(156.6105, device='cuda:0')
epoch: 11 test_true_pfm: 134.16970844901525 sim_pfm: 160.2970815558161
episode: 44 training return: tensor(164.2670, device='cuda:0')
episode: 45 training return: tensor(168.8643, device='cuda:0')
episode: 46 training return: tensor(157.3228, device='cuda:0')
episode: 47 training return: tensor(162.9018, device='cuda:0')
epoch: 12 test_true_pfm: 134.63134478887577 sim_pfm: 159.5375687162974
episode: 48 training return: tensor(158.9681, device='cuda:0')
episode: 49 training return: tensor(155.0663, device='cuda:0')
episode: 50 training return: tensor(146.2895, device='cuda:0')
episode: 51 training return: tensor(155.1684, device='cuda:0')
epoch: 13 test_true_pfm: 135.08769536939303 sim_pfm: 156.23404116917519
episode: 52 training return: tensor(162.9168, device='cuda:0')
episode: 53 training return: tensor(156.2592, device='cuda:0')
episode: 54 training return: tensor(161.7384, device='cuda:0')
episode: 55 training return: tensor(162.7418, device='cuda:0')
epoch: 14 test_true_pfm: 131.40293020240884 sim_pfm: 158.69037606737112
episode: 56 training return: tensor(164.0859, device='cuda:0')
episode: 57 training return: tensor(169.6464, device='cuda:0')
episode: 58 training return: tensor(151.0751, device='cuda:0')
episode: 59 training return: tensor(149.9977, device='cuda:0')
epoch: 15 test_true_pfm: 129.88553632997667 sim_pfm: 160.0869717797672
episode: 60 training return: tensor(159.0628, device='cuda:0')
episode: 61 training return: tensor(173.6253, device='cuda:0')
episode: 62 training return: tensor(172.1322, device='cuda:0')
episode: 63 training return: tensor(164.9137, device='cuda:0')
epoch: 16 test_true_pfm: 135.25791538288118 sim_pfm: 158.91804214139702
episode: 64 training return: tensor(158.8084, device='cuda:0')
episode: 65 training return: tensor(168.7868, device='cuda:0')
episode: 66 training return: tensor(155.6779, device='cuda:0')
episode: 67 training return: tensor(165.3600, device='cuda:0')
epoch: 17 test_true_pfm: 129.28460863225644 sim_pfm: 158.83830164127866
episode: 68 training return: tensor(160.0644, device='cuda:0')
episode: 69 training return: tensor(166.9568, device='cuda:0')
episode: 70 training return: tensor(167.3866, device='cuda:0')
episode: 71 training return: tensor(168.6304, device='cuda:0')
epoch: 18 test_true_pfm: 134.2767273517622 sim_pfm: 158.85729748469893
episode: 72 training return: tensor(165.8598, device='cuda:0')
episode: 73 training return: tensor(149.6539, device='cuda:0')
episode: 74 training return: tensor(163.1731, device='cuda:0')
episode: 75 training return: tensor(162.0142, device='cuda:0')
epoch: 19 test_true_pfm: 136.71975452886187 sim_pfm: 154.97819411482197
episode: 76 training return: tensor(158.5271, device='cuda:0')
episode: 77 training return: tensor(162.4478, device='cuda:0')
episode: 78 training return: tensor(160.6935, device='cuda:0')
episode: 79 training return: tensor(172.0876, device='cuda:0')
epoch: 20 test_true_pfm: 135.16440393396834 sim_pfm: 156.097875462909
episode: 80 training return: tensor(169.7280, device='cuda:0')
episode: 81 training return: tensor(140.3705, device='cuda:0')
episode: 82 training return: tensor(159.1560, device='cuda:0')
episode: 83 training return: tensor(153.4995, device='cuda:0')
epoch: 21 test_true_pfm: 134.40023936161583 sim_pfm: 164.04555507927435
episode: 84 training return: tensor(149.6929, device='cuda:0')
episode: 85 training return: tensor(173.3687, device='cuda:0')
episode: 86 training return: tensor(157.5262, device='cuda:0')
episode: 87 training return: tensor(153.0748, device='cuda:0')
epoch: 22 test_true_pfm: 132.10140296333472 sim_pfm: 163.0516506733082
episode: 88 training return: tensor(160.6744, device='cuda:0')
episode: 89 training return: tensor(170.4927, device='cuda:0')
episode: 90 training return: tensor(159.8643, device='cuda:0')
episode: 91 training return: tensor(170.9188, device='cuda:0')
epoch: 23 test_true_pfm: 130.69537834617353 sim_pfm: 155.386000715103
episode: 92 training return: tensor(159.7654, device='cuda:0')
episode: 93 training return: tensor(154.3806, device='cuda:0')
episode: 94 training return: tensor(148.8835, device='cuda:0')
episode: 95 training return: tensor(171.8951, device='cuda:0')
epoch: 24 test_true_pfm: 130.3203159606727 sim_pfm: 156.0484398610075
episode: 96 training return: tensor(151.7331, device='cuda:0')
episode: 97 training return: tensor(186.0312, device='cuda:0')
episode: 98 training return: tensor(188.9008, device='cuda:0')
episode: 99 training return: tensor(182.4416, device='cuda:0')
epoch: 25 test_true_pfm: 130.97675700838192 sim_pfm: 184.48601899124333
episode: 100 training return: tensor(183.6484, device='cuda:0')
episode: 101 training return: tensor(181.1629, device='cuda:0')
episode: 102 training return: tensor(179.8120, device='cuda:0')
episode: 103 training return: tensor(201.9615, device='cuda:0')
epoch: 26 test_true_pfm: 129.40869461877287 sim_pfm: 189.467814283137
episode: 104 training return: tensor(181.1074, device='cuda:0')
episode: 105 training return: tensor(179.9414, device='cuda:0')
episode: 106 training return: tensor(181.0054, device='cuda:0')
episode: 107 training return: tensor(179.2685, device='cuda:0')
epoch: 27 test_true_pfm: 130.69106478827752 sim_pfm: 180.64075335703092
episode: 108 training return: tensor(175.1480, device='cuda:0')
episode: 109 training return: tensor(182.3298, device='cuda:0')
episode: 110 training return: tensor(186.6716, device='cuda:0')
episode: 111 training return: tensor(181.1273, device='cuda:0')
epoch: 28 test_true_pfm: 128.84412886076484 sim_pfm: 180.59511240427963
episode: 112 training return: tensor(177.6593, device='cuda:0')
episode: 113 training return: tensor(158.9075, device='cuda:0')
episode: 114 training return: tensor(172.9193, device='cuda:0')
episode: 115 training return: tensor(190.2551, device='cuda:0')
epoch: 29 test_true_pfm: 129.1589934613045 sim_pfm: 176.24049105335726
episode: 116 training return: tensor(184.1411, device='cuda:0')
episode: 117 training return: tensor(182.3669, device='cuda:0')
episode: 118 training return: tensor(188.0481, device='cuda:0')
episode: 119 training return: tensor(180.1480, device='cuda:0')
epoch: 30 test_true_pfm: 127.695604387734 sim_pfm: 177.60000153852744
episode: 120 training return: tensor(194.4870, device='cuda:0')
episode: 121 training return: tensor(190.4347, device='cuda:0')
episode: 122 training return: tensor(181.7126, device='cuda:0')
episode: 123 training return: tensor(179.8834, device='cuda:0')
epoch: 31 test_true_pfm: 130.13158680051242 sim_pfm: 187.47473020207252
episode: 124 training return: tensor(179.8956, device='cuda:0')
episode: 125 training return: tensor(174.0529, device='cuda:0')
episode: 126 training return: tensor(190.5413, device='cuda:0')
episode: 127 training return: tensor(182.7385, device='cuda:0')
epoch: 32 test_true_pfm: 129.38108622193567 sim_pfm: 184.2893297841365
episode: 128 training return: tensor(177.7745, device='cuda:0')
episode: 129 training return: tensor(179.9130, device='cuda:0')
episode: 130 training return: tensor(174.8062, device='cuda:0')
episode: 131 training return: tensor(182.6111, device='cuda:0')
epoch: 33 test_true_pfm: 129.8450791162217 sim_pfm: 182.7531740976032
episode: 132 training return: tensor(175.2795, device='cuda:0')
episode: 133 training return: tensor(184.7381, device='cuda:0')
episode: 134 training return: tensor(188.6882, device='cuda:0')
episode: 135 training return: tensor(178.7294, device='cuda:0')
epoch: 34 test_true_pfm: 131.01796308306575 sim_pfm: 189.22120576627785
episode: 136 training return: tensor(190.0622, device='cuda:0')
episode: 137 training return: tensor(175.8306, device='cuda:0')
episode: 138 training return: tensor(191.8156, device='cuda:0')
episode: 139 training return: tensor(178.8436, device='cuda:0')
epoch: 35 test_true_pfm: 131.1418473530793 sim_pfm: 184.20559567946475
episode: 140 training return: tensor(182.8178, device='cuda:0')
episode: 141 training return: tensor(190.9860, device='cuda:0')
episode: 142 training return: tensor(181.7079, device='cuda:0')
episode: 143 training return: tensor(179.0064, device='cuda:0')
epoch: 36 test_true_pfm: 130.77594501364 sim_pfm: 182.8659072631155
episode: 144 training return: tensor(179.7383, device='cuda:0')
episode: 145 training return: tensor(187.6428, device='cuda:0')
episode: 146 training return: tensor(190.4743, device='cuda:0')
episode: 147 training return: tensor(181.5663, device='cuda:0')
epoch: 37 test_true_pfm: 130.14622084083507 sim_pfm: 184.4129264123796
episode: 148 training return: tensor(177.2480, device='cuda:0')
episode: 149 training return: tensor(193.7924, device='cuda:0')
episode: 150 training return: tensor(180.3085, device='cuda:0')
episode: 151 training return: tensor(187.1880, device='cuda:0')
epoch: 38 test_true_pfm: 130.15667849259313 sim_pfm: 180.20504320967012
episode: 152 training return: tensor(181.3981, device='cuda:0')
episode: 153 training return: tensor(188.4850, device='cuda:0')
episode: 154 training return: tensor(182.1185, device='cuda:0')
episode: 155 training return: tensor(170.4454, device='cuda:0')
epoch: 39 test_true_pfm: 128.06668372866673 sim_pfm: 178.41451660143213
episode: 156 training return: tensor(179.4911, device='cuda:0')
episode: 157 training return: tensor(183.6358, device='cuda:0')
episode: 158 training return: tensor(185.6515, device='cuda:0')
episode: 159 training return: tensor(190.9304, device='cuda:0')
epoch: 40 test_true_pfm: 127.97880528582157 sim_pfm: 182.60906309265994
episode: 160 training return: tensor(188.6289, device='cuda:0')
episode: 161 training return: tensor(178.4721, device='cuda:0')
episode: 162 training return: tensor(184.4291, device='cuda:0')
episode: 163 training return: tensor(177.8452, device='cuda:0')
epoch: 41 test_true_pfm: 129.56918116967518 sim_pfm: 179.8767833064485
episode: 164 training return: tensor(183.8208, device='cuda:0')
episode: 165 training return: tensor(175.4009, device='cuda:0')
episode: 166 training return: tensor(180.0760, device='cuda:0')
episode: 167 training return: tensor(187.2664, device='cuda:0')
epoch: 42 test_true_pfm: 129.81311110398207 sim_pfm: 180.1701449856977
episode: 168 training return: tensor(179.3735, device='cuda:0')
episode: 169 training return: tensor(178.9875, device='cuda:0')
episode: 170 training return: tensor(188.3957, device='cuda:0')
episode: 171 training return: tensor(175.0128, device='cuda:0')
epoch: 43 test_true_pfm: 135.14955112994176 sim_pfm: 181.25887582874276
episode: 172 training return: tensor(189.0804, device='cuda:0')
episode: 173 training return: tensor(172.1151, device='cuda:0')
episode: 174 training return: tensor(180.6366, device='cuda:0')
episode: 175 training return: tensor(180.3898, device='cuda:0')
epoch: 44 test_true_pfm: 133.53979861191493 sim_pfm: 182.35853762847836
episode: 176 training return: tensor(178.3198, device='cuda:0')
episode: 177 training return: tensor(182.4940, device='cuda:0')
episode: 178 training return: tensor(172.5704, device='cuda:0')
episode: 179 training return: tensor(171.9354, device='cuda:0')
epoch: 45 test_true_pfm: 132.97315571011023 sim_pfm: 175.5338645192096
episode: 180 training return: tensor(179.8611, device='cuda:0')
episode: 181 training return: tensor(177.3403, device='cuda:0')
episode: 182 training return: tensor(175.6079, device='cuda:0')
episode: 183 training return: tensor(180.2511, device='cuda:0')
epoch: 46 test_true_pfm: 131.10802262665658 sim_pfm: 180.36664971453138
episode: 184 training return: tensor(186.3743, device='cuda:0')
episode: 185 training return: tensor(182.6895, device='cuda:0')
episode: 186 training return: tensor(189.1197, device='cuda:0')
episode: 187 training return: tensor(182.7554, device='cuda:0')
epoch: 47 test_true_pfm: 131.8862147105826 sim_pfm: 185.01322650550864
episode: 188 training return: tensor(192.9748, device='cuda:0')
episode: 189 training return: tensor(198.3567, device='cuda:0')
episode: 190 training return: tensor(186.3375, device='cuda:0')
episode: 191 training return: tensor(187.1906, device='cuda:0')
epoch: 48 test_true_pfm: 126.94957384198281 sim_pfm: 183.62960784086027
episode: 192 training return: tensor(185.1981, device='cuda:0')
episode: 193 training return: tensor(189.7081, device='cuda:0')
episode: 194 training return: tensor(184.1160, device='cuda:0')
episode: 195 training return: tensor(190.3867, device='cuda:0')
epoch: 49 test_true_pfm: 130.98656225430403 sim_pfm: 185.59211978469975
episode: 196 training return: tensor(193.0482, device='cuda:0')
episode: 197 training return: tensor(189.2538, device='cuda:0')
episode: 198 training return: tensor(183.8604, device='cuda:0')
episode: 199 training return: tensor(194.8608, device='cuda:0')
epoch: 50 test_true_pfm: 126.46859024074092 sim_pfm: 189.9582874624757
episode: 200 training return: tensor(167.1998, device='cuda:0')
episode: 201 training return: tensor(181.9898, device='cuda:0')
episode: 202 training return: tensor(184.7478, device='cuda:0')
episode: 203 training return: tensor(191.2085, device='cuda:0')
epoch: 51 test_true_pfm: 128.3613606502838 sim_pfm: 180.24218383191618
episode: 204 training return: tensor(185.8148, device='cuda:0')
episode: 205 training return: tensor(174.5486, device='cuda:0')
episode: 206 training return: tensor(188.9408, device='cuda:0')
episode: 207 training return: tensor(194.1265, device='cuda:0')
epoch: 52 test_true_pfm: 128.62624667780344 sim_pfm: 184.96287223819527
episode: 208 training return: tensor(196.2087, device='cuda:0')
episode: 209 training return: tensor(178.5066, device='cuda:0')
episode: 210 training return: tensor(176.8423, device='cuda:0')
episode: 211 training return: tensor(195.4157, device='cuda:0')
epoch: 53 test_true_pfm: 127.43619859016842 sim_pfm: 194.05654252126698
episode: 212 training return: tensor(183.7305, device='cuda:0')
episode: 213 training return: tensor(178.6124, device='cuda:0')
episode: 214 training return: tensor(185.2055, device='cuda:0')
episode: 215 training return: tensor(190.7613, device='cuda:0')
epoch: 54 test_true_pfm: 129.20896798486382 sim_pfm: 187.7402012889739
episode: 216 training return: tensor(190.5253, device='cuda:0')
episode: 217 training return: tensor(196.6013, device='cuda:0')
episode: 218 training return: tensor(189.5298, device='cuda:0')
episode: 219 training return: tensor(197.9186, device='cuda:0')
epoch: 55 test_true_pfm: 129.96588729413799 sim_pfm: 185.48029636625432
episode: 220 training return: tensor(188.7926, device='cuda:0')
episode: 221 training return: tensor(195.7249, device='cuda:0')
episode: 222 training return: tensor(184.6946, device='cuda:0')
episode: 223 training return: tensor(184.8124, device='cuda:0')
epoch: 56 test_true_pfm: 128.87278055302733 sim_pfm: 180.15155388254206
episode: 224 training return: tensor(193.3873, device='cuda:0')
episode: 225 training return: tensor(184.1882, device='cuda:0')
episode: 226 training return: tensor(200.1980, device='cuda:0')
episode: 227 training return: tensor(191.3736, device='cuda:0')
epoch: 57 test_true_pfm: 131.6605194486773 sim_pfm: 192.2478303797834
episode: 228 training return: tensor(198.3697, device='cuda:0')
episode: 229 training return: tensor(183.4894, device='cuda:0')
episode: 230 training return: tensor(185.7484, device='cuda:0')
episode: 231 training return: tensor(181.3047, device='cuda:0')
epoch: 58 test_true_pfm: 130.02137032499974 sim_pfm: 189.7301566608716
episode: 232 training return: tensor(192.1121, device='cuda:0')
episode: 233 training return: tensor(192.5967, device='cuda:0')
episode: 234 training return: tensor(179.9342, device='cuda:0')
episode: 235 training return: tensor(186.1215, device='cuda:0')
epoch: 59 test_true_pfm: 129.30042391816582 sim_pfm: 202.9687365071615
episode: 236 training return: tensor(185.0696, device='cuda:0')
episode: 237 training return: tensor(191.4195, device='cuda:0')
episode: 238 training return: tensor(210.1015, device='cuda:0')
episode: 239 training return: tensor(172.9179, device='cuda:0')
epoch: 60 test_true_pfm: 127.78701136006698 sim_pfm: 172.41097652366733
episode: 240 training return: tensor(183.2301, device='cuda:0')
episode: 241 training return: tensor(169.2935, device='cuda:0')
episode: 242 training return: tensor(185.0377, device='cuda:0')
episode: 243 training return: tensor(189.6844, device='cuda:0')
epoch: 61 test_true_pfm: 130.32183863585306 sim_pfm: 191.61879057206096
episode: 244 training return: tensor(194.7578, device='cuda:0')
episode: 245 training return: tensor(190.6495, device='cuda:0')
episode: 246 training return: tensor(190.6957, device='cuda:0')
episode: 247 training return: tensor(210.7472, device='cuda:0')
epoch: 62 test_true_pfm: 134.6189107911467 sim_pfm: 193.49892764358083
episode: 248 training return: tensor(187.2843, device='cuda:0')
episode: 249 training return: tensor(209.5104, device='cuda:0')
episode: 250 training return: tensor(181.0889, device='cuda:0')
episode: 251 training return: tensor(191.5465, device='cuda:0')
epoch: 63 test_true_pfm: 135.97187995744258 sim_pfm: 203.59519606286776
episode: 252 training return: tensor(189.4450, device='cuda:0')
episode: 253 training return: tensor(180.3605, device='cuda:0')
episode: 254 training return: tensor(185.8083, device='cuda:0')
episode: 255 training return: tensor(207.4328, device='cuda:0')
epoch: 64 test_true_pfm: 128.63694316454735 sim_pfm: 174.2206582951476
episode: 256 training return: tensor(187.0726, device='cuda:0')
episode: 257 training return: tensor(203.9023, device='cuda:0')
episode: 258 training return: tensor(187.9265, device='cuda:0')
episode: 259 training return: tensor(196.2067, device='cuda:0')
epoch: 65 test_true_pfm: 131.21600265489081 sim_pfm: 190.5755107445293
episode: 260 training return: tensor(199.3748, device='cuda:0')
episode: 261 training return: tensor(187.6022, device='cuda:0')
episode: 262 training return: tensor(185.7341, device='cuda:0')
episode: 263 training return: tensor(190.0703, device='cuda:0')
epoch: 66 test_true_pfm: 129.9945578324528 sim_pfm: 186.28204555604606
episode: 264 training return: tensor(185.0964, device='cuda:0')
episode: 265 training return: tensor(198.6190, device='cuda:0')
episode: 266 training return: tensor(183.1331, device='cuda:0')
episode: 267 training return: tensor(200.2992, device='cuda:0')
epoch: 67 test_true_pfm: 129.10649089471573 sim_pfm: 191.36343108104774
episode: 268 training return: tensor(222.7967, device='cuda:0')
episode: 269 training return: tensor(211.1850, device='cuda:0')
episode: 270 training return: tensor(200.7097, device='cuda:0')
episode: 271 training return: tensor(172.6925, device='cuda:0')
epoch: 68 test_true_pfm: 127.94580120014079 sim_pfm: 195.0544800564181
episode: 272 training return: tensor(227.1377, device='cuda:0')
episode: 273 training return: tensor(184.2804, device='cuda:0')
episode: 274 training return: tensor(238.0423, device='cuda:0')
episode: 275 training return: tensor(191.7757, device='cuda:0')
epoch: 69 test_true_pfm: 134.15945225522404 sim_pfm: 194.53627857379615
episode: 276 training return: tensor(190.0029, device='cuda:0')
episode: 277 training return: tensor(226.0336, device='cuda:0')
episode: 278 training return: tensor(191.5865, device='cuda:0')
episode: 279 training return: tensor(205.2886, device='cuda:0')
epoch: 70 test_true_pfm: 130.80236343728797 sim_pfm: 193.77901827975876
episode: 280 training return: tensor(208.4355, device='cuda:0')
episode: 281 training return: tensor(186.8136, device='cuda:0')
episode: 282 training return: tensor(210.4759, device='cuda:0')
episode: 283 training return: tensor(216.0965, device='cuda:0')
epoch: 71 test_true_pfm: 134.23560791011238 sim_pfm: 194.15898758926195
episode: 284 training return: tensor(204.9469, device='cuda:0')
episode: 285 training return: tensor(191.0330, device='cuda:0')
episode: 286 training return: tensor(200.9726, device='cuda:0')
episode: 287 training return: tensor(213.5349, device='cuda:0')
epoch: 72 test_true_pfm: 135.4238137841604 sim_pfm: 213.6463766771194
episode: 288 training return: tensor(189.3244, device='cuda:0')
episode: 289 training return: tensor(187.4759, device='cuda:0')
episode: 290 training return: tensor(209.4771, device='cuda:0')
episode: 291 training return: tensor(213.2971, device='cuda:0')
epoch: 73 test_true_pfm: 129.87705390265288 sim_pfm: 203.9324503582844
episode: 292 training return: tensor(221.5839, device='cuda:0')
episode: 293 training return: tensor(226.1000, device='cuda:0')
episode: 294 training return: tensor(214.9013, device='cuda:0')
episode: 295 training return: tensor(202.5669, device='cuda:0')
epoch: 74 test_true_pfm: 131.9581054073861 sim_pfm: 189.83917434895412
episode: 296 training return: tensor(199.6513, device='cuda:0')
episode: 297 training return: tensor(197.9189, device='cuda:0')
episode: 298 training return: tensor(229.4229, device='cuda:0')
episode: 299 training return: tensor(214.2870, device='cuda:0')
epoch: 75 test_true_pfm: 129.02687678532888 sim_pfm: 205.50176498679792
episode: 300 training return: tensor(201.6427, device='cuda:0')
episode: 301 training return: tensor(185.7128, device='cuda:0')
episode: 302 training return: tensor(186.8164, device='cuda:0')
episode: 303 training return: tensor(228.5057, device='cuda:0')
epoch: 76 test_true_pfm: 132.22558761857118 sim_pfm: 192.31090105237672
episode: 304 training return: tensor(234.2304, device='cuda:0')
episode: 305 training return: tensor(182.1119, device='cuda:0')
episode: 306 training return: tensor(207.4418, device='cuda:0')
episode: 307 training return: tensor(218.7769, device='cuda:0')
epoch: 77 test_true_pfm: 137.06262160102852 sim_pfm: 222.97835848279064
episode: 308 training return: tensor(202.6482, device='cuda:0')
episode: 309 training return: tensor(208.1190, device='cuda:0')
episode: 310 training return: tensor(208.9837, device='cuda:0')
episode: 311 training return: tensor(212.8977, device='cuda:0')
epoch: 78 test_true_pfm: 135.45689989497586 sim_pfm: 218.47633312881808
episode: 312 training return: tensor(230.0827, device='cuda:0')
episode: 313 training return: tensor(227.4071, device='cuda:0')
episode: 314 training return: tensor(223.4818, device='cuda:0')
episode: 315 training return: tensor(196.1413, device='cuda:0')
epoch: 79 test_true_pfm: 134.49083472953635 sim_pfm: 205.2592140101129
episode: 316 training return: tensor(235.7085, device='cuda:0')
episode: 317 training return: tensor(189.0792, device='cuda:0')
episode: 318 training return: tensor(227.6001, device='cuda:0')
episode: 319 training return: tensor(237.8401, device='cuda:0')
epoch: 80 test_true_pfm: 137.34362488602537 sim_pfm: 204.7357491803472
episode: 320 training return: tensor(230.5017, device='cuda:0')
episode: 321 training return: tensor(189.5158, device='cuda:0')
episode: 322 training return: tensor(208.6119, device='cuda:0')
episode: 323 training return: tensor(219.1335, device='cuda:0')
epoch: 81 test_true_pfm: 130.41313548975342 sim_pfm: 209.303421257803
episode: 324 training return: tensor(224.2856, device='cuda:0')
episode: 325 training return: tensor(223.1839, device='cuda:0')
episode: 326 training return: tensor(202.5352, device='cuda:0')
episode: 327 training return: tensor(236.1974, device='cuda:0')
epoch: 82 test_true_pfm: 137.0238662043955 sim_pfm: 207.77564617284807
episode: 328 training return: tensor(212.9698, device='cuda:0')
episode: 329 training return: tensor(225.9573, device='cuda:0')
episode: 330 training return: tensor(231.8766, device='cuda:0')
episode: 331 training return: tensor(229.9524, device='cuda:0')
epoch: 83 test_true_pfm: 139.13808481459463 sim_pfm: 215.98473803132075
episode: 332 training return: tensor(206.8695, device='cuda:0')
episode: 333 training return: tensor(200.8665, device='cuda:0')
episode: 334 training return: tensor(215.3101, device='cuda:0')
episode: 335 training return: tensor(230.8820, device='cuda:0')
epoch: 84 test_true_pfm: 131.3380739317792 sim_pfm: 223.58415395690827
episode: 336 training return: tensor(217.8582, device='cuda:0')
episode: 337 training return: tensor(206.6844, device='cuda:0')
episode: 338 training return: tensor(238.6397, device='cuda:0')
episode: 339 training return: tensor(215.6574, device='cuda:0')
epoch: 85 test_true_pfm: 126.87393478407179 sim_pfm: 220.3429849648499
episode: 340 training return: tensor(237.2292, device='cuda:0')
episode: 341 training return: tensor(229.7901, device='cuda:0')
episode: 342 training return: tensor(221.0850, device='cuda:0')
episode: 343 training return: tensor(200.2740, device='cuda:0')
epoch: 86 test_true_pfm: 127.66138456240428 sim_pfm: 208.5659155168396
episode: 344 training return: tensor(221.0944, device='cuda:0')
episode: 345 training return: tensor(230.4114, device='cuda:0')
episode: 346 training return: tensor(229.3696, device='cuda:0')
episode: 347 training return: tensor(205.9628, device='cuda:0')
epoch: 87 test_true_pfm: 129.00381766717774 sim_pfm: 217.0825358575268
episode: 348 training return: tensor(236.3460, device='cuda:0')
episode: 349 training return: tensor(223.3323, device='cuda:0')
episode: 350 training return: tensor(213.9997, device='cuda:0')
episode: 351 training return: tensor(197.1292, device='cuda:0')
epoch: 88 test_true_pfm: 127.90048022650467 sim_pfm: 232.1806412351434
episode: 352 training return: tensor(212.1674, device='cuda:0')
episode: 353 training return: tensor(213.6406, device='cuda:0')
episode: 354 training return: tensor(227.7490, device='cuda:0')
episode: 355 training return: tensor(232.3274, device='cuda:0')
epoch: 89 test_true_pfm: 133.72520639423902 sim_pfm: 221.42142687607557
episode: 356 training return: tensor(208.3899, device='cuda:0')
episode: 357 training return: tensor(209.5650, device='cuda:0')
episode: 358 training return: tensor(229.6812, device='cuda:0')
episode: 359 training return: tensor(245.1475, device='cuda:0')
epoch: 90 test_true_pfm: 127.70530230322645 sim_pfm: 230.34607543730527
episode: 360 training return: tensor(209.0236, device='cuda:0')
episode: 361 training return: tensor(219.4825, device='cuda:0')
episode: 362 training return: tensor(249.3768, device='cuda:0')
episode: 363 training return: tensor(230.3979, device='cuda:0')
epoch: 91 test_true_pfm: 136.14232455114671 sim_pfm: 219.7085510071018
episode: 364 training return: tensor(235.5667, device='cuda:0')
episode: 365 training return: tensor(225.3121, device='cuda:0')
episode: 366 training return: tensor(239.0835, device='cuda:0')
episode: 367 training return: tensor(230.2994, device='cuda:0')
epoch: 92 test_true_pfm: 135.02277532263048 sim_pfm: 227.33336939049187
episode: 368 training return: tensor(245.4725, device='cuda:0')
episode: 369 training return: tensor(228.3380, device='cuda:0')
episode: 370 training return: tensor(230.9793, device='cuda:0')
episode: 371 training return: tensor(230.9515, device='cuda:0')
epoch: 93 test_true_pfm: 131.4754874676005 sim_pfm: 223.02155637383112
episode: 372 training return: tensor(221.5114, device='cuda:0')
episode: 373 training return: tensor(200.9742, device='cuda:0')
episode: 374 training return: tensor(236.1198, device='cuda:0')
episode: 375 training return: tensor(209.0540, device='cuda:0')
epoch: 94 test_true_pfm: 136.72681033469547 sim_pfm: 240.96932307085953
episode: 376 training return: tensor(186.9603, device='cuda:0')
episode: 377 training return: tensor(193.5128, device='cuda:0')
episode: 378 training return: tensor(233.4093, device='cuda:0')
episode: 379 training return: tensor(216.4932, device='cuda:0')
epoch: 95 test_true_pfm: 131.87281838654468 sim_pfm: 230.5334179203375
episode: 380 training return: tensor(230.9482, device='cuda:0')
episode: 381 training return: tensor(249.5527, device='cuda:0')
episode: 382 training return: tensor(216.0019, device='cuda:0')
episode: 383 training return: tensor(241.6603, device='cuda:0')
epoch: 96 test_true_pfm: 135.45164856500486 sim_pfm: 238.0419900083507
episode: 384 training return: tensor(232.2440, device='cuda:0')
episode: 385 training return: tensor(204.3510, device='cuda:0')
episode: 386 training return: tensor(217.9803, device='cuda:0')
episode: 387 training return: tensor(226.9401, device='cuda:0')
epoch: 97 test_true_pfm: 132.65500137550083 sim_pfm: 234.77220523866126
episode: 388 training return: tensor(184.1891, device='cuda:0')
episode: 389 training return: tensor(226.3952, device='cuda:0')
episode: 390 training return: tensor(214.9645, device='cuda:0')
episode: 391 training return: tensor(247.1463, device='cuda:0')
epoch: 98 test_true_pfm: 132.86151601868212 sim_pfm: 232.2558682247647
episode: 392 training return: tensor(221.0843, device='cuda:0')
episode: 393 training return: tensor(228.8602, device='cuda:0')
episode: 394 training return: tensor(234.2199, device='cuda:0')
episode: 395 training return: tensor(189.6610, device='cuda:0')
epoch: 99 test_true_pfm: 138.00442666905755 sim_pfm: 238.64344795731594
episode: 396 training return: tensor(256.7551, device='cuda:0')
episode: 397 training return: tensor(222.9830, device='cuda:0')
episode: 398 training return: tensor(215.7122, device='cuda:0')
episode: 399 training return: tensor(201.2052, device='cuda:0')
epoch: 100 test_true_pfm: 134.63952842185915 sim_pfm: 236.95856785160723
episode: 400 training return: tensor(229.6711, device='cuda:0')
episode: 401 training return: tensor(214.8885, device='cuda:0')
episode: 402 training return: tensor(222.7938, device='cuda:0')
episode: 403 training return: tensor(218.8531, device='cuda:0')
epoch: 101 test_true_pfm: 135.0038921785471 sim_pfm: 220.26740317268414
episode: 404 training return: tensor(237.0227, device='cuda:0')
episode: 405 training return: tensor(259.2577, device='cuda:0')
episode: 406 training return: tensor(232.0187, device='cuda:0')
episode: 407 training return: tensor(230.3354, device='cuda:0')
epoch: 102 test_true_pfm: 134.77241552331566 sim_pfm: 228.97750052373158
episode: 408 training return: tensor(252.9277, device='cuda:0')
episode: 409 training return: tensor(225.5445, device='cuda:0')
episode: 410 training return: tensor(245.7018, device='cuda:0')
episode: 411 training return: tensor(236.8222, device='cuda:0')
epoch: 103 test_true_pfm: 138.17368117099184 sim_pfm: 233.18005546948407
episode: 412 training return: tensor(244.2554, device='cuda:0')
episode: 413 training return: tensor(217.3893, device='cuda:0')
episode: 414 training return: tensor(260.5098, device='cuda:0')
episode: 415 training return: tensor(236.0950, device='cuda:0')
epoch: 104 test_true_pfm: 134.26659724686996 sim_pfm: 228.33411216803944
episode: 416 training return: tensor(242.9310, device='cuda:0')
episode: 417 training return: tensor(221.8317, device='cuda:0')
episode: 418 training return: tensor(235.9044, device='cuda:0')
episode: 419 training return: tensor(257.9560, device='cuda:0')
epoch: 105 test_true_pfm: 138.94643029996809 sim_pfm: 241.3736675224849
episode: 420 training return: tensor(259.6038, device='cuda:0')
episode: 421 training return: tensor(253.9189, device='cuda:0')
episode: 422 training return: tensor(248.6667, device='cuda:0')
episode: 423 training return: tensor(228.2280, device='cuda:0')
epoch: 106 test_true_pfm: 135.1792686035717 sim_pfm: 242.96025995968958
episode: 424 training return: tensor(236.8812, device='cuda:0')
episode: 425 training return: tensor(244.5925, device='cuda:0')
episode: 426 training return: tensor(229.8533, device='cuda:0')
episode: 427 training return: tensor(219.1169, device='cuda:0')
epoch: 107 test_true_pfm: 134.95051322995923 sim_pfm: 233.35716725021484
episode: 428 training return: tensor(233.6177, device='cuda:0')
episode: 429 training return: tensor(236.7629, device='cuda:0')
episode: 430 training return: tensor(242.1219, device='cuda:0')
episode: 431 training return: tensor(234.7173, device='cuda:0')
epoch: 108 test_true_pfm: 136.5078797594919 sim_pfm: 218.97737286430782
episode: 432 training return: tensor(249.6677, device='cuda:0')
episode: 433 training return: tensor(214.6080, device='cuda:0')
episode: 434 training return: tensor(237.5089, device='cuda:0')
episode: 435 training return: tensor(236.6213, device='cuda:0')
epoch: 109 test_true_pfm: 138.83611722506652 sim_pfm: 225.71913421703502
episode: 436 training return: tensor(241.1099, device='cuda:0')
episode: 437 training return: tensor(242.3249, device='cuda:0')
episode: 438 training return: tensor(253.2245, device='cuda:0')
episode: 439 training return: tensor(236.5746, device='cuda:0')
epoch: 110 test_true_pfm: 134.27206849741037 sim_pfm: 237.39499083501286
episode: 440 training return: tensor(218.8454, device='cuda:0')
episode: 441 training return: tensor(236.2632, device='cuda:0')
episode: 442 training return: tensor(220.7035, device='cuda:0')
episode: 443 training return: tensor(236.4627, device='cuda:0')
epoch: 111 test_true_pfm: 131.24829699386603 sim_pfm: 229.0879408426641
episode: 444 training return: tensor(235.5332, device='cuda:0')
episode: 445 training return: tensor(215.2630, device='cuda:0')
episode: 446 training return: tensor(225.4235, device='cuda:0')
episode: 447 training return: tensor(256.1654, device='cuda:0')
epoch: 112 test_true_pfm: 128.5017996740483 sim_pfm: 244.5797807904426
episode: 448 training return: tensor(242.7599, device='cuda:0')
episode: 449 training return: tensor(244.9668, device='cuda:0')
episode: 450 training return: tensor(217.0970, device='cuda:0')
episode: 451 training return: tensor(239.7595, device='cuda:0')
epoch: 113 test_true_pfm: 138.78884855521594 sim_pfm: 233.85466011029786
episode: 452 training return: tensor(209.1578, device='cuda:0')
episode: 453 training return: tensor(222.0853, device='cuda:0')
episode: 454 training return: tensor(237.5539, device='cuda:0')
episode: 455 training return: tensor(203.7355, device='cuda:0')
epoch: 114 test_true_pfm: 133.71221870426604 sim_pfm: 229.81660520929145
episode: 456 training return: tensor(224.3383, device='cuda:0')
episode: 457 training return: tensor(228.6620, device='cuda:0')
episode: 458 training return: tensor(240.7779, device='cuda:0')
episode: 459 training return: tensor(221.3262, device='cuda:0')
epoch: 115 test_true_pfm: 140.4369776920234 sim_pfm: 229.93742103080731
episode: 460 training return: tensor(248.8873, device='cuda:0')
episode: 461 training return: tensor(236.4729, device='cuda:0')
episode: 462 training return: tensor(256.8860, device='cuda:0')
episode: 463 training return: tensor(226.9166, device='cuda:0')
epoch: 116 test_true_pfm: 136.8952295074883 sim_pfm: 238.16791847023416
episode: 464 training return: tensor(229.6316, device='cuda:0')
episode: 465 training return: tensor(254.0009, device='cuda:0')
episode: 466 training return: tensor(245.3333, device='cuda:0')
episode: 467 training return: tensor(218.9614, device='cuda:0')
epoch: 117 test_true_pfm: 138.0678557791914 sim_pfm: 235.23110442124306
episode: 468 training return: tensor(214.4351, device='cuda:0')
episode: 469 training return: tensor(259.5037, device='cuda:0')
episode: 470 training return: tensor(235.4567, device='cuda:0')
episode: 471 training return: tensor(227.6985, device='cuda:0')
epoch: 118 test_true_pfm: 134.95706364386453 sim_pfm: 233.77224547197693
episode: 472 training return: tensor(243.8005, device='cuda:0')
episode: 473 training return: tensor(206.4095, device='cuda:0')
episode: 474 training return: tensor(248.9452, device='cuda:0')
episode: 475 training return: tensor(237.3321, device='cuda:0')
epoch: 119 test_true_pfm: 136.27964912639695 sim_pfm: 237.7533367596916
episode: 476 training return: tensor(195.5699, device='cuda:0')
episode: 477 training return: tensor(241.0228, device='cuda:0')
episode: 478 training return: tensor(197.6591, device='cuda:0')
episode: 479 training return: tensor(261.6241, device='cuda:0')
epoch: 120 test_true_pfm: 133.82491322507204 sim_pfm: 218.11100867260248
episode: 480 training return: tensor(200.3027, device='cuda:0')
episode: 481 training return: tensor(218.5468, device='cuda:0')
episode: 482 training return: tensor(240.2159, device='cuda:0')
episode: 483 training return: tensor(242.2581, device='cuda:0')
epoch: 121 test_true_pfm: 128.74630782390904 sim_pfm: 216.92599380430767
episode: 484 training return: tensor(241.3134, device='cuda:0')
episode: 485 training return: tensor(229.7173, device='cuda:0')
episode: 486 training return: tensor(218.3109, device='cuda:0')
episode: 487 training return: tensor(226.2506, device='cuda:0')
epoch: 122 test_true_pfm: 137.21282136234387 sim_pfm: 218.45053514108878
episode: 488 training return: tensor(229.4991, device='cuda:0')
episode: 489 training return: tensor(233.0014, device='cuda:0')
episode: 490 training return: tensor(250.3310, device='cuda:0')
episode: 491 training return: tensor(198.6341, device='cuda:0')
epoch: 123 test_true_pfm: 140.24797011972905 sim_pfm: 226.11378407671583
episode: 492 training return: tensor(221.8550, device='cuda:0')
episode: 493 training return: tensor(253.3909, device='cuda:0')
episode: 494 training return: tensor(211.8519, device='cuda:0')
episode: 495 training return: tensor(210.8771, device='cuda:0')
epoch: 124 test_true_pfm: 136.6320319294384 sim_pfm: 231.32248086052132
episode: 496 training return: tensor(252.6993, device='cuda:0')
episode: 497 training return: tensor(229.8419, device='cuda:0')
episode: 498 training return: tensor(219.7346, device='cuda:0')
episode: 499 training return: tensor(253.6376, device='cuda:0')
epoch: 125 test_true_pfm: 134.79234274469744 sim_pfm: 235.02045215849066
episode: 500 training return: tensor(231.4808, device='cuda:0')
episode: 501 training return: tensor(248.5014, device='cuda:0')
episode: 502 training return: tensor(218.0121, device='cuda:0')
episode: 503 training return: tensor(246.5495, device='cuda:0')
epoch: 126 test_true_pfm: 137.25399600496863 sim_pfm: 233.65007897189935
episode: 504 training return: tensor(228.7574, device='cuda:0')
episode: 505 training return: tensor(234.0447, device='cuda:0')
episode: 506 training return: tensor(236.0611, device='cuda:0')
episode: 507 training return: tensor(235.4731, device='cuda:0')
epoch: 127 test_true_pfm: 139.08815246570677 sim_pfm: 223.44389551506612
episode: 508 training return: tensor(235.3941, device='cuda:0')
episode: 509 training return: tensor(232.8575, device='cuda:0')
episode: 510 training return: tensor(250.2913, device='cuda:0')
episode: 511 training return: tensor(204.8204, device='cuda:0')
epoch: 128 test_true_pfm: 138.01408401071285 sim_pfm: 236.89996703122742
episode: 512 training return: tensor(252.8033, device='cuda:0')
episode: 513 training return: tensor(219.9753, device='cuda:0')
episode: 514 training return: tensor(209.7194, device='cuda:0')
episode: 515 training return: tensor(236.3883, device='cuda:0')
epoch: 129 test_true_pfm: 135.21583439967017 sim_pfm: 229.80734360688365
episode: 516 training return: tensor(234.4778, device='cuda:0')
episode: 517 training return: tensor(255.7984, device='cuda:0')
episode: 518 training return: tensor(235.1716, device='cuda:0')
episode: 519 training return: tensor(232.7803, device='cuda:0')
epoch: 130 test_true_pfm: 138.1411273119157 sim_pfm: 241.5625315700192
episode: 520 training return: tensor(232.4518, device='cuda:0')
episode: 521 training return: tensor(246.1555, device='cuda:0')
episode: 522 training return: tensor(249.8138, device='cuda:0')
episode: 523 training return: tensor(217.7952, device='cuda:0')
epoch: 131 test_true_pfm: 133.51016670222347 sim_pfm: 219.53157595823285
episode: 524 training return: tensor(260.3614, device='cuda:0')
episode: 525 training return: tensor(231.4870, device='cuda:0')
episode: 526 training return: tensor(231.2071, device='cuda:0')
episode: 527 training return: tensor(244.2925, device='cuda:0')
epoch: 132 test_true_pfm: 141.4565529969461 sim_pfm: 234.5926083450322
episode: 528 training return: tensor(249.1130, device='cuda:0')
episode: 529 training return: tensor(253.6498, device='cuda:0')
episode: 530 training return: tensor(238.1376, device='cuda:0')
episode: 531 training return: tensor(217.5028, device='cuda:0')
epoch: 133 test_true_pfm: 130.17283301461364 sim_pfm: 229.57858759723604
episode: 532 training return: tensor(189.7402, device='cuda:0')
episode: 533 training return: tensor(232.1125, device='cuda:0')
episode: 534 training return: tensor(236.7313, device='cuda:0')
episode: 535 training return: tensor(188.4748, device='cuda:0')
epoch: 134 test_true_pfm: 142.4468600281312 sim_pfm: 246.29946388123207
episode: 536 training return: tensor(243.2909, device='cuda:0')
episode: 537 training return: tensor(236.2655, device='cuda:0')
episode: 538 training return: tensor(233.5713, device='cuda:0')
episode: 539 training return: tensor(237.7836, device='cuda:0')
epoch: 135 test_true_pfm: 135.41656953730677 sim_pfm: 243.78410830484935
episode: 540 training return: tensor(245.6290, device='cuda:0')
episode: 541 training return: tensor(232.4946, device='cuda:0')
episode: 542 training return: tensor(217.3898, device='cuda:0')
episode: 543 training return: tensor(238.3866, device='cuda:0')
epoch: 136 test_true_pfm: 141.62699494900568 sim_pfm: 226.11615190196318
episode: 544 training return: tensor(239.5311, device='cuda:0')
episode: 545 training return: tensor(254.0075, device='cuda:0')
episode: 546 training return: tensor(222.1584, device='cuda:0')
episode: 547 training return: tensor(218.4088, device='cuda:0')
epoch: 137 test_true_pfm: 139.82060391359863 sim_pfm: 239.3121929818357
episode: 548 training return: tensor(231.7656, device='cuda:0')
episode: 549 training return: tensor(246.3082, device='cuda:0')
episode: 550 training return: tensor(198.0520, device='cuda:0')
episode: 551 training return: tensor(242.0319, device='cuda:0')
epoch: 138 test_true_pfm: 139.00233026247267 sim_pfm: 231.78996121464297
episode: 552 training return: tensor(247.1954, device='cuda:0')
episode: 553 training return: tensor(230.4993, device='cuda:0')
episode: 554 training return: tensor(235.7778, device='cuda:0')
episode: 555 training return: tensor(251.1166, device='cuda:0')
epoch: 139 test_true_pfm: 136.33334940544754 sim_pfm: 231.0912685958436
episode: 556 training return: tensor(229.2695, device='cuda:0')
episode: 557 training return: tensor(234.1857, device='cuda:0')
episode: 558 training return: tensor(242.9905, device='cuda:0')
episode: 559 training return: tensor(201.6599, device='cuda:0')
epoch: 140 test_true_pfm: 132.12132119911195 sim_pfm: 237.63152733136084
episode: 560 training return: tensor(215.7552, device='cuda:0')
episode: 561 training return: tensor(241.5750, device='cuda:0')
episode: 562 training return: tensor(256.2758, device='cuda:0')
episode: 563 training return: tensor(240.3498, device='cuda:0')
epoch: 141 test_true_pfm: 140.38887264748809 sim_pfm: 239.6827581866819
episode: 564 training return: tensor(233.0603, device='cuda:0')
episode: 565 training return: tensor(248.5111, device='cuda:0')
episode: 566 training return: tensor(232.4428, device='cuda:0')
episode: 567 training return: tensor(220.3951, device='cuda:0')
epoch: 142 test_true_pfm: 140.06801549005192 sim_pfm: 233.98004385644452
episode: 568 training return: tensor(243.0084, device='cuda:0')
episode: 569 training return: tensor(249.9097, device='cuda:0')
episode: 570 training return: tensor(236.9552, device='cuda:0')
episode: 571 training return: tensor(213.5541, device='cuda:0')
epoch: 143 test_true_pfm: 136.99770188141378 sim_pfm: 244.52034333482734
episode: 572 training return: tensor(225.8610, device='cuda:0')
episode: 573 training return: tensor(240.1660, device='cuda:0')
episode: 574 training return: tensor(225.0332, device='cuda:0')
episode: 575 training return: tensor(243.6938, device='cuda:0')
epoch: 144 test_true_pfm: 138.0767961835896 sim_pfm: 247.7697169158724
episode: 576 training return: tensor(263.8022, device='cuda:0')
episode: 577 training return: tensor(242.5029, device='cuda:0')
episode: 578 training return: tensor(225.4970, device='cuda:0')
episode: 579 training return: tensor(255.6282, device='cuda:0')
epoch: 145 test_true_pfm: 138.1659494010108 sim_pfm: 238.21348612649598
episode: 580 training return: tensor(238.7142, device='cuda:0')
episode: 581 training return: tensor(229.6311, device='cuda:0')
episode: 582 training return: tensor(234.6804, device='cuda:0')
episode: 583 training return: tensor(262.8013, device='cuda:0')
epoch: 146 test_true_pfm: 133.9502975479433 sim_pfm: 222.55206329538487
episode: 584 training return: tensor(239.8087, device='cuda:0')
episode: 585 training return: tensor(228.8539, device='cuda:0')
episode: 586 training return: tensor(242.3587, device='cuda:0')
episode: 587 training return: tensor(253.3619, device='cuda:0')
epoch: 147 test_true_pfm: 133.41111765410764 sim_pfm: 244.31512069578747
episode: 588 training return: tensor(250.7440, device='cuda:0')
episode: 589 training return: tensor(227.3256, device='cuda:0')
episode: 590 training return: tensor(185.1574, device='cuda:0')
episode: 591 training return: tensor(195.5781, device='cuda:0')
epoch: 148 test_true_pfm: 142.07239260308728 sim_pfm: 235.35369746006327
episode: 592 training return: tensor(233.6911, device='cuda:0')
episode: 593 training return: tensor(242.3988, device='cuda:0')
episode: 594 training return: tensor(245.2809, device='cuda:0')
episode: 595 training return: tensor(231.6507, device='cuda:0')
epoch: 149 test_true_pfm: 140.03788082124575 sim_pfm: 248.75179609507322
episode: 596 training return: tensor(252.4548, device='cuda:0')
episode: 597 training return: tensor(249.0360, device='cuda:0')
episode: 598 training return: tensor(229.6426, device='cuda:0')
episode: 599 training return: tensor(261.8498, device='cuda:0')
epoch: 150 test_true_pfm: 133.17789329254083 sim_pfm: 228.41952554152812
