{
  "aceas_full": {
    "experiment_name": "aceas_full",
    "total_timesteps": 5000,
    "total_time": 217.3913043478261,
    "avg_throughput": 23,
    "train_metrics": [
      {
        "timestep": 0,
        "pass_at_1": 0.0,
        "avg_reward": -0.0897956033936976
      },
      {
        "timestep": 50,
        "pass_at_1": 0.015924387810168795,
        "avg_reward": 0.09637033935598306
      },
      {
        "timestep": 101,
        "pass_at_1": 0.0,
        "avg_reward": -0.03452704593073945
      },
      {
        "timestep": 151,
        "pass_at_1": 0.009057174553668204,
        "avg_reward": -0.01747168017934047
      },
      {
        "timestep": 202,
        "pass_at_1": 0.04348700709626684,
        "avg_reward": -0.07203315763024007
      },
      {
        "timestep": 252,
        "pass_at_1": 0.036669890627988505,
        "avg_reward": 0.12055715399867245
      },
      {
        "timestep": 303,
        "pass_at_1": 0.05178598853204112,
        "avg_reward": 0.036079394497421965
      },
      {
        "timestep": 353,
        "pass_at_1": 0.05195896705845082,
        "avg_reward": 0.09527341712040352
      },
      {
        "timestep": 404,
        "pass_at_1": 0.03713322023070143,
        "avg_reward": -0.04718404353319795
      },
      {
        "timestep": 454,
        "pass_at_1": 0.06581026173141444,
        "avg_reward": 0.08192385482158687
      },
      {
        "timestep": 505,
        "pass_at_1": 0.17236849598341222,
        "avg_reward": 0.15132613469731462
      },
      {
        "timestep": 555,
        "pass_at_1": 0.1737992966500163,
        "avg_reward": 0.23529380373042186
      },
      {
        "timestep": 606,
        "pass_at_1": 0.17065957556381905,
        "avg_reward": 0.13872657391575519
      },
      {
        "timestep": 656,
        "pass_at_1": 0.20979799830323004,
        "avg_reward": 0.163566886073388
      },
      {
        "timestep": 707,
        "pass_at_1": 0.19665515475931744,
        "avg_reward": 0.2978560284745291
      },
      {
        "timestep": 757,
        "pass_at_1": 0.2240549797524333,
        "avg_reward": 0.051381719225631645
      },
      {
        "timestep": 808,
        "pass_at_1": 0.2315622122058796,
        "avg_reward": 0.20727459422846511
      },
      {
        "timestep": 858,
        "pass_at_1": 0.19626911644208822,
        "avg_reward": 0.03184423713423437
      },
      {
        "timestep": 909,
        "pass_at_1": 0.23486416685098052,
        "avg_reward": 0.2699353057779083
      },
      {
        "timestep": 959,
        "pass_at_1": 0.19452259288958884,
        "avg_reward": 0.12211478697310593
      },
      {
        "timestep": 1010,
        "pass_at_1": 0.2258647406444292,
        "avg_reward": 0.19045993262680805
      },
      {
        "timestep": 1060,
        "pass_at_1": 0.23034955977586663,
        "avg_reward": 0.19551313887723193
      },
      {
        "timestep": 1111,
        "pass_at_1": 0.21198093195868078,
        "avg_reward": 0.1573460697140507
      },
      {
        "timestep": 1161,
        "pass_at_1": 0.2163723695811804,
        "avg_reward": 0.2015481591705417
      },
      {
        "timestep": 1212,
        "pass_at_1": 0.24071446807325958,
        "avg_reward": 0.23739576930153516
      },
      {
        "timestep": 1262,
        "pass_at_1": 0.22975803139711964,
        "avg_reward": 0.11072775187326163
      },
      {
        "timestep": 1313,
        "pass_at_1": 0.22220866624856028,
        "avg_reward": 0.1642314117297573
      },
      {
        "timestep": 1363,
        "pass_at_1": 0.23720793886734393,
        "avg_reward": 0.27667402816547887
      },
      {
        "timestep": 1414,
        "pass_at_1": 0.24930198068644158,
        "avg_reward": 0.1874191348068217
      },
      {
        "timestep": 1464,
        "pass_at_1": 0.25488396629516924,
        "avg_reward": 0.13903408965844843
      },
      {
        "timestep": 1515,
        "pass_at_1": 0.24865269976468965,
        "avg_reward": 0.21229960333573455
      },
      {
        "timestep": 1565,
        "pass_at_1": 0.29296677929155823,
        "avg_reward": 0.21360474450704053
      },
      {
        "timestep": 1616,
        "pass_at_1": 0.2500059932467399,
        "avg_reward": 0.15576186471736256
      },
      {
        "timestep": 1666,
        "pass_at_1": 0.24948002757865673,
        "avg_reward": 0.21511576534514742
      },
      {
        "timestep": 1717,
        "pass_at_1": 0.23864283690844157,
        "avg_reward": 0.2630520230582601
      },
      {
        "timestep": 1767,
        "pass_at_1": 0.24177091041397233,
        "avg_reward": 0.20443075947328881
      },
      {
        "timestep": 1818,
        "pass_at_1": 0.31100684725644734,
        "avg_reward": 0.2609615010813908
      },
      {
        "timestep": 1868,
        "pass_at_1": 0.289468840329448,
        "avg_reward": 0.26110800111759314
      },
      {
        "timestep": 1919,
        "pass_at_1": 0.2789971750761063,
        "avg_reward": 0.16316591857694807
      },
      {
        "timestep": 1969,
        "pass_at_1": 0.29522173552280634,
        "avg_reward": 0.22474047488294546
      },
      {
        "timestep": 2020,
        "pass_at_1": 0.27042060186959155,
        "avg_reward": 0.25768728362138144
      },
      {
        "timestep": 2070,
        "pass_at_1": 0.3154987255421803,
        "avg_reward": 0.19802188638508036
      },
      {
        "timestep": 2121,
        "pass_at_1": 0.2605721806663154,
        "avg_reward": 0.1961996441241572
      },
      {
        "timestep": 2171,
        "pass_at_1": 0.32677184487398503,
        "avg_reward": 0.26529270891419376
      },
      {
        "timestep": 2222,
        "pass_at_1": 0.31356899296129803,
        "avg_reward": 0.22618498834456985
      },
      {
        "timestep": 2272,
        "pass_at_1": 0.32141850469513356,
        "avg_reward": 0.31203757856642744
      },
      {
        "timestep": 2323,
        "pass_at_1": 0.3090478462287938,
        "avg_reward": 0.174219051286009
      },
      {
        "timestep": 2373,
        "pass_at_1": 0.3360641337003278,
        "avg_reward": 0.21764884670976203
      },
      {
        "timestep": 2424,
        "pass_at_1": 0.30045806744597275,
        "avg_reward": 0.27972027044065195
      },
      {
        "timestep": 2474,
        "pass_at_1": 0.34169938063940775,
        "avg_reward": 0.22375932787556446
      },
      {
        "timestep": 2525,
        "pass_at_1": 0.35476655276620855,
        "avg_reward": 0.2704317491768996
      },
      {
        "timestep": 2575,
        "pass_at_1": 0.3681240036782842,
        "avg_reward": 0.24651397917393833
      },
      {
        "timestep": 2626,
        "pass_at_1": 0.3350518113179648,
        "avg_reward": 0.2604186712198895
      },
      {
        "timestep": 2676,
        "pass_at_1": 0.34503913106745276,
        "avg_reward": 0.2804936070200078
      },
      {
        "timestep": 2727,
        "pass_at_1": 0.3642663233138885,
        "avg_reward": 0.28064642332920164
      },
      {
        "timestep": 2777,
        "pass_at_1": 0.39751846383494893,
        "avg_reward": 0.3255864492165789
      },
      {
        "timestep": 2828,
        "pass_at_1": 0.3597628891252364,
        "avg_reward": 0.2672706026272303
      },
      {
        "timestep": 2878,
        "pass_at_1": 0.40265457598773685,
        "avg_reward": 0.3242195669701596
      },
      {
        "timestep": 2929,
        "pass_at_1": 0.35899129477656666,
        "avg_reward": 0.25654701022165083
      },
      {
        "timestep": 2979,
        "pass_at_1": 0.3680442092707934,
        "avg_reward": 0.27715819021746096
      },
      {
        "timestep": 3030,
        "pass_at_1": 0.3333942046247909,
        "avg_reward": 0.2638294151860742
      },
      {
        "timestep": 3080,
        "pass_at_1": 0.386669579802508,
        "avg_reward": 0.3407393782360122
      },
      {
        "timestep": 3131,
        "pass_at_1": 0.4105670726829869,
        "avg_reward": 0.4194683753387478
      },
      {
        "timestep": 3181,
        "pass_at_1": 0.42623800200065176,
        "avg_reward": 0.36491329953395424
      },
      {
        "timestep": 3232,
        "pass_at_1": 0.369349461375412,
        "avg_reward": 0.3386120317194564
      },
      {
        "timestep": 3282,
        "pass_at_1": 0.4018268725567688,
        "avg_reward": 0.33898488597713167
      },
      {
        "timestep": 3333,
        "pass_at_1": 0.3787692825850537,
        "avg_reward": 0.24964105235781558
      },
      {
        "timestep": 3383,
        "pass_at_1": 0.40286275224270945,
        "avg_reward": 0.3237574000912541
      },
      {
        "timestep": 3434,
        "pass_at_1": 0.417370243969597,
        "avg_reward": 0.3105291331513297
      },
      {
        "timestep": 3484,
        "pass_at_1": 0.4290328326271697,
        "avg_reward": 0.35398527047608813
      },
      {
        "timestep": 3535,
        "pass_at_1": 0.4099680034796707,
        "avg_reward": 0.2742983968273611
      },
      {
        "timestep": 3585,
        "pass_at_1": 0.4297130506028432,
        "avg_reward": 0.3192828432960496
      },
      {
        "timestep": 3636,
        "pass_at_1": 0.4097892399714879,
        "avg_reward": 0.3138470869897769
      },
      {
        "timestep": 3686,
        "pass_at_1": 0.4757386882060191,
        "avg_reward": 0.40228047169044057
      },
      {
        "timestep": 3737,
        "pass_at_1": 0.42606781053547854,
        "avg_reward": 0.3972603948969765
      },
      {
        "timestep": 3787,
        "pass_at_1": 0.4752595314817283,
        "avg_reward": 0.42328371629113026
      },
      {
        "timestep": 3838,
        "pass_at_1": 0.45469320250505907,
        "avg_reward": 0.33142828779860195
      },
      {
        "timestep": 3888,
        "pass_at_1": 0.4512139031264494,
        "avg_reward": 0.3570707439958486
      },
      {
        "timestep": 3939,
        "pass_at_1": 0.4565757784456037,
        "avg_reward": 0.30030378965874427
      },
      {
        "timestep": 3989,
        "pass_at_1": 0.4704229896180959,
        "avg_reward": 0.3027355800216165
      },
      {
        "timestep": 4040,
        "pass_at_1": 0.44552697131975916,
        "avg_reward": 0.370547924942939
      },
      {
        "timestep": 4090,
        "pass_at_1": 0.44475294675460647,
        "avg_reward": 0.38047491542929235
      },
      {
        "timestep": 4141,
        "pass_at_1": 0.45423599303085,
        "avg_reward": 0.3739396035741215
      },
      {
        "timestep": 4191,
        "pass_at_1": 0.44093892987048666,
        "avg_reward": 0.32998219512416543
      },
      {
        "timestep": 4242,
        "pass_at_1": 0.4375870665471129,
        "avg_reward": 0.3074426572266261
      },
      {
        "timestep": 4292,
        "pass_at_1": 0.5181921298112548,
        "avg_reward": 0.4162285993379549
      },
      {
        "timestep": 4343,
        "pass_at_1": 0.4579375166269524,
        "avg_reward": 0.28104958059818463
      },
      {
        "timestep": 4393,
        "pass_at_1": 0.4587230704632146,
        "avg_reward": 0.3389456011868514
      },
      {
        "timestep": 4444,
        "pass_at_1": 0.47480731570717305,
        "avg_reward": 0.4091906146974055
      },
      {
        "timestep": 4494,
        "pass_at_1": 0.47943001911880845,
        "avg_reward": 0.3061979249597436
      },
      {
        "timestep": 4545,
        "pass_at_1": 0.5055808473806792,
        "avg_reward": 0.4857029172998787
      },
      {
        "timestep": 4595,
        "pass_at_1": 0.46368672701446106,
        "avg_reward": 0.37389004070351217
      },
      {
        "timestep": 4646,
        "pass_at_1": 0.5070818839728887,
        "avg_reward": 0.3445646254999799
      },
      {
        "timestep": 4696,
        "pass_at_1": 0.44671494841423576,
        "avg_reward": 0.28903588367811395
      },
      {
        "timestep": 4747,
        "pass_at_1": 0.5083034986297337,
        "avg_reward": 0.4564898932774475
      },
      {
        "timestep": 4797,
        "pass_at_1": 0.4755037876185144,
        "avg_reward": 0.3756928948298105
      },
      {
        "timestep": 4848,
        "pass_at_1": 0.48658732150592443,
        "avg_reward": 0.3569366987206672
      },
      {
        "timestep": 4898,
        "pass_at_1": 0.5009795799811624,
        "avg_reward": 0.4915934736668202
      },
      {
        "timestep": 4949,
        "pass_at_1": 0.48142923229216544,
        "avg_reward": 0.39643157851268795
      },
      {
        "timestep": 5000,
        "pass_at_1": 0.5219646440173533,
        "avg_reward": 0.36907115644115507
      }
    ],
    "final_pass_at_1": 0.62
  },
  "aceas_no_csc": {
    "experiment_name": "aceas_no_csc",
    "total_timesteps": 5000,
    "total_time": 208.33333333333334,
    "avg_throughput": 24,
    "train_metrics": [
      {
        "timestep": 0,
        "pass_at_1": 0.011406515256941654,
        "avg_reward": 0.06551558049066786
      },
      {
        "timestep": 50,
        "pass_at_1": 0.0,
        "avg_reward": 0.07071786596845016
      },
      {
        "timestep": 101,
        "pass_at_1": 0.04096095283564055,
        "avg_reward": 0.1285046723257058
      },
      {
        "timestep": 151,
        "pass_at_1": 0.029656413117491506,
        "avg_reward": 0.005625233295772986
      },
      {
        "timestep": 202,
        "pass_at_1": 0.028924351415082577,
        "avg_reward": 0.016507458487309157
      },
      {
        "timestep": 252,
        "pass_at_1": 0.03668827720140512,
        "avg_reward": 0.051276565036601274
      },
      {
        "timestep": 303,
        "pass_at_1": 0.0,
        "avg_reward": 0.014021073500916484
      },
      {
        "timestep": 353,
        "pass_at_1": 0.03588325539203523,
        "avg_reward": 0.03208505525647489
      },
      {
        "timestep": 404,
        "pass_at_1": 0.020433633698793908,
        "avg_reward": -0.034340494590855905
      },
      {
        "timestep": 454,
        "pass_at_1": 0.0485016762206936,
        "avg_reward": 0.10792295601010742
      },
      {
        "timestep": 505,
        "pass_at_1": 0.15617904798574597,
        "avg_reward": 0.05844286997350273
      },
      {
        "timestep": 555,
        "pass_at_1": 0.20361442636845442,
        "avg_reward": 0.1709051966886776
      },
      {
        "timestep": 606,
        "pass_at_1": 0.2106562420718469,
        "avg_reward": 0.12722008582177405
      },
      {
        "timestep": 656,
        "pass_at_1": 0.19824730930970086,
        "avg_reward": 0.21476096836150652
      },
      {
        "timestep": 707,
        "pass_at_1": 0.17263810777461194,
        "avg_reward": 0.15051136065664947
      },
      {
        "timestep": 757,
        "pass_at_1": 0.18766494109685813,
        "avg_reward": 0.07238735264900203
      },
      {
        "timestep": 808,
        "pass_at_1": 0.16801009884246956,
        "avg_reward": 0.06101110566290484
      },
      {
        "timestep": 858,
        "pass_at_1": 0.2062378016639879,
        "avg_reward": 0.1487603708185457
      },
      {
        "timestep": 909,
        "pass_at_1": 0.18768719946557932,
        "avg_reward": 0.11983403635713114
      },
      {
        "timestep": 959,
        "pass_at_1": 0.2145529710917955,
        "avg_reward": 0.13169013960225678
      },
      {
        "timestep": 1010,
        "pass_at_1": 0.22673204097633703,
        "avg_reward": 0.22241477470644033
      },
      {
        "timestep": 1060,
        "pass_at_1": 0.19324895127539296,
        "avg_reward": 0.08586000766647861
      },
      {
        "timestep": 1111,
        "pass_at_1": 0.2113259322805082,
        "avg_reward": 0.23755261877041683
      },
      {
        "timestep": 1161,
        "pass_at_1": 0.17644779822591772,
        "avg_reward": 0.12350517483430447
      },
      {
        "timestep": 1212,
        "pass_at_1": 0.17261498422119353,
        "avg_reward": 0.0680534954430876
      },
      {
        "timestep": 1262,
        "pass_at_1": 0.2668946541913143,
        "avg_reward": 0.15587021490039074
      },
      {
        "timestep": 1313,
        "pass_at_1": 0.23194433386559377,
        "avg_reward": 0.14404049461079438
      },
      {
        "timestep": 1363,
        "pass_at_1": 0.17282532964785252,
        "avg_reward": 0.1219162649173443
      },
      {
        "timestep": 1414,
        "pass_at_1": 0.22487234476535947,
        "avg_reward": 0.15003609858933042
      },
      {
        "timestep": 1464,
        "pass_at_1": 0.21432835179982668,
        "avg_reward": 0.13206516354579184
      },
      {
        "timestep": 1515,
        "pass_at_1": 0.24672897216412015,
        "avg_reward": 0.23247390436005397
      },
      {
        "timestep": 1565,
        "pass_at_1": 0.25162226999865683,
        "avg_reward": 0.21840969960681092
      },
      {
        "timestep": 1616,
        "pass_at_1": 0.23283887640825718,
        "avg_reward": 0.13160826004696605
      },
      {
        "timestep": 1666,
        "pass_at_1": 0.2390978183030888,
        "avg_reward": 0.1403680310274672
      },
      {
        "timestep": 1717,
        "pass_at_1": 0.21860155134488313,
        "avg_reward": 0.18568771937394585
      },
      {
        "timestep": 1767,
        "pass_at_1": 0.22619045277051616,
        "avg_reward": 0.11577001817642978
      },
      {
        "timestep": 1818,
        "pass_at_1": 0.24075532521572995,
        "avg_reward": 0.16974378831803025
      },
      {
        "timestep": 1868,
        "pass_at_1": 0.26999837844933505,
        "avg_reward": 0.2269679150308585
      },
      {
        "timestep": 1919,
        "pass_at_1": 0.24419324796431446,
        "avg_reward": 0.15871325172163725
      },
      {
        "timestep": 1969,
        "pass_at_1": 0.25144157601622547,
        "avg_reward": 0.21048402557650006
      },
      {
        "timestep": 2020,
        "pass_at_1": 0.2872183922870545,
        "avg_reward": 0.2180676495615713
      },
      {
        "timestep": 2070,
        "pass_at_1": 0.2519994772161169,
        "avg_reward": 0.08871706437128173
      },
      {
        "timestep": 2121,
        "pass_at_1": 0.2714288800692399,
        "avg_reward": 0.17867719025650425
      },
      {
        "timestep": 2171,
        "pass_at_1": 0.23067681070145588,
        "avg_reward": 0.20313659793780395
      },
      {
        "timestep": 2222,
        "pass_at_1": 0.2858046816594029,
        "avg_reward": 0.14523148435546446
      },
      {
        "timestep": 2272,
        "pass_at_1": 0.26929478669192813,
        "avg_reward": 0.1853380108144988
      },
      {
        "timestep": 2323,
        "pass_at_1": 0.27710420339465475,
        "avg_reward": 0.2008262223822003
      },
      {
        "timestep": 2373,
        "pass_at_1": 0.2930447488597874,
        "avg_reward": 0.2889623696802309
      },
      {
        "timestep": 2424,
        "pass_at_1": 0.2697568287852671,
        "avg_reward": 0.2596431988143824
      },
      {
        "timestep": 2474,
        "pass_at_1": 0.3050170819723229,
        "avg_reward": 0.13408666368025812
      },
      {
        "timestep": 2525,
        "pass_at_1": 0.27618127693201844,
        "avg_reward": 0.17954634458339314
      },
      {
        "timestep": 2575,
        "pass_at_1": 0.30411865173636926,
        "avg_reward": 0.1882512163629958
      },
      {
        "timestep": 2626,
        "pass_at_1": 0.30429220078194796,
        "avg_reward": 0.24434753609960114
      },
      {
        "timestep": 2676,
        "pass_at_1": 0.2826168761989325,
        "avg_reward": 0.18862417346600724
      },
      {
        "timestep": 2727,
        "pass_at_1": 0.2652518923774455,
        "avg_reward": 0.1970887011192754
      },
      {
        "timestep": 2777,
        "pass_at_1": 0.298687174892083,
        "avg_reward": 0.1430464924296465
      },
      {
        "timestep": 2828,
        "pass_at_1": 0.2750690934979481,
        "avg_reward": 0.18265394808838864
      },
      {
        "timestep": 2878,
        "pass_at_1": 0.30298811407459814,
        "avg_reward": 0.18242153471277012
      },
      {
        "timestep": 2929,
        "pass_at_1": 0.27550651074371013,
        "avg_reward": 0.20266863007569108
      },
      {
        "timestep": 2979,
        "pass_at_1": 0.35535879378007335,
        "avg_reward": 0.30317944374740363
      },
      {
        "timestep": 3030,
        "pass_at_1": 0.29568441331915235,
        "avg_reward": 0.26224558164123396
      },
      {
        "timestep": 3080,
        "pass_at_1": 0.30389852086029584,
        "avg_reward": 0.29203719262426975
      },
      {
        "timestep": 3131,
        "pass_at_1": 0.3087457331858423,
        "avg_reward": 0.19846724962774426
      },
      {
        "timestep": 3181,
        "pass_at_1": 0.3238670210490733,
        "avg_reward": 0.36299811934624204
      },
      {
        "timestep": 3232,
        "pass_at_1": 0.3710072949788497,
        "avg_reward": 0.2979400965968963
      },
      {
        "timestep": 3282,
        "pass_at_1": 0.2958033019441673,
        "avg_reward": 0.10212193479824788
      },
      {
        "timestep": 3333,
        "pass_at_1": 0.3075564783374264,
        "avg_reward": 0.2224885957866805
      },
      {
        "timestep": 3383,
        "pass_at_1": 0.3176375407420827,
        "avg_reward": 0.19138516308793635
      },
      {
        "timestep": 3434,
        "pass_at_1": 0.3373061205416856,
        "avg_reward": 0.22119118813062827
      },
      {
        "timestep": 3484,
        "pass_at_1": 0.32476912222752935,
        "avg_reward": 0.24137646467379176
      },
      {
        "timestep": 3535,
        "pass_at_1": 0.32084192076690266,
        "avg_reward": 0.2690033954690508
      },
      {
        "timestep": 3585,
        "pass_at_1": 0.2914953845743415,
        "avg_reward": 0.2640217926215126
      },
      {
        "timestep": 3636,
        "pass_at_1": 0.34875210092447845,
        "avg_reward": 0.23996782678325218
      },
      {
        "timestep": 3686,
        "pass_at_1": 0.34311230052784414,
        "avg_reward": 0.2873609788066253
      },
      {
        "timestep": 3737,
        "pass_at_1": 0.32105789523102696,
        "avg_reward": 0.22715413164080644
      },
      {
        "timestep": 3787,
        "pass_at_1": 0.3050586401173986,
        "avg_reward": 0.2943401985523534
      },
      {
        "timestep": 3838,
        "pass_at_1": 0.343213704255617,
        "avg_reward": 0.22381441611023786
      },
      {
        "timestep": 3888,
        "pass_at_1": 0.36004381431339094,
        "avg_reward": 0.2711660378318696
      },
      {
        "timestep": 3939,
        "pass_at_1": 0.3376161135077459,
        "avg_reward": 0.2433311536589172
      },
      {
        "timestep": 3989,
        "pass_at_1": 0.36538586274536855,
        "avg_reward": 0.265281313911598
      },
      {
        "timestep": 4040,
        "pass_at_1": 0.370919709260362,
        "avg_reward": 0.2621980861809929
      },
      {
        "timestep": 4090,
        "pass_at_1": 0.3457078650507803,
        "avg_reward": 0.31341175935692195
      },
      {
        "timestep": 4141,
        "pass_at_1": 0.3554527959641493,
        "avg_reward": 0.3056034447113403
      },
      {
        "timestep": 4191,
        "pass_at_1": 0.3369565602771416,
        "avg_reward": 0.32904947759035563
      },
      {
        "timestep": 4242,
        "pass_at_1": 0.38577200899473146,
        "avg_reward": 0.2603157424669339
      },
      {
        "timestep": 4292,
        "pass_at_1": 0.3489830082499924,
        "avg_reward": 0.22799111675053732
      },
      {
        "timestep": 4343,
        "pass_at_1": 0.3542048913655659,
        "avg_reward": 0.32096975154872875
      },
      {
        "timestep": 4393,
        "pass_at_1": 0.3752622181217229,
        "avg_reward": 0.22832057209689027
      },
      {
        "timestep": 4444,
        "pass_at_1": 0.3917361658572055,
        "avg_reward": 0.32494359223459
      },
      {
        "timestep": 4494,
        "pass_at_1": 0.39530386474049956,
        "avg_reward": 0.33392141732627234
      },
      {
        "timestep": 4545,
        "pass_at_1": 0.3868753993499065,
        "avg_reward": 0.2744362092090322
      },
      {
        "timestep": 4595,
        "pass_at_1": 0.40055437986819104,
        "avg_reward": 0.3416312749589783
      },
      {
        "timestep": 4646,
        "pass_at_1": 0.32402760881378023,
        "avg_reward": 0.23640327681810305
      },
      {
        "timestep": 4696,
        "pass_at_1": 0.38059764862156126,
        "avg_reward": 0.34057290761600956
      },
      {
        "timestep": 4747,
        "pass_at_1": 0.38543123815858193,
        "avg_reward": 0.34551503334560024
      },
      {
        "timestep": 4797,
        "pass_at_1": 0.3661556206272453,
        "avg_reward": 0.26258862077588596
      },
      {
        "timestep": 4848,
        "pass_at_1": 0.37875354157162466,
        "avg_reward": 0.3251694145149665
      },
      {
        "timestep": 4898,
        "pass_at_1": 0.38589079167993634,
        "avg_reward": 0.2528040164221679
      },
      {
        "timestep": 4949,
        "pass_at_1": 0.38377511300430217,
        "avg_reward": 0.3134628227422775
      },
      {
        "timestep": 5000,
        "pass_at_1": 0.3677175288372665,
        "avg_reward": 0.2804795706410241
      }
    ],
    "final_pass_at_1": 0.52
  },
  "aceas_no_eaas": {
    "experiment_name": "aceas_no_eaas",
    "total_timesteps": 5000,
    "total_time": 277.77777777777777,
    "avg_throughput": 18,
    "train_metrics": [
      {
        "timestep": 0,
        "pass_at_1": 0.0,
        "avg_reward": 0.024693647769665285
      },
      {
        "timestep": 50,
        "pass_at_1": 0.0,
        "avg_reward": -0.011332895744091671
      },
      {
        "timestep": 101,
        "pass_at_1": 0.011829698173700068,
        "avg_reward": 0.0169518061438151
      },
      {
        "timestep": 151,
        "pass_at_1": 0.02302504599564805,
        "avg_reward": -0.008307046291861919
      },
      {
        "timestep": 202,
        "pass_at_1": 0.014796537904553328,
        "avg_reward": 0.039169025411813635
      },
      {
        "timestep": 252,
        "pass_at_1": 0.038015197037558096,
        "avg_reward": 0.07455684260418381
      },
      {
        "timestep": 303,
        "pass_at_1": 0.021453933263190623,
        "avg_reward": 0.0148692833413491
      },
      {
        "timestep": 353,
        "pass_at_1": 0.05101729550472826,
        "avg_reward": 0.015456621274450431
      },
      {
        "timestep": 404,
        "pass_at_1": 0.06929709813526344,
        "avg_reward": 0.006136483034714715
      },
      {
        "timestep": 454,
        "pass_at_1": 0.04520871931952476,
        "avg_reward": 0.007862168961272686
      },
      {
        "timestep": 505,
        "pass_at_1": 0.2042238958614629,
        "avg_reward": 0.0951444862223814
      },
      {
        "timestep": 555,
        "pass_at_1": 0.17990547675939472,
        "avg_reward": 0.19305191803270783
      },
      {
        "timestep": 606,
        "pass_at_1": 0.17386724345935692,
        "avg_reward": 0.16362081097806275
      },
      {
        "timestep": 656,
        "pass_at_1": 0.16531681531222608,
        "avg_reward": 0.2297887281680102
      },
      {
        "timestep": 707,
        "pass_at_1": 0.19802547610329185,
        "avg_reward": 0.1678613499162443
      },
      {
        "timestep": 757,
        "pass_at_1": 0.21284448850565482,
        "avg_reward": 0.1866015671094323
      },
      {
        "timestep": 808,
        "pass_at_1": 0.23892955491225443,
        "avg_reward": 0.15755837440103357
      },
      {
        "timestep": 858,
        "pass_at_1": 0.18383905486610821,
        "avg_reward": 0.08639247902871725
      },
      {
        "timestep": 909,
        "pass_at_1": 0.17292065682147204,
        "avg_reward": 0.1585921886121673
      },
      {
        "timestep": 959,
        "pass_at_1": 0.2171708131798207,
        "avg_reward": 0.16414364591457836
      },
      {
        "timestep": 1010,
        "pass_at_1": 0.22008962146919125,
        "avg_reward": 0.09674360738567178
      },
      {
        "timestep": 1060,
        "pass_at_1": 0.20475207894071698,
        "avg_reward": 0.22856661150661475
      },
      {
        "timestep": 1111,
        "pass_at_1": 0.2070202825213513,
        "avg_reward": 0.29510552465507467
      },
      {
        "timestep": 1161,
        "pass_at_1": 0.23464870697512052,
        "avg_reward": 0.12710222005770577
      },
      {
        "timestep": 1212,
        "pass_at_1": 0.2106871948288938,
        "avg_reward": 0.1282186003414636
      },
      {
        "timestep": 1262,
        "pass_at_1": 0.19162136766833932,
        "avg_reward": 0.16355920362428183
      },
      {
        "timestep": 1313,
        "pass_at_1": 0.2388754608181971,
        "avg_reward": 0.1736958612617259
      },
      {
        "timestep": 1363,
        "pass_at_1": 0.23109220193092564,
        "avg_reward": 0.23923226369721734
      },
      {
        "timestep": 1414,
        "pass_at_1": 0.23568163241579443,
        "avg_reward": 0.18786449949476333
      },
      {
        "timestep": 1464,
        "pass_at_1": 0.23196556795040688,
        "avg_reward": 0.1950057015025576
      },
      {
        "timestep": 1515,
        "pass_at_1": 0.25120924721154025,
        "avg_reward": 0.21148573866366396
      },
      {
        "timestep": 1565,
        "pass_at_1": 0.2817574964400284,
        "avg_reward": 0.2603323686275538
      },
      {
        "timestep": 1616,
        "pass_at_1": 0.23293575116703363,
        "avg_reward": 0.1807654042093011
      },
      {
        "timestep": 1666,
        "pass_at_1": 0.27598695610051555,
        "avg_reward": 0.185290710790641
      },
      {
        "timestep": 1717,
        "pass_at_1": 0.22088498032784035,
        "avg_reward": 0.17305890647367858
      },
      {
        "timestep": 1767,
        "pass_at_1": 0.22759984276063722,
        "avg_reward": 0.2327875187250066
      },
      {
        "timestep": 1818,
        "pass_at_1": 0.235001153683424,
        "avg_reward": 0.18939018526885187
      },
      {
        "timestep": 1868,
        "pass_at_1": 0.2530397698060447,
        "avg_reward": 0.10294765594438955
      },
      {
        "timestep": 1919,
        "pass_at_1": 0.2854023513935002,
        "avg_reward": 0.23904160825967424
      },
      {
        "timestep": 1969,
        "pass_at_1": 0.26204424611939214,
        "avg_reward": 0.24387492684136985
      },
      {
        "timestep": 2020,
        "pass_at_1": 0.23649459778500034,
        "avg_reward": 0.20438954179650615
      },
      {
        "timestep": 2070,
        "pass_at_1": 0.24669308247968427,
        "avg_reward": 0.12772857120201192
      },
      {
        "timestep": 2121,
        "pass_at_1": 0.2621299277875387,
        "avg_reward": 0.19679653410872175
      },
      {
        "timestep": 2171,
        "pass_at_1": 0.30820308568930926,
        "avg_reward": 0.2674364692898678
      },
      {
        "timestep": 2222,
        "pass_at_1": 0.2739592558528771,
        "avg_reward": 0.22524722817346746
      },
      {
        "timestep": 2272,
        "pass_at_1": 0.3117594977421036,
        "avg_reward": 0.23779866125046092
      },
      {
        "timestep": 2323,
        "pass_at_1": 0.25714116323641006,
        "avg_reward": 0.20190937781391347
      },
      {
        "timestep": 2373,
        "pass_at_1": 0.30741350560891245,
        "avg_reward": 0.3369186798957451
      },
      {
        "timestep": 2424,
        "pass_at_1": 0.3068190401461991,
        "avg_reward": 0.22173110218238007
      },
      {
        "timestep": 2474,
        "pass_at_1": 0.29305107714217904,
        "avg_reward": 0.16177721713079005
      },
      {
        "timestep": 2525,
        "pass_at_1": 0.2756707616484507,
        "avg_reward": 0.25812514935662395
      },
      {
        "timestep": 2575,
        "pass_at_1": 0.2846166145153958,
        "avg_reward": 0.3085763243720352
      },
      {
        "timestep": 2626,
        "pass_at_1": 0.291557437322087,
        "avg_reward": 0.20528932743787964
      },
      {
        "timestep": 2676,
        "pass_at_1": 0.3085486606691871,
        "avg_reward": 0.25873029909069173
      },
      {
        "timestep": 2727,
        "pass_at_1": 0.2946797880408391,
        "avg_reward": 0.1656316615843505
      },
      {
        "timestep": 2777,
        "pass_at_1": 0.3481056782917665,
        "avg_reward": 0.28986135135608215
      },
      {
        "timestep": 2828,
        "pass_at_1": 0.37069627217851303,
        "avg_reward": 0.3295687348086468
      },
      {
        "timestep": 2878,
        "pass_at_1": 0.3428762331835674,
        "avg_reward": 0.23290359181177936
      },
      {
        "timestep": 2929,
        "pass_at_1": 0.31213172791549354,
        "avg_reward": 0.24751254034418918
      },
      {
        "timestep": 2979,
        "pass_at_1": 0.31813338698173876,
        "avg_reward": 0.2383816727076693
      },
      {
        "timestep": 3030,
        "pass_at_1": 0.3209808067606371,
        "avg_reward": 0.24784189312186924
      },
      {
        "timestep": 3080,
        "pass_at_1": 0.33953757597232603,
        "avg_reward": 0.21927564954861672
      },
      {
        "timestep": 3131,
        "pass_at_1": 0.3262599119869629,
        "avg_reward": 0.30324354380364066
      },
      {
        "timestep": 3181,
        "pass_at_1": 0.3528756194412362,
        "avg_reward": 0.2411327074435226
      },
      {
        "timestep": 3232,
        "pass_at_1": 0.3683996013071071,
        "avg_reward": 0.35592627475499916
      },
      {
        "timestep": 3282,
        "pass_at_1": 0.3359638346317818,
        "avg_reward": 0.31597420918202906
      },
      {
        "timestep": 3333,
        "pass_at_1": 0.32330150756021586,
        "avg_reward": 0.2820253940113552
      },
      {
        "timestep": 3383,
        "pass_at_1": 0.3189016571293434,
        "avg_reward": 0.2832482057774997
      },
      {
        "timestep": 3434,
        "pass_at_1": 0.3420665800069418,
        "avg_reward": 0.29643609690663153
      },
      {
        "timestep": 3484,
        "pass_at_1": 0.3660831309606215,
        "avg_reward": 0.2696562256796202
      },
      {
        "timestep": 3535,
        "pass_at_1": 0.33709606517953017,
        "avg_reward": 0.28878466166643113
      },
      {
        "timestep": 3585,
        "pass_at_1": 0.33428131794104177,
        "avg_reward": 0.28838951677347563
      },
      {
        "timestep": 3636,
        "pass_at_1": 0.3564424030371582,
        "avg_reward": 0.23993238701492053
      },
      {
        "timestep": 3686,
        "pass_at_1": 0.3433598381731425,
        "avg_reward": 0.24662212398983227
      },
      {
        "timestep": 3737,
        "pass_at_1": 0.3791096632550706,
        "avg_reward": 0.24650553304197068
      },
      {
        "timestep": 3787,
        "pass_at_1": 0.37818154287947764,
        "avg_reward": 0.33716456442886883
      },
      {
        "timestep": 3838,
        "pass_at_1": 0.35361747384969955,
        "avg_reward": 0.28257876948610455
      },
      {
        "timestep": 3888,
        "pass_at_1": 0.355605833731544,
        "avg_reward": 0.31546838044073705
      },
      {
        "timestep": 3939,
        "pass_at_1": 0.3890177519489914,
        "avg_reward": 0.30562141614650984
      },
      {
        "timestep": 3989,
        "pass_at_1": 0.39266509780861397,
        "avg_reward": 0.27346874020803874
      },
      {
        "timestep": 4040,
        "pass_at_1": 0.4210422374448323,
        "avg_reward": 0.3367144594629751
      },
      {
        "timestep": 4090,
        "pass_at_1": 0.4173563767730588,
        "avg_reward": 0.34715256505107683
      },
      {
        "timestep": 4141,
        "pass_at_1": 0.3635049098785491,
        "avg_reward": 0.3360966939231477
      },
      {
        "timestep": 4191,
        "pass_at_1": 0.36555077737326946,
        "avg_reward": 0.21965586825011163
      },
      {
        "timestep": 4242,
        "pass_at_1": 0.3961237250801883,
        "avg_reward": 0.33272999632940764
      },
      {
        "timestep": 4292,
        "pass_at_1": 0.408368394578764,
        "avg_reward": 0.3800521729068872
      },
      {
        "timestep": 4343,
        "pass_at_1": 0.3969272583419605,
        "avg_reward": 0.31853450318938054
      },
      {
        "timestep": 4393,
        "pass_at_1": 0.3874780283096786,
        "avg_reward": 0.3598391473655632
      },
      {
        "timestep": 4444,
        "pass_at_1": 0.4538361265883883,
        "avg_reward": 0.36140075122539317
      },
      {
        "timestep": 4494,
        "pass_at_1": 0.41457777232736376,
        "avg_reward": 0.24375821908296694
      },
      {
        "timestep": 4545,
        "pass_at_1": 0.41602031688220403,
        "avg_reward": 0.29330286964200813
      },
      {
        "timestep": 4595,
        "pass_at_1": 0.4289363313324948,
        "avg_reward": 0.3338941758176009
      },
      {
        "timestep": 4646,
        "pass_at_1": 0.4507205568071342,
        "avg_reward": 0.39710650072482123
      },
      {
        "timestep": 4696,
        "pass_at_1": 0.4003876091629984,
        "avg_reward": 0.3088139455334789
      },
      {
        "timestep": 4747,
        "pass_at_1": 0.4082393474489477,
        "avg_reward": 0.40149344412269566
      },
      {
        "timestep": 4797,
        "pass_at_1": 0.4344124426703927,
        "avg_reward": 0.34825663656146416
      },
      {
        "timestep": 4848,
        "pass_at_1": 0.4037496795350584,
        "avg_reward": 0.35440443123580995
      },
      {
        "timestep": 4898,
        "pass_at_1": 0.41054283080349824,
        "avg_reward": 0.3166196069292125
      },
      {
        "timestep": 4949,
        "pass_at_1": 0.43530834488267434,
        "avg_reward": 0.30989559085322643
      },
      {
        "timestep": 5000,
        "pass_at_1": 0.4442888135179599,
        "avg_reward": 0.3736166649921601
      }
    ],
    "final_pass_at_1": 0.55
  },
  "aceas_no_acb": {
    "experiment_name": "aceas_no_acb",
    "total_timesteps": 5000,
    "total_time": 227.27272727272728,
    "avg_throughput": 22,
    "train_metrics": [
      {
        "timestep": 0,
        "pass_at_1": 0.0009182585419600032,
        "avg_reward": -0.010934209415527172
      },
      {
        "timestep": 50,
        "pass_at_1": 0.0,
        "avg_reward": -0.0021739106052424684
      },
      {
        "timestep": 101,
        "pass_at_1": 0.0,
        "avg_reward": 0.003910665441305423
      },
      {
        "timestep": 151,
        "pass_at_1": 0.036065008620379875,
        "avg_reward": -0.0025927666786852953
      },
      {
        "timestep": 202,
        "pass_at_1": 0.016842731011507198,
        "avg_reward": -0.031774121002756624
      },
      {
        "timestep": 252,
        "pass_at_1": 0.04426511713961781,
        "avg_reward": 0.058495302463501986
      },
      {
        "timestep": 303,
        "pass_at_1": 0.0695155916519522,
        "avg_reward": 0.05708196955417912
      },
      {
        "timestep": 353,
        "pass_at_1": 0.03780215754733522,
        "avg_reward": 0.05986663794527232
      },
      {
        "timestep": 404,
        "pass_at_1": 0.005073993047757221,
        "avg_reward": -0.04322969168380294
      },
      {
        "timestep": 454,
        "pass_at_1": 0.0016200753198738482,
        "avg_reward": -0.05668903991812017
      },
      {
        "timestep": 505,
        "pass_at_1": 0.14208741714200912,
        "avg_reward": 0.15409450824746074
      },
      {
        "timestep": 555,
        "pass_at_1": 0.16993076424927686,
        "avg_reward": 0.20321309677925115
      },
      {
        "timestep": 606,
        "pass_at_1": 0.20204155291953596,
        "avg_reward": 0.20689073076311842
      },
      {
        "timestep": 656,
        "pass_at_1": 0.16708671976126294,
        "avg_reward": 0.14569419294214325
      },
      {
        "timestep": 707,
        "pass_at_1": 0.16738830480215208,
        "avg_reward": 0.12207303147942154
      },
      {
        "timestep": 757,
        "pass_at_1": 0.2119104277032675,
        "avg_reward": 0.20447003610219927
      },
      {
        "timestep": 808,
        "pass_at_1": 0.20800527006619968,
        "avg_reward": 0.18541767836446787
      },
      {
        "timestep": 858,
        "pass_at_1": 0.18321319387624507,
        "avg_reward": 0.07574788209484018
      },
      {
        "timestep": 909,
        "pass_at_1": 0.20136664341171556,
        "avg_reward": 0.17216684296282014
      },
      {
        "timestep": 959,
        "pass_at_1": 0.16924288941459578,
        "avg_reward": 0.143051036822499
      },
      {
        "timestep": 1010,
        "pass_at_1": 0.19475416367117176,
        "avg_reward": 0.17544354488922176
      },
      {
        "timestep": 1060,
        "pass_at_1": 0.22855633161269692,
        "avg_reward": 0.20807258468823203
      },
      {
        "timestep": 1111,
        "pass_at_1": 0.21430271012131463,
        "avg_reward": 0.16677572127241716
      },
      {
        "timestep": 1161,
        "pass_at_1": 0.18735613477617838,
        "avg_reward": 0.09821703505523019
      },
      {
        "timestep": 1212,
        "pass_at_1": 0.20332118995914095,
        "avg_reward": 0.20135438444447712
      },
      {
        "timestep": 1262,
        "pass_at_1": 0.19049708487255082,
        "avg_reward": 0.1492058199468111
      },
      {
        "timestep": 1313,
        "pass_at_1": 0.22705665429091312,
        "avg_reward": 0.17389931963193211
      },
      {
        "timestep": 1363,
        "pass_at_1": 0.2392634911066785,
        "avg_reward": 0.22576733567880808
      },
      {
        "timestep": 1414,
        "pass_at_1": 0.2366500493951576,
        "avg_reward": 0.18879403146119014
      },
      {
        "timestep": 1464,
        "pass_at_1": 0.22173653802968138,
        "avg_reward": 0.20831420477077273
      },
      {
        "timestep": 1515,
        "pass_at_1": 0.1967991093470685,
        "avg_reward": 0.2259127540844011
      },
      {
        "timestep": 1565,
        "pass_at_1": 0.24036288936568054,
        "avg_reward": 0.10641559310902514
      },
      {
        "timestep": 1616,
        "pass_at_1": 0.23864115062223787,
        "avg_reward": 0.25599239879532515
      },
      {
        "timestep": 1666,
        "pass_at_1": 0.20838167163479426,
        "avg_reward": 0.16809617827331985
      },
      {
        "timestep": 1717,
        "pass_at_1": 0.24014304259482688,
        "avg_reward": 0.16982450295585505
      },
      {
        "timestep": 1767,
        "pass_at_1": 0.22165409444433384,
        "avg_reward": 0.19646470951552603
      },
      {
        "timestep": 1818,
        "pass_at_1": 0.26371287073227234,
        "avg_reward": 0.2535313949332178
      },
      {
        "timestep": 1868,
        "pass_at_1": 0.26870189505518727,
        "avg_reward": 0.2007918381485522
      },
      {
        "timestep": 1919,
        "pass_at_1": 0.2201877984817927,
        "avg_reward": 0.20605661649375068
      },
      {
        "timestep": 1969,
        "pass_at_1": 0.21359095007002696,
        "avg_reward": 0.13657954527212687
      },
      {
        "timestep": 2020,
        "pass_at_1": 0.24529488456909393,
        "avg_reward": 0.21398029892347217
      },
      {
        "timestep": 2070,
        "pass_at_1": 0.22297563310453283,
        "avg_reward": 0.12731597942809061
      },
      {
        "timestep": 2121,
        "pass_at_1": 0.23941868994283258,
        "avg_reward": 0.21041943305054167
      },
      {
        "timestep": 2171,
        "pass_at_1": 0.25723117151450925,
        "avg_reward": 0.13641541785168498
      },
      {
        "timestep": 2222,
        "pass_at_1": 0.27275148508069025,
        "avg_reward": 0.25668939409013697
      },
      {
        "timestep": 2272,
        "pass_at_1": 0.22108060170380422,
        "avg_reward": 0.3064247234999592
      },
      {
        "timestep": 2323,
        "pass_at_1": 0.24748023818408854,
        "avg_reward": 0.24289423356158965
      },
      {
        "timestep": 2373,
        "pass_at_1": 0.2456608398665216,
        "avg_reward": 0.19751519837111964
      },
      {
        "timestep": 2424,
        "pass_at_1": 0.27157251117599024,
        "avg_reward": 0.24141108138975054
      },
      {
        "timestep": 2474,
        "pass_at_1": 0.2818646524004681,
        "avg_reward": 0.2248144203344272
      },
      {
        "timestep": 2525,
        "pass_at_1": 0.2796376190014839,
        "avg_reward": 0.267080065814873
      },
      {
        "timestep": 2575,
        "pass_at_1": 0.2755042743633944,
        "avg_reward": 0.2532559054377434
      },
      {
        "timestep": 2626,
        "pass_at_1": 0.28967850195931927,
        "avg_reward": 0.18433753792886068
      },
      {
        "timestep": 2676,
        "pass_at_1": 0.26648272835106324,
        "avg_reward": 0.24435605490158974
      },
      {
        "timestep": 2727,
        "pass_at_1": 0.2711928758244512,
        "avg_reward": 0.15010637018984782
      },
      {
        "timestep": 2777,
        "pass_at_1": 0.26736733475192004,
        "avg_reward": 0.21221917381791192
      },
      {
        "timestep": 2828,
        "pass_at_1": 0.28758180228027175,
        "avg_reward": 0.1737759907362689
      },
      {
        "timestep": 2878,
        "pass_at_1": 0.2944867567937384,
        "avg_reward": 0.2632499490530803
      },
      {
        "timestep": 2929,
        "pass_at_1": 0.29543069002582617,
        "avg_reward": 0.25764341150761827
      },
      {
        "timestep": 2979,
        "pass_at_1": 0.3139157164423433,
        "avg_reward": 0.18988134399579779
      },
      {
        "timestep": 3030,
        "pass_at_1": 0.2681189115113034,
        "avg_reward": 0.20483025211421116
      },
      {
        "timestep": 3080,
        "pass_at_1": 0.2638683809889566,
        "avg_reward": 0.19720368666428134
      },
      {
        "timestep": 3131,
        "pass_at_1": 0.3296221129442544,
        "avg_reward": 0.3240735895492393
      },
      {
        "timestep": 3181,
        "pass_at_1": 0.25733648001601866,
        "avg_reward": 0.24605873968657227
      },
      {
        "timestep": 3232,
        "pass_at_1": 0.30097429983295027,
        "avg_reward": 0.20319956719431068
      },
      {
        "timestep": 3282,
        "pass_at_1": 0.2972691549485211,
        "avg_reward": 0.2800648262262864
      },
      {
        "timestep": 3333,
        "pass_at_1": 0.28591273614318424,
        "avg_reward": 0.2568402360514449
      },
      {
        "timestep": 3383,
        "pass_at_1": 0.31005126096721203,
        "avg_reward": 0.15027282042463097
      },
      {
        "timestep": 3434,
        "pass_at_1": 0.2774884320151784,
        "avg_reward": 0.14940271579101905
      },
      {
        "timestep": 3484,
        "pass_at_1": 0.31830468240683374,
        "avg_reward": 0.27933028479619815
      },
      {
        "timestep": 3535,
        "pass_at_1": 0.3193135369223091,
        "avg_reward": 0.25652623484152604
      },
      {
        "timestep": 3585,
        "pass_at_1": 0.2997919796395488,
        "avg_reward": 0.23806711778323564
      },
      {
        "timestep": 3636,
        "pass_at_1": 0.2900256591790378,
        "avg_reward": 0.1840676743548846
      },
      {
        "timestep": 3686,
        "pass_at_1": 0.31751575988633746,
        "avg_reward": 0.2489098980237688
      },
      {
        "timestep": 3737,
        "pass_at_1": 0.306879843091582,
        "avg_reward": 0.31312894512416983
      },
      {
        "timestep": 3787,
        "pass_at_1": 0.329211809245539,
        "avg_reward": 0.28003211820151563
      },
      {
        "timestep": 3838,
        "pass_at_1": 0.3293517884208194,
        "avg_reward": 0.30194212058754244
      },
      {
        "timestep": 3888,
        "pass_at_1": 0.30119413926752814,
        "avg_reward": 0.2646731821548958
      },
      {
        "timestep": 3939,
        "pass_at_1": 0.32147125629371964,
        "avg_reward": 0.28080886104747466
      },
      {
        "timestep": 3989,
        "pass_at_1": 0.3650900745361708,
        "avg_reward": 0.26872980852248923
      },
      {
        "timestep": 4040,
        "pass_at_1": 0.36620403019944503,
        "avg_reward": 0.29900408803629014
      },
      {
        "timestep": 4090,
        "pass_at_1": 0.32470329995319847,
        "avg_reward": 0.2005968853831241
      },
      {
        "timestep": 4141,
        "pass_at_1": 0.3073871384370688,
        "avg_reward": 0.22934959465173194
      },
      {
        "timestep": 4191,
        "pass_at_1": 0.3410422816273396,
        "avg_reward": 0.24548067897867312
      },
      {
        "timestep": 4242,
        "pass_at_1": 0.3109682636069351,
        "avg_reward": 0.2977825639010776
      },
      {
        "timestep": 4292,
        "pass_at_1": 0.3591052465022301,
        "avg_reward": 0.3347166809449613
      },
      {
        "timestep": 4343,
        "pass_at_1": 0.35116951131510604,
        "avg_reward": 0.24786398001527082
      },
      {
        "timestep": 4393,
        "pass_at_1": 0.35280673824713904,
        "avg_reward": 0.25217618035929107
      },
      {
        "timestep": 4444,
        "pass_at_1": 0.348031851507838,
        "avg_reward": 0.27651855356186106
      },
      {
        "timestep": 4494,
        "pass_at_1": 0.33213419962131135,
        "avg_reward": 0.23082414978092528
      },
      {
        "timestep": 4545,
        "pass_at_1": 0.3615578882593069,
        "avg_reward": 0.18939503530014606
      },
      {
        "timestep": 4595,
        "pass_at_1": 0.37716128471384275,
        "avg_reward": 0.34360622452845885
      },
      {
        "timestep": 4646,
        "pass_at_1": 0.36028843562741225,
        "avg_reward": 0.2454191133554967
      },
      {
        "timestep": 4696,
        "pass_at_1": 0.3301153827842049,
        "avg_reward": 0.13980120600541465
      },
      {
        "timestep": 4747,
        "pass_at_1": 0.3592019647083622,
        "avg_reward": 0.22249488965172415
      },
      {
        "timestep": 4797,
        "pass_at_1": 0.4008837628761112,
        "avg_reward": 0.40369404578646667
      },
      {
        "timestep": 4848,
        "pass_at_1": 0.3894900626220889,
        "avg_reward": 0.28873868171642636
      },
      {
        "timestep": 4898,
        "pass_at_1": 0.3266384834013073,
        "avg_reward": 0.29527224771988875
      },
      {
        "timestep": 4949,
        "pass_at_1": 0.35882312512607517,
        "avg_reward": 0.25408100230025815
      },
      {
        "timestep": 5000,
        "pass_at_1": 0.35056779252019604,
        "avg_reward": 0.26585299530360307
      }
    ],
    "final_pass_at_1": 0.5
  }
}