{
  "baseline": [
    {
      "step": 10,
      "file": "10_16384.jsonl",
      "repetition_rate": 0.06869670645298616,
      "total_responses": 2019,
      "avg_token_length": 1339.890044576523,
      "accuracy": 0.3234274393263992,
      "internal_textual_diversity": 0.9666843214456011,
      "internal_textual_similarity": 0.03331567855439879,
      "internal_unit_count": 48.55324418028727,
      "internal_equational_diversity": 0.6808149764125611,
      "formula_unique_count": 18.778603268945023,
      "formula_total_count": 43.32838038632987,
      "distinct_10gram_count": 348.90193164933135,
      "distinct_10gram_ratio": 0.8918895024671334,
      "step_global_distinct_10gram_count": 689484,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004254635613289041,
      "step_total_10gram_count": 1271144,
      "global_total_10gram_count": 162054771,
      "entropy": 1.0019879341125488,
      "acc_reward": 0.1103515625
    },
    {
      "step": 20,
      "file": "20_16384.jsonl",
      "repetition_rate": 0.05073213370831672,
      "total_responses": 2019,
      "avg_token_length": 1213.2218920257553,
      "accuracy": 0.37345220406141655,
      "internal_textual_diversity": 0.974203100002226,
      "internal_textual_similarity": 0.025796899997773997,
      "internal_unit_count": 46.873204556711244,
      "internal_equational_diversity": 0.6931347216898753,
      "formula_unique_count": 20.542347696879645,
      "formula_total_count": 39.79990094105993,
      "distinct_10gram_count": 389.098563645369,
      "distinct_10gram_ratio": 0.9158553594562039,
      "step_global_distinct_10gram_count": 763307,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004710179128265221,
      "step_total_10gram_count": 1197039,
      "global_total_10gram_count": 162054771,
      "entropy": 0.6028540730476379,
      "acc_reward": 0.201171875
    },
    {
      "step": 30,
      "file": "30_16384.jsonl",
      "repetition_rate": 0.03632624234172866,
      "total_responses": 2019,
      "avg_token_length": 1065.5522535908865,
      "accuracy": 0.4239722634967806,
      "internal_textual_diversity": 0.9809617393497693,
      "internal_textual_similarity": 0.019038260650230676,
      "internal_unit_count": 41.48291233283804,
      "internal_equational_diversity": 0.7187248861084015,
      "formula_unique_count": 21.215453194650816,
      "formula_total_count": 36.00099058940069,
      "distinct_10gram_count": 381.9965329370976,
      "distinct_10gram_ratio": 0.9354076498238927,
      "step_global_distinct_10gram_count": 745954,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004603098047634772,
      "step_total_10gram_count": 1059935,
      "global_total_10gram_count": 162054771,
      "entropy": 0.3434525728225708,
      "acc_reward": 0.2802734375
    },
    {
      "step": 40,
      "file": "40_16384.jsonl",
      "repetition_rate": 0.04065961085032855,
      "total_responses": 2019,
      "avg_token_length": 1066.5641406636948,
      "accuracy": 0.4556711243189698,
      "internal_textual_diversity": 0.9769128937317733,
      "internal_textual_similarity": 0.02308710626822676,
      "internal_unit_count": 38.14462605250124,
      "internal_equational_diversity": 0.7365767423208378,
      "formula_unique_count": 21.75334323922734,
      "formula_total_count": 37.1282813273898,
      "distinct_10gram_count": 368.5854383358098,
      "distinct_10gram_ratio": 0.9359130465352418,
      "step_global_distinct_10gram_count": 718359,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004432816112522846,
      "step_total_10gram_count": 1064296,
      "global_total_10gram_count": 162054771,
      "entropy": 0.1747453361749649,
      "acc_reward": 0.3017578125
    },
    {
      "step": 50,
      "file": "50_16384.jsonl",
      "repetition_rate": 0.034773023270124014,
      "total_responses": 2019,
      "avg_token_length": 1053.6077265973254,
      "accuracy": 0.4551758296186231,
      "internal_textual_diversity": 0.9801631869937757,
      "internal_textual_similarity": 0.019836813006224294,
      "internal_unit_count": 36.835066864784544,
      "internal_equational_diversity": 0.7403104228075847,
      "formula_unique_count": 22.45517582961862,
      "formula_total_count": 38.399207528479444,
      "distinct_10gram_count": 378.89846458642893,
      "distinct_10gram_ratio": 0.9404513043140399,
      "step_global_distinct_10gram_count": 737281,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004549579104955818,
      "step_total_10gram_count": 1033279,
      "global_total_10gram_count": 162054771,
      "entropy": 0.14305667579174042,
      "acc_reward": 0.2568359375
    },
    {
      "step": 60,
      "file": "60_16384.jsonl",
      "repetition_rate": 0.035895630685276965,
      "total_responses": 2019,
      "avg_token_length": 1080.7647350173354,
      "accuracy": 0.4749876176324913,
      "internal_textual_diversity": 0.9793665639511007,
      "internal_textual_similarity": 0.02063343604889931,
      "internal_unit_count": 38.34769687964339,
      "internal_equational_diversity": 0.7427051378962041,
      "formula_unique_count": 22.802377414561665,
      "formula_total_count": 39.13174839029222,
      "distinct_10gram_count": 379.5225359088658,
      "distinct_10gram_ratio": 0.9394611797727446,
      "step_global_distinct_10gram_count": 736385,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004544050110070502,
      "step_total_10gram_count": 1049319,
      "global_total_10gram_count": 162054771,
      "entropy": 0.1302637755870819,
      "acc_reward": 0.310546875
    },
    {
      "step": 70,
      "file": "70_16384.jsonl",
      "repetition_rate": 0.03299264201920716,
      "total_responses": 2019,
      "avg_token_length": 1096.1832590391282,
      "accuracy": 0.4591381872213967,
      "internal_textual_diversity": 0.9789788708125808,
      "internal_textual_similarity": 0.021021129187419167,
      "internal_unit_count": 39.37196631996038,
      "internal_equational_diversity": 0.749594814775883,
      "formula_unique_count": 24.141654284299157,
      "formula_total_count": 39.72412085190688,
      "distinct_10gram_count": 396.92174343734524,
      "distinct_10gram_ratio": 0.9419169151021253,
      "step_global_distinct_10gram_count": 766916,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004732449376636989,
      "step_total_10gram_count": 1097748,
      "global_total_10gram_count": 162054771,
      "entropy": 0.13579615950584412,
      "acc_reward": 0.2890625
    },
    {
      "step": 80,
      "file": "80_16384.jsonl",
      "repetition_rate": 0.03548824572451928,
      "total_responses": 2019,
      "avg_token_length": 1109.1857355126301,
      "accuracy": 0.4814264487369985,
      "internal_textual_diversity": 0.9781038761804378,
      "internal_textual_similarity": 0.02189612381956221,
      "internal_unit_count": 40.417038137691925,
      "internal_equational_diversity": 0.7411433414426609,
      "formula_unique_count": 23.660723130262507,
      "formula_total_count": 40.31451213472016,
      "distinct_10gram_count": 393.93065874195145,
      "distinct_10gram_ratio": 0.9384334404611951,
      "step_global_distinct_10gram_count": 759328,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004685625701201972,
      "step_total_10gram_count": 1119630,
      "global_total_10gram_count": 162054771,
      "entropy": 0.10111761838197708,
      "acc_reward": 0.302734375
    },
    {
      "step": 90,
      "file": "90_16384.jsonl",
      "repetition_rate": 0.04206550461188588,
      "total_responses": 2019,
      "avg_token_length": 1188.930163447251,
      "accuracy": 0.4809311540366518,
      "internal_textual_diversity": 0.975468248124774,
      "internal_textual_similarity": 0.02453175187522616,
      "internal_unit_count": 42.21446260525013,
      "internal_equational_diversity": 0.732464354145632,
      "formula_unique_count": 24.828132738979694,
      "formula_total_count": 43.972263496780585,
      "distinct_10gram_count": 411.2377414561664,
      "distinct_10gram_ratio": 0.9333094495453493,
      "step_global_distinct_10gram_count": 791432,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004883731562583863,
      "step_total_10gram_count": 1204506,
      "global_total_10gram_count": 162054771,
      "entropy": 0.10113760083913803,
      "acc_reward": 0.3583984375
    },
    {
      "step": 100,
      "file": "100_16384.jsonl",
      "repetition_rate": 0.04186151635883946,
      "total_responses": 2019,
      "avg_token_length": 1193.144130757801,
      "accuracy": 0.48340762753838534,
      "internal_textual_diversity": 0.9769465343548456,
      "internal_textual_similarity": 0.023053465645154415,
      "internal_unit_count": 41.68647845468053,
      "internal_equational_diversity": 0.7301242089087346,
      "formula_unique_count": 23.85438335809807,
      "formula_total_count": 42.9470034670629,
      "distinct_10gram_count": 402.4834076275384,
      "distinct_10gram_ratio": 0.9303360680839614,
      "step_global_distinct_10gram_count": 774295,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004777983364648981,
      "step_total_10gram_count": 1197318,
      "global_total_10gram_count": 162054771,
      "entropy": 0.08198729902505875,
      "acc_reward": 0.3759765625
    },
    {
      "step": 110,
      "file": "110_16384.jsonl",
      "repetition_rate": 0.042742195366164575,
      "total_responses": 2019,
      "avg_token_length": 1234.5894006934125,
      "accuracy": 0.48390292223873205,
      "internal_textual_diversity": 0.9760081888131524,
      "internal_textual_similarity": 0.0239918111868475,
      "internal_unit_count": 43.13273897969292,
      "internal_equational_diversity": 0.7370369767252394,
      "formula_unique_count": 24.773650321941556,
      "formula_total_count": 44.40366518078257,
      "distinct_10gram_count": 416.73848439821694,
      "distinct_10gram_ratio": 0.9328927532582353,
      "step_global_distinct_10gram_count": 799870,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.004935800378256065,
      "step_total_10gram_count": 1232110,
      "global_total_10gram_count": 162054771,
      "entropy": 0.0762253850698471,
      "acc_reward": 0.2666015625
    },
    {
      "step": 120,
      "file": "120_16384.jsonl",
      "repetition_rate": 0.04671103584470593,
      "total_responses": 2019,
      "avg_token_length": 1284.426944031699,
      "accuracy": 0.48390292223873205,
      "internal_textual_diversity": 0.9724569581733982,
      "internal_textual_similarity": 0.027543041826601795,
      "internal_unit_count": 45.896978702327885,
      "internal_equational_diversity": 0.7279417399800433,
      "formula_unique_count": 25.12778603268945,
      "formula_total_count": 45.9247152055473,
      "distinct_10gram_count": 428.7251114413076,
      "distinct_10gram_ratio": 0.9267797460713503,
      "step_global_distinct_10gram_count": 821754,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005070841141727324,
      "step_total_10gram_count": 1312749,
      "global_total_10gram_count": 162054771,
      "entropy": 0.07086098939180374,
      "acc_reward": 0.314453125
    },
    {
      "step": 130,
      "file": "130_16384.jsonl",
      "repetition_rate": 0.05052782155814565,
      "total_responses": 2019,
      "avg_token_length": 1340.044081228331,
      "accuracy": 0.49777117384843983,
      "internal_textual_diversity": 0.9717916629381382,
      "internal_textual_similarity": 0.02820833706186182,
      "internal_unit_count": 47.74938088162457,
      "internal_equational_diversity": 0.7294875170559849,
      "formula_unique_count": 25.824665676077267,
      "formula_total_count": 47.10203070827142,
      "distinct_10gram_count": 431.0609212481426,
      "distinct_10gram_ratio": 0.9225164630951256,
      "step_global_distinct_10gram_count": 823338,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0050806156148281495,
      "step_total_10gram_count": 1342018,
      "global_total_10gram_count": 162054771,
      "entropy": 0.06361906230449677,
      "acc_reward": 0.330078125
    },
    {
      "step": 140,
      "file": "140_16384.jsonl",
      "repetition_rate": 0.04466099263785088,
      "total_responses": 2019,
      "avg_token_length": 1284.6740960871718,
      "accuracy": 0.4883605745418524,
      "internal_textual_diversity": 0.9745557110483043,
      "internal_textual_similarity": 0.0254442889516957,
      "internal_unit_count": 46.464091134224866,
      "internal_equational_diversity": 0.7319632996931186,
      "formula_unique_count": 25.283308568598315,
      "formula_total_count": 46.184744923229324,
      "distinct_10gram_count": 429.45121347201587,
      "distinct_10gram_ratio": 0.9276583536106999,
      "step_global_distinct_10gram_count": 820716,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005064435899884737,
      "step_total_10gram_count": 1305396,
      "global_total_10gram_count": 162054771,
      "entropy": 0.06591621041297913,
      "acc_reward": 0.3330078125
    },
    {
      "step": 150,
      "file": "150_16384.jsonl",
      "repetition_rate": 0.055606684957078746,
      "total_responses": 2019,
      "avg_token_length": 1421.6666666666667,
      "accuracy": 0.4943041109460129,
      "internal_textual_diversity": 0.9692244241315209,
      "internal_textual_similarity": 0.03077557586847911,
      "internal_unit_count": 49.92620108964834,
      "internal_equational_diversity": 0.721671240380268,
      "formula_unique_count": 26.368994551758295,
      "formula_total_count": 48.2332838038633,
      "distinct_10gram_count": 447.147102526003,
      "distinct_10gram_ratio": 0.9169980501752026,
      "step_global_distinct_10gram_count": 857008,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005288384875752902,
      "step_total_10gram_count": 1432543,
      "global_total_10gram_count": 162054771,
      "entropy": 0.05290662497282028,
      "acc_reward": 0.3935546875
    },
    {
      "step": 160,
      "file": "160_16384.jsonl",
      "repetition_rate": 0.05688061384893166,
      "total_responses": 2019,
      "avg_token_length": 1433.2020802377415,
      "accuracy": 0.4908370480435859,
      "internal_textual_diversity": 0.9727196921136466,
      "internal_textual_similarity": 0.027280307886353466,
      "internal_unit_count": 50.255076770678556,
      "internal_equational_diversity": 0.7206659934443218,
      "formula_unique_count": 26.3531451213472,
      "formula_total_count": 50.3754333828628,
      "distinct_10gram_count": 448.135710747895,
      "distinct_10gram_ratio": 0.9141486426680709,
      "step_global_distinct_10gram_count": 855846,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005281214460511008,
      "step_total_10gram_count": 1456100,
      "global_total_10gram_count": 162054771,
      "entropy": 0.06373094022274017,
      "acc_reward": 0.376953125
    },
    {
      "step": 170,
      "file": "170_16384.jsonl",
      "repetition_rate": 0.06275512837513846,
      "total_responses": 2019,
      "avg_token_length": 1486.0569588905398,
      "accuracy": 0.4943041109460129,
      "internal_textual_diversity": 0.9706246954362403,
      "internal_textual_similarity": 0.029375304563759648,
      "internal_unit_count": 50.40316988608222,
      "internal_equational_diversity": 0.7197402930043071,
      "formula_unique_count": 25.479940564635957,
      "formula_total_count": 48.48291233283804,
      "distinct_10gram_count": 437.34720158494304,
      "distinct_10gram_ratio": 0.9097501266832448,
      "step_global_distinct_10gram_count": 829138,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005116405983505416,
      "step_total_10gram_count": 1455430,
      "global_total_10gram_count": 162054771,
      "entropy": 0.05210494622588158,
      "acc_reward": 0.349609375
    },
    {
      "step": 180,
      "file": "180_16384.jsonl",
      "repetition_rate": 0.07480085540876649,
      "total_responses": 2019,
      "avg_token_length": 1616.731550272412,
      "accuracy": 0.487369985141159,
      "internal_textual_diversity": 0.9668291386824314,
      "internal_textual_similarity": 0.033170861317568724,
      "internal_unit_count": 56.21297672114908,
      "internal_equational_diversity": 0.7171167819583539,
      "formula_unique_count": 27.038632986627043,
      "formula_total_count": 55.3902922238732,
      "distinct_10gram_count": 454.92223873204557,
      "distinct_10gram_ratio": 0.897058181515426,
      "step_global_distinct_10gram_count": 869351,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005364550482750057,
      "step_total_10gram_count": 1617107,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04398287087678909,
      "acc_reward": 0.376953125
    },
    {
      "step": 190,
      "file": "190_16384.jsonl",
      "repetition_rate": 0.07727283005341377,
      "total_responses": 2019,
      "avg_token_length": 1647.4011887072809,
      "accuracy": 0.4918276374442793,
      "internal_textual_diversity": 0.9643906270653619,
      "internal_textual_similarity": 0.035609372934638034,
      "internal_unit_count": 54.865279841505696,
      "internal_equational_diversity": 0.711129517030504,
      "formula_unique_count": 26.931649331352155,
      "formula_total_count": 54.80534918276374,
      "distinct_10gram_count": 455.28083209509657,
      "distinct_10gram_ratio": 0.8935816222904099,
      "step_global_distinct_10gram_count": 867137,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005350888435120494,
      "step_total_10gram_count": 1636327,
      "global_total_10gram_count": 162054771,
      "entropy": 0.042156122624874115,
      "acc_reward": 0.4033203125
    },
    {
      "step": 200,
      "file": "200_16384.jsonl",
      "repetition_rate": 0.08203047334878495,
      "total_responses": 2019,
      "avg_token_length": 1698.2105002476474,
      "accuracy": 0.4938088162456662,
      "internal_textual_diversity": 0.9606675630259593,
      "internal_textual_similarity": 0.039332436974040624,
      "internal_unit_count": 55.46854878652798,
      "internal_equational_diversity": 0.7094956064868476,
      "formula_unique_count": 26.85586924219911,
      "formula_total_count": 55.62407132243685,
      "distinct_10gram_count": 464.77711738484396,
      "distinct_10gram_ratio": 0.8906750600149447,
      "step_global_distinct_10gram_count": 882597,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.00544628827990507,
      "step_total_10gram_count": 1693641,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04357381537556648,
      "acc_reward": 0.4248046875
    },
    {
      "step": 210,
      "file": "210_16384.jsonl",
      "repetition_rate": 0.06932374738489308,
      "total_responses": 2019,
      "avg_token_length": 1593.3779098563646,
      "accuracy": 0.5007429420505201,
      "internal_textual_diversity": 0.9676238252621799,
      "internal_textual_similarity": 0.03237617473782009,
      "internal_unit_count": 52.21792966815255,
      "internal_equational_diversity": 0.7080919463035824,
      "formula_unique_count": 26.769687964338782,
      "formula_total_count": 53.84299157999009,
      "distinct_10gram_count": 461.2218920257553,
      "distinct_10gram_ratio": 0.9006995559341776,
      "step_global_distinct_10gram_count": 879749,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005428713974733888,
      "step_total_10gram_count": 1592583,
      "global_total_10gram_count": 162054771,
      "entropy": 0.042426757514476776,
      "acc_reward": 0.341796875
    },
    {
      "step": 220,
      "file": "220_16384.jsonl",
      "repetition_rate": 0.07104325110239508,
      "total_responses": 2019,
      "avg_token_length": 1606.8469539375928,
      "accuracy": 0.49826646854878653,
      "internal_textual_diversity": 0.9667011944170031,
      "internal_textual_similarity": 0.03329880558299674,
      "internal_unit_count": 54.036651807825656,
      "internal_equational_diversity": 0.7083419918394691,
      "formula_unique_count": 26.996037642397226,
      "formula_total_count": 53.557206537890046,
      "distinct_10gram_count": 472.89598811292717,
      "distinct_10gram_ratio": 0.8981758267702062,
      "step_global_distinct_10gram_count": 901563,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0055633227854797315,
      "step_total_10gram_count": 1595602,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04113100469112396,
      "acc_reward": 0.4052734375
    },
    {
      "step": 230,
      "file": "230_16384.jsonl",
      "repetition_rate": 0.06861140197629792,
      "total_responses": 2019,
      "avg_token_length": 1662.2491332342745,
      "accuracy": 0.49876176324913324,
      "internal_textual_diversity": 0.9680764729140994,
      "internal_textual_similarity": 0.03192352708590073,
      "internal_unit_count": 54.582961862308075,
      "internal_equational_diversity": 0.7024056569950341,
      "formula_unique_count": 28.315007429420504,
      "formula_total_count": 56.12481426448737,
      "distinct_10gram_count": 493.4338781575037,
      "distinct_10gram_ratio": 0.8980201084486367,
      "step_global_distinct_10gram_count": 936303,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005777694752350118,
      "step_total_10gram_count": 1665033,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04452966898679733,
      "acc_reward": 0.37109375
    },
    {
      "step": 240,
      "file": "240_16384.jsonl",
      "repetition_rate": 0.07676228278475733,
      "total_responses": 2019,
      "avg_token_length": 1721.311540366518,
      "accuracy": 0.5002476473501734,
      "internal_textual_diversity": 0.9646240323297491,
      "internal_textual_similarity": 0.03537596767025087,
      "internal_unit_count": 58.72461614660723,
      "internal_equational_diversity": 0.6881295205607864,
      "formula_unique_count": 28.323427439326398,
      "formula_total_count": 60.34076275383853,
      "distinct_10gram_count": 494.87667161961366,
      "distinct_10gram_ratio": 0.8923883707658794,
      "step_global_distinct_10gram_count": 943091,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.00581958182520896,
      "step_total_10gram_count": 1717653,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04048857465386391,
      "acc_reward": 0.376953125
    },
    {
      "step": 250,
      "file": "250_16384.jsonl",
      "repetition_rate": 0.08423129704943656,
      "total_responses": 2019,
      "avg_token_length": 1767.9891035165924,
      "accuracy": 0.4913323427439326,
      "internal_textual_diversity": 0.9620940407289091,
      "internal_textual_similarity": 0.037905959271090944,
      "internal_unit_count": 60.55225359088658,
      "internal_equational_diversity": 0.6847100425089508,
      "formula_unique_count": 27.823179791976226,
      "formula_total_count": 59.983655274888555,
      "distinct_10gram_count": 494.6404160475483,
      "distinct_10gram_ratio": 0.8858667155222644,
      "step_global_distinct_10gram_count": 935346,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0057717893415183685,
      "step_total_10gram_count": 1821868,
      "global_total_10gram_count": 162054771,
      "entropy": 0.03871513530611992,
      "acc_reward": 0.4462890625
    },
    {
      "step": 260,
      "file": "260_16384.jsonl",
      "repetition_rate": 0.0793106886664343,
      "total_responses": 2019,
      "avg_token_length": 1740.518573551263,
      "accuracy": 0.5061911837543338,
      "internal_textual_diversity": 0.9652459411110678,
      "internal_textual_similarity": 0.03475405888893229,
      "internal_unit_count": 60.480931154036654,
      "internal_equational_diversity": 0.685055264424576,
      "formula_unique_count": 27.14115898959881,
      "formula_total_count": 59.93016344725111,
      "distinct_10gram_count": 490.18177315502726,
      "distinct_10gram_ratio": 0.8878381420968927,
      "step_global_distinct_10gram_count": 935615,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005773449274134607,
      "step_total_10gram_count": 1798693,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04356292262673378,
      "acc_reward": 0.3759765625
    },
    {
      "step": 270,
      "file": "270_16384.jsonl",
      "repetition_rate": 0.07338793094330391,
      "total_responses": 2019,
      "avg_token_length": 1691.3234274393265,
      "accuracy": 0.5071817731550272,
      "internal_textual_diversity": 0.9669479422121114,
      "internal_textual_similarity": 0.0330520577878886,
      "internal_unit_count": 59.126795443288756,
      "internal_equational_diversity": 0.6908438186841297,
      "formula_unique_count": 26.837048043585934,
      "formula_total_count": 57.335809806835066,
      "distinct_10gram_count": 486.6433878157504,
      "distinct_10gram_ratio": 0.8917129118113521,
      "step_global_distinct_10gram_count": 926134,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005714944362853717,
      "step_total_10gram_count": 1707402,
      "global_total_10gram_count": 162054771,
      "entropy": 0.03886816278100014,
      "acc_reward": 0.4482421875
    },
    {
      "step": 280,
      "file": "280_16384.jsonl",
      "repetition_rate": 0.0836358543630424,
      "total_responses": 2019,
      "avg_token_length": 1820.621594848935,
      "accuracy": 0.5106488360574541,
      "internal_textual_diversity": 0.9640844765541295,
      "internal_textual_similarity": 0.0359155234458705,
      "internal_unit_count": 63.797919762258545,
      "internal_equational_diversity": 0.684174260951091,
      "formula_unique_count": 28.596830113917783,
      "formula_total_count": 61.33432392273403,
      "distinct_10gram_count": 507.01733531451214,
      "distinct_10gram_ratio": 0.8818600324264997,
      "step_global_distinct_10gram_count": 963433,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.005945107287214642,
      "step_total_10gram_count": 1867983,
      "global_total_10gram_count": 162054771,
      "entropy": 0.03974568098783493,
      "acc_reward": 0.4189453125
    },
    {
      "step": 290,
      "file": "290_16384.jsonl",
      "repetition_rate": 0.08527829047511214,
      "total_responses": 2019,
      "avg_token_length": 1953.0693412580486,
      "accuracy": 0.5071817731550272,
      "internal_textual_diversity": 0.9648267794072269,
      "internal_textual_similarity": 0.035173220592773176,
      "internal_unit_count": 70.50668647845468,
      "internal_equational_diversity": 0.6808770745626371,
      "formula_unique_count": 30.864289252105003,
      "formula_total_count": 67.30807330361566,
      "distinct_10gram_count": 544.1684001981179,
      "distinct_10gram_ratio": 0.8777616429389689,
      "step_global_distinct_10gram_count": 1042849,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006435163824951504,
      "step_total_10gram_count": 1961595,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04067077860236168,
      "acc_reward": 0.478515625
    },
    {
      "step": 300,
      "file": "300_16384.jsonl",
      "repetition_rate": 0.09427925813809217,
      "total_responses": 2019,
      "avg_token_length": 2073.5626547795937,
      "accuracy": 0.5101535413571074,
      "internal_textual_diversity": 0.9642257284399646,
      "internal_textual_similarity": 0.035774271560035514,
      "internal_unit_count": 72.92669638434869,
      "internal_equational_diversity": 0.6689874868073266,
      "formula_unique_count": 31.95938583457157,
      "formula_total_count": 70.2184249628529,
      "distinct_10gram_count": 555.7478949975235,
      "distinct_10gram_ratio": 0.8667329615752795,
      "step_global_distinct_10gram_count": 1068311,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006592283543444703,
      "step_total_10gram_count": 2028820,
      "global_total_10gram_count": 162054771,
      "entropy": 0.03951321169734001,
      "acc_reward": 0.4267578125
    },
    {
      "step": 310,
      "file": "310_16384.jsonl",
      "repetition_rate": 0.09130971380201322,
      "total_responses": 2019,
      "avg_token_length": 2083.925705794948,
      "accuracy": 0.5106488360574541,
      "internal_textual_diversity": 0.9655050016466964,
      "internal_textual_similarity": 0.034494998353303594,
      "internal_unit_count": 76.75978207033185,
      "internal_equational_diversity": 0.666183222393318,
      "formula_unique_count": 32.23873204556711,
      "formula_total_count": 74.21149083704805,
      "distinct_10gram_count": 571.7469044081229,
      "distinct_10gram_ratio": 0.8650443936709499,
      "step_global_distinct_10gram_count": 1099205,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.00678292279343013,
      "step_total_10gram_count": 2093157,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04091840237379074,
      "acc_reward": 0.4609375
    },
    {
      "step": 320,
      "file": "320_16384.jsonl",
      "repetition_rate": 0.09957725873893797,
      "total_responses": 2019,
      "avg_token_length": 2125.852897473997,
      "accuracy": 0.5037147102526003,
      "internal_textual_diversity": 0.9597858666230932,
      "internal_textual_similarity": 0.040214133376906804,
      "internal_unit_count": 75.80881624566616,
      "internal_equational_diversity": 0.6659776662801544,
      "formula_unique_count": 32.06191183754334,
      "formula_total_count": 71.79891035165923,
      "distinct_10gram_count": 558.9024269440317,
      "distinct_10gram_ratio": 0.8593991051195079,
      "step_global_distinct_10gram_count": 1068949,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0065962204839992026,
      "step_total_10gram_count": 2102506,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04366082325577736,
      "acc_reward": 0.408203125
    },
    {
      "step": 330,
      "file": "330_16384.jsonl",
      "repetition_rate": 0.11188380834729945,
      "total_responses": 2019,
      "avg_token_length": 2221.0619118375434,
      "accuracy": 0.49975235264982665,
      "internal_textual_diversity": 0.9575119050634081,
      "internal_textual_similarity": 0.04248809493659187,
      "internal_unit_count": 80.32590391282814,
      "internal_equational_diversity": 0.6556811833081589,
      "formula_unique_count": 32.178801386825164,
      "formula_total_count": 76.71916790490341,
      "distinct_10gram_count": 564.3427439326399,
      "distinct_10gram_ratio": 0.8469842681074512,
      "step_global_distinct_10gram_count": 1078170,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006653120999442837,
      "step_total_10gram_count": 2232232,
      "global_total_10gram_count": 162054771,
      "entropy": 0.039984576404094696,
      "acc_reward": 0.4541015625
    },
    {
      "step": 340,
      "file": "340_16384.jsonl",
      "repetition_rate": 0.12650210913396126,
      "total_responses": 2019,
      "avg_token_length": 2325.652798415057,
      "accuracy": 0.4952947003467063,
      "internal_textual_diversity": 0.9579137301171414,
      "internal_textual_similarity": 0.042086269882858525,
      "internal_unit_count": 83.07726597325409,
      "internal_equational_diversity": 0.6551376310678424,
      "formula_unique_count": 30.673105497771175,
      "formula_total_count": 76.47994056463595,
      "distinct_10gram_count": 555.9232293214462,
      "distinct_10gram_ratio": 0.8371401161814189,
      "step_global_distinct_10gram_count": 1064454,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0065684829482743215,
      "step_total_10gram_count": 2358572,
      "global_total_10gram_count": 162054771,
      "entropy": 0.0360633060336113,
      "acc_reward": 0.3671875
    },
    {
      "step": 350,
      "file": "350_16384.jsonl",
      "repetition_rate": 0.115867738505295,
      "total_responses": 2019,
      "avg_token_length": 2199.9073798910354,
      "accuracy": 0.5215453194650818,
      "internal_textual_diversity": 0.9589701720276512,
      "internal_textual_similarity": 0.041029827972348804,
      "internal_unit_count": 78.9395740465577,
      "internal_equational_diversity": 0.66314896862267,
      "formula_unique_count": 31.128776622090143,
      "formula_total_count": 74.9692917285785,
      "distinct_10gram_count": 542.5834571570084,
      "distinct_10gram_ratio": 0.8451924983250033,
      "step_global_distinct_10gram_count": 1032861,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006373530341787962,
      "step_total_10gram_count": 2219050,
      "global_total_10gram_count": 162054771,
      "entropy": 0.03896726295351982,
      "acc_reward": 0.3857421875
    },
    {
      "step": 360,
      "file": "360_16384.jsonl",
      "repetition_rate": 0.1086762521409442,
      "total_responses": 2019,
      "avg_token_length": 2182.8281327389795,
      "accuracy": 0.5071817731550272,
      "internal_textual_diversity": 0.9649208046887329,
      "internal_textual_similarity": 0.03507919531126718,
      "internal_unit_count": 80.32590391282814,
      "internal_equational_diversity": 0.65556200886236,
      "formula_unique_count": 31.194155522535908,
      "formula_total_count": 75.10995542347698,
      "distinct_10gram_count": 566.7077761267955,
      "distinct_10gram_ratio": 0.8515991524425421,
      "step_global_distinct_10gram_count": 1076457,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0066425504991766025,
      "step_total_10gram_count": 2235468,
      "global_total_10gram_count": 162054771,
      "entropy": 0.042809780687093735,
      "acc_reward": 0.4404296875
    },
    {
      "step": 370,
      "file": "370_16384.jsonl",
      "repetition_rate": 0.0967759372870685,
      "total_responses": 2019,
      "avg_token_length": 2022.8841010401188,
      "accuracy": 0.5175829618623081,
      "internal_textual_diversity": 0.9692801658647883,
      "internal_textual_similarity": 0.030719834135211753,
      "internal_unit_count": 74.6849925705795,
      "internal_equational_diversity": 0.6612299129486113,
      "formula_unique_count": 30.665676077265974,
      "formula_total_count": 71.09658246656761,
      "distinct_10gram_count": 555.7379891035166,
      "distinct_10gram_ratio": 0.8653131329455804,
      "step_global_distinct_10gram_count": 1054491,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006507003733941286,
      "step_total_10gram_count": 2116346,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04236150532960892,
      "acc_reward": 0.4951171875
    },
    {
      "step": 380,
      "file": "380_16384.jsonl",
      "repetition_rate": 0.11561612127215203,
      "total_responses": 2019,
      "avg_token_length": 2249.1792966815256,
      "accuracy": 0.5032194155522536,
      "internal_textual_diversity": 0.9640987043231667,
      "internal_textual_similarity": 0.03590129567683332,
      "internal_unit_count": 83.64487369985142,
      "internal_equational_diversity": 0.6547902034335668,
      "formula_unique_count": 31.69341258048539,
      "formula_total_count": 78.69044081228331,
      "distinct_10gram_count": 575.8429915799901,
      "distinct_10gram_ratio": 0.845216066385399,
      "step_global_distinct_10gram_count": 1097307,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006771210703818155,
      "step_total_10gram_count": 2378061,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04477997124195099,
      "acc_reward": 0.3486328125
    },
    {
      "step": 390,
      "file": "390_16384.jsonl",
      "repetition_rate": 0.10915964220801745,
      "total_responses": 2019,
      "avg_token_length": 2194.098563645369,
      "accuracy": 0.5156017830609212,
      "internal_textual_diversity": 0.9640629027116123,
      "internal_textual_similarity": 0.03593709728838765,
      "internal_unit_count": 80.80188211986132,
      "internal_equational_diversity": 0.6579936061207331,
      "formula_unique_count": 31.24962852897474,
      "formula_total_count": 76.69935611688955,
      "distinct_10gram_count": 571.8875681030213,
      "distinct_10gram_ratio": 0.8508500460591144,
      "step_global_distinct_10gram_count": 1090448,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006728885507480678,
      "step_total_10gram_count": 2327527,
      "global_total_10gram_count": 162054771,
      "entropy": 0.042485013604164124,
      "acc_reward": 0.40625
    },
    {
      "step": 400,
      "file": "400_16384.jsonl",
      "repetition_rate": 0.1076669286968575,
      "total_responses": 2019,
      "avg_token_length": 2182.4155522535907,
      "accuracy": 0.5086676572560673,
      "internal_textual_diversity": 0.9681300637110496,
      "internal_textual_similarity": 0.031869936288950386,
      "internal_unit_count": 82.70331847449232,
      "internal_equational_diversity": 0.6569954784322417,
      "formula_unique_count": 30.79841505695889,
      "formula_total_count": 73.94601287766221,
      "distinct_10gram_count": 573.3506686478455,
      "distinct_10gram_ratio": 0.8501659723641045,
      "step_global_distinct_10gram_count": 1086327,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.006703455833460157,
      "step_total_10gram_count": 2331035,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04521636664867401,
      "acc_reward": 0.4384765625
    },
    {
      "step": 410,
      "file": "410_16384.jsonl",
      "repetition_rate": 0.11122629371190502,
      "total_responses": 2019,
      "avg_token_length": 2312.6082218920255,
      "accuracy": 0.5047052996532937,
      "internal_textual_diversity": 0.9704143902958878,
      "internal_textual_similarity": 0.029585609704112118,
      "internal_unit_count": 90.33382862803369,
      "internal_equational_diversity": 0.6495727754165873,
      "formula_unique_count": 33.23922734026746,
      "formula_total_count": 81.87964338781575,
      "distinct_10gram_count": 605.7211490837049,
      "distinct_10gram_ratio": 0.8451796729350298,
      "step_global_distinct_10gram_count": 1153995,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.007121018362365894,
      "step_total_10gram_count": 2457622,
      "global_total_10gram_count": 162054771,
      "entropy": 0.0485348142683506,
      "acc_reward": 0.419921875
    },
    {
      "step": 420,
      "file": "420_16384.jsonl",
      "repetition_rate": 0.1131481500191267,
      "total_responses": 2019,
      "avg_token_length": 2352.1059930658744,
      "accuracy": 0.5116394254581476,
      "internal_textual_diversity": 0.9686072827571105,
      "internal_textual_similarity": 0.031392717242889494,
      "internal_unit_count": 93.74393263992076,
      "internal_equational_diversity": 0.6521622542584783,
      "formula_unique_count": 34.68945022288261,
      "formula_total_count": 82.88509162951956,
      "distinct_10gram_count": 628.3878157503715,
      "distinct_10gram_ratio": 0.8449572194104256,
      "step_global_distinct_10gram_count": 1198809,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0073975544971767605,
      "step_total_10gram_count": 2463416,
      "global_total_10gram_count": 162054771,
      "entropy": 0.039616987109184265,
      "acc_reward": 0.5126953125
    },
    {
      "step": 430,
      "file": "430_16384.jsonl",
      "repetition_rate": 0.11988640843592956,
      "total_responses": 2019,
      "avg_token_length": 2412.340267459138,
      "accuracy": 0.5190688459633482,
      "internal_textual_diversity": 0.963287262747149,
      "internal_textual_similarity": 0.03671273725285101,
      "internal_unit_count": 98.00198117880139,
      "internal_equational_diversity": 0.6464416532310424,
      "formula_unique_count": 33.87518573551263,
      "formula_total_count": 84.48935116394254,
      "distinct_10gram_count": 626.9752352649826,
      "distinct_10gram_ratio": 0.836947348815292,
      "step_global_distinct_10gram_count": 1192722,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.007359993122325291,
      "step_total_10gram_count": 2527550,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04575519636273384,
      "acc_reward": 0.4580078125
    },
    {
      "step": 440,
      "file": "440_16384.jsonl",
      "repetition_rate": 0.10416154951033305,
      "total_responses": 2019,
      "avg_token_length": 2187.993065874195,
      "accuracy": 0.5180782565626548,
      "internal_textual_diversity": 0.9704522889269599,
      "internal_textual_similarity": 0.029547711073040057,
      "internal_unit_count": 90.54036651807826,
      "internal_equational_diversity": 0.6541217236133577,
      "formula_unique_count": 31.693907875185737,
      "formula_total_count": 76.13174839029223,
      "distinct_10gram_count": 600.4091134224864,
      "distinct_10gram_ratio": 0.8525061857549154,
      "step_global_distinct_10gram_count": 1135215,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.007005131616890193,
      "step_total_10gram_count": 2336031,
      "global_total_10gram_count": 162054771,
      "entropy": 0.046918369829654694,
      "acc_reward": 0.4169921875
    },
    {
      "step": 450,
      "file": "450_16384.jsonl",
      "repetition_rate": 0.0951672211432927,
      "total_responses": 2019,
      "avg_token_length": 2132.204556711243,
      "accuracy": 0.5131253095591877,
      "internal_textual_diversity": 0.9711943132797541,
      "internal_textual_similarity": 0.028805686720245915,
      "internal_unit_count": 87.04408122833085,
      "internal_equational_diversity": 0.6649328530089921,
      "formula_unique_count": 32.48736998514116,
      "formula_total_count": 72.73204556711244,
      "distinct_10gram_count": 610.6017830609212,
      "distinct_10gram_ratio": 0.8642142044042121,
      "step_global_distinct_10gram_count": 1163014,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.007176672385659044,
      "step_total_10gram_count": 2249470,
      "global_total_10gram_count": 162054771,
      "entropy": 0.05426788702607155,
      "acc_reward": 0.4755859375
    },
    {
      "step": 460,
      "file": "460_16384.jsonl",
      "repetition_rate": 0.07730909829262111,
      "total_responses": 2019,
      "avg_token_length": 1981.6191183754333,
      "accuracy": 0.5299653293709757,
      "internal_textual_diversity": 0.9752212426455569,
      "internal_textual_similarity": 0.02477875735444311,
      "internal_unit_count": 79.6097077761268,
      "internal_equational_diversity": 0.6855272106008147,
      "formula_unique_count": 32.719663199603765,
      "formula_total_count": 67.9777117384844,
      "distinct_10gram_count": 610.3100544824171,
      "distinct_10gram_ratio": 0.8835761707969905,
      "step_global_distinct_10gram_count": 1160392,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.007160492670715631,
      "step_total_10gram_count": 2059338,
      "global_total_10gram_count": 162054771,
      "entropy": 0.05740517005324364,
      "acc_reward": 0.5048828125
    },
    {
      "step": 470,
      "file": "470_16384.jsonl",
      "repetition_rate": 0.062195480475873234,
      "total_responses": 2019,
      "avg_token_length": 1966.3907875185735,
      "accuracy": 0.5279841505695889,
      "internal_textual_diversity": 0.9799112719121723,
      "internal_textual_similarity": 0.02008872808782766,
      "internal_unit_count": 79.91976225854383,
      "internal_equational_diversity": 0.6879662085097606,
      "formula_unique_count": 34.47894997523527,
      "formula_total_count": 68.49331352154532,
      "distinct_10gram_count": 647.7805844477464,
      "distinct_10gram_ratio": 0.8951665297726882,
      "step_global_distinct_10gram_count": 1232894,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.0076078846206879035,
      "step_total_10gram_count": 2028790,
      "global_total_10gram_count": 162054771,
      "entropy": 0.053807687014341354,
      "acc_reward": 0.5087890625
    },
    {
      "step": 480,
      "file": "480_16384.jsonl",
      "repetition_rate": 0.07196357826923724,
      "total_responses": 2019,
      "avg_token_length": 2108.045567112432,
      "accuracy": 0.5255076770678554,
      "internal_textual_diversity": 0.9787972996256211,
      "internal_textual_similarity": 0.021202700374378977,
      "internal_unit_count": 86.3476968796434,
      "internal_equational_diversity": 0.678167776180194,
      "formula_unique_count": 35.32491332342744,
      "formula_total_count": 73.77761267954433,
      "distinct_10gram_count": 663.531451213472,
      "distinct_10gram_ratio": 0.8838045664350713,
      "step_global_distinct_10gram_count": 1261901,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.007786879659346777,
      "step_total_10gram_count": 2170114,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04990176483988762,
      "acc_reward": 0.443359375
    },
    {
      "step": 490,
      "file": "490_16384.jsonl",
      "repetition_rate": 0.09393648342611294,
      "total_responses": 2019,
      "avg_token_length": 2383.068350668648,
      "accuracy": 0.5255076770678554,
      "internal_textual_diversity": 0.9769727262346846,
      "internal_textual_similarity": 0.023027273765315267,
      "internal_unit_count": 96.67706785537395,
      "internal_equational_diversity": 0.6596078405307992,
      "formula_unique_count": 36.408122833085685,
      "formula_total_count": 82.47696879643388,
      "distinct_10gram_count": 686.3665180782566,
      "distinct_10gram_ratio": 0.8566998470277987,
      "step_global_distinct_10gram_count": 1306790,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.008063878600649159,
      "step_total_10gram_count": 2459273,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04547511786222458,
      "acc_reward": 0.46875
    },
    {
      "step": 500,
      "file": "500_16384.jsonl",
      "repetition_rate": 0.09456272364388453,
      "total_responses": 2019,
      "avg_token_length": 2397.4323922734025,
      "accuracy": 0.5235264982664686,
      "internal_textual_diversity": 0.9758631620384502,
      "internal_textual_similarity": 0.02413683796154978,
      "internal_unit_count": 99.78949975235265,
      "internal_equational_diversity": 0.6429663852267598,
      "formula_unique_count": 36.73997028231798,
      "formula_total_count": 86.14858841010401,
      "distinct_10gram_count": 702.1887072808321,
      "distinct_10gram_ratio": 0.8528051969416325,
      "step_global_distinct_10gram_count": 1333253,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.008227175243115798,
      "step_total_10gram_count": 2526781,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04868646711111069,
      "acc_reward": 0.455078125
    },
    {
      "step": 510,
      "file": "510_16384.jsonl",
      "repetition_rate": 0.09943203056417124,
      "total_responses": 2019,
      "avg_token_length": 2495.358593363051,
      "accuracy": 0.5220406141654285,
      "internal_textual_diversity": 0.9728943058432991,
      "internal_textual_similarity": 0.02710569415670097,
      "internal_unit_count": 105.12580485388807,
      "internal_equational_diversity": 0.6378330427790246,
      "formula_unique_count": 38.01733531451214,
      "formula_total_count": 89.46508172362556,
      "distinct_10gram_count": 738.591381872214,
      "distinct_10gram_ratio": 0.8515095891810479,
      "step_global_distinct_10gram_count": 1397597,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.008624226188317529,
      "step_total_10gram_count": 2642072,
      "global_total_10gram_count": 162054771,
      "entropy": 0.045929934829473495,
      "acc_reward": 0.462890625
    },
    {
      "step": 520,
      "file": "520_16384.jsonl",
      "repetition_rate": 0.11193821499003256,
      "total_responses": 2019,
      "avg_token_length": 2667.2526002971767,
      "accuracy": 0.5304606240713224,
      "internal_textual_diversity": 0.9747707452660076,
      "internal_textual_similarity": 0.025229254733992332,
      "internal_unit_count": 111.59831599801882,
      "internal_equational_diversity": 0.6192168895688033,
      "formula_unique_count": 37.76770678553739,
      "formula_total_count": 93.71570084200098,
      "distinct_10gram_count": 763.6443784051511,
      "distinct_10gram_ratio": 0.8354349648952274,
      "step_global_distinct_10gram_count": 1452166,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.008960958020791625,
      "step_total_10gram_count": 2896592,
      "global_total_10gram_count": 162054771,
      "entropy": 0.0442916601896286,
      "acc_reward": 0.501953125
    },
    {
      "step": 530,
      "file": "530_16384.jsonl",
      "repetition_rate": 0.10815747837811204,
      "total_responses": 2019,
      "avg_token_length": 2738.909856364537,
      "accuracy": 0.5339276869737494,
      "internal_textual_diversity": 0.9759737662640192,
      "internal_textual_similarity": 0.024026233735980864,
      "internal_unit_count": 114.66666666666667,
      "internal_equational_diversity": 0.6175774715166916,
      "formula_unique_count": 39.077761267954436,
      "formula_total_count": 97.97127290737988,
      "distinct_10gram_count": 795.3060921248143,
      "distinct_10gram_ratio": 0.8348566235882198,
      "step_global_distinct_10gram_count": 1506926,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.009298868467130782,
      "step_total_10gram_count": 2972867,
      "global_total_10gram_count": 162054771,
      "entropy": 0.0485064759850502,
      "acc_reward": 0.4384765625
    },
    {
      "step": 540,
      "file": "540_16384.jsonl",
      "repetition_rate": 0.11769362960716885,
      "total_responses": 2019,
      "avg_token_length": 2875.52897473997,
      "accuracy": 0.5230312035661219,
      "internal_textual_diversity": 0.9729882591893226,
      "internal_textual_similarity": 0.027011740810677257,
      "internal_unit_count": 117.42496285289748,
      "internal_equational_diversity": 0.6071339073085533,
      "formula_unique_count": 39.82516097077761,
      "formula_total_count": 102.07776126795443,
      "distinct_10gram_count": 828.2838038632987,
      "distinct_10gram_ratio": 0.8293661132037653,
      "step_global_distinct_10gram_count": 1577753,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.009735924405459189,
      "step_total_10gram_count": 3083403,
      "global_total_10gram_count": 162054771,
      "entropy": 0.04923596978187561,
      "acc_reward": 0.4775390625
    },
    {
      "step": 550,
      "file": "550_16384.jsonl",
      "repetition_rate": 0.10977853778735748,
      "total_responses": 2019,
      "avg_token_length": 3188.2238732045566,
      "accuracy": 0.5349182763744428,
      "internal_textual_diversity": 0.97618901659702,
      "internal_textual_similarity": 0.023810983402980004,
      "internal_unit_count": 132.24418028727092,
      "internal_equational_diversity": 0.5970672973265673,
      "formula_unique_count": 44.84497275879148,
      "formula_total_count": 115.66963843486874,
      "distinct_10gram_count": 931.5081723625557,
      "distinct_10gram_ratio": 0.8187454167162846,
      "step_global_distinct_10gram_count": 1775907,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.010958683839058339,
      "step_total_10gram_count": 3372071,
      "global_total_10gram_count": 162054771,
      "entropy": 0.050153911113739014,
      "acc_reward": 0.4921875
    },
    {
      "step": 560,
      "file": "560_16384.jsonl",
      "repetition_rate": 0.08653374884302398,
      "total_responses": 2019,
      "avg_token_length": 3121.159980188212,
      "accuracy": 0.5359088657751362,
      "internal_textual_diversity": 0.9811083974149325,
      "internal_textual_similarity": 0.018891602585067526,
      "internal_unit_count": 128.27835562159484,
      "internal_equational_diversity": 0.6121867761260759,
      "formula_unique_count": 47.30757800891531,
      "formula_total_count": 115.19910846953938,
      "distinct_10gram_count": 970.0064388311046,
      "distinct_10gram_ratio": 0.8403400497463632,
      "step_global_distinct_10gram_count": 1856179,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.011454022541551707,
      "step_total_10gram_count": 3252354,
      "global_total_10gram_count": 162054771,
      "entropy": 0.05302621051669121,
      "acc_reward": 0.5205078125
    },
    {
      "step": 570,
      "file": "570_16384.jsonl",
      "repetition_rate": 0.06535477446000226,
      "total_responses": 2019,
      "avg_token_length": 3046.706785537395,
      "accuracy": 0.5299653293709757,
      "internal_textual_diversity": 0.9832364926648819,
      "internal_textual_similarity": 0.016763507335118083,
      "internal_unit_count": 124.68350668647845,
      "internal_equational_diversity": 0.6193311069720417,
      "formula_unique_count": 49.025260029717685,
      "formula_total_count": 112.20802377414562,
      "distinct_10gram_count": 1013.2852897473997,
      "distinct_10gram_ratio": 0.8573123057973276,
      "step_global_distinct_10gram_count": 1941561,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.011980893793000392,
      "step_total_10gram_count": 3142676,
      "global_total_10gram_count": 162054771,
      "entropy": 0.05369521677494049,
      "acc_reward": 0.52734375
    },
    {
      "step": 580,
      "file": "580_16384.jsonl",
      "repetition_rate": 0.08525580924414604,
      "total_responses": 2019,
      "avg_token_length": 3272.213472015849,
      "accuracy": 0.5339276869737494,
      "internal_textual_diversity": 0.981849184195716,
      "internal_textual_similarity": 0.018150815804284095,
      "internal_unit_count": 138.3680039623576,
      "internal_equational_diversity": 0.5938042215581655,
      "formula_unique_count": 49.00643883110451,
      "formula_total_count": 120.4432887568103,
      "distinct_10gram_count": 1067.8023774145618,
      "distinct_10gram_ratio": 0.8388138919004757,
      "step_global_distinct_10gram_count": 2035418,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.012560062177990428,
      "step_total_10gram_count": 3463599,
      "global_total_10gram_count": 162054771,
      "entropy": 0.055034421384334564,
      "acc_reward": 0.5537109375
    },
    {
      "step": 590,
      "file": "590_16384.jsonl",
      "repetition_rate": 0.08091556813075558,
      "total_responses": 2019,
      "avg_token_length": 3238.079742446756,
      "accuracy": 0.5309559187716691,
      "internal_textual_diversity": 0.9822640244861057,
      "internal_textual_similarity": 0.017735975513894393,
      "internal_unit_count": 139.4918276374443,
      "internal_equational_diversity": 0.5926261358927242,
      "formula_unique_count": 47.722139673105495,
      "formula_total_count": 120.06339772164438,
      "distinct_10gram_count": 1061.6721149083705,
      "distinct_10gram_ratio": 0.8418883057187905,
      "step_global_distinct_10gram_count": 2030898,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.01253217037343504,
      "step_total_10gram_count": 3417219,
      "global_total_10gram_count": 162054771,
      "entropy": 0.06514324247837067,
      "acc_reward": 0.478515625
    },
    {
      "step": 600,
      "file": "600_16384.jsonl",
      "repetition_rate": 0.11034282986527988,
      "total_responses": 2019,
      "avg_token_length": 3422.3620604259536,
      "accuracy": 0.5468053491827637,
      "internal_textual_diversity": 0.9786132207568183,
      "internal_textual_similarity": 0.021386779243181647,
      "internal_unit_count": 150.5037147102526,
      "internal_equational_diversity": 0.5735893659080542,
      "formula_unique_count": 45.48588410104012,
      "formula_total_count": 126.9608717186726,
      "distinct_10gram_count": 1042.184249628529,
      "distinct_10gram_ratio": 0.8154689414244694,
      "step_global_distinct_10gram_count": 1992804,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.012297101700264042,
      "step_total_10gram_count": 3657085,
      "global_total_10gram_count": 162054771,
      "entropy": 0.058605123311281204,
      "acc_reward": 0.5517578125
    },
    {
      "step": 610,
      "file": "610_16384.jsonl",
      "repetition_rate": 0.10810241659948767,
      "total_responses": 2019,
      "avg_token_length": 3418.0222882615158,
      "accuracy": 0.5373947498761763,
      "internal_textual_diversity": 0.9781125083970907,
      "internal_textual_similarity": 0.021887491602909197,
      "internal_unit_count": 151.43041109460128,
      "internal_equational_diversity": 0.5697407315898171,
      "formula_unique_count": 45.117880138682516,
      "formula_total_count": 127.4824170381377,
      "distinct_10gram_count": 1052.2808320950967,
      "distinct_10gram_ratio": 0.8149410841122071,
      "step_global_distinct_10gram_count": 2009068,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.012397462830637673,
      "step_total_10gram_count": 3660270,
      "global_total_10gram_count": 162054771,
      "entropy": 0.06703393906354904,
      "acc_reward": 0.4453125
    },
    {
      "step": 620,
      "file": "620_16384.jsonl",
      "repetition_rate": 0.083534552692334,
      "total_responses": 2019,
      "avg_token_length": 3217.5998018821197,
      "accuracy": 0.5512630014858841,
      "internal_textual_diversity": 0.9821517241899413,
      "internal_textual_similarity": 0.01784827581005864,
      "internal_unit_count": 140.72461614660722,
      "internal_equational_diversity": 0.5913265495246612,
      "formula_unique_count": 47.42991579990094,
      "formula_total_count": 121.81178801386825,
      "distinct_10gram_count": 1085.448736998514,
      "distinct_10gram_ratio": 0.8504263509814238,
      "step_global_distinct_10gram_count": 2081974,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.012847347764910914,
      "step_total_10gram_count": 3394137,
      "global_total_10gram_count": 162054771,
      "entropy": 0.07410860061645508,
      "acc_reward": 0.4873046875
    },
    {
      "step": 630,
      "file": "630_16384.jsonl",
      "repetition_rate": 0.07781271786513504,
      "total_responses": 2019,
      "avg_token_length": 3358.3630510153544,
      "accuracy": 0.532937097573056,
      "internal_textual_diversity": 0.9824311443206001,
      "internal_textual_similarity": 0.017568855679399947,
      "internal_unit_count": 147.78306092124814,
      "internal_equational_diversity": 0.5778119560161874,
      "formula_unique_count": 50.64883605745418,
      "formula_total_count": 128.88211986131748,
      "distinct_10gram_count": 1170.698860822189,
      "distinct_10gram_ratio": 0.84864024306993,
      "step_global_distinct_10gram_count": 2248051,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.01387216794746512,
      "step_total_10gram_count": 3542144,
      "global_total_10gram_count": 162054771,
      "entropy": 0.07399960607290268,
      "acc_reward": 0.4912109375
    },
    {
      "step": 640,
      "file": "640_16384.jsonl",
      "repetition_rate": 0.0844972918356981,
      "total_responses": 2019,
      "avg_token_length": 3176.508172362556,
      "accuracy": 0.5408618127786032,
      "internal_textual_diversity": 0.9802589031167497,
      "internal_textual_similarity": 0.019741096883250403,
      "internal_unit_count": 142.0827142149579,
      "internal_equational_diversity": 0.5815655935747341,
      "formula_unique_count": 46.45269935611689,
      "formula_total_count": 123.51163942545814,
      "distinct_10gram_count": 1079.64289252105,
      "distinct_10gram_ratio": 0.8501422418860265,
      "step_global_distinct_10gram_count": 2061940,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.012723722894897059,
      "step_total_10gram_count": 3360392,
      "global_total_10gram_count": 162054771,
      "entropy": 0.07469397038221359,
      "acc_reward": 0.4521484375
    },
    {
      "step": 650,
      "file": "650_16384.jsonl",
      "repetition_rate": 0.07975636133370449,
      "total_responses": 2019,
      "avg_token_length": 3639.518573551263,
      "accuracy": 0.5433382862803368,
      "internal_textual_diversity": 0.9819639612946235,
      "internal_textual_similarity": 0.01803603870537643,
      "internal_unit_count": 156.90638930163448,
      "internal_equational_diversity": 0.5621514826416808,
      "formula_unique_count": 54.38682516097078,
      "formula_total_count": 141.04705299653293,
      "distinct_10gram_count": 1280.5334323922734,
      "distinct_10gram_ratio": 0.84397278755047,
      "step_global_distinct_10gram_count": 2458685,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.015171938381252595,
      "step_total_10gram_count": 3828164,
      "global_total_10gram_count": 162054771,
      "entropy": 0.0789271891117096,
      "acc_reward": 0.54296875
    },
    {
      "step": 660,
      "file": "660_16384.jsonl",
      "repetition_rate": 0.08239342104283655,
      "total_responses": 2019,
      "avg_token_length": 3637.1659237246163,
      "accuracy": 0.5567112431896979,
      "internal_textual_diversity": 0.9815861233833554,
      "internal_textual_similarity": 0.018413876616644592,
      "internal_unit_count": 160.21248142644873,
      "internal_equational_diversity": 0.5617585706351595,
      "formula_unique_count": 54.15700842000991,
      "formula_total_count": 141.82020802377414,
      "distinct_10gram_count": 1286.4799405646359,
      "distinct_10gram_ratio": 0.8434209665529875,
      "step_global_distinct_10gram_count": 2464213,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.015206050305053962,
      "step_total_10gram_count": 3841606,
      "global_total_10gram_count": 162054771,
      "entropy": 0.07324223965406418,
      "acc_reward": 0.5
    },
    {
      "step": 670,
      "file": "670_16384.jsonl",
      "repetition_rate": 0.07841977678348169,
      "total_responses": 2019,
      "avg_token_length": 3535.191679049034,
      "accuracy": 0.5468053491827637,
      "internal_textual_diversity": 0.9815911101306002,
      "internal_textual_similarity": 0.01840888986939973,
      "internal_unit_count": 155.92372461614661,
      "internal_equational_diversity": 0.5612665174623199,
      "formula_unique_count": 52.469539375928676,
      "formula_total_count": 135.85339276869738,
      "distinct_10gram_count": 1225.1154036651808,
      "distinct_10gram_ratio": 0.8389686720114391,
      "step_global_distinct_10gram_count": 2336838,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.014420050613628648,
      "step_total_10gram_count": 3740069,
      "global_total_10gram_count": 162054771,
      "entropy": 0.07158448547124863,
      "acc_reward": 0.517578125
    },
    {
      "step": 680,
      "file": "680_16384.jsonl",
      "repetition_rate": 0.09851440418610279,
      "total_responses": 2019,
      "avg_token_length": 3988.8276374442794,
      "accuracy": 0.5408618127786032,
      "internal_textual_diversity": 0.9797098686772534,
      "internal_textual_similarity": 0.020290131322746555,
      "internal_unit_count": 180.46359583952452,
      "internal_equational_diversity": 0.5355624374382679,
      "formula_unique_count": 55.074294205052006,
      "formula_total_count": 154.4730064388311,
      "distinct_10gram_count": 1329.4076275383854,
      "distinct_10gram_ratio": 0.8097999872897781,
      "step_global_distinct_10gram_count": 2534311,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.01563860776428483,
      "step_total_10gram_count": 4246166,
      "global_total_10gram_count": 162054771,
      "entropy": 0.0737958624958992,
      "acc_reward": 0.525390625
    },
    {
      "step": 690,
      "file": "690_16384.jsonl",
      "repetition_rate": 0.08805734823823805,
      "total_responses": 2019,
      "avg_token_length": 3857.6191183754336,
      "accuracy": 0.54284299157999,
      "internal_textual_diversity": 0.9806327020577013,
      "internal_textual_similarity": 0.019367297942298776,
      "internal_unit_count": 174.9118375433383,
      "internal_equational_diversity": 0.5466997013055678,
      "formula_unique_count": 55.824170381376916,
      "formula_total_count": 149.42595344229818,
      "distinct_10gram_count": 1331.9489846458644,
      "distinct_10gram_ratio": 0.8262267454469614,
      "step_global_distinct_10gram_count": 2547683,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.01572112307634559,
      "step_total_10gram_count": 4104398,
      "global_total_10gram_count": 162054771,
      "entropy": 0.07466119527816772,
      "acc_reward": 0.482421875
    },
    {
      "step": 700,
      "file": "700_16384.jsonl",
      "repetition_rate": 0.07819105937403303,
      "total_responses": 2019,
      "avg_token_length": 3433.6726102030707,
      "accuracy": 0.5373947498761763,
      "internal_textual_diversity": 0.981722426155332,
      "internal_textual_similarity": 0.018277573844667924,
      "internal_unit_count": 154.9162951956414,
      "internal_equational_diversity": 0.570513663315532,
      "formula_unique_count": 51.66617137196632,
      "formula_total_count": 132.15007429420504,
      "distinct_10gram_count": 1184.0846953937594,
      "distinct_10gram_ratio": 0.8451521363264689,
      "step_global_distinct_10gram_count": 2257488,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.01393040134560432,
      "step_total_10gram_count": 3595676,
      "global_total_10gram_count": 162054771,
      "entropy": 0.06912454962730408,
      "acc_reward": 0.474609375
    },
    {
      "step": 710,
      "file": "710_16384.jsonl",
      "repetition_rate": 0.09517570123801408,
      "total_responses": 2019,
      "avg_token_length": 3432.5473006438833,
      "accuracy": 0.5413571074789499,
      "internal_textual_diversity": 0.9786659149815496,
      "internal_textual_similarity": 0.02133408501845027,
      "internal_unit_count": 153.67805844477465,
      "internal_equational_diversity": 0.5626899071209555,
      "formula_unique_count": 48.134720158494304,
      "formula_total_count": 131.62753838533928,
      "distinct_10gram_count": 1124.2773650321942,
      "distinct_10gram_ratio": 0.8292740709305059,
      "step_global_distinct_10gram_count": 2141091,
      "global_distinct_10gram_count": 57511570,
      "step_global_distinct_10gram_ratio": 0.013212144183030563,
      "step_total_10gram_count": 3634605,
      "global_total_10gram_count": 162054771,
      "entropy": 0.06654968112707138,
      "acc_reward": 0.546875
    }
  ],
  "gspo_length": [
    {
      "step": 10,
      "file": "10_16384.jsonl",
      "repetition_rate": 0.08858701175225525,
      "total_responses": 2019,
      "avg_token_length": 1567.0673600792472,
      "accuracy": 0.3333333333333333,
      "internal_textual_diversity": 0.963750249358979,
      "internal_textual_similarity": 0.03624975064102099,
      "internal_unit_count": 61.134720158494304,
      "internal_equational_diversity": 0.6536613104560137,
      "formula_unique_count": 20.55522535908866,
      "formula_total_count": 50.16245666171372,
      "distinct_10gram_count": 391.82169390787516,
      "distinct_10gram_ratio": 0.8645245757419342,
      "step_global_distinct_10gram_count": 772685,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0011915832751269253,
      "step_total_10gram_count": 1534818,
      "global_total_10gram_count": 648452371,
      "entropy": 1.014036774635315,
      "acc_reward": 0.1181640625
    },
    {
      "step": 20,
      "file": "20_16384.jsonl",
      "repetition_rate": 0.14572552672449257,
      "total_responses": 2019,
      "avg_token_length": 2782.440316988608,
      "accuracy": 0.31154036651807826,
      "internal_textual_diversity": 0.9612013072537151,
      "internal_textual_similarity": 0.038798692746284835,
      "internal_unit_count": 112.64933135215453,
      "internal_equational_diversity": 0.5930687902666861,
      "formula_unique_count": 33.10846953937593,
      "formula_total_count": 85.42347696879644,
      "distinct_10gram_count": 682.8984645864289,
      "distinct_10gram_ratio": 0.7859410720722069,
      "step_global_distinct_10gram_count": 1339270,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0020653328754657294,
      "step_total_10gram_count": 2811187,
      "global_total_10gram_count": 648452371,
      "entropy": 1.1696624755859375,
      "acc_reward": 0.15234375
    },
    {
      "step": 30,
      "file": "30_16384.jsonl",
      "repetition_rate": 0.2768400565989133,
      "total_responses": 2019,
      "avg_token_length": 4938.890044576523,
      "accuracy": 0.2530955918771669,
      "internal_textual_diversity": 0.9423940781249084,
      "internal_textual_similarity": 0.0576059218750916,
      "internal_unit_count": 200.97325408618127,
      "internal_equational_diversity": 0.5086190081431768,
      "formula_unique_count": 50.42545814759782,
      "formula_total_count": 147.56265477959386,
      "distinct_10gram_count": 1054.4091134224864,
      "distinct_10gram_ratio": 0.6320810328749872,
      "step_global_distinct_10gram_count": 2056269,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0031710409152008482,
      "step_total_10gram_count": 4948862,
      "global_total_10gram_count": 648452371,
      "entropy": 1.0681086778640747,
      "acc_reward": 0.1474609375
    },
    {
      "step": 40,
      "file": "40_16384.jsonl",
      "repetition_rate": 0.44157521953961665,
      "total_responses": 2019,
      "avg_token_length": 7122.67161961367,
      "accuracy": 0.2213967310549777,
      "internal_textual_diversity": 0.913832189408855,
      "internal_textual_similarity": 0.08616781059114494,
      "internal_unit_count": 296.38335809806836,
      "internal_equational_diversity": 0.38452915997604425,
      "formula_unique_count": 57.445765230312034,
      "formula_total_count": 214.38385339276869,
      "distinct_10gram_count": 1185.3620604259534,
      "distinct_10gram_ratio": 0.4221273591159983,
      "step_global_distinct_10gram_count": 2308940,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0035606932802779434,
      "step_total_10gram_count": 7206673,
      "global_total_10gram_count": 648452371,
      "entropy": 1.1812629699707031,
      "acc_reward": 0.130859375
    },
    {
      "step": 50,
      "file": "50_16384.jsonl",
      "repetition_rate": 0.5012374129246135,
      "total_responses": 2019,
      "avg_token_length": 7471.947994056463,
      "accuracy": 0.2535908865775136,
      "internal_textual_diversity": 0.9119517870264189,
      "internal_textual_similarity": 0.08804821297358105,
      "internal_unit_count": 310.20307082714214,
      "internal_equational_diversity": 0.32803314398383115,
      "formula_unique_count": 52.16840019811788,
      "formula_total_count": 234.6607231302625,
      "distinct_10gram_count": 1047.4809311540366,
      "distinct_10gram_ratio": 0.34638343379934594,
      "step_global_distinct_10gram_count": 2046642,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.003156194797844297,
      "step_total_10gram_count": 7633972,
      "global_total_10gram_count": 648452371,
      "entropy": 1.095787525177002,
      "acc_reward": 0.1162109375
    },
    {
      "step": 60,
      "file": "60_16384.jsonl",
      "repetition_rate": 0.5615357741823411,
      "total_responses": 2019,
      "avg_token_length": 7742.858841010401,
      "accuracy": 0.3031203566121842,
      "internal_textual_diversity": 0.8954867288180863,
      "internal_textual_similarity": 0.10451327118191367,
      "internal_unit_count": 320.2139673105498,
      "internal_equational_diversity": 0.26687180248222525,
      "formula_unique_count": 42.246161466072316,
      "formula_total_count": 250.5512630014859,
      "distinct_10gram_count": 869.6394254581476,
      "distinct_10gram_ratio": 0.2792651452200799,
      "step_global_distinct_10gram_count": 1705303,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0026298045566094477,
      "step_total_10gram_count": 7851982,
      "global_total_10gram_count": 648452371,
      "entropy": 0.9403428435325623,
      "acc_reward": 0.1728515625
    },
    {
      "step": 70,
      "file": "70_16384.jsonl",
      "repetition_rate": 0.5852263748867278,
      "total_responses": 2019,
      "avg_token_length": 7811.7355126300145,
      "accuracy": 0.3313521545319465,
      "internal_textual_diversity": 0.8922472487934816,
      "internal_textual_similarity": 0.10775275120651838,
      "internal_unit_count": 333.69093610698366,
      "internal_equational_diversity": 0.2494471473354359,
      "formula_unique_count": 37.40713224368499,
      "formula_total_count": 253.05250123823674,
      "distinct_10gram_count": 790.2005943536404,
      "distinct_10gram_ratio": 0.2535380311955651,
      "step_global_distinct_10gram_count": 1552851,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0023947032495313368,
      "step_total_10gram_count": 7918942,
      "global_total_10gram_count": 648452371,
      "entropy": 0.8837499618530273,
      "acc_reward": 0.171875
    },
    {
      "step": 80,
      "file": "80_16384.jsonl",
      "repetition_rate": 0.6152821015854444,
      "total_responses": 2019,
      "avg_token_length": 7990.7147102526005,
      "accuracy": 0.35116394254581473,
      "internal_textual_diversity": 0.9095218611004965,
      "internal_textual_similarity": 0.09047813889950361,
      "internal_unit_count": 345.2372461614661,
      "internal_equational_diversity": 0.20522868755603751,
      "formula_unique_count": 34.969291728578504,
      "formula_total_count": 268.5641406636949,
      "distinct_10gram_count": 733.0208023774146,
      "distinct_10gram_ratio": 0.21508493034300694,
      "step_global_distinct_10gram_count": 1442383,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.002224346867258197,
      "step_total_10gram_count": 8169006,
      "global_total_10gram_count": 648452371,
      "entropy": 0.7200179100036621,
      "acc_reward": 0.1787109375
    },
    {
      "step": 90,
      "file": "90_16384.jsonl",
      "repetition_rate": 0.6334944636742837,
      "total_responses": 2019,
      "avg_token_length": 8054.004952947003,
      "accuracy": 0.3813769192669638,
      "internal_textual_diversity": 0.9066113810580412,
      "internal_textual_similarity": 0.0933886189419588,
      "internal_unit_count": 350.2184249628529,
      "internal_equational_diversity": 0.1888298981053279,
      "formula_unique_count": 33.3159980188212,
      "formula_total_count": 273.36305101535413,
      "distinct_10gram_count": 712.2634967805844,
      "distinct_10gram_ratio": 0.2018244408985695,
      "step_global_distinct_10gram_count": 1402383,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.002162661534936388,
      "step_total_10gram_count": 8239563,
      "global_total_10gram_count": 648452371,
      "entropy": 0.6375336050987244,
      "acc_reward": 0.2314453125
    },
    {
      "step": 100,
      "file": "100_16384.jsonl",
      "repetition_rate": 0.6191418160069032,
      "total_responses": 2019,
      "avg_token_length": 8027.894502228826,
      "accuracy": 0.3863298662704309,
      "internal_textual_diversity": 0.9132794482256247,
      "internal_textual_similarity": 0.08672055177437533,
      "internal_unit_count": 349.5388806339772,
      "internal_equational_diversity": 0.178607556860323,
      "formula_unique_count": 34.44675581971273,
      "formula_total_count": 278.0911342248638,
      "distinct_10gram_count": 719.4868746904408,
      "distinct_10gram_ratio": 0.20620132831985152,
      "step_global_distinct_10gram_count": 1418126,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0021869393396049437,
      "step_total_10gram_count": 8227051,
      "global_total_10gram_count": 648452371,
      "entropy": 0.5082208514213562,
      "acc_reward": 0.2587890625
    },
    {
      "step": 110,
      "file": "110_16384.jsonl",
      "repetition_rate": 0.613232820198731,
      "total_responses": 2019,
      "avg_token_length": 8049.373452204061,
      "accuracy": 0.4086181277860327,
      "internal_textual_diversity": 0.9290838643425496,
      "internal_textual_similarity": 0.07091613565745038,
      "internal_unit_count": 348.4418028727093,
      "internal_equational_diversity": 0.16577010747884222,
      "formula_unique_count": 34.07974244675582,
      "formula_total_count": 289.04408122833087,
      "distinct_10gram_count": 721.7018325903913,
      "distinct_10gram_ratio": 0.19871571744891486,
      "step_global_distinct_10gram_count": 1420988,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0021913529251325692,
      "step_total_10gram_count": 8336905,
      "global_total_10gram_count": 648452371,
      "entropy": 0.37564265727996826,
      "acc_reward": 0.20703125
    },
    {
      "step": 120,
      "file": "120_16384.jsonl",
      "repetition_rate": 0.5888226452298766,
      "total_responses": 2019,
      "avg_token_length": 8105.906389301635,
      "accuracy": 0.4279346210995542,
      "internal_textual_diversity": 0.9358366077983742,
      "internal_textual_similarity": 0.06416339220162576,
      "internal_unit_count": 359.70133729569096,
      "internal_equational_diversity": 0.15453726899362724,
      "formula_unique_count": 35.766220901436355,
      "formula_total_count": 291.7880138682516,
      "distinct_10gram_count": 790.7181773155028,
      "distinct_10gram_ratio": 0.20765226417786073,
      "step_global_distinct_10gram_count": 1561000,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0024072700938585975,
      "step_total_10gram_count": 8401181,
      "global_total_10gram_count": 648452371,
      "entropy": 0.3505806028842926,
      "acc_reward": 0.255859375
    },
    {
      "step": 130,
      "file": "130_16384.jsonl",
      "repetition_rate": 0.5382209142937622,
      "total_responses": 2019,
      "avg_token_length": 8086.11490837048,
      "accuracy": 0.4442793462109955,
      "internal_textual_diversity": 0.9430684684217125,
      "internal_textual_similarity": 0.05693153157828747,
      "internal_unit_count": 357.65279841505696,
      "internal_equational_diversity": 0.16226892875476814,
      "formula_unique_count": 39.062902426944035,
      "formula_total_count": 293.87023278850916,
      "distinct_10gram_count": 894.9574046557702,
      "distinct_10gram_ratio": 0.23491334813938303,
      "step_global_distinct_10gram_count": 1766214,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0027237374385357905,
      "step_total_10gram_count": 8366984,
      "global_total_10gram_count": 648452371,
      "entropy": 0.28804323077201843,
      "acc_reward": 0.2998046875
    },
    {
      "step": 140,
      "file": "140_16384.jsonl",
      "repetition_rate": 0.45745549172038363,
      "total_responses": 2019,
      "avg_token_length": 8078.454680534918,
      "accuracy": 0.45864289252105,
      "internal_textual_diversity": 0.9503119010424795,
      "internal_textual_similarity": 0.04968809895752058,
      "internal_unit_count": 372.7469044081228,
      "internal_equational_diversity": 0.18301105956701577,
      "formula_unique_count": 49.83308568598316,
      "formula_total_count": 303.617137196632,
      "distinct_10gram_count": 1184.0539871223377,
      "distinct_10gram_ratio": 0.2995116204335195,
      "step_global_distinct_10gram_count": 2334123,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0035995288233744466,
      "step_total_10gram_count": 8468674,
      "global_total_10gram_count": 648452371,
      "entropy": 0.2571614682674408,
      "acc_reward": 0.2978515625
    },
    {
      "step": 150,
      "file": "150_16384.jsonl",
      "repetition_rate": 0.3233367472343236,
      "total_responses": 2019,
      "avg_token_length": 8043.9767211490835,
      "accuracy": 0.4923229321446261,
      "internal_textual_diversity": 0.9650088802629784,
      "internal_textual_similarity": 0.03449582503667497,
      "internal_unit_count": 389.0673600792472,
      "internal_equational_diversity": 0.21738715558335897,
      "formula_unique_count": 59.0232788509163,
      "formula_total_count": 298.99950470529967,
      "distinct_10gram_count": 1593.2407132243684,
      "distinct_10gram_ratio": 0.3884867719476314,
      "step_global_distinct_10gram_count": 3116859,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.004806612080380534,
      "step_total_10gram_count": 8675839,
      "global_total_10gram_count": 648452371,
      "entropy": 0.24416351318359375,
      "acc_reward": 0.380859375
    },
    {
      "step": 160,
      "file": "160_16384.jsonl",
      "repetition_rate": 0.28140570972469936,
      "total_responses": 2019,
      "avg_token_length": 8065.668647845468,
      "accuracy": 0.4952947003467063,
      "internal_textual_diversity": 0.9692944014744662,
      "internal_textual_similarity": 0.030705598525533808,
      "internal_unit_count": 396.2441802872709,
      "internal_equational_diversity": 0.213180952519386,
      "formula_unique_count": 60.315007429420504,
      "formula_total_count": 307.641406636949,
      "distinct_10gram_count": 1666.934125804854,
      "distinct_10gram_ratio": 0.39955998819350835,
      "step_global_distinct_10gram_count": 3247295,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0050077617805487215,
      "step_total_10gram_count": 8830881,
      "global_total_10gram_count": 648452371,
      "entropy": 0.21321935951709747,
      "acc_reward": 0.35546875
    },
    {
      "step": 170,
      "file": "170_16384.jsonl",
      "repetition_rate": 0.26178750894749253,
      "total_responses": 2019,
      "avg_token_length": 7992.826646854879,
      "accuracy": 0.5165923724616147,
      "internal_textual_diversity": 0.9728674992046499,
      "internal_textual_similarity": 0.02713250079535011,
      "internal_unit_count": 398.579990094106,
      "internal_equational_diversity": 0.22028332280119814,
      "formula_unique_count": 62.07677067855374,
      "formula_total_count": 303.63595839524515,
      "distinct_10gram_count": 1710.9024269440317,
      "distinct_10gram_ratio": 0.41850339799570263,
      "step_global_distinct_10gram_count": 3330653,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005136310928840755,
      "step_total_10gram_count": 8674438,
      "global_total_10gram_count": 648452371,
      "entropy": 0.19241541624069214,
      "acc_reward": 0.3818359375
    },
    {
      "step": 180,
      "file": "180_16384.jsonl",
      "repetition_rate": 0.22914488667799507,
      "total_responses": 2019,
      "avg_token_length": 8006.813273897969,
      "accuracy": 0.516097077761268,
      "internal_textual_diversity": 0.9785397248202556,
      "internal_textual_similarity": 0.0214602751797443,
      "internal_unit_count": 398.56909361069836,
      "internal_equational_diversity": 0.21619428980958336,
      "formula_unique_count": 62.51708766716196,
      "formula_total_count": 307.7464091134225,
      "distinct_10gram_count": 1769.0643883110451,
      "distinct_10gram_ratio": 0.42807846821923545,
      "step_global_distinct_10gram_count": 3439829,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005304674874879901,
      "step_total_10gram_count": 8756144,
      "global_total_10gram_count": 648452371,
      "entropy": 0.17050795257091522,
      "acc_reward": 0.4072265625
    },
    {
      "step": 190,
      "file": "190_16384.jsonl",
      "repetition_rate": 0.225797584363024,
      "total_responses": 2019,
      "avg_token_length": 8033.015849430411,
      "accuracy": 0.5180782565626548,
      "internal_textual_diversity": 0.9764656862029452,
      "internal_textual_similarity": 0.02353431379705484,
      "internal_unit_count": 399.83110450718175,
      "internal_equational_diversity": 0.21139721644124307,
      "formula_unique_count": 62.79940564635958,
      "formula_total_count": 310.272907379891,
      "distinct_10gram_count": 1748.1282813273897,
      "distinct_10gram_ratio": 0.4211923943728653,
      "step_global_distinct_10gram_count": 3397885,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005239991635407252,
      "step_total_10gram_count": 8761220,
      "global_total_10gram_count": 648452371,
      "entropy": 0.14366412162780762,
      "acc_reward": 0.4365234375
    },
    {
      "step": 200,
      "file": "200_16384.jsonl",
      "repetition_rate": 0.1976706340210028,
      "total_responses": 2019,
      "avg_token_length": 7999.768697374938,
      "accuracy": 0.524517087667162,
      "internal_textual_diversity": 0.9796455890761675,
      "internal_textual_similarity": 0.02035441092383259,
      "internal_unit_count": 398.4680534918276,
      "internal_equational_diversity": 0.22063132138806085,
      "formula_unique_count": 64.32144626052501,
      "formula_total_count": 306.505695889054,
      "distinct_10gram_count": 1779.7662209014363,
      "distinct_10gram_ratio": 0.4301161900310036,
      "step_global_distinct_10gram_count": 3448268,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005317688937866495,
      "step_total_10gram_count": 8749053,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13624432682991028,
      "acc_reward": 0.451171875
    },
    {
      "step": 210,
      "file": "210_16384.jsonl",
      "repetition_rate": 0.17988757688246237,
      "total_responses": 2019,
      "avg_token_length": 8005.638930163447,
      "accuracy": 0.5349182763744428,
      "internal_textual_diversity": 0.9821060351889457,
      "internal_textual_similarity": 0.017893964811054403,
      "internal_unit_count": 398.8043585933631,
      "internal_equational_diversity": 0.22171062141976527,
      "formula_unique_count": 64.518573551263,
      "formula_total_count": 306.9712729073799,
      "distinct_10gram_count": 1843.6790490341753,
      "distinct_10gram_ratio": 0.443958212662501,
      "step_global_distinct_10gram_count": 3575209,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005513448882123064,
      "step_total_10gram_count": 8736774,
      "global_total_10gram_count": 648452371,
      "entropy": 0.14419588446617126,
      "acc_reward": 0.3740234375
    },
    {
      "step": 220,
      "file": "220_16384.jsonl",
      "repetition_rate": 0.1749842002018112,
      "total_responses": 2019,
      "avg_token_length": 8036.208519068846,
      "accuracy": 0.5408618127786032,
      "internal_textual_diversity": 0.9826954993513161,
      "internal_textual_similarity": 0.017304500648683926,
      "internal_unit_count": 399.0232788509163,
      "internal_equational_diversity": 0.21757024262434654,
      "formula_unique_count": 63.64190193164933,
      "formula_total_count": 304.0569588905399,
      "distinct_10gram_count": 1837.975730559683,
      "distinct_10gram_ratio": 0.43731258613282403,
      "step_global_distinct_10gram_count": 3563634,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0054955986890824395,
      "step_total_10gram_count": 8803173,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1333802491426468,
      "acc_reward": 0.4404296875
    },
    {
      "step": 230,
      "file": "230_16384.jsonl",
      "repetition_rate": 0.1916019523287951,
      "total_responses": 2019,
      "avg_token_length": 8032.588905398712,
      "accuracy": 0.5413571074789499,
      "internal_textual_diversity": 0.9820208248470712,
      "internal_textual_similarity": 0.01797917515292876,
      "internal_unit_count": 397.5601783060921,
      "internal_equational_diversity": 0.21573890000152907,
      "formula_unique_count": 62.67161961367013,
      "formula_total_count": 306.24467558197125,
      "distinct_10gram_count": 1805.3080733036156,
      "distinct_10gram_ratio": 0.42988202091973055,
      "step_global_distinct_10gram_count": 3495564,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005390625674803802,
      "step_total_10gram_count": 8793897,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1385885328054428,
      "acc_reward": 0.4169921875
    },
    {
      "step": 240,
      "file": "240_16384.jsonl",
      "repetition_rate": 0.1717459414665141,
      "total_responses": 2019,
      "avg_token_length": 8026.991579990094,
      "accuracy": 0.5482912332838039,
      "internal_textual_diversity": 0.9839956392261258,
      "internal_textual_similarity": 0.016004360773874203,
      "internal_unit_count": 400.16889549281825,
      "internal_equational_diversity": 0.219894390506979,
      "formula_unique_count": 64.4368499257058,
      "formula_total_count": 304.07776126795443,
      "distinct_10gram_count": 1873.298662704309,
      "distinct_10gram_ratio": 0.44998956959148195,
      "step_global_distinct_10gram_count": 3626705,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.00559286257895416,
      "step_total_10gram_count": 8690383,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13087321817874908,
      "acc_reward": 0.435546875
    },
    {
      "step": 250,
      "file": "250_16384.jsonl",
      "repetition_rate": 0.16282750078454086,
      "total_responses": 2019,
      "avg_token_length": 8020.587914809312,
      "accuracy": 0.5527488855869243,
      "internal_textual_diversity": 0.9846910914554124,
      "internal_textual_similarity": 0.01530890854458749,
      "internal_unit_count": 405.90936106983656,
      "internal_equational_diversity": 0.22204547452231244,
      "formula_unique_count": 66.21495789995048,
      "formula_total_count": 308.5027241208519,
      "distinct_10gram_count": 1906.135710747895,
      "distinct_10gram_ratio": 0.45581264906665897,
      "step_global_distinct_10gram_count": 3690192,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005690767996282028,
      "step_total_10gram_count": 8714229,
      "global_total_10gram_count": 648452371,
      "entropy": 0.119587741792202,
      "acc_reward": 0.447265625
    },
    {
      "step": 260,
      "file": "260_16384.jsonl",
      "repetition_rate": 0.15196126216390857,
      "total_responses": 2019,
      "avg_token_length": 7982.670133729569,
      "accuracy": 0.5477959385834572,
      "internal_textual_diversity": 0.9848173699830625,
      "internal_textual_similarity": 0.015182630016937499,
      "internal_unit_count": 401.4749876176325,
      "internal_equational_diversity": 0.2249279666616791,
      "formula_unique_count": 67.26745913818722,
      "formula_total_count": 307.52055473006436,
      "distinct_10gram_count": 1966.5096582466567,
      "distinct_10gram_ratio": 0.47187160909886894,
      "step_global_distinct_10gram_count": 3804932,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.005867712372047137,
      "step_total_10gram_count": 8679477,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12940062582492828,
      "acc_reward": 0.421875
    },
    {
      "step": 270,
      "file": "270_16384.jsonl",
      "repetition_rate": 0.13495905938644526,
      "total_responses": 2019,
      "avg_token_length": 7962.069341258049,
      "accuracy": 0.5507677067855374,
      "internal_textual_diversity": 0.9858027998158401,
      "internal_textual_similarity": 0.014197200184159911,
      "internal_unit_count": 399.95591877166913,
      "internal_equational_diversity": 0.24483518256728942,
      "formula_unique_count": 71.50668647845468,
      "formula_total_count": 302.1822684497276,
      "distinct_10gram_count": 2139.4477464091133,
      "distinct_10gram_ratio": 0.5127793940829024,
      "step_global_distinct_10gram_count": 4148492,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006397527691359155,
      "step_total_10gram_count": 8643200,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12122385203838348,
      "acc_reward": 0.505859375
    },
    {
      "step": 280,
      "file": "280_16384.jsonl",
      "repetition_rate": 0.14843584104384042,
      "total_responses": 2019,
      "avg_token_length": 7958.883110450718,
      "accuracy": 0.5542347696879644,
      "internal_textual_diversity": 0.9858120326328172,
      "internal_textual_similarity": 0.0141879673671828,
      "internal_unit_count": 401.21892025755324,
      "internal_equational_diversity": 0.23560855392087154,
      "formula_unique_count": 69.58494304110945,
      "formula_total_count": 308.9286775631501,
      "distinct_10gram_count": 2051.0837048043586,
      "distinct_10gram_ratio": 0.4914364419241051,
      "step_global_distinct_10gram_count": 3980840,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006138985957998756,
      "step_total_10gram_count": 8661209,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12555105984210968,
      "acc_reward": 0.4912109375
    },
    {
      "step": 290,
      "file": "290_16384.jsonl",
      "repetition_rate": 0.1413072571407479,
      "total_responses": 2019,
      "avg_token_length": 8058.978702327885,
      "accuracy": 0.5586924219910847,
      "internal_textual_diversity": 0.9860524685587264,
      "internal_textual_similarity": 0.013947531441273576,
      "internal_unit_count": 402.0326894502229,
      "internal_equational_diversity": 0.2276746879311715,
      "formula_unique_count": 69.83110450718178,
      "formula_total_count": 314.62407132243686,
      "distinct_10gram_count": 2052.308073303616,
      "distinct_10gram_ratio": 0.48279290458585056,
      "step_global_distinct_10gram_count": 3978107,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006134771307667868,
      "step_total_10gram_count": 8746520,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12381650507450104,
      "acc_reward": 0.498046875
    },
    {
      "step": 300,
      "file": "300_16384.jsonl",
      "repetition_rate": 0.14308280961953374,
      "total_responses": 2019,
      "avg_token_length": 8003.426944031698,
      "accuracy": 0.5601783060921248,
      "internal_textual_diversity": 0.9859412476279984,
      "internal_textual_similarity": 0.01405875237200163,
      "internal_unit_count": 397.5492818226845,
      "internal_equational_diversity": 0.23022178186430836,
      "formula_unique_count": 70.4665676077266,
      "formula_total_count": 312.1134224863794,
      "distinct_10gram_count": 2034.8439821693908,
      "distinct_10gram_ratio": 0.48400296755985567,
      "step_global_distinct_10gram_count": 3943219,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006080969360816787,
      "step_total_10gram_count": 8656797,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13643018901348114,
      "acc_reward": 0.521484375
    },
    {
      "step": 310,
      "file": "310_16384.jsonl",
      "repetition_rate": 0.13894952334965216,
      "total_responses": 2019,
      "avg_token_length": 7916.769192669639,
      "accuracy": 0.5626547795938583,
      "internal_textual_diversity": 0.9866740729592254,
      "internal_textual_similarity": 0.013325927040774573,
      "internal_unit_count": 392.3432392273403,
      "internal_equational_diversity": 0.24420176012718922,
      "formula_unique_count": 73.03417533432392,
      "formula_total_count": 311.31203566121843,
      "distinct_10gram_count": 2066.052005943536,
      "distinct_10gram_ratio": 0.5036373580080565,
      "step_global_distinct_10gram_count": 4005010,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.00617625932005421,
      "step_total_10gram_count": 8516545,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12801730632781982,
      "acc_reward": 0.5283203125
    },
    {
      "step": 320,
      "file": "320_16384.jsonl",
      "repetition_rate": 0.1207433530788599,
      "total_responses": 2019,
      "avg_token_length": 7829.278850916296,
      "accuracy": 0.5755324418028727,
      "internal_textual_diversity": 0.9876081148463128,
      "internal_textual_similarity": 0.01239188515368719,
      "internal_unit_count": 387.84497275879147,
      "internal_equational_diversity": 0.266509019003832,
      "formula_unique_count": 77.95492818226845,
      "formula_total_count": 311.13323427439326,
      "distinct_10gram_count": 2229.881129271917,
      "distinct_10gram_ratio": 0.551805674490258,
      "step_global_distinct_10gram_count": 4326558,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0066721292009895355,
      "step_total_10gram_count": 8424898,
      "global_total_10gram_count": 648452371,
      "entropy": 0.14014628529548645,
      "acc_reward": 0.474609375
    },
    {
      "step": 330,
      "file": "330_16384.jsonl",
      "repetition_rate": 0.11044074768698636,
      "total_responses": 2019,
      "avg_token_length": 7808.343734522041,
      "accuracy": 0.5611688954928182,
      "internal_textual_diversity": 0.9878512151514797,
      "internal_textual_similarity": 0.012148784848520356,
      "internal_unit_count": 388.3937592867756,
      "internal_equational_diversity": 0.25625939097373074,
      "formula_unique_count": 76.03962357602774,
      "formula_total_count": 308.97919762258545,
      "distinct_10gram_count": 2219.4388311045072,
      "distinct_10gram_ratio": 0.5477186205104206,
      "step_global_distinct_10gram_count": 4299749,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006630786149134152,
      "step_total_10gram_count": 8437929,
      "global_total_10gram_count": 648452371,
      "entropy": 0.11394868046045303,
      "acc_reward": 0.5400390625
    },
    {
      "step": 340,
      "file": "340_16384.jsonl",
      "repetition_rate": 0.10173229837782262,
      "total_responses": 2019,
      "avg_token_length": 7581.320455671124,
      "accuracy": 0.5690936106983655,
      "internal_textual_diversity": 0.9893566090417785,
      "internal_textual_similarity": 0.01064339095822158,
      "internal_unit_count": 379.0703318474492,
      "internal_equational_diversity": 0.2731076430593667,
      "formula_unique_count": 76.6849925705795,
      "formula_total_count": 299.8231797919762,
      "distinct_10gram_count": 2256.140168400198,
      "distinct_10gram_ratio": 0.57525230904302,
      "step_global_distinct_10gram_count": 4364143,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006730090281372415,
      "step_total_10gram_count": 8287570,
      "global_total_10gram_count": 648452371,
      "entropy": 0.14733222126960754,
      "acc_reward": 0.4541015625
    },
    {
      "step": 350,
      "file": "350_16384.jsonl",
      "repetition_rate": 0.09903092800730001,
      "total_responses": 2019,
      "avg_token_length": 7673.298662704309,
      "accuracy": 0.5780089153046062,
      "internal_textual_diversity": 0.9889632310077133,
      "internal_textual_similarity": 0.0110367689922867,
      "internal_unit_count": 386.50817236255574,
      "internal_equational_diversity": 0.2637389857812012,
      "formula_unique_count": 76.80633977216443,
      "formula_total_count": 305.83853392768697,
      "distinct_10gram_count": 2304.9673105497773,
      "distinct_10gram_ratio": 0.5758555653975516,
      "step_global_distinct_10gram_count": 4455123,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.00687039356973837,
      "step_total_10gram_count": 8395457,
      "global_total_10gram_count": 648452371,
      "entropy": 0.14172787964344025,
      "acc_reward": 0.490234375
    },
    {
      "step": 360,
      "file": "360_16384.jsonl",
      "repetition_rate": 0.10824101000142587,
      "total_responses": 2019,
      "avg_token_length": 7845.2426944031695,
      "accuracy": 0.5824665676077266,
      "internal_textual_diversity": 0.9887038427781065,
      "internal_textual_similarity": 0.01129615722189349,
      "internal_unit_count": 397.1996037642397,
      "internal_equational_diversity": 0.2572453551893201,
      "formula_unique_count": 77.64338781575037,
      "formula_total_count": 310.68053491827635,
      "distinct_10gram_count": 2282.263992075285,
      "distinct_10gram_ratio": 0.550828513440817,
      "step_global_distinct_10gram_count": 4405277,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006793524392865548,
      "step_total_10gram_count": 8568437,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13406652212142944,
      "acc_reward": 0.466796875
    },
    {
      "step": 370,
      "file": "370_16384.jsonl",
      "repetition_rate": 0.12200523072691569,
      "total_responses": 2019,
      "avg_token_length": 8029.709262010896,
      "accuracy": 0.5710747894997523,
      "internal_textual_diversity": 0.9875482198493394,
      "internal_textual_similarity": 0.012451780150660723,
      "internal_unit_count": 406.97275879148094,
      "internal_equational_diversity": 0.2506840922276736,
      "formula_unique_count": 77.41901931649332,
      "formula_total_count": 313.0490341753343,
      "distinct_10gram_count": 2280.4155522535907,
      "distinct_10gram_ratio": 0.5353919166968503,
      "step_global_distinct_10gram_count": 4404570,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.00679243410461676,
      "step_total_10gram_count": 8703934,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1457429826259613,
      "acc_reward": 0.4326171875
    },
    {
      "step": 380,
      "file": "380_16384.jsonl",
      "repetition_rate": 0.15304992178018995,
      "total_responses": 2019,
      "avg_token_length": 8153.202575532442,
      "accuracy": 0.5775136206042595,
      "internal_textual_diversity": 0.9855479933509825,
      "internal_textual_similarity": 0.014452006649017515,
      "internal_unit_count": 409.96037642397226,
      "internal_equational_diversity": 0.24243718067201755,
      "formula_unique_count": 78.39177810797425,
      "formula_total_count": 320.07478949975234,
      "distinct_10gram_count": 2235.6225854383356,
      "distinct_10gram_ratio": 0.5234887506283725,
      "step_global_distinct_10gram_count": 4325071,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006669836048760472,
      "step_total_10gram_count": 8681730,
      "global_total_10gram_count": 648452371,
      "entropy": 0.14337454736232758,
      "acc_reward": 0.5478515625
    },
    {
      "step": 390,
      "file": "390_16384.jsonl",
      "repetition_rate": 0.12892711073013602,
      "total_responses": 2019,
      "avg_token_length": 8093.755819712729,
      "accuracy": 0.5725606736007924,
      "internal_textual_diversity": 0.986741321712801,
      "internal_textual_similarity": 0.013258678287198922,
      "internal_unit_count": 414.555720653789,
      "internal_equational_diversity": 0.2520710856624906,
      "formula_unique_count": 79.07875185735513,
      "formula_total_count": 315.55522535908864,
      "distinct_10gram_count": 2364.963843486875,
      "distinct_10gram_ratio": 0.5538627023102751,
      "step_global_distinct_10gram_count": 4574096,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007053865795796435,
      "step_total_10gram_count": 8694479,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13002900779247284,
      "acc_reward": 0.466796875
    },
    {
      "step": 400,
      "file": "400_16384.jsonl",
      "repetition_rate": 0.11728765928013254,
      "total_responses": 2019,
      "avg_token_length": 8099.7890044576525,
      "accuracy": 0.571570084200099,
      "internal_textual_diversity": 0.9875002646400791,
      "internal_textual_similarity": 0.012499735359920917,
      "internal_unit_count": 419.21991084695395,
      "internal_equational_diversity": 0.24910856453375024,
      "formula_unique_count": 78.7211490837048,
      "formula_total_count": 317.1050024764735,
      "distinct_10gram_count": 2349.1807825656265,
      "distinct_10gram_ratio": 0.5501151694958469,
      "step_global_distinct_10gram_count": 4534578,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006992923771729103,
      "step_total_10gram_count": 8699768,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13718336820602417,
      "acc_reward": 0.5146484375
    },
    {
      "step": 410,
      "file": "410_16384.jsonl",
      "repetition_rate": 0.12083877061589497,
      "total_responses": 2019,
      "avg_token_length": 8134.05695889054,
      "accuracy": 0.5804853888063398,
      "internal_textual_diversity": 0.9878880551350899,
      "internal_textual_similarity": 0.012111944864910155,
      "internal_unit_count": 419.45666171371965,
      "internal_equational_diversity": 0.25350524270412444,
      "formula_unique_count": 80.0232788509163,
      "formula_total_count": 312.4006934125805,
      "distinct_10gram_count": 2378.376919266964,
      "distinct_10gram_ratio": 0.5512234650497249,
      "step_global_distinct_10gram_count": 4589279,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007077280005812485,
      "step_total_10gram_count": 8764995,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1408974528312683,
      "acc_reward": 0.4912109375
    },
    {
      "step": 420,
      "file": "420_16384.jsonl",
      "repetition_rate": 0.12857270868438023,
      "total_responses": 2019,
      "avg_token_length": 8092.359583952451,
      "accuracy": 0.5780089153046062,
      "internal_textual_diversity": 0.9876503376187462,
      "internal_textual_similarity": 0.012349662381253833,
      "internal_unit_count": 412.5160970777613,
      "internal_equational_diversity": 0.2523837363010656,
      "formula_unique_count": 77.81773155027241,
      "formula_total_count": 308.9787023278851,
      "distinct_10gram_count": 2385.5993065874195,
      "distinct_10gram_ratio": 0.5562210672363016,
      "step_global_distinct_10gram_count": 4594661,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007085579767276385,
      "step_total_10gram_count": 8744224,
      "global_total_10gram_count": 648452371,
      "entropy": 0.16125260293483734,
      "acc_reward": 0.4716796875
    },
    {
      "step": 430,
      "file": "430_16384.jsonl",
      "repetition_rate": 0.1359427655884968,
      "total_responses": 2019,
      "avg_token_length": 8164.161466072313,
      "accuracy": 0.5889053987122338,
      "internal_textual_diversity": 0.9868191868155495,
      "internal_textual_similarity": 0.013180813184450598,
      "internal_unit_count": 416.877166914314,
      "internal_equational_diversity": 0.2554896535523206,
      "formula_unique_count": 78.8949975235265,
      "formula_total_count": 309.3298662704309,
      "distinct_10gram_count": 2388.2877662209016,
      "distinct_10gram_ratio": 0.5530192104571581,
      "step_global_distinct_10gram_count": 4605989,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007103049053389921,
      "step_total_10gram_count": 8774857,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1531638205051422,
      "acc_reward": 0.47265625
    },
    {
      "step": 440,
      "file": "440_16384.jsonl",
      "repetition_rate": 0.13047980879050788,
      "total_responses": 2019,
      "avg_token_length": 8180.495294700347,
      "accuracy": 0.5908865775136206,
      "internal_textual_diversity": 0.9871532538323717,
      "internal_textual_similarity": 0.012846746167628197,
      "internal_unit_count": 413.42149578999505,
      "internal_equational_diversity": 0.2552994740160751,
      "formula_unique_count": 80.71074789499752,
      "formula_total_count": 315.27389796929174,
      "distinct_10gram_count": 2406.388806339772,
      "distinct_10gram_ratio": 0.5527931538468488,
      "step_global_distinct_10gram_count": 4637845,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.00715217525205101,
      "step_total_10gram_count": 8810689,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13642579317092896,
      "acc_reward": 0.5888671875
    },
    {
      "step": 450,
      "file": "450_16384.jsonl",
      "repetition_rate": 0.12830073928083666,
      "total_responses": 2019,
      "avg_token_length": 8167.948984645865,
      "accuracy": 0.5879148093115404,
      "internal_textual_diversity": 0.9865935243686605,
      "internal_textual_similarity": 0.013406475631339497,
      "internal_unit_count": 411.63001485884104,
      "internal_equational_diversity": 0.2460016905851911,
      "formula_unique_count": 78.16295195641406,
      "formula_total_count": 316.1887072808321,
      "distinct_10gram_count": 2349.5264982664685,
      "distinct_10gram_ratio": 0.538744738157433,
      "step_global_distinct_10gram_count": 4528681,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0069838298116115605,
      "step_total_10gram_count": 8853018,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13757336139678955,
      "acc_reward": 0.51171875
    },
    {
      "step": 460,
      "file": "460_16384.jsonl",
      "repetition_rate": 0.12789937458243908,
      "total_responses": 2019,
      "avg_token_length": 8179.252105002476,
      "accuracy": 0.5884101040118871,
      "internal_textual_diversity": 0.9869897205235756,
      "internal_textual_similarity": 0.013010279476424413,
      "internal_unit_count": 416.93412580485386,
      "internal_equational_diversity": 0.24110522257647996,
      "formula_unique_count": 75.63546310054483,
      "formula_total_count": 312.06835066864784,
      "distinct_10gram_count": 2406.4526993561167,
      "distinct_10gram_ratio": 0.5436142086641776,
      "step_global_distinct_10gram_count": 4619121,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007123300347991171,
      "step_total_10gram_count": 8969433,
      "global_total_10gram_count": 648452371,
      "entropy": 0.15036028623580933,
      "acc_reward": 0.515625
    },
    {
      "step": 470,
      "file": "470_16384.jsonl",
      "repetition_rate": 0.10638693896865084,
      "total_responses": 2019,
      "avg_token_length": 8186.259534422981,
      "accuracy": 0.5884101040118871,
      "internal_textual_diversity": 0.9870211830395161,
      "internal_textual_similarity": 0.012978816960483966,
      "internal_unit_count": 411.4640911342249,
      "internal_equational_diversity": 0.243012145218365,
      "formula_unique_count": 76.99009410599307,
      "formula_total_count": 315.05052005943537,
      "distinct_10gram_count": 2403.640416047548,
      "distinct_10gram_ratio": 0.5408475321599092,
      "step_global_distinct_10gram_count": 4615805,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007118186633941693,
      "step_total_10gram_count": 9002442,
      "global_total_10gram_count": 648452371,
      "entropy": 0.16791225969791412,
      "acc_reward": 0.509765625
    },
    {
      "step": 480,
      "file": "480_16384.jsonl",
      "repetition_rate": 0.11970603146091771,
      "total_responses": 2019,
      "avg_token_length": 8178.359088657751,
      "accuracy": 0.5938583457157008,
      "internal_textual_diversity": 0.983609085886329,
      "internal_textual_similarity": 0.016390914113670916,
      "internal_unit_count": 402.8930163447251,
      "internal_equational_diversity": 0.24537727942743232,
      "formula_unique_count": 76.88509162951956,
      "formula_total_count": 315.3907875185736,
      "distinct_10gram_count": 2339.0123823675085,
      "distinct_10gram_ratio": 0.5266561109744126,
      "step_global_distinct_10gram_count": 4494381,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.006930934639145609,
      "step_total_10gram_count": 9072214,
      "global_total_10gram_count": 648452371,
      "entropy": 0.15214775502681732,
      "acc_reward": 0.5322265625
    },
    {
      "step": 490,
      "file": "490_16384.jsonl",
      "repetition_rate": 0.09683266781970006,
      "total_responses": 2019,
      "avg_token_length": 8181.087667161962,
      "accuracy": 0.5839524517087668,
      "internal_textual_diversity": 0.9883070704163053,
      "internal_textual_similarity": 0.011692929583694814,
      "internal_unit_count": 404.6027736503219,
      "internal_equational_diversity": 0.25371364317829354,
      "formula_unique_count": 80.31203566121843,
      "formula_total_count": 315.6448736998514,
      "distinct_10gram_count": 2434.3665180782564,
      "distinct_10gram_ratio": 0.5507793169487911,
      "step_global_distinct_10gram_count": 4674227,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007208281146064311,
      "step_total_10gram_count": 8987370,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1531391143798828,
      "acc_reward": 0.4951171875
    },
    {
      "step": 500,
      "file": "500_16384.jsonl",
      "repetition_rate": 0.1087523800291013,
      "total_responses": 2019,
      "avg_token_length": 8188.667657256067,
      "accuracy": 0.5864289252105003,
      "internal_textual_diversity": 0.986552314039403,
      "internal_textual_similarity": 0.013447685960597002,
      "internal_unit_count": 405.9574046557702,
      "internal_equational_diversity": 0.2405392333777862,
      "formula_unique_count": 77.74393263992076,
      "formula_total_count": 318.79049034175335,
      "distinct_10gram_count": 2375.683011391778,
      "distinct_10gram_ratio": 0.5292682047320182,
      "step_global_distinct_10gram_count": 4551408,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007018877875303505,
      "step_total_10gram_count": 9107575,
      "global_total_10gram_count": 648452371,
      "entropy": 0.15606707334518433,
      "acc_reward": 0.5556640625
    },
    {
      "step": 510,
      "file": "510_16384.jsonl",
      "repetition_rate": 0.09773184220740111,
      "total_responses": 2019,
      "avg_token_length": 8163.951956414066,
      "accuracy": 0.591877166914314,
      "internal_textual_diversity": 0.9874949540673927,
      "internal_textual_similarity": 0.012505045932607282,
      "internal_unit_count": 401.56810302129765,
      "internal_equational_diversity": 0.24513122740848545,
      "formula_unique_count": 79.35710747894997,
      "formula_total_count": 322.98266468548786,
      "distinct_10gram_count": 2432.4601287766222,
      "distinct_10gram_ratio": 0.5463474449370873,
      "step_global_distinct_10gram_count": 4647942,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007167746172062343,
      "step_total_10gram_count": 9044524,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1543671041727066,
      "acc_reward": 0.5341796875
    },
    {
      "step": 520,
      "file": "520_16384.jsonl",
      "repetition_rate": 0.10967376087207674,
      "total_responses": 2019,
      "avg_token_length": 8168.270430906389,
      "accuracy": 0.595344229816741,
      "internal_textual_diversity": 0.984851660703387,
      "internal_textual_similarity": 0.015148339296613024,
      "internal_unit_count": 396.0837048043586,
      "internal_equational_diversity": 0.24550280946462663,
      "formula_unique_count": 80.60772659732541,
      "formula_total_count": 325.56562654779594,
      "distinct_10gram_count": 2490.1307578008914,
      "distinct_10gram_ratio": 0.5633136894655696,
      "step_global_distinct_10gram_count": 4767035,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0073514034541173725,
      "step_total_10gram_count": 9004228,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13893605768680573,
      "acc_reward": 0.4931640625
    },
    {
      "step": 530,
      "file": "530_16384.jsonl",
      "repetition_rate": 0.07974172648910967,
      "total_responses": 2019,
      "avg_token_length": 8186.335809806835,
      "accuracy": 0.5849430411094602,
      "internal_textual_diversity": 0.9880801812767169,
      "internal_textual_similarity": 0.011919818723283144,
      "internal_unit_count": 398.92223873204557,
      "internal_equational_diversity": 0.24899593896078578,
      "formula_unique_count": 80.55621594848935,
      "formula_total_count": 320.6215948489351,
      "distinct_10gram_count": 2448.6493313521546,
      "distinct_10gram_ratio": 0.5481116359673813,
      "step_global_distinct_10gram_count": 4714549,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007270463045311311,
      "step_total_10gram_count": 9059742,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1397191733121872,
      "acc_reward": 0.513671875
    },
    {
      "step": 540,
      "file": "540_16384.jsonl",
      "repetition_rate": 0.09282097105549662,
      "total_responses": 2019,
      "avg_token_length": 8183.8033680039625,
      "accuracy": 0.586924219910847,
      "internal_textual_diversity": 0.9880391599298858,
      "internal_textual_similarity": 0.011960840070114265,
      "internal_unit_count": 397.5740465577018,
      "internal_equational_diversity": 0.2479120116609939,
      "formula_unique_count": 79.44675581971273,
      "formula_total_count": 319.10004952947,
      "distinct_10gram_count": 2431.2387320455673,
      "distinct_10gram_ratio": 0.5431969106147722,
      "step_global_distinct_10gram_count": 4668449,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007199370699810426,
      "step_total_10gram_count": 9079772,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12786895036697388,
      "acc_reward": 0.5439453125
    },
    {
      "step": 550,
      "file": "550_16384.jsonl",
      "repetition_rate": 0.09073292487695576,
      "total_responses": 2019,
      "avg_token_length": 8182.54284299158,
      "accuracy": 0.5928677563150074,
      "internal_textual_diversity": 0.9865440921596871,
      "internal_textual_similarity": 0.01345590784031296,
      "internal_unit_count": 399.86379395740465,
      "internal_equational_diversity": 0.24613639829059278,
      "formula_unique_count": 76.79148093115404,
      "formula_total_count": 312.38038632986627,
      "distinct_10gram_count": 2415.8405151064885,
      "distinct_10gram_ratio": 0.5410716474567968,
      "step_global_distinct_10gram_count": 4636362,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007149888268355178,
      "step_total_10gram_count": 9063310,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1277356594800949,
      "acc_reward": 0.5263671875
    },
    {
      "step": 560,
      "file": "560_16384.jsonl",
      "repetition_rate": 0.09232460928208336,
      "total_responses": 2019,
      "avg_token_length": 8168.55720653789,
      "accuracy": 0.600297176820208,
      "internal_textual_diversity": 0.9877674964036307,
      "internal_textual_similarity": 0.012232503596369383,
      "internal_unit_count": 401.6374442793462,
      "internal_equational_diversity": 0.25218039094526923,
      "formula_unique_count": 79.63992075284794,
      "formula_total_count": 316.82565626547796,
      "distinct_10gram_count": 2453.182763744428,
      "distinct_10gram_ratio": 0.5527781241343438,
      "step_global_distinct_10gram_count": 4708961,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007261845604385954,
      "step_total_10gram_count": 9012284,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1347048282623291,
      "acc_reward": 0.5439453125
    },
    {
      "step": 570,
      "file": "570_16384.jsonl",
      "repetition_rate": 0.09686201643914229,
      "total_responses": 2019,
      "avg_token_length": 8177.581971272907,
      "accuracy": 0.5874195146111937,
      "internal_textual_diversity": 0.9867730059161284,
      "internal_textual_similarity": 0.013226994083871643,
      "internal_unit_count": 397.4229816740961,
      "internal_equational_diversity": 0.24899986662675688,
      "formula_unique_count": 78.63793957404656,
      "formula_total_count": 316.30708271421497,
      "distinct_10gram_count": 2389.3907875185737,
      "distinct_10gram_ratio": 0.538964329405546,
      "step_global_distinct_10gram_count": 4591330,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0070804429212272865,
      "step_total_10gram_count": 9032678,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12178639322519302,
      "acc_reward": 0.5810546875
    },
    {
      "step": 580,
      "file": "580_16384.jsonl",
      "repetition_rate": 0.10168080726295592,
      "total_responses": 2019,
      "avg_token_length": 8178.697870232789,
      "accuracy": 0.5898959881129272,
      "internal_textual_diversity": 0.9870132840686527,
      "internal_textual_similarity": 0.01298671593134731,
      "internal_unit_count": 398.1119366022784,
      "internal_equational_diversity": 0.24778453160072925,
      "formula_unique_count": 77.39177810797425,
      "formula_total_count": 315.148588410104,
      "distinct_10gram_count": 2413.3566121842496,
      "distinct_10gram_ratio": 0.5409728655248315,
      "step_global_distinct_10gram_count": 4631681,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0071426695423402195,
      "step_total_10gram_count": 9061507,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1286756843328476,
      "acc_reward": 0.53125
    },
    {
      "step": 590,
      "file": "590_16384.jsonl",
      "repetition_rate": 0.09598925874771738,
      "total_responses": 2019,
      "avg_token_length": 8174.447746409113,
      "accuracy": 0.5894006934125805,
      "internal_textual_diversity": 0.987895920407369,
      "internal_textual_similarity": 0.012104079592631132,
      "internal_unit_count": 400.35463100544825,
      "internal_equational_diversity": 0.24911027617905163,
      "formula_unique_count": 76.86379395740465,
      "formula_total_count": 313.0074294205052,
      "distinct_10gram_count": 2409.8182268449727,
      "distinct_10gram_ratio": 0.5429978905741332,
      "step_global_distinct_10gram_count": 4622213,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007128068624179647,
      "step_total_10gram_count": 9010958,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12200208008289337,
      "acc_reward": 0.5146484375
    },
    {
      "step": 600,
      "file": "600_16384.jsonl",
      "repetition_rate": 0.09975738729321672,
      "total_responses": 2019,
      "avg_token_length": 8186.637939574047,
      "accuracy": 0.5943536404160475,
      "internal_textual_diversity": 0.9874158383880346,
      "internal_textual_similarity": 0.012584161611965416,
      "internal_unit_count": 400.4784546805349,
      "internal_equational_diversity": 0.25184736644052086,
      "formula_unique_count": 77.80435859336305,
      "formula_total_count": 314.19068845963346,
      "distinct_10gram_count": 2472.4229816740963,
      "distinct_10gram_ratio": 0.558437365579539,
      "step_global_distinct_10gram_count": 4758470,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007338195082333966,
      "step_total_10gram_count": 8987507,
      "global_total_10gram_count": 648452371,
      "entropy": 0.11982327699661255,
      "acc_reward": 0.580078125
    },
    {
      "step": 610,
      "file": "610_16384.jsonl",
      "repetition_rate": 0.08883596908521763,
      "total_responses": 2019,
      "avg_token_length": 8189.747399702823,
      "accuracy": 0.5799900941059931,
      "internal_textual_diversity": 0.987595960231197,
      "internal_textual_similarity": 0.012404039768803018,
      "internal_unit_count": 398.4739970282318,
      "internal_equational_diversity": 0.25112131200622356,
      "formula_unique_count": 79.69044081228331,
      "formula_total_count": 322.31847449232293,
      "distinct_10gram_count": 2447.816245666171,
      "distinct_10gram_ratio": 0.5503579863286566,
      "step_global_distinct_10gram_count": 4697087,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007243534313486225,
      "step_total_10gram_count": 9022123,
      "global_total_10gram_count": 648452371,
      "entropy": 0.11436523497104645,
      "acc_reward": 0.5390625
    },
    {
      "step": 620,
      "file": "620_16384.jsonl",
      "repetition_rate": 0.09332112743071407,
      "total_responses": 2019,
      "avg_token_length": 8183.002971768202,
      "accuracy": 0.5884101040118871,
      "internal_textual_diversity": 0.9874961809924035,
      "internal_textual_similarity": 0.0125038190075965,
      "internal_unit_count": 394.42149578999505,
      "internal_equational_diversity": 0.2542517077682592,
      "formula_unique_count": 82.0158494304111,
      "formula_total_count": 327.1674096087172,
      "distinct_10gram_count": 2464.9673105497773,
      "distinct_10gram_ratio": 0.5531379092804619,
      "step_global_distinct_10gram_count": 4735094,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0073021461741251,
      "step_total_10gram_count": 9055805,
      "global_total_10gram_count": 648452371,
      "entropy": 0.13813452422618866,
      "acc_reward": 0.505859375
    },
    {
      "step": 630,
      "file": "630_16384.jsonl",
      "repetition_rate": 0.09721125361880772,
      "total_responses": 2019,
      "avg_token_length": 8178.621099554235,
      "accuracy": 0.5948489351163943,
      "internal_textual_diversity": 0.9880527953803417,
      "internal_textual_similarity": 0.011947204619658302,
      "internal_unit_count": 394.26993561168894,
      "internal_equational_diversity": 0.24923193881746297,
      "formula_unique_count": 78.53789004457653,
      "formula_total_count": 323.9831599801882,
      "distinct_10gram_count": 2406.68598315998,
      "distinct_10gram_ratio": 0.5374855079919066,
      "step_global_distinct_10gram_count": 4610443,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007109917715143955,
      "step_total_10gram_count": 9092951,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12261822074651718,
      "acc_reward": 0.5244140625
    },
    {
      "step": 640,
      "file": "640_16384.jsonl",
      "repetition_rate": 0.07654944012064958,
      "total_responses": 2019,
      "avg_token_length": 8145.664190193165,
      "accuracy": 0.5884101040118871,
      "internal_textual_diversity": 0.988770145141379,
      "internal_textual_similarity": 0.011229854858621022,
      "internal_unit_count": 393.13967310549776,
      "internal_equational_diversity": 0.25419882870805727,
      "formula_unique_count": 80.74294205052006,
      "formula_total_count": 325.7860326894502,
      "distinct_10gram_count": 2470.891530460624,
      "distinct_10gram_ratio": 0.5565596711085778,
      "step_global_distinct_10gram_count": 4728501,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007291978889225158,
      "step_total_10gram_count": 9039970,
      "global_total_10gram_count": 648452371,
      "entropy": 0.10200555622577667,
      "acc_reward": 0.505859375
    },
    {
      "step": 650,
      "file": "650_16384.jsonl",
      "repetition_rate": 0.059837977571318354,
      "total_responses": 2019,
      "avg_token_length": 8169.719167904904,
      "accuracy": 0.5928677563150074,
      "internal_textual_diversity": 0.9891976603897252,
      "internal_textual_similarity": 0.010802339610274783,
      "internal_unit_count": 388.2758791480931,
      "internal_equational_diversity": 0.2547051232276905,
      "formula_unique_count": 81.06141654284299,
      "formula_total_count": 325.06835066864784,
      "distinct_10gram_count": 2585.4309063893015,
      "distinct_10gram_ratio": 0.5763135642681599,
      "step_global_distinct_10gram_count": 4952754,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0076378069099542235,
      "step_total_10gram_count": 9127485,
      "global_total_10gram_count": 648452371,
      "entropy": 0.10995927453041077,
      "acc_reward": 0.556640625
    },
    {
      "step": 660,
      "file": "660_16384.jsonl",
      "repetition_rate": 0.0659349275242233,
      "total_responses": 2019,
      "avg_token_length": 8180.49975235265,
      "accuracy": 0.5854383358098069,
      "internal_textual_diversity": 0.989249534304858,
      "internal_textual_similarity": 0.010750465695142045,
      "internal_unit_count": 399.32590391282815,
      "internal_equational_diversity": 0.2519770746185868,
      "formula_unique_count": 81.55225359088658,
      "formula_total_count": 324.5388806339772,
      "distinct_10gram_count": 2551.1872213967313,
      "distinct_10gram_ratio": 0.5646366034435872,
      "step_global_distinct_10gram_count": 4896626,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007551250051640261,
      "step_total_10gram_count": 9170121,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12154211848974228,
      "acc_reward": 0.525390625
    },
    {
      "step": 670,
      "file": "670_16384.jsonl",
      "repetition_rate": 0.08864916571561905,
      "total_responses": 2019,
      "avg_token_length": 8183.05646359584,
      "accuracy": 0.58345715700842,
      "internal_textual_diversity": 0.9882729599628756,
      "internal_textual_similarity": 0.011727040037124414,
      "internal_unit_count": 395.9024269440317,
      "internal_equational_diversity": 0.2429953739862064,
      "formula_unique_count": 79.64190193164933,
      "formula_total_count": 328.1367013372957,
      "distinct_10gram_count": 2424.582961862308,
      "distinct_10gram_ratio": 0.5383366265236137,
      "step_global_distinct_10gram_count": 4638309,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007152890801905943,
      "step_total_10gram_count": 9134908,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12077857553958893,
      "acc_reward": 0.572265625
    },
    {
      "step": 680,
      "file": "680_16384.jsonl",
      "repetition_rate": 0.09016792419932916,
      "total_responses": 2019,
      "avg_token_length": 8171.631996037642,
      "accuracy": 0.5933630510153541,
      "internal_textual_diversity": 0.9876324829629125,
      "internal_textual_similarity": 0.0123675170370875,
      "internal_unit_count": 389.704309063893,
      "internal_equational_diversity": 0.23726453588381252,
      "formula_unique_count": 77.11490837048044,
      "formula_total_count": 330.23278850916296,
      "distinct_10gram_count": 2326.6562654779596,
      "distinct_10gram_ratio": 0.5183381396323697,
      "step_global_distinct_10gram_count": 4457169,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.00687354877448663,
      "step_total_10gram_count": 9123986,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1303921788930893,
      "acc_reward": 0.5341796875
    },
    {
      "step": 690,
      "file": "690_16384.jsonl",
      "repetition_rate": 0.09390859327548366,
      "total_responses": 2019,
      "avg_token_length": 8192.0,
      "accuracy": 0.5884101040118871,
      "internal_textual_diversity": 0.986743395017734,
      "internal_textual_similarity": 0.013256604982266074,
      "internal_unit_count": 378.89995047053,
      "internal_equational_diversity": 0.2490882913041211,
      "formula_unique_count": 82.19167904903418,
      "formula_total_count": 334.6225854383358,
      "distinct_10gram_count": 2423.623080733036,
      "distinct_10gram_ratio": 0.5434640072509787,
      "step_global_distinct_10gram_count": 4648920,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007169254378437611,
      "step_total_10gram_count": 9057304,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12266851961612701,
      "acc_reward": 0.5908203125
    },
    {
      "step": 700,
      "file": "700_16384.jsonl",
      "repetition_rate": 0.09836213613771153,
      "total_responses": 2019,
      "avg_token_length": 8186.37147102526,
      "accuracy": 0.5874195146111937,
      "internal_textual_diversity": 0.9875079053578082,
      "internal_textual_similarity": 0.012492094642191785,
      "internal_unit_count": 379.38038632986627,
      "internal_equational_diversity": 0.24658292570283996,
      "formula_unique_count": 80.01188707280832,
      "formula_total_count": 330.3789004457652,
      "distinct_10gram_count": 2374.743932639921,
      "distinct_10gram_ratio": 0.5332309223087953,
      "step_global_distinct_10gram_count": 4542492,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0070051282147289735,
      "step_total_10gram_count": 9056500,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12426384538412094,
      "acc_reward": 0.5302734375
    },
    {
      "step": 710,
      "file": "710_16384.jsonl",
      "repetition_rate": 0.08357579753768664,
      "total_responses": 2019,
      "avg_token_length": 8185.7677067855375,
      "accuracy": 0.5854383358098069,
      "internal_textual_diversity": 0.9884508041916359,
      "internal_textual_similarity": 0.011549195808364141,
      "internal_unit_count": 378.9034175334324,
      "internal_equational_diversity": 0.25771963536175396,
      "formula_unique_count": 81.26646854878653,
      "formula_total_count": 326.3169886082219,
      "distinct_10gram_count": 2484.653789004458,
      "distinct_10gram_ratio": 0.5566094469409709,
      "step_global_distinct_10gram_count": 4763600,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.0073461062262042374,
      "step_total_10gram_count": 9083793,
      "global_total_10gram_count": 648452371,
      "entropy": 0.10667068511247635,
      "acc_reward": 0.552734375
    },
    {
      "step": 720,
      "file": "720_16384.jsonl",
      "repetition_rate": 0.0705692136409062,
      "total_responses": 2019,
      "avg_token_length": 8183.433382862803,
      "accuracy": 0.5884101040118871,
      "internal_textual_diversity": 0.9888827129995662,
      "internal_textual_similarity": 0.011117287000433695,
      "internal_unit_count": 379.1154036651808,
      "internal_equational_diversity": 0.25637307228059064,
      "formula_unique_count": 80.7275879148093,
      "formula_total_count": 326.6775631500743,
      "distinct_10gram_count": 2688.7221396731056,
      "distinct_10gram_ratio": 0.5927988576251548,
      "step_global_distinct_10gram_count": 5158871,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.007955666801008583,
      "step_total_10gram_count": 9185065,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12535986304283142,
      "acc_reward": 0.5263671875
    },
    {
      "step": 730,
      "file": "730_16384.jsonl",
      "repetition_rate": 0.06797111003789476,
      "total_responses": 2019,
      "avg_token_length": 8168.019811788014,
      "accuracy": 0.5844477464091135,
      "internal_textual_diversity": 0.9887537038439056,
      "internal_textual_similarity": 0.011246296156094581,
      "internal_unit_count": 385.13224368499255,
      "internal_equational_diversity": 0.26624131814647106,
      "formula_unique_count": 82.23080733036157,
      "formula_total_count": 318.74888558692425,
      "distinct_10gram_count": 2762.609707776127,
      "distinct_10gram_ratio": 0.6122940945017568,
      "step_global_distinct_10gram_count": 5296929,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.00816857064125069,
      "step_total_10gram_count": 9166683,
      "global_total_10gram_count": 648452371,
      "entropy": 0.11886059492826462,
      "acc_reward": 0.578125
    },
    {
      "step": 740,
      "file": "740_16384.jsonl",
      "repetition_rate": 0.0733621499276569,
      "total_responses": 2019,
      "avg_token_length": 8168.384348687469,
      "accuracy": 0.5968301139177811,
      "internal_textual_diversity": 0.9893103646964763,
      "internal_textual_similarity": 0.010689635303523785,
      "internal_unit_count": 381.13075780089156,
      "internal_equational_diversity": 0.26161640411226694,
      "formula_unique_count": 81.19960376423973,
      "formula_total_count": 321.2020802377415,
      "distinct_10gram_count": 2727.8880633977215,
      "distinct_10gram_ratio": 0.5995022080535005,
      "step_global_distinct_10gram_count": 5222880,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.008054377211923248,
      "step_total_10gram_count": 9218574,
      "global_total_10gram_count": 648452371,
      "entropy": 0.1367001235485077,
      "acc_reward": 0.5341796875
    },
    {
      "step": 750,
      "file": "750_16384.jsonl",
      "repetition_rate": 0.07460417420315867,
      "total_responses": 2019,
      "avg_token_length": 8130.005943536404,
      "accuracy": 0.5874195146111937,
      "internal_textual_diversity": 0.9886810845391214,
      "internal_textual_similarity": 0.01131891546087857,
      "internal_unit_count": 375.83853392768697,
      "internal_equational_diversity": 0.26261698347202905,
      "formula_unique_count": 79.9311540366518,
      "formula_total_count": 317.5844477464091,
      "distinct_10gram_count": 2728.84299157999,
      "distinct_10gram_ratio": 0.6032080848744208,
      "step_global_distinct_10gram_count": 5237608,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.008077089751284138,
      "step_total_10gram_count": 9210668,
      "global_total_10gram_count": 648452371,
      "entropy": 0.12687039375305176,
      "acc_reward": 0.505859375
    },
    {
      "step": 760,
      "file": "760_16384.jsonl",
      "repetition_rate": 0.07417577404571492,
      "total_responses": 2019,
      "avg_token_length": 7965.530955918772,
      "accuracy": 0.6037642397226349,
      "internal_textual_diversity": 0.9881849794599022,
      "internal_textual_similarity": 0.01181502054009773,
      "internal_unit_count": 366.5215453194651,
      "internal_equational_diversity": 0.2653527872072989,
      "formula_unique_count": 76.84645864289251,
      "formula_total_count": 304.46607231302625,
      "distinct_10gram_count": 2772.1149083704804,
      "distinct_10gram_ratio": 0.6201960874733049,
      "step_global_distinct_10gram_count": 5335123,
      "global_distinct_10gram_count": 206778336,
      "step_global_distinct_10gram_ratio": 0.008227470880818169,
      "step_total_10gram_count": 9163327,
      "global_total_10gram_count": 648452371,
      "entropy": 0.11829198151826859,
      "acc_reward": 0.6318359375
    }
  ]
}
