source code: xxx,in-domain-dataset,lr,filter < threshold,selected epoch,in domain score,% token filtered,avg ppl (overall),mbpp,math,arc-cot,arc-da,gsm8k,bird,SafetyBench,forget metric,improve metric,
gemma2-2b-it,base model,-,-,-,-,-,-,0.5106,0.281,0.7466,,0.5845,,,,,
gemma2-2b-it,MBPP (ground truth),-,-,-,0.2580,-,-,0.5873,0.258,0.3951,,0.6793,,,,,
gemma2-2b-it,MBPP (self output),-,-,?,0.5344,-%,-,0.5344,0.292,0.7295,,0.5239,,,,,
gemma2-2b-it,MBPP (rephrase),-,-,?,0.4841,-%,-,0.4841,0.289,0.7713,,0.4822,,,,,
gemma2-2b-it,MBPP (ground truth),-,-,1,0.3995,-%,-,0.3995,0.229,0.6954,,0.1926,,,,,
gemma2-2b-it,MBPP,2.00E-05,-1 (inf),3,0.4577,0.00%,,0.4577,,0.7466,,0.4412,,,,,
gemma2-2b-it,MBPP,2.00E-05,1000,3,0.4577,6.27%,,0.4577,0.3,0.7611,,0.4898,,,,,
gemma2-2b-it,MBPP,2.00E-05,2.5,3,0.4894,20.84%,,0.4894,0.297,0.7483,?,0.5542,?,,,,
gemma2-2b-it,MBPP,2.00E-05,-1 (inf),1,0.4312,0.00%,,0.4312,,0.7526,,0.4428,,,,,
gemma2-2b-it,MBPP,2.00E-05,1000,1,0.4418,6.27%,,0.4418,,0.7628,,,,,,,
gemma2-2b-it,MBPP,2.00E-05,2.5,1,0.4921,20.84%,,0.5106,0.298,0.7585,0.7483,0.5641,0.1375,0,0.42%,0.00%,
gemma2-2b-it,MBPP,2.00E-04,-1 (inf),3,0.463,0.00%,,0.463,,0.7389,,0.329,,,,,
gemma2-2b-it,MBPP,2.00E-04,1000,3,0.4418,6.27%,,0.4418,,,,0.2555,,,,,
gemma2-2b-it,MBPP,2.00E-04,2.5,3,0.4788,20.84%,,0.4788,,,,0.329,,,,,
gemma2-2b-it,MBPP,2.00E-04,-1 (inf),1,0.4153,0.00%,,0.4153,,0.6903,,0.0925,,,,,
gemma2-2b-it,MBPP,2.00E-04,1000,1,0.4048,6.27%,,0.4048,,,,0.1418,,,,,
gemma2-2b-it,MBPP,2.00E-04,2.5,1,0.4365,20.84%,,0.4365,,,,0.5193,,,,,
gemma2-2b-it,MATH,2.00E-05,-1 (inf),3,0.219,0.00%,,0.4418,0.219,0.219,,0.1986,,,,,
gemma2-2b-it,MATH,2.00E-05,1000,3,0.233,2.47%,,0.4868,0.233,0.233,0.2543,0.2487,0.1278,,-34.02%,-17.08%,
gemma2-2b-it,MATH,2.00E-05,200,3,0.251,4.29%,,0.4894,0.251,0.523,,0.353,,,-60.96%,-10.68%,
gemma2-2b-it,MATH,2.00E-05,100,3,0.268,5.50%,,0.4894,0.248,0.6075,,0.4618,,,-56.28%,-11.74%,
gemma2-2b-it,MATH,2.00E-05,25,3,0.270,9.33%,,0.4815,0.27,0.7372,0.7287,0.4973,0.131,,-7.20%,-3.91%,
gemma2-2b-it,MATH,2.00E-05,2.5,3,0.303,23.80%,,0.4947,0.303,0.7628,0.7534,0.5542,0.133,0,-2.93%,7.83%,
gemma2-2b-it,MATH,2.00E-05,1.5,3,0.298,28.56%,,0.4815,0.298,0.7688,,0.5444,,,,,
gemma2-2b-it,MATH ( SFT-filtered ),2.00E-05,0,3,0.225,39.24%,,0.4021,0.225,0.0666,0.0597,0.0114,0.1102,,-57.99%,,
gemma2-2b-it,MATH ( hybrid ),2.00E-05,100,3,0.173,44.68%,,0.4365,0.173,0.1886,0.1442,0.0910,0.1167,,-48.91%,,
gemma2-2b-it,MATH (self-output),2.00E-05,2,3,0.293,11.82%,,0.4894,0.293,0.7551,,0.5519,,,,,
gemma2-2b-it,MATH (self-output),2.00E-05,4,3,0.3060,5.88%,,0.5000,0.3060,0.7415,,0.5648,,,,,
gemma2-2b-it,MATH (9b ppl),2.00E-05,3,3,0.2670,19.26%,,0.4762,0.2670,0.7440,,0.3973,,,,,
gemma2-2b-it,MATH (9b ppl),2.00E-05,2,3,0.2720,21.34%,,0.4762,0.2720,0.7295,0.7244,0.4200,0.1408,,-9.15%,-3.20%,
gemma2-2b-it,MATH (9b ppl),2.00E-05,2,1,0.2740,21.34%,,0.4894,0.2740,0.7321,,0.4193,,,,,
gemma2-9b-it,-,-,-,-,-,-,,0.5635,0.5000,0.8916,,0.8771,,,,,
gemma2-9b-it,MATH (ground truth),1.00E-06,-1,3,0.5020,0.00%,,0.6402,0.5020,0.8771,,0.8446,,,,,
gemma2-9b-it,MATH (2b ppl),2.00E-05,2.5,3,0.442,23.80%,,0.6190,0.4420,0.8754,,0.7695,,,,,
gemma2-9b-it,MATH (2b ppl),1.00E-06,2.5,3,0.5060,23.80%,,0.5767,0.5060,0.8840,,0.8590,,,,,
gemma2-9b-it,MATH (9b ppl),1.00E-06,2,3,0.4930,21.34%,,0.5423,0.4930,0.8757,,0.84534,,,,,
gemma2-9b-it,MBPP,2.00E-05,3,1,0.6429,19.81%,,0.6429,0.492,,0.8968,0.8423,0.2562,,,,
gemma2-9b-it,MBPP,2.00E-05,2.5,1,0.6534,20.84%,,0.6534,0.502,,0.8968,0.8431,0.2529,0,2.58%,13.49%,
gemma2-9b-it,MBPP,2.00E-05,2,1,0.6455,22.69%,,0.6455,WIP,,WIP,WIP,,,,,
gemma2-9b-it,MBPP,2.00E-05,-1,3,0.5899,0%,,0.5899,0.501,,0.9044,0.8544,0.2568,,4.67%,7.14%,
gemma2-9b-it,MBPP (SFT-filtered),2.00E-05,2.5 (t2.5 adapter+dpf) ,1,0.5926,7.74%,,0.5926,0.492,,0.8985,0.8499,0.2516,,-23.38%,,
gemma2-9b-it,MBPP (SFT-filtered),2.00E-05,0 (t2.5 adapter+dpf) ,1,0.6058,12.01%,,0.6058,0.507,,0.9053,0.8357,0.2438,,-21.61%,,
gemma2-9b-it,MBPP (hybrid),2.00E-05,2.5 (t2.5 adapter+stm_dpf) ,1,0.6402,21.36%,,0.6402,0.497,,0.9019,0.8499,0.2536,,-21.99%,,
"by model/task: general converation/bird, test safety/toxic",,,,,,,,,,,,,,,,,
Note: If we find what tokens which has MATH (ref_ppl - sft_ppl <= 0 ) and MATH ref_ppl,,,,,,,,,,,,,,,,,
"the ref_ppl median: 1.318 average : 42574430178, PR90 : 1167",,,,,,,,,,,,,,,,,
stats : ref_ppl <= 1000,,,,,,,,,,,,,,,,,
"median : 1.010 average: 14.88, PR90: 8.372",,,,,,,,,,,,,,,,,
"For gemma 2 9b and 2b model, when we select the same threshold about 71% tokens are the same in MATH",,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
mistral,MATH (ground truth),-,-,-,0.167,-%,-,0.4709,0.167,,,0,0.1851,,,,
mistral,MATH (rephrase),-,-,-,0.165,-%,-,0.4974,0.165,,,0.2714,0.1799,,,,
mistral,MATH (self output),-,-,-,0.194,-%,-,0.4683,0.194,,,0.2396,0.1447,,,,
mistral,MATH,2.00E-05,1500,3,0.149,3.67%,,0.5159,0.1490,0.4377,0.5802,0.3101,0.1519,,,,
mistral,MATH,2.00E-05,1500,1,0.149,3.67%,,,,,0.5725,,0.161,,,,
mistral,MATH,2.00E-05,30,3,0.161,10.81%,,0.4709,0.1610,0.4428,0.6203,0.4155,0.1402,,,,
mistral,MATH,2.00E-05,30,1,0.161,10.81%,,,,,0.6314,,0.1675,,,,
mistral,MATH,2.00E-05,3,3,0.189,21.91%,,0.4656,0.1890,0.4889,0.651,0.4541,0.1167,0,18.46%,,
mistral,MBPP,2.00E-04,3,1,0.373,20.81%,,0.373,0.1630,0.3216723549,,0.2100,0.1506,,,,
mistral,MBPP,2.00E-04,3,3,0.4444,20.81%,,0.4444,0.1640,0.4096,,0.2805,,,,,
mistral,MBPP,2.00E-05,2,3,0.4788,22.89%,,0.4788,0.1900,0.4846416382,0.6101,0.4905231236,0.1239,0,38.56%,,
,MBPP,2.00E-05,1.5,3,,,,,,,,,,,,,
mistral,ARC,2.00E-05,3,1,0.7619,23.09%,,0.5026,0.1854,0.7619,0.7713,0.4716,0.1316,,,,
,,,3,3,0.7713,23.09%,,0.4788,0.202,0.7713,0.7576,0.5019,0.0828,,,,
llama,MBPP,2.00E-05,2.5,1,0.6111,23.57%,,0.6111,0.32,,0.7892,0.7543,0.2066,,-0.30%,3.12%,
llama,MBPP,2.00E-05,2.5 (same count but mask random),2,0.5343,23.57%,,0.5343,0.325,,0.6382,0.7339,0.1923,0,-40.41%,-9.84%,"ep1,ep2: 1.0 for val, lower training loss on ep2"
llama,MBPP,2.00E-05,2.5 (same count but mask lowest),2,0.4841,23.57%,,0.4841,0.311,,0.6152,0.7407,0.163,,-47.67%,-18.31%,ep2 1.0 for val
llama,MBPP,2.00E-05,1.00008,3,0.5053,21.29%,,0.5053,0.316,,0.7167,0.743,0.1545,,-33.01%,-14.73%,
llama,MBPP,2.00E-05,1000 (same count but mask random),,,,,,,,,,,,,,
llama,MBPP,2.00E-05,100 (same count but mask random),3,0.5132,,,0.5132,0.308,,0.6263,0.7589,,,,,
llama,MBPP,2.00E-05,30 (same count but mask random),2,0.5238,,,0.5238,0.295,,0.5776,0.743,,,,,
llama,MBPP,2.00E-05,10 (same count but mask random),2,0.5423,,,0.5423,0.296,,0.5794,0.7574,,,,,
llama,MBPP,2.00E-05,2 (same count but mask random),,,,,,,,,,,,,,
llama,MBPP,2.00E-05,1.75 (same count but mask random),,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
llama,MBPP,2.00E-05,dora,1,0.545,,,0.545,0.33,,0.7201,0.7779,0.1949,,-1.89%,-8.03%,
llama,MBPP,2.00E-05,"2.5, dora",1,0.6138,23.57%,,0.6138,0.342,,0.8046,0.7703,0.1962,,1.69%,3.58%,
,,,,,MBPP,GSM8k,MATH,MATH ,ARC,BIRD,,,,,,,
llama,MBPP,2.00E-05,sft + stm2.5,3,,23.57%,,,0.207,,,,,,,,"ep1:0.31, ep2:0.5,ep3:0.57"
llama,MBPP,2.00E-05,sft   ,6,0.328,-%,,0.328,0.239,,0.244,0,0,,-73.17%,-44.65%,"ep4:0.81, ep5:0.93, ep6:0.94"
llama,MATH,2.00E-05,1000,2,0.277,1.67%,,0.5926,0.277,,0.6928,0.7119,0.1943,,-19.55%,-11.78%,
llama,MATH,2.00E-05,100,3,0.276,4.07%,,0.5794,0.276,,0.7637,0.7225,0.1864,,-17.94%,,
llama,MATH,2.00E-05,30,1,0.307,6.72%,,0.6164,0.307,,0.7756,0.7369,0.1981,,-14.36%,-2.23%,
llama,MATH,2.00E-05,10,2,0.33,10.99%,,0.6111,0.33,,0.7765,0.7559,0.1949,,-13.14%,5.10%,
llama,MATH,2.00E-05,2.5,1,0.334,22.19%,,0.6164,0.334,,0.7577,0.7687,0.2027,0,-0.30%,6.37%,
- MATH -> MBPP,MBPP,2.00E-05,2.5,3,0.625,20.15%,,0.625,0.332,,,0.7635,,,-48.96%,,
llama,MATH,2.00E-05,2,1,0.31,25.46%,,0.6005,0.31,,0.7688,0.7597,0.2086,,-12.44%,,
llama,MATH,2.00E-05,1.75,1,0.32,27.74%,,0.5952,0.32,,0.7687,0.7543,0.2106,,-11.95%,,
llama,MATH ( hybrid ),2.00E-05,100,1,0.178,70.88%,,0.5503,0.178,,0.4642,0.5868,0.1812,,-36.66%,,
llama,MATH ( SFT-filtered ),2.00E-05,0,1,0.239,67.13%,,0.5714,0.239,,0.4812,0.7005,0.189,,-28.93%,,
,,,,,,,,,,,,,,,,,
gemm2 2b,MBPP,2.00E-06,sft   ,5,0.3677,0,,0.3677,0.161,,0.6911,0.1251,0.1408,0,-31.87%,-27.98%,
gemm2 2b,MBPP,2.00E-06,sft   + stm2.5,1,0.4656,20.84%,,0.4656,0.305,,0.7466,0.5625,0.1317,0,-0.13%,-8.81%,
gemm2 2b,MBPP,2.00E-05,2.5 (same count but mask random),1,0.4312,20.84%,,0.4312,0.293,,0.7432,0.4132,0.1266,,-8.61%,-15.55%,
gemm2 2b,MBPP,2.00E-05,1.0012,1,0.4153,67.80%,,0.4153,0.309,,0.7415,0.4139,0.1226,,-7.94%,-18.66%,
gemm2 2b,MBPP,2.00E-05,1000,1,0.4525,6.26%,,0.4525,0.302,,0.7423,0.4716,0.1402,,-2.90%,-11.38%,
gemm2 2b,MBPP,2.00E-05,25,1,0.4683,12.34%,,0.4683,0.297,,0.7415,0.5254,0.1323,,-2.49%,-8.28%,
gemm2 2b,MBPP,2.00E-05,10,1,0.4577,15.10%,,0.4577,0.296,,0.7457,0.5466,0.1368,,-0.72%,-10.36%,
gemm2 2b,MBPP,2.00E-05,1.5,1,0.5079,26.05%,,0.5079,0.29,,0.7457,0.5739,0.1356,,-0.30%,-0.52%,
gemm2 2b,MBPP,2.00E-05,9b 2.5,1,0.4735,21.25%,,0.4735,0.283,,0.7457,0.5216,0.1323,,-3.76%,-7.26%,
,,,,,,,,,,,,,,,,,
Qwen/Qwen2.5-7B-Instruct-1M,BIRD,2.00E-05,1.0125,3,0.2757,23.01%,,0.6905,0.557,,0.8882,0.8863,0.2757,,0.56%,0.47%,-0.004
Qwen/Qwen2.5-7B-Instruct-1M,BIRD,2.00E-05,1.025,2,0.2797,20.76%,,0.6693,0.564,,0.8908,0.8923,0.2797,0,0.31%,0.33%,
Qwen/Qwen2.5-7B-Instruct-1M,BIRD,2.00E-05,1.075,1,0.2757,17.70%,,0.6772,0.584,,0.8851,,0.2757,,,,
,,,1.5,,,13.28%,,,,,,,,,,,
,,,10,,,7.79%,,,,,,,,,,,
,,,1.0065,,,25.26%,,,,,,,,,,,
Qwen/Qwen2-7B-Instruct,BIRD,2.00E-05,1.0065,,,26.05%,,,,,,,,,,,
Qwen/Qwen2-7B-Instruct,BIRD,2.00E-05,1.0125,,,23.21%,,,,,,,,,,,
Qwen/Qwen2-7B-Instruct,BIRD,2.00E-05,1.0175,,,21.71%,,,,,,,,,,,
Qwen/Qwen2-7B-Instruct,BIRD,2.00E-05,1.025,,,20.28%,,,,,,,,,,,
olmo2 instruct,-,,,,0.0854,,,0.4312,0.389,,0.698,0.815,0.0854,,,,
olmo2 instruct mbpp ground truth,mbpp,2.00E-05,-1,1,0.381,0,,0.381,0.344,,0.6792,0.7612,0.0723,,-9.05%,-11.64%,
olmo2 instruct mbpp stm,mbpp,,2.5,1,0.4048,25.83%,,0.4048,0.391,,0.6971,0.8203,0.0828,,-0.50%,-6.12%,
olmo2 instruct mbpp stm,mbpp,,3.5,1,0.381,24.06%,,0.381,0.384,,0.6911,0.8112,0.0821,,-1.65%,-11.64%,
olmo2 sft mbpp ground truth,mbpp,2.00E-05,-1,1,,0,,,,,,,,,,,
olmo2 sft mbpp stm,mbpp,,2.5,1,,23.64%,,,,,,,,,,,
olmo2 instruct bird ground truth,BIRD,,-1,,0.1538,0,,0.3915,0.373,,0.6886,0.7733,0.1538,,-4.95%,80.09%,
omlo-instruct-bird_config_lr2e-05,BIRD,2.00E-05,2,3,0.808,21.51%,,0.4286,0.382,,0.7253,0.8241,0.0808,,0.66%,-5.39%,
omlo-instruct-bird_config_lr2e-05,,,6,3,0.0893,16.76%,,0.4339,0.396,,0.686,0.8158,0.0893,,0.20%,4.57%,
omlo-instruct-bird_config_lr2e-05,,,20,,,13.55%,,,,,,,,,,-8.93%,
omlo-instruct-bird_config_lr2e-05,,,40,,,11.96%,,,,,,,,,,0.00%,
omlo-instruct-bird_config_lr2e-05,,,60,1,0.107,11.08%,,0.4233,0.379,,0.6852,0.818,0.107,,-1.47%,25.29%,
omlo-instruct-bird_config_lr2e-05,,,80,2,0.1076,10.50%,,0.4339,0.4,,0.6894,0.8211,0.1076,,0.74%,26.00%,
omlo-sft_bird_config_lr2e-05,BIRD,2.00E-05,-1,,0.179,0.00%,,0.4101,0.335,,0.7287,0.2343,0.179,,-15.84%,145.21%,
omlo-sft_bird_config_lr2e-05,,,1.75,2,0.071,22.29%,,0.4074,0.346,,0.7201,0.4572,0.071,,-4.62%,-2.74%,
omlo-sft_bird_config_lr2e-05,,,46,3,0.09647,7.24%,,0.4444,0.385,,0.6758,0.8074,0.09647,,15.91%,32.15%,
omlo-sft_bird_config_lr2e-05,,,60,2,0.116,6.63%,,0.4312,0.384,,0.6877,0.8226,0.116,,16.23%,58.90%,
omlo-sft_bird_config_lr2e-05,,,80,2,0.12711,6.07%,,0.4365,0.398,,0.6741,0.815,0.12711,,16.71%,74.12%,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,0.00%,-15.84%,145.21%,0%,-4.95%,80.09%,
,,,,,,,,,,,6.07%,16.71%,74.12%,10.50%,0.74%,26.00%,
,,,,,,,,,,,6.63%,16.23%,58.90%,11.08%,-1.47%,25.29%,
,,,,,,,,,,,7.24%,15.91%,32.15%,16.76%,0.20%,4.57%,
,,,,,,,,,,,22.29%,-4.62%,-2.74%,21.51%,0.66%,-5.39%,