model,checkpoint,macro_hack_rate,macro_hack_rate_hard,micro_hack_rate
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,100,0.158066860465116,0.293604651162791,0.133254716981132
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,200,0.137112403100775,0.293604651162791,0.139150943396226
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,300,0.183624031007752,0.351744186046512,0.170990566037736
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,400,0.208817829457364,0.383720930232558,0.185141509433962
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,500,0.20203488372093,0.369186046511628,0.1875
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,600,0.212209302325581,0.372093023255814,0.1875
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,700,0.308139534883721,0.47093023255814,0.221698113207547
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,800,0.340600775193798,0.508720930232558,0.220518867924528
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,900,0.402616279069767,0.575581395348837,0.247641509433962
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1000,0.358527131782946,0.51453488372093,0.244103773584906
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1100,0.312015503875969,0.47093023255814,0.237028301886792
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1200,0.30390019379845,0.468023255813953,0.252358490566038
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1300,0.240794573643411,0.40406976744186,0.192216981132075
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1400,0.241763565891473,0.40406976744186,0.186320754716981
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1500,0.255329457364341,0.418604651162791,0.196933962264151
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1600,0.245639534883721,0.409883720930233,0.188679245283019
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1700,0.218507751937985,0.377906976744186,0.175707547169811
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1800,0.28343023255814,0.436046511627907,0.193396226415094
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,1900,0.312984496124031,0.473837209302326,0.200471698113208
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2000,0.203972868217054,0.36046511627907,0.162735849056604
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2100,0.234980620155039,0.38953488372093,0.199292452830189
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2200,0.25484496124031,0.406976744186047,0.201650943396226
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2300,0.280038759689923,0.424418604651163,0.20872641509434
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2400,0.265140503875969,0.441860465116279,0.222877358490566
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2500,0.404312015503876,0.595930232558139,0.280660377358491
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2600,0.422359496124031,0.598837209302326,0.3125
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2700,0.494670542635658,0.674418604651163,0.313679245283019
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2800,0.398740310077519,0.584302325581395,0.292452830188679
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,2900,0.366763565891473,0.572674418604651,0.26061320754717
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_overoptimization_set12_hard1234_clean_nokl,3000,0.385174418604651,0.584302325581395,0.240566037735849
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,100,0.118459302325581,0.226744186046512,0.0955188679245283
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,200,0.0801841085271318,0.197674418604651,0.0966981132075472
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,300,0.0898740310077519,0.206395348837209,0.0966981132075472
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,400,0.0571705426356589,0.148255813953488,0.0719339622641509
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,500,0.0692829457364341,0.156976744186047,0.0778301886792453
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,600,0.0564437984496124,0.113372093023256,0.0542452830188679
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,700,0.0271317829457364,0.0784883720930233,0.0330188679245283
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,800,0.0499031007751938,0.104651162790698,0.0459905660377358
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,900,0.0578972868217054,0.13953488372093,0.0695754716981132
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_kl005,1000,0.0409399224806201,0.0988372093023256,0.0495283018867925
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,100,0.0847868217054263,0.194767441860465,0.102594339622642
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,200,0.0942028985507246,0.217391304347826,0.109090909090909
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,300,0.106589147286822,0.252906976744186,0.117924528301887
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,400,0.112887596899225,0.270348837209302,0.112028301886792
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,500,0.176356589147287,0.351744186046512,0.150943396226415
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,600,0.170058139534884,0.366279069767442,0.160377358490566
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,700,0.272165697674419,0.468023255813953,0.201650943396226
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,800,0.264050387596899,0.444767441860465,0.1875
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,900,0.29312015503876,0.488372093023256,0.189858490566038
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_nokl,1000,0.21765988372093,0.412790697674419,0.170990566037736
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,100,0.103682170542636,0.203488372093023,0.104952830188679
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,200,0.0675872093023256,0.165697674418605,0.0825471698113208
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,300,0.0903585271317829,0.183139534883721,0.0884433962264151
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,400,0.0712209302325581,0.171511627906977,0.089622641509434
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,500,0.0281007751937985,0.0755813953488372,0.0330188679245283
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,600,0.0416666666666667,0.136627906976744,0.0636792452830189
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,700,0.0881782945736434,0.183139534883721,0.0813679245283019
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,800,0.0683139534883721,0.127906976744186,0.0518867924528302
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,900,0.0186531007751938,0.0494186046511628,0.017688679245283
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen7Bjudge_set12_hard1234_clean_nokl,1000,0.0465116279069767,0.063953488372093,0.0141509433962264
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,100,0.190406976744186,0.328488372093023,0.154481132075472
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,200,0.21172480620155,0.375,0.189858490566038
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,300,0.170542635658915,0.337209302325581,0.163915094339623
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,400,0.252422480620155,0.409883720930233,0.207547169811321
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,500,0.303779069767442,0.456395348837209,0.255896226415094
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,600,0.298449612403101,0.447674418604651,0.265330188679245
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,700,0.266230620155039,0.418604651162791,0.229952830188679
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,800,0.278585271317829,0.430232558139535,0.199292452830189
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,900,0.287790697674418,0.438953488372093,0.234669811320755
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_no-IntentCheck_set12_hard1234_clean_kl005,1000,0.28875968992248,0.447674418604651,0.238207547169811
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,100,0.21172480620155,0.366279069767442,0.142688679245283
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,200,0.219476744186047,0.357558139534884,0.141509433962264
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,300,0.168604651162791,0.255813953488372,0.0813679245283019
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,400,0.0419089147286822,0.0872093023255814,0.0365566037735849
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,500,0.0198643410852713,0.0436046511627907,0.0129716981132075
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,600,0.0201065891472868,0.0552325581395349,0.0235849056603774
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,700,0.0426356589147287,0.0901162790697674,0.0389150943396226
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,800,0.0285852713178295,0.0813953488372093,0.0283018867924528
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,900,0.0169573643410853,0.0494186046511628,0.0188679245283019
run_qwen2_5-7b_cif-v3_0327_lr1e-6_bsz64_roll5_qwen32Bjudge_set12_hard1234_clean_nokl,1000,0.00436046511627907,0.0145348837209302,0.00471698113207547
