model,right_rate,right,wrong,bad_domain,all
nl_Qwen2.5-0.5B-Instruct,0.000,0,0,283,283
nl_Qwen2.5-1.5B-Instruct,0.021,6,0,277,283
nl_Qwen2.5-14B-Instruct,0.516,146,38,99,283
nl_Qwen2.5-32B-Instruct,0.668,189,20,74,283
nl_Qwen2.5-32B-Instruct_8_1124,0.707,200,17,66,283
nl_Qwen2.5-3B-Instruct,0.117,33,2,248,283
nl_Qwen2.5-72B-Instruct,0.562,159,34,90,283
nl_Qwen2.5-7B-Instruct,0.092,26,23,234,283
nl_Yi-1.5-34B-Chat,0.834,236,4,43,283
nl_Yi-1.5-6B-Chat,0.095,27,13,243,283
nl_Yi-1.5-9B-Chat,0.466,132,47,104,283
nl_Yi-Coder-1.5B-Chat,0.000,0,0,283,283
nl_Yi-Coder-9B-Chat,0.251,71,43,169,283
prob_Qwen2.5-0.5B-Instruct,0.000,0,0,332,332
prob_Qwen2.5-1.5B-Instruct,0.003,1,0,331,332
prob_Qwen2.5-14B-Instruct,0.620,206,45,81,332
prob_Qwen2.5-32B-Instruct,0.747,248,23,61,332
prob_Qwen2.5-32B-Instruct_8_1124,0.711,236,12,84,332
prob_Qwen2.5-3B-Instruct,0.012,4,0,328,332
prob_Qwen2.5-72B-Instruct,0.711,236,49,47,332
prob_Qwen2.5-7B-Instruct,0.346,115,33,184,332
prob_Yi-1.5-34B-Chat,0.380,126,14,192,332
prob_Yi-1.5-6B-Chat,0.072,24,2,306,332
prob_Yi-1.5-9B-Chat,0.398,132,31,169,332
prob_Yi-Coder-1.5B-Chat,0.000,0,0,332,332
prob_Yi-Coder-9B-Chat,0.395,131,46,155,332
