Model,SOP,structural_alignment,property_fidelity,semantic_fidelity,code_bonus,code_compliance
Baseline,eval_model_01,4.67,4.67,6.0,-0.12,0.38
Claude,eval_model_01,6.0,6.67,5.67,0.5,0.59
DeepSeek,eval_model_01,3.33,3.33,4.67,0.06,0.32
Gemini,eval_model_01,5.33,5.33,5.0,-0.38,0.34
Qwen,eval_model_01,5.33,5.0,5.33,0.06,0.43
Baseline,eval_model_02,6.0,6.0,7.0,0.75,0.65
Claude,eval_model_02,6.33,7.67,6.33,0.54,0.65
DeepSeek,eval_model_02,4.33,4.33,6.0,0.08,0.41
Gemini,eval_model_02,5.67,6.33,6.67,0.0,0.5
Qwen,eval_model_02,5.33,5.0,4.67,0.58,0.52
Baseline,eval_model_03,3.0,3.67,3.0,0.38,0.33
Claude,eval_model_03,6.33,7.67,8.0,0.75,0.73
DeepSeek,eval_model_03,6.0,6.0,5.67,-0.02,0.47
Gemini,eval_model_03,8.67,9.67,8.0,0.35,0.77
Qwen,eval_model_03,7.33,7.67,6.0,0.29,0.62
Baseline,eval_model_04,7.33,8.33,7.33,0.33,0.68
Claude,eval_model_04,9.0,8.67,8.0,0.27,0.74
DeepSeek,eval_model_04,6.0,8.0,7.0,-0.06,0.55
Gemini,eval_model_04,5.0,7.0,6.0,0.38,0.55
Qwen,eval_model_04,6.33,6.33,6.67,-0.08,0.5
Baseline,eval_model_05,3.67,5.0,4.67,0.12,0.38
Claude,eval_model_05,6.0,6.33,6.67,0.46,0.6
DeepSeek,eval_model_05,3.0,3.0,3.0,0.0,0.24
Gemini,eval_model_05,4.33,7.33,6.0,0.83,0.64
Qwen,eval_model_05,4.33,5.67,5.33,-0.12,0.38
Baseline,eval_model_06,6.33,8.67,4.67,0.25,0.58
Claude,eval_model_06,7.33,7.33,6.67,0.47,0.66
DeepSeek,eval_model_06,6.67,8.0,9.0,0.39,0.71
Gemini,eval_model_06,8.33,10.0,8.33,-0.25,0.66
Qwen,eval_model_06,6.0,9.0,7.33,0.54,0.7
Baseline,eval_model_07,9.67,9.0,7.67,0.0,0.7
Claude,eval_model_07,10.0,10.0,9.67,0.0,0.79
DeepSeek,eval_model_07,9.67,10.0,8.67,-0.12,0.73
Gemini,eval_model_07,8.33,10.0,7.33,0.38,0.76
Qwen,eval_model_07,9.0,9.33,8.67,0.29,0.78
Baseline,eval_model_08,5.0,5.67,5.0,0.42,0.5
Claude,eval_model_08,7.67,8.67,8.33,-0.08,0.64
DeepSeek,eval_model_08,7.33,9.33,7.0,-0.04,0.62
Gemini,eval_model_08,9.67,10.0,9.0,0.31,0.83
Qwen,eval_model_08,9.0,9.33,9.33,0.69,0.87
Baseline,eval_model_09,5.0,5.67,4.67,0.62,0.53
Claude,eval_model_09,6.0,8.67,6.67,0.38,0.64
DeepSeek,eval_model_09,6.0,7.33,6.67,0.19,0.57
Gemini,eval_model_09,8.0,9.67,7.0,0.45,0.75
Qwen,eval_model_09,6.0,7.0,6.67,-0.15,0.5
Baseline,eval_model_10,5.33,7.0,5.33,0.5,0.57
Claude,eval_model_10,7.33,8.33,7.67,0.17,0.66
DeepSeek,eval_model_10,4.67,5.0,5.0,0.25,0.44
Gemini,eval_model_10,7.0,8.67,8.67,0.67,0.78
Qwen,eval_model_10,6.67,8.67,6.67,-0.04,0.58
