Model,SOP,structural_alignment,property_fidelity,semantic_fidelity,code_bonus,code_compliance
Baseline,eval_model_01,5.0,5.67,5.33,0.5,0.52
Claude,eval_model_01,5.33,6.33,6.33,0.0,0.48
DeepSeek,eval_model_01,5.33,5.33,5.33,0.0,0.42
Gemini,eval_model_01,4.33,6.0,5.0,-0.08,0.39
Qwen,eval_model_01,5.0,5.33,5.33,0.33,0.48
Baseline,eval_model_02,5.0,5.0,6.0,0.54,0.53
Claude,eval_model_02,7.67,8.0,8.0,0.17,0.67
DeepSeek,eval_model_02,4.33,5.33,5.67,0.0,0.41
Gemini,eval_model_02,7.0,8.0,7.33,0.25,0.64
Qwen,eval_model_02,4.67,5.33,4.67,0.75,0.54
Baseline,eval_model_03,5.0,5.33,4.0,0.17,0.42
Claude,eval_model_03,7.33,9.0,8.0,0.71,0.79
DeepSeek,eval_model_03,4.33,6.33,5.33,-0.09,0.41
Gemini,eval_model_03,9.0,9.0,8.67,0.08,0.73
Qwen,eval_model_03,6.0,6.67,5.33,0.38,0.55
Baseline,eval_model_04,5.67,6.67,5.67,0.17,0.51
Claude,eval_model_04,7.33,8.67,6.67,0.21,0.65
DeepSeek,eval_model_04,6.67,5.67,5.33,-0.12,0.45
Gemini,eval_model_04,4.33,6.67,5.33,0.67,0.57
Qwen,eval_model_04,4.67,6.0,5.33,-0.05,0.41
Baseline,eval_model_05,4.0,5.67,5.67,0.58,0.52
Claude,eval_model_05,5.0,5.67,6.67,0.83,0.63
DeepSeek,eval_model_05,4.0,4.67,4.0,-0.25,0.29
Gemini,eval_model_05,5.0,7.0,5.0,0.42,0.54
Qwen,eval_model_05,5.0,5.33,5.0,0.33,0.47
Baseline,eval_model_06,5.33,7.67,5.33,0.04,0.5
Claude,eval_model_06,5.33,7.67,6.67,-0.04,0.52
DeepSeek,eval_model_06,6.0,8.67,7.33,0.29,0.64
Gemini,eval_model_06,7.67,9.67,8.67,0.52,0.8
Qwen,eval_model_06,6.0,7.0,5.33,-0.04,0.48
Baseline,eval_model_07,9.0,7.67,7.67,0.38,0.72
Claude,eval_model_07,9.33,9.0,8.67,-0.25,0.67
DeepSeek,eval_model_07,7.67,8.33,6.33,-0.12,0.57
Gemini,eval_model_07,7.67,9.67,5.67,0.25,0.67
Qwen,eval_model_07,10.0,10.0,10.0,0.12,0.83
Baseline,eval_model_08,6.67,6.67,5.0,0.26,0.54
Claude,eval_model_08,4.67,8.33,7.67,0.02,0.56
DeepSeek,eval_model_08,7.0,8.33,6.0,0.13,0.59
Gemini,eval_model_08,10.0,10.0,10.0,0.75,0.95
Qwen,eval_model_08,8.33,8.67,8.33,0.19,0.71
Baseline,eval_model_09,5.0,5.67,4.67,0.31,0.47
Claude,eval_model_09,7.0,9.0,7.0,0.16,0.65
DeepSeek,eval_model_09,6.33,7.0,6.33,0.09,0.55
Gemini,eval_model_09,9.0,9.0,8.33,0.35,0.77
Qwen,eval_model_09,7.0,7.33,7.0,-0.02,0.56
Baseline,eval_model_10,6.0,6.67,5.67,0.25,0.54
Claude,eval_model_10,6.0,8.0,6.0,0.03,0.54
DeepSeek,eval_model_10,5.33,5.33,5.67,0.14,0.46
Gemini,eval_model_10,7.67,9.0,7.67,0.38,0.72
Qwen,eval_model_10,6.67,7.33,7.67,-0.25,0.53
