{
    "math_word_problem_generation": {
        "Reasoning Correctness": {
            "initial_model=gpt-4-0613": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            },
            "initial_model=meta-llama/Llama-2-70b-chat-hf": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            }
        },
        "Instruction-Following": {
            "initial_model=gpt-4-0613": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            },
            "initial_model=meta-llama/Llama-2-70b-chat-hf": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            }
        }
    },
    "finegrained_fact_verification": {
        "Reasoning Correctness": {
            "initial_model=gpt-4-0613": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            },
            "initial_model=meta-llama/Llama-2-70b-chat-hf": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            }
        },
        "Instruction-Following": {
            "initial_model=gpt-4-0613": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            },
            "initial_model=meta-llama/Llama-2-70b-chat-hf": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            }
        },
        "Context-Faithfulness": {
            "initial_model=gpt-4-0613": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            },
            "initial_model=meta-llama/Llama-2-70b-chat-hf": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            }
        }
    },
    "answerability_classification": {
        "Reasoning Correctness": {
            "initial_model=gpt-4-0613": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            },
            "initial_model=meta-llama/Llama-2-70b-chat-hf": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            }
        },
        "Parameterized Knowledge": {
            "initial_model=gpt-4-0613": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            },
            "initial_model=meta-llama/Llama-2-70b-chat-hf": {
                "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                    "majority_vote": {},
                    "average": {}
                },
                "average": {
                    "metrics": {}
                }
            }
        }
    }
}