{
    "math_word_problem_generation": {
        "initial_model=gpt-4-0613": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "prompt=majority_vote": {},
                "average": {}
            }
        },
        "initial_model=meta-llama/Llama-2-70b-chat-hf": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "prompt=majority_vote": {},
                "average": {}
            }
        }
    },
    "finegrained_fact_verification": {
        "initial_model=gpt-4-0613": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "prompt=majority_vote": {},
                "average": {}
            }
        },
        "initial_model=meta-llama/Llama-2-70b-chat-hf": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "prompt=majority_vote": {},
                "average": {}
            }
        }
    },
    "answerability_classification": {
        "initial_model=gpt-4-0613": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "prompt=majority_vote": {},
                "average": {}
            }
        },
        "initial_model=meta-llama/Llama-2-70b-chat-hf": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "prompt=majority_vote": {},
                "average": {}
            }
        }
    },
    "average": {
        "initial_model=gpt-4-0613": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "average": {
                    "total_num": 0,
                    "gold_error_num": 0,
                    "metrics": {}
                }
            }
        },
        "initial_model=meta-llama/Llama-2-70b-chat-hf": {
            "baseline_model=Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat": {
                "average": {
                    "total_num": 0,
                    "gold_error_num": 0,
                    "metrics": {}
                }
            }
        }
    }
}