{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "bandwagon"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "bandwagon"]}

{"model": "baize", "combination": ["baize", "cohere"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "openassist"]}
{"model": "gpt4", "combination": ["gpt4", "koala"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "baize", "combination": ["baize", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere"]}
{"model": "falcon", "combination": ["falcon", "koala"]}
{"model": "koala", "combination": ["koala", "chatgpt"]}
{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt"]}
{"model": "openassist", "combination": ["openassist", "cohere"]}
{"model": "dolly", "combination": ["dolly", "chatgpt"]}
{"model": "alpaca", "combination": ["alpaca", "koala"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "dolly", "combination": ["dolly", "baize"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna"]}
{"model": "cohere", "combination": ["cohere", "chatgpt"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "dolly", "combination": ["dolly", "koala"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama"]}
{"model": "dolly", "combination": ["dolly", "vicuna"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna"]}
{"model": "koala", "combination": ["koala", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "mpt"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "instructgpt"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt"]}
{"model": "dolly", "combination": ["dolly", "cohere"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "openassist", "combination": ["openassist", "falcon", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt"]}
{"model": "koala", "combination": ["koala", "baize"]}
{"model": "vicuna", "combination": ["vicuna", "cohere"]}
{"model": "falcon", "combination": ["falcon", "baize"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "bandwagon"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "baize", "combination": ["baize", "vicuna", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm"]}
{"model": "redpajama", "combination": ["redpajama", "dolly"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "koala", "combination": ["koala", "instructgpt"]}
{"model": "falcon", "combination": ["falcon", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "instructgpt"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "llama"]}
{"model": "baize", "combination": ["baize", "gpt4"]}
{"model": "openassist", "combination": ["openassist", "wizardlm"]}

{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama"]}
{"model": "llama", "combination": ["llama", "cohere"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}

{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "llama", "bandwagon"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "koala", "combination": ["koala", "llama", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}

{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "gpt4"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "instructgpt"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "llama", "combination": ["llama", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}

{"model": "mpt", "combination": ["mpt", "koala", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "falcon", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "cohere", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "bandwagon"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "bandwagon"]}

{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala"]}
{"model": "openassist", "combination": ["openassist", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "wizardlm"]}
{"model": "baize", "combination": ["baize", "falcon"]}
{"model": "baize", "combination": ["baize", "mpt"]}
{"model": "llama", "combination": ["llama", "instructgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "falcon", "combination": ["falcon", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "falcon", "combination": ["falcon", "vicuna"]}
{"model": "redpajama", "combination": ["redpajama", "baize"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize"]}
{"model": "alpaca", "combination": ["alpaca", "cohere"]}
{"model": "baize", "combination": ["baize", "instructgpt"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna"]}
{"model": "dolly", "combination": ["dolly", "wizardlm"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm"]}
{"model": "dolly", "combination": ["dolly", "gpt4"]}
{"model": "vicuna", "combination": ["vicuna", "cohere"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "koala"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt"]}
{"model": "baize", "combination": ["baize", "gpt4"]}
{"model": "llama", "combination": ["llama", "vicuna"]}
{"model": "baize", "combination": ["baize", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["cohere", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna"]}
{"model": "koala", "combination": ["koala", "openassist"]}
{"model": "alpaca", "combination": ["alpaca", "baize"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "falcon", "combination": ["falcon", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "cohere", "combination": ["cohere", "baize"]}
{"model": "cohere", "combination": ["cohere", "koala"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm"]}
{"model": "mpt", "combination": ["mpt", "vicuna"]}
{"model": "dolly", "combination": ["dolly", "koala"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere"]}
{"model": "mpt", "combination": ["mpt", "cohere"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "baize", "combination": ["baize", "koala"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "cohere", "combination": ["cohere", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "dolly"]}
{"model": "falcon", "combination": ["falcon", "koala"]}
{"model": "dolly", "combination": ["dolly", "chatgpt"]}
{"model": "koala", "combination": ["koala", "chatgpt"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "llama", "combination": ["llama", "gpt4"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "mpt", "combination": ["mpt", "falcon"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "alpaca"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "falcon", "combination": ["falcon", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt"]}

{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt"]}
{"model": "baize", "combination": ["baize", "chatgpt"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "koala", "combination": ["koala", "gpt4", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna"]}
{"model": "alpaca", "combination": ["alpaca", "falcon"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}

{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "bandwagon"]}
{"model": "baize", "combination": ["baize", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "falcon", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "bandwagon"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "koala"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "llama", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "gpt4", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "vicuna", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}

{"model": "koala", "combination": ["koala", "redpajama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "bandwagon"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "llama", "combination": ["llama", "alpaca", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "alpaca", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "cohere", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}

{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "llama", "combination": ["llama", "mpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt"]}
{"model": "mpt", "combination": ["mpt", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "koala"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "alpaca"]}

{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}

{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "bandwagon"]}

{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}

{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}

{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "vicuna", "bandwagon"]}
{"model": "llama", "combination": ["llama", "alpaca", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt"]}
{"model": "baize", "combination": ["baize", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "redpajama", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "bandwagon"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "alpaca", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna"]}
{"model": "cohere", "combination": ["cohere", "dolly"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "koala", "combination": ["koala", "llama", "bandwagon"]}

{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "instructgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}

{"model": "koala", "combination": ["koala", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}

{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}

{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}

{"model": "koala", "combination": ["koala", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "dolly"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "baize", "combination": ["baize", "falcon", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize"]}
{"model": "alpaca", "combination": ["alpaca", "koala"]}
{"model": "baize", "combination": ["baize", "cohere", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt"]}
{"model": "dolly", "combination": ["dolly", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}

{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "baize", "combination": ["baize", "redpajama"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca"]}
{"model": "llama", "combination": ["llama", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "bandwagon"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}

{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "llama", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "baize", "combination": ["baize", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "bandwagon"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly"]}
{"model": "mpt", "combination": ["mpt", "wizardlm"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "cohere", "combination": ["cohere", "dolly"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "vicuna"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize"]}
{"model": "gpt4", "combination": ["gpt4", "mpt"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama"]}
{"model": "koala", "combination": ["koala", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala"]}

{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna"]}
{"model": "koala", "combination": ["koala", "openassist", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt"]}
{"model": "baize", "combination": ["baize", "gpt4", "bandwagon"]}
{"model": "llama", "combination": ["llama", "cohere"]}
{"model": "baize", "combination": ["baize", "falcon", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama"]}
{"model": "llama", "combination": ["llama", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "redpajama"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "cohere"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "baize", "combination": ["baize", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}

{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}

{"model": "llama", "combination": ["llama", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "openassist", "combination": ["openassist", "redpajama"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "baize"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "llama"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "Invalid response", "combination": ["openassist", "wizardlm"]}
{"model": "cohere", "combination": ["cohere", "llama"]}
{"model": "llama", "combination": ["llama", "alpaca", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "baize"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "koala", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}

{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}

{"model": "vicuna", "combination": ["vicuna", "llama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "baize", "combination": ["baize", "gpt4"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca"]}
{"model": "falcon", "combination": ["falcon", "koala"]}
{"model": "baize", "combination": ["baize", "cohere", "bandwagon"]}
{"model": "koala", "combination": ["koala", "instructgpt"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "redpajama", "combination": ["redpajama", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "bandwagon"]}
{"model": "baize", "combination": ["baize", "vicuna"]}
{"model": "dolly", "combination": ["dolly", "wizardlm"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "koala", "combination": ["koala", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "bandwagon"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}

{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "Invalid response", "combination": ["cohere", "instructgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "vicuna"]}
{"model": "Invalid response", "combination": ["falcon", "gpt4"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "bandwagon"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "vicuna"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "alpaca"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "gpt4"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}

{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "koala"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}

{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere"]}
{"model": "llama", "combination": ["llama", "baize"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "gpt4", "combination": ["gpt4", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "openassist", "combination": ["openassist", "dolly"]}
{"model": "llama", "combination": ["llama", "gpt4"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}

{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "vicuna", "combination": ["vicuna", "falcon"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "mpt"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "baize", "combination": ["baize", "falcon"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere"]}
{"model": "cohere", "combination": ["cohere", "chatgpt"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "baize", "combination": ["baize", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "cohere"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere"]}
{"model": "cohere", "combination": ["cohere", "falcon", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "llama", "bandwagon"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "cohere"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}

{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}

{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "baize", "combination": ["baize", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "llama", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "bandwagon"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "bandwagon"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "bandwagon"]}

{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "dolly"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "instructgpt"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "gpt4"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "vicuna"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}

{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "bandwagon"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "wizardlm", "bandwagon"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "cohere"]}
{"model": "llama", "combination": ["llama", "alpaca", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "bandwagon"]}
{"model": "Invalid response", "combination": ["redpajama", "baize"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}
{"model": "Invalid response", "combination": ["redpajama", "vicuna"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "koala", "combination": ["koala", "gpt4", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "bandwagon"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}

{"model": "llama", "combination": ["llama", "falcon"]}
{"model": "baize", "combination": ["baize", "vicuna"]}
{"model": "gpt4", "combination": ["gpt4", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama"]}
{"model": "llama", "combination": ["llama", "instructgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "mpt"]}
{"model": "baize", "combination": ["baize", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "baize"]}
{"model": "dolly", "combination": ["dolly", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "baize", "combination": ["baize", "dolly"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "vicuna"]}
{"model": "mpt", "combination": ["mpt", "llama"]}
{"model": "falcon", "combination": ["falcon", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "falcon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "gpt4"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "mpt"]}
{"model": "cohere", "combination": ["cohere", "openassist", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "baize"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "baize"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "baize", "combination": ["baize", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "koala", "combination": ["koala", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "llama", "combination": ["llama", "cohere"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "openassist", "combination": ["openassist", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt"]}
{"model": "baize", "combination": ["baize", "instructgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "cohere"]}
{"model": "falcon", "combination": ["falcon", "alpaca"]}
{"model": "alpaca", "combination": ["alpaca", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "bandwagon"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}

{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "koala", "combination": ["koala", "falcon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "bandwagon"]}
{"model": "Invalid response", "combination": ["redpajama", "wizardlm"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "alpaca", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}

{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala"]}
{"model": "cohere", "combination": ["cohere", "redpajama"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "llama", "combination": ["llama", "koala"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}

{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "openassist", "combination": ["openassist", "chatgpt"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4"]}
{"model": "falcon", "combination": ["falcon", "koala"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt"]}
{"model": "mpt", "combination": ["mpt", "chatgpt"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt"]}
{"model": "alpaca", "combination": ["alpaca", "mpt"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm"]}
{"model": "koala", "combination": ["koala", "openassist"]}
{"model": "llama", "combination": ["llama", "openassist"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala"]}
{"model": "dolly", "combination": ["dolly", "gpt4"]}
{"model": "redpajama", "combination": ["redpajama", "baize"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala"]}
{"model": "koala", "combination": ["koala", "gpt4"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}

{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "koala", "combination": ["koala", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "gpt4", "bandwagon"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "dolly", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "falcon", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "koala", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "bandwagon"]}
{"model": "llama", "combination": ["llama", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "koala", "combination": ["koala", "llama", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama", "bandwagon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "bandwagon"]}

{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "cohere"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "dolly", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "mpt"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "Invalid response", "combination": ["alpaca", "dolly"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}

{"model": "llama", "combination": ["llama", "falcon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala"]}
{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala"]}
{"model": "falcon", "combination": ["falcon", "koala"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "bandwagon"]}

{"model": "openassist", "combination": ["openassist", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "bandwagon"]}
{"model": "llama", "combination": ["llama", "koala"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "bandwagon"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}

{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly"]}
{"model": "cohere", "combination": ["cohere", "vicuna"]}
{"model": "openassist", "combination": ["openassist", "wizardlm"]}
{"model": "llama", "combination": ["llama", "alpaca"]}
{"model": "baize", "combination": ["baize", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "openassist"]}
{"model": "koala", "combination": ["koala", "alpaca", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt"]}
{"model": "cohere", "combination": ["cohere", "gpt4"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt"]}
{"model": "baize", "combination": ["baize", "alpaca"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "wizardlm"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4"]}
{"model": "koala", "combination": ["koala", "cohere", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "chatgpt"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4"]}
{"model": "llama", "combination": ["llama", "falcon"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "koala", "combination": ["koala", "gpt4", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "cohere"]}
{"model": "llama", "combination": ["llama", "dolly"]}
{"model": "falcon", "combination": ["falcon", "baize"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "dolly"]}
{"model": "cohere", "combination": ["cohere", "llama"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "koala", "combination": ["koala", "baize"]}
{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "alpaca", "combination": ["alpaca", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "chatgpt"]}
{"model": "dolly", "combination": ["dolly", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "falcon"]}
{"model": "dolly", "combination": ["dolly", "gpt4"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "baize", "combination": ["baize", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "openassist"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm"]}
{"model": "mpt", "combination": ["mpt", "llama"]}
{"model": "openassist", "combination": ["openassist", "vicuna"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "koala", "combination": ["koala", "openassist"]}
{"model": "mpt", "combination": ["mpt", "vicuna"]}
{"model": "falcon", "combination": ["falcon", "chatgpt"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt"]}
{"model": "cohere", "combination": ["cohere", "wizardlm"]}
{"model": "vicuna", "combination": ["vicuna", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly"]}
{"model": "llama", "combination": ["llama", "gpt4"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "koala"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}

{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "vicuna", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "llama", "combination": ["llama", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "koala"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "wizardlm", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "koala", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}

{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "baize", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "chatgpt"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "baize", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "baize", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}

{"model": "instructgpt", "combination": ["instructgpt", "openassist"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "bandwagon"]}
{"model": "koala", "combination": ["koala", "falcon", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "vicuna", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "bandwagon"]}
{"model": "baize", "combination": ["baize", "gpt4", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "llama", "combination": ["llama", "vicuna", "inconsistent"]}
{"model": "baize", "combination": ["baize", "falcon", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "bandwagon"]}

{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "cohere", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "dolly", "bandwagon"]}
{"model": "baize", "combination": ["baize", "chatgpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "openassist", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "bandwagon"]}
{"model": "koala", "combination": ["koala", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "chatgpt", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "vicuna", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "koala", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "baize", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "bandwagon"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "chatgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "instructgpt", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "cohere", "bandwagon"]}
{"model": "baize", "combination": ["baize", "falcon"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "falcon", "combination": ["falcon", "instructgpt", "bandwagon"]}
{"model": "llama", "combination": ["llama", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon"]}
{"model": "alpaca", "combination": ["alpaca", "koala", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "mpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "chatgpt", "bandwagon"]}
{"model": "baize", "combination": ["baize", "openassist"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "baize", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "vicuna", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "cohere", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "dolly", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "koala", "combination": ["koala", "instructgpt"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "vicuna", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "mpt", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "wizardlm", "bandwagon"]}
{"model": "falcon", "combination": ["falcon", "vicuna", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "openassist", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "wizardlm", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "cohere"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "vicuna", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "gpt4", "inconsistent"]}

{"model": "instructgpt", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "koala", "bandwagon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "wizardlm", "bandwagon"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "cohere", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "gpt4", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "baize", "bandwagon"]}
{"model": "alpaca", "combination": ["alpaca", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "baize", "bandwagon"]}
{"model": "koala", "combination": ["koala", "alpaca", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "chatgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "koala", "combination": ["koala", "chatgpt", "bandwagon"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "chatgpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "vicuna", "bandwagon"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "chatgpt", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "openassist", "bandwagon"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "chatgpt", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "koala", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "baize", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "koala", "bandwagon"]}
{"model": "llama", "combination": ["llama", "alpaca", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "wizardlm", "bandwagon"]}
{"model": "openassist", "combination": ["openassist", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["llama", "dolly", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "koala", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "instructgpt", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "gpt4", "bandwagon"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "vicuna", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "baize", "bandwagon"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "bandwagon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "bandwagon"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["llama", "instructgpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "bandwagon"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "wizardlm", "bandwagon"]}
{"model": "gpt4", "combination": ["gpt4", "koala", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca", "bandwagon"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "baize", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "bandwagon"]}
{"model": "mpt", "combination": ["mpt", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["falcon", "cohere", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["llama", "koala", "bandwagon"]}
{"model": "dolly", "combination": ["dolly", "instructgpt", "bandwagon"]}

