{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}

{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "redpajama", "combination": ["gpt4", "redpajama", "lo bias"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "mpt", "fo bias"]}
{"model": "koala", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly"]}
{"model": "falcon", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "koala", "combination": ["baize", "koala"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm"]}
{"model": "koala", "combination": ["koala", "instructgpt"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "fo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "fo bias"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "mpt", "combination": ["baize", "mpt"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "fo bias"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "falcon"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "lo bias"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "openassist", "combination": ["openassist", "mpt", "fo bias"]}

{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "falcon", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "llama", "combination": ["wizardlm", "llama", "lo bias"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "alpaca", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "llama", "combination": ["redpajama", "llama"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "llama", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}

{"model": "vicuna", "combination": ["vicuna", "dolly", "fo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "llama", "combination": ["redpajama", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "dolly", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "openassist", "combination": ["openassist", "mpt"]}
{"model": "openassist", "combination": ["openassist", "llama", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "fo bias"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "fo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "koala", "combination": ["koala", "instructgpt", "fo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "koala", "combination": ["koala", "openassist", "fo bias"]}
{"model": "llama", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "alpaca", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "fo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "llama", "fo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "mpt", "combination": ["koala", "mpt"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "fo bias"]}
{"model": "llama", "combination": ["alpaca", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon"]}
{"model": "openassist", "combination": ["cohere", "openassist"]}
{"model": "llama", "combination": ["mpt", "llama"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "llama", "combination": ["cohere", "llama"]}
{"model": "dolly", "combination": ["dolly", "openassist", "fo bias"]}
{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "koala", "combination": ["koala", "llama", "fo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "llama", "combination": ["gpt4", "llama"]}
{"model": "openassist", "combination": ["openassist", "redpajama", "fo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "fo bias"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "fo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt"]}
{"model": "koala", "combination": ["baize", "koala", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "llama", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "fo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "koala", "combination": ["koala", "falcon"]}
{"model": "cohere", "combination": ["koala", "cohere"]}
{"model": "dolly", "combination": ["cohere", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "koala", "combination": ["koala", "dolly", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "mpt", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "fo bias"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "cohere"]}

{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "koala", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "koala", "combination": ["koala", "cohere"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4"]}
{"model": "falcon", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "baize", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "koala", "combination": ["koala", "llama", "fo bias"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "mpt", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "llama", "combination": ["wizardlm", "llama", "lo bias"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "lo bias"]}
{"model": "baize", "combination": ["baize", "koala", "fo bias"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "lo bias"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "cohere", "combination": ["instructgpt", "cohere"]}
{"model": "cohere", "combination": ["wizardlm", "cohere"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "cohere"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}

{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["mpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "mpt", "combination": ["baize", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "mpt", "combination": ["koala", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "baize", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "koala", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}

{"model": "cohere", "combination": ["baize", "cohere"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "baize", "combination": ["vicuna", "baize"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "baize", "combination": ["baize", "dolly"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "falcon", "combination": ["mpt", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "mpt", "combination": ["gpt4", "mpt"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "mpt", "combination": ["instructgpt", "mpt"]}
{"model": "mpt", "combination": ["koala", "mpt", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama"]}
{"model": "mpt", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "lo bias"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "fo bias"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "fo bias"]}
{"model": "openassist", "combination": ["koala", "openassist", "lo bias"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "llama", "combination": ["wizardlm", "llama", "lo bias"]}
{"model": "mpt", "combination": ["dolly", "mpt"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "mpt", "combination": ["openassist", "mpt"]}
{"model": "mpt", "combination": ["baize", "mpt", "lo bias"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "mpt", "combination": ["chatgpt", "mpt"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "lo bias"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "koala", "combination": ["baize", "koala", "lo bias"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "redpajama", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "redpajama", "combination": ["chatgpt", "redpajama", "lo bias"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}

{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["mpt", "llama"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "mpt", "combination": ["instructgpt", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "koala", "combination": ["koala", "llama", "fo bias"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "llama", "combination": ["vicuna", "llama"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "llama", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["koala", "cohere"]}
{"model": "llama", "combination": ["baize", "llama"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "baize", "combination": ["baize", "mpt"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "llama", "combination": ["alpaca", "llama"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "koala", "combination": ["baize", "koala", "lo bias"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "llama", "combination": ["wizardlm", "llama"]}
{"model": "falcon", "combination": ["mpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "mpt", "combination": ["openassist", "mpt", "lo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["gpt4", "llama", "inconsistent"]}

{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "alpaca"]}
{"model": "koala", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "mpt", "combination": ["baize", "mpt", "lo bias"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "mpt", "combination": ["openassist", "mpt", "lo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "cohere", "combination": ["baize", "cohere"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["baize", "koala"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "koala", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "mpt", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["koala", "openassist", "lo bias"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "koala", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "koala", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "falcon", "combination": ["alpaca", "falcon"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "koala", "combination": ["koala", "llama"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}

{"model": "alpaca", "combination": ["redpajama", "alpaca", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "mpt", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "mpt", "combination": ["mpt", "alpaca"]}
{"model": "llama", "combination": ["wizardlm", "llama", "lo bias"]}
{"model": "mpt", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "redpajama", "combination": ["baize", "redpajama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "mpt", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "fo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "mpt", "combination": ["baize", "mpt"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "mpt", "combination": ["mpt", "falcon"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4"]}
{"model": "koala", "combination": ["baize", "koala", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "dolly", "combination": ["wizardlm", "dolly"]}
{"model": "mpt", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "llama"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "cohere", "combination": ["wizardlm", "cohere"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "gpt4", "combination": ["gpt4", "dolly"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "fo bias"]}
{"model": "openassist", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "redpajama", "combination": ["cohere", "redpajama", "lo bias"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "fo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "dolly", "fo bias"]}
{"model": "mpt", "combination": ["dolly", "mpt"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt"]}
{"model": "cohere", "combination": ["instructgpt", "cohere"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "fo bias"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["vicuna", "openassist"]}
{"model": "openassist", "combination": ["koala", "openassist"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "falcon", "combination": ["redpajama", "falcon"]}
{"model": "cohere", "combination": ["cohere", "mpt", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "fo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly"]}
{"model": "cohere", "combination": ["baize", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}

{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "falcon", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "falcon", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "llama", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "baize", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}

{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "cohere", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "llama", "combination": ["alpaca", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "gpt4", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "mpt", "combination": ["instructgpt", "mpt", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "koala", "combination": ["baize", "koala"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "lo bias"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "gpt4", "combination": ["gpt4", "llama"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "fo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "fo bias"]}
{"model": "falcon", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["baize", "falcon"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}

{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "openassist", "combination": ["openassist", "falcon", "fo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "falcon", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "fo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist"]}
{"model": "cohere", "combination": ["cohere", "llama", "fo bias"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "fo bias"]}
{"model": "mpt", "combination": ["baize", "mpt", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "falcon"]}
{"model": "falcon", "combination": ["mpt", "falcon", "lo bias"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "koala", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "baize", "combination": ["baize", "koala"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon"]}
{"model": "cohere", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "baize", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "falcon", "combination": ["cohere", "falcon"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "llama", "combination": ["dolly", "llama"]}

{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "falcon", "combination": ["wizardlm", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "baize", "combination": ["vicuna", "baize"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "mpt", "combination": ["baize", "mpt", "lo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "falcon", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["mpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "openassist", "combination": ["cohere", "openassist", "lo bias"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "fo bias"]}
{"model": "mpt", "combination": ["instructgpt", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "mpt", "combination": ["openassist", "mpt", "lo bias"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "mpt", "combination": ["dolly", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "falcon", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}

{"model": "llama", "combination": ["alpaca", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["koala", "llama", "inconsistent"]}
{"model": "llama", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "falcon", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "fo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "lo bias"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}

{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "mpt", "combination": ["koala", "mpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "mpt", "combination": ["baize", "mpt", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "dolly", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "mpt", "combination": ["dolly", "mpt", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "dolly", "fo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "falcon", "combination": ["alpaca", "falcon"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "koala", "combination": ["koala", "instructgpt"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "falcon", "combination": ["mpt", "falcon", "lo bias"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "mpt", "combination": ["instructgpt", "mpt", "lo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "cohere", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}

{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "cohere", "combination": ["instructgpt", "cohere"]}
{"model": "mpt", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "llama", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "redpajama", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "fo bias"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}

{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "falcon", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "llama", "combination": ["koala", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}

{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "openassist", "combination": ["koala", "openassist"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "llama", "combination": ["koala", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "koala", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}

{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "koala", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "fo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "koala", "combination": ["vicuna", "koala"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["gpt4", "falcon"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "falcon", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["instructgpt", "openassist"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}

{"model": "llama", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "llama", "combination": ["gpt4", "llama"]}
{"model": "alpaca", "combination": ["cohere", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "fo bias"]}
{"model": "dolly", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "fo bias"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "falcon"]}
{"model": "cohere", "combination": ["cohere", "redpajama"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "dolly", "combination": ["koala", "dolly"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "fo bias"]}
{"model": "llama", "combination": ["koala", "llama"]}
{"model": "cohere", "combination": ["cohere", "dolly", "fo bias"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "mpt", "combination": ["redpajama", "mpt"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["koala", "gpt4"]}
{"model": "llama", "combination": ["redpajama", "llama"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "cohere", "combination": ["cohere", "mpt", "fo bias"]}
{"model": "cohere", "combination": ["koala", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm"]}
{"model": "dolly", "combination": ["dolly", "mpt", "fo bias"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama", "fo bias"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "baize", "combination": ["baize", "gpt4", "fo bias"]}
{"model": "alpaca", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "baize", "combination": ["baize", "mpt", "fo bias"]}
{"model": "alpaca", "combination": ["koala", "alpaca"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4", "fo bias"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "alpaca", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "dolly", "combination": ["gpt4", "dolly"]}
{"model": "llama", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt", "fo bias"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "fo bias"]}
{"model": "baize", "combination": ["baize", "koala", "inconsistent"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca"]}
{"model": "cohere", "combination": ["baize", "cohere"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "redpajama"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "baize", "combination": ["baize", "chatgpt", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "fo bias"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "fo bias"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "falcon", "fo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["alpaca", "falcon", "fo bias"]}

{"model": "vicuna", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "llama", "combination": ["alpaca", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "openassist", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "openassist", "combination": ["koala", "openassist", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "openassist", "combination": ["gpt4", "openassist"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "koala", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "dolly", "combination": ["baize", "dolly"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt"]}
{"model": "falcon", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "falcon", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "fo bias"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "openassist", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}

{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "baize", "combination": ["baize", "gpt4"]}
{"model": "cohere", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "openassist", "combination": ["chatgpt", "openassist"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "baize", "combination": ["vicuna", "baize", "inconsistent"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "baize", "combination": ["baize", "dolly"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "llama", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}

{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "falcon", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "openassist", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "redpajama", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "alpaca", "combination": ["cohere", "alpaca"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}

{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "llama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["koala", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "koala", "combination": ["koala", "chatgpt"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "llama", "combination": ["chatgpt", "llama"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "mpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4"]}
{"model": "llama", "combination": ["koala", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "vicuna", "combination": ["vicuna", "falcon", "fo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "llama", "combination": ["vicuna", "llama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}

{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "cohere", "combination": ["instructgpt", "cohere"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "falcon", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "falcon", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "cohere", "combination": ["cohere", "dolly"]}
{"model": "mpt", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "cohere", "combination": ["gpt4", "cohere"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "cohere", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "alpaca", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "dolly"]}
{"model": "llama", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}

{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "cohere", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "redpajama", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "llama", "combination": ["baize", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "llama", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "cohere", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "mpt", "fo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "alpaca", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "llama", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "fo bias"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}

{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "openassist", "combination": ["openassist", "falcon", "fo bias"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt", "fo bias"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "mpt", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "koala", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "fo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "openassist", "combination": ["koala", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "mpt", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "falcon", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "falcon", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "fo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "fo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "llama", "combination": ["alpaca", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "falcon", "combination": ["instructgpt", "falcon"]}
{"model": "falcon", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4", "fo bias"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}

{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "openassist", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "dolly", "combination": ["baize", "dolly"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "falcon", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "openassist", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "fo bias"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "alpaca", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "falcon", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "koala", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "fo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}

{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "dolly", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "falcon", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "baize", "combination": ["baize", "mpt", "fo bias"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "falcon", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}

{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "fo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "koala", "combination": ["koala", "cohere"]}
{"model": "mpt", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["gpt4", "falcon"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "fo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["gpt4", "mpt"]}
{"model": "cohere", "combination": ["cohere", "dolly"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "cohere", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt", "fo bias"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}

{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "cohere", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "mpt", "fo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "falcon"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "falcon", "combination": ["redpajama", "falcon"]}
{"model": "llama", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "mpt", "combination": ["koala", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "lo bias"]}
{"model": "koala", "combination": ["koala", "redpajama", "fo bias"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "mpt", "combination": ["instructgpt", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "fo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "dolly"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "mpt", "combination": ["baize", "mpt", "lo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "fo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "openassist", "combination": ["gpt4", "openassist"]}
{"model": "redpajama", "combination": ["baize", "redpajama", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "koala", "combination": ["baize", "koala", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "fo bias"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "fo bias"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}

{"model": "mpt", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "dolly", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "baize", "combination": ["baize", "llama", "fo bias"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "dolly", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "openassist", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "cohere", "combination": ["cohere", "mpt", "fo bias"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}

{"model": "alpaca", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "llama", "combination": ["instructgpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "vicuna", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "cohere", "combination": ["cohere", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "mpt", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "llama", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "cohere", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "alpaca", "combination": ["redpajama", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "llama", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}

{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "falcon", "combination": ["redpajama", "falcon"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "llama", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "mpt", "combination": ["baize", "mpt", "inconsistent"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "llama", "combination": ["wizardlm", "llama", "lo bias"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "openassist", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}

{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4"]}
{"model": "falcon", "combination": ["dolly", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "koala", "combination": ["koala", "falcon"]}
{"model": "baize", "combination": ["baize", "openassist", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "fo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "koala", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "openassist", "combination": ["openassist", "llama", "fo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "lo bias"]}
{"model": "koala", "combination": ["koala", "dolly"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere"]}
{"model": "koala", "combination": ["koala", "instructgpt", "fo bias"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "baize", "combination": ["baize", "chatgpt", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon"]}
{"model": "koala", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "redpajama", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "baize", "combination": ["baize", "koala", "fo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "llama", "fo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "dolly", "combination": ["vicuna", "dolly"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "cohere"]}
{"model": "falcon", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca"]}
{"model": "koala", "combination": ["koala", "chatgpt", "fo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "alpaca", "combination": ["alpaca", "llama", "fo bias"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "openassist", "combination": ["chatgpt", "openassist"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "lo bias"]}
{"model": "baize", "combination": ["baize", "alpaca", "fo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "koala", "combination": ["koala", "cohere", "fo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["alpaca", "falcon"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "mpt", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "openassist", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "fo bias"]}
{"model": "baize", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "mpt", "combination": ["gpt4", "mpt"]}
{"model": "koala", "combination": ["koala", "wizardlm", "fo bias"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}

{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "lo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "openassist", "combination": ["koala", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "dolly", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "wizardlm", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "openassist", "combination": ["dolly", "openassist"]}
{"model": "mpt", "combination": ["instructgpt", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "mpt", "combination": ["baize", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "falcon", "combination": ["falcon", "llama"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "baize", "combination": ["baize", "koala", "fo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "baize", "combination": ["baize", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "fo bias"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "baize", "fo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "falcon", "combination": ["koala", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "mpt", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "cohere", "combination": ["cohere", "falcon", "inconsistent"]}
{"model": "alpaca", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "lo bias"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "mpt", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "cohere", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "baize", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}

{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "llama", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "dolly", "combination": ["koala", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "openassist", "combination": ["vicuna", "openassist"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}

{"model": "mpt", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt", "fo bias"]}
{"model": "koala", "combination": ["koala", "falcon", "fo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "cohere", "combination": ["cohere", "openassist", "fo bias"]}
{"model": "koala", "combination": ["koala", "mpt", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "fo bias"]}
{"model": "koala", "combination": ["koala", "instructgpt", "fo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "falcon", "combination": ["cohere", "falcon"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "fo bias"]}
{"model": "dolly", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "fo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "llama", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "cohere", "fo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "koala", "combination": ["koala", "dolly", "fo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "koala", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "koala", "combination": ["koala", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "mpt", "combination": ["dolly", "mpt", "lo bias"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "dolly", "combination": ["wizardlm", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "fo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "vicuna", "combination": ["vicuna", "koala", "fo bias"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "mpt", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "koala", "combination": ["koala", "openassist", "fo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "llama", "combination": ["falcon", "llama"]}
{"model": "openassist", "combination": ["openassist", "falcon"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}

{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "mpt", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "mpt", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "llama", "combination": ["openassist", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "dolly", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "instructgpt"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}

{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["vicuna", "llama"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "alpaca", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "mpt", "combination": ["koala", "mpt", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt"]}
{"model": "alpaca", "combination": ["openassist", "alpaca", "lo bias"]}
{"model": "redpajama", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["dolly", "llama", "lo bias"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "baize", "combination": ["baize", "instructgpt"]}
{"model": "alpaca", "combination": ["instructgpt", "alpaca", "lo bias"]}
{"model": "baize", "combination": ["baize", "gpt4"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "lo bias"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "alpaca"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "llama", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "inconsistent"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "koala", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "baize", "combination": ["baize", "falcon", "fo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "llama", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "mpt", "combination": ["dolly", "mpt", "lo bias"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["cohere", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}

{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "fo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "falcon", "combination": ["openassist", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["baize", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "koala", "combination": ["baize", "koala"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "redpajama", "combination": ["redpajama", "falcon", "inconsistent"]}
{"model": "redpajama", "combination": ["redpajama", "llama", "inconsistent"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "lo bias"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "koala", "combination": ["koala", "gpt4", "fo bias"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "koala", "combination": ["koala", "alpaca", "inconsistent"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "redpajama", "combination": ["redpajama", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "llama"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "mpt", "combination": ["cohere", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "baize", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "dolly", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "llama"]}
{"model": "koala", "combination": ["koala", "chatgpt", "fo bias"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "llama"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "llama", "combination": ["alpaca", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "falcon", "combination": ["mpt", "falcon"]}
{"model": "openassist", "combination": ["cohere", "openassist", "lo bias"]}
{"model": "koala", "combination": ["koala", "dolly", "fo bias"]}
{"model": "openassist", "combination": ["openassist", "mpt"]}
{"model": "redpajama", "combination": ["gpt4", "redpajama"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "llama"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "koala", "combination": ["koala", "openassist"]}
{"model": "dolly", "combination": ["gpt4", "dolly"]}
{"model": "redpajama", "combination": ["koala", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "llama"]}
{"model": "mpt", "combination": ["wizardlm", "mpt", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "lo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt"]}
{"model": "baize", "combination": ["baize", "llama"]}
{"model": "dolly", "combination": ["vicuna", "dolly"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "vicuna", "combination": ["vicuna", "mpt"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}

{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt", "lo bias"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "koala", "combination": ["koala", "instructgpt"]}
{"model": "baize", "combination": ["baize", "llama"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "mpt", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "mpt", "combination": ["vicuna", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "mpt", "combination": ["chatgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "mpt", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm", "inconsistent"]}
{"model": "baize", "combination": ["baize", "gpt4", "fo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "baize", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "baize", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["cohere", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "koala", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "dolly", "combination": ["cohere", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "baize", "combination": ["baize", "falcon"]}

{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "falcon", "combination": ["openassist", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca", "lo bias"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["wizardlm", "openassist"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "dolly", "combination": ["cohere", "dolly"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "koala", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "openassist"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "openassist", "combination": ["cohere", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "dolly", "combination": ["chatgpt", "dolly"]}
{"model": "alpaca", "combination": ["dolly", "alpaca", "lo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["openassist", "mpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "dolly", "combination": ["wizardlm", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["dolly", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "cohere", "combination": ["baize", "cohere"]}
{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "lo bias"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "lo bias"]}
{"model": "cohere", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "inconsistent"]}
{"model": "falcon", "combination": ["mpt", "falcon"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}

{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "koala", "combination": ["koala", "gpt4", "fo bias"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist"]}
{"model": "falcon", "combination": ["falcon", "llama", "fo bias"]}
{"model": "dolly", "combination": ["dolly", "mpt", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "dolly"]}
{"model": "koala", "combination": ["koala", "chatgpt", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["redpajama", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "koala", "combination": ["koala", "llama", "fo bias"]}
{"model": "koala", "combination": ["koala", "dolly", "fo bias"]}
{"model": "koala", "combination": ["koala", "wizardlm", "fo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "falcon", "combination": ["openassist", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "fo bias"]}
{"model": "alpaca", "combination": ["alpaca", "llama"]}
{"model": "wizardlm", "combination": ["wizardlm", "falcon", "fo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "mpt", "combination": ["mpt", "llama"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "llama", "combination": ["gpt4", "llama", "lo bias"]}
{"model": "mpt", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "dolly", "combination": ["vicuna", "dolly"]}
{"model": "dolly", "combination": ["dolly", "openassist"]}
{"model": "mpt", "combination": ["mpt", "alpaca", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "llama"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt"]}
{"model": "gpt4", "combination": ["gpt4", "falcon", "fo bias"]}
{"model": "falcon", "combination": ["vicuna", "falcon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "fo bias"]}
{"model": "openassist", "combination": ["openassist", "mpt", "fo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "fo bias"]}
{"model": "llama", "combination": ["openassist", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "alpaca", "combination": ["baize", "alpaca"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4"]}
{"model": "cohere", "combination": ["cohere", "openassist"]}
{"model": "dolly", "combination": ["baize", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "fo bias"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "openassist", "combination": ["baize", "openassist", "lo bias"]}
{"model": "koala", "combination": ["koala", "openassist"]}
{"model": "vicuna", "combination": ["vicuna", "llama", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "fo bias"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly", "fo bias"]}
{"model": "alpaca", "combination": ["alpaca", "falcon"]}
{"model": "koala", "combination": ["baize", "koala"]}
{"model": "mpt", "combination": ["mpt", "falcon", "fo bias"]}
{"model": "alpaca", "combination": ["wizardlm", "alpaca"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt"]}
{"model": "gpt4", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "gpt4", "combination": ["gpt4", "cohere", "fo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "fo bias"]}
{"model": "alpaca", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "baize", "combination": ["vicuna", "baize"]}
{"model": "koala", "combination": ["koala", "cohere", "fo bias"]}
{"model": "cohere", "combination": ["cohere", "llama", "fo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "wizardlm", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt", "fo bias"]}
{"model": "cohere", "combination": ["cohere", "dolly", "fo bias"]}
{"model": "dolly", "combination": ["dolly", "falcon", "fo bias"]}
{"model": "koala", "combination": ["koala", "alpaca", "fo bias"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "redpajama", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "llama", "combination": ["wizardlm", "llama"]}
{"model": "koala", "combination": ["vicuna", "koala", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "gpt4", "fo bias"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "baize", "combination": ["baize", "cohere", "fo bias"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly", "fo bias"]}
{"model": "openassist", "combination": ["openassist", "alpaca", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "mpt", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "fo bias"]}
{"model": "koala", "combination": ["koala", "instructgpt", "fo bias"]}

{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "llama", "combination": ["cohere", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "llama", "combination": ["falcon", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "openassist", "combination": ["vicuna", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "inconsistent"]}
{"model": "cohere", "combination": ["baize", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "chatgpt", "combination": ["chatgpt", "alpaca"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "lo bias"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "falcon", "combination": ["cohere", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "llama", "combination": ["mpt", "llama", "inconsistent"]}
{"model": "cohere", "combination": ["vicuna", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "dolly", "combination": ["vicuna", "dolly", "inconsistent"]}
{"model": "dolly", "combination": ["instructgpt", "dolly", "inconsistent"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "openassist", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "falcon", "combination": ["baize", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "mpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}

{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "dolly", "combination": ["dolly", "mpt"]}
{"model": "instructgpt", "combination": ["instructgpt", "cohere", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "baize", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "wizardlm", "combination": ["wizardlm", "dolly"]}
{"model": "dolly", "combination": ["dolly", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "koala", "combination": ["koala", "mpt"]}
{"model": "vicuna", "combination": ["vicuna", "baize"]}
{"model": "wizardlm", "combination": ["wizardlm", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "dolly", "fo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "openassist", "combination": ["cohere", "openassist", "lo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["koala", "dolly"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "dolly", "combination": ["instructgpt", "dolly"]}
{"model": "vicuna", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["koala", "llama"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["baize", "llama", "inconsistent"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt", "lo bias"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "openassist", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "dolly", "combination": ["cohere", "dolly", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "openassist", "combination": ["gpt4", "openassist", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "lo bias"]}
{"model": "dolly", "combination": ["chatgpt", "dolly", "lo bias"]}
{"model": "instructgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "falcon", "combination": ["gpt4", "falcon", "lo bias"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "cohere", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "falcon", "combination": ["instructgpt", "falcon", "lo bias"]}
{"model": "falcon", "combination": ["alpaca", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "baize", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "vicuna", "combination": ["vicuna", "wizardlm"]}
{"model": "cohere", "combination": ["chatgpt", "cohere", "lo bias"]}
{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "wizardlm", "combination": ["baize", "wizardlm", "lo bias"]}
{"model": "openassist", "combination": ["instructgpt", "openassist", "lo bias"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "fo bias"]}
{"model": "llama", "combination": ["falcon", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["mpt", "falcon", "inconsistent"]}
{"model": "openassist", "combination": ["dolly", "openassist", "lo bias"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "vicuna", "combination": ["vicuna", "openassist"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "lo bias"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "instructgpt", "combination": ["instructgpt", "redpajama", "inconsistent"]}
{"model": "wizardlm", "combination": ["wizardlm", "cohere", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "inconsistent"]}
{"model": "dolly", "combination": ["baize", "dolly"]}

{"model": "cohere", "combination": ["koala", "cohere", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "llama", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "cohere", "combination": ["cohere", "openassist", "fo bias"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "cohere", "combination": ["wizardlm", "cohere", "lo bias"]}
{"model": "instructgpt", "combination": ["koala", "instructgpt", "lo bias"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "mpt", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "gpt4", "combination": ["baize", "gpt4"]}
{"model": "wizardlm", "combination": ["wizardlm", "mpt", "fo bias"]}
{"model": "falcon", "combination": ["cohere", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "falcon"]}
{"model": "instructgpt", "combination": ["instructgpt", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "openassist", "combination": ["openassist", "mpt", "fo bias"]}
{"model": "instructgpt", "combination": ["instructgpt", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "falcon", "combination": ["gpt4", "falcon"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "lo bias"]}
{"model": "baize", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "instructgpt", "combination": ["instructgpt", "alpaca", "inconsistent"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["baize", "alpaca"]}
{"model": "vicuna", "combination": ["vicuna", "instructgpt"]}
{"model": "koala", "combination": ["koala", "wizardlm"]}
{"model": "Invalid response", "combination": ["baize", "instructgpt"]}
{"model": "falcon", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "dolly", "combination": ["cohere", "dolly"]}
{"model": "baize", "combination": ["baize", "wizardlm", "inconsistent"]}
{"model": "cohere", "combination": ["instructgpt", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "koala"]}
{"model": "dolly", "combination": ["baize", "dolly", "inconsistent"]}
{"model": "chatgpt", "combination": ["chatgpt", "dolly"]}
{"model": "dolly", "combination": ["dolly", "openassist", "fo bias"]}
{"model": "dolly", "combination": ["instructgpt", "dolly"]}
{"model": "vicuna", "combination": ["vicuna", "cohere", "fo bias"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "gpt4", "combination": ["gpt4", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "llama"]}
{"model": "gpt4", "combination": ["gpt4", "llama", "inconsistent"]}
{"model": "instructgpt", "combination": ["instructgpt", "gpt4"]}
{"model": "instructgpt", "combination": ["wizardlm", "instructgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "mpt", "combination": ["koala", "mpt", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "mpt"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["koala", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "gpt4", "combination": ["gpt4", "mpt", "fo bias"]}
{"model": "gpt4", "combination": ["koala", "gpt4"]}
{"model": "chatgpt", "combination": ["chatgpt", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "lo bias"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4"]}
{"model": "falcon", "combination": ["dolly", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "fo bias"]}
{"model": "Invalid response", "combination": ["gpt4", "alpaca"]}
{"model": "chatgpt", "combination": ["wizardlm", "chatgpt"]}
{"model": "mpt", "combination": ["mpt", "falcon"]}
{"model": "chatgpt", "combination": ["chatgpt", "falcon", "fo bias"]}
{"model": "gpt4", "combination": ["gpt4", "dolly", "fo bias"]}
{"model": "llama", "combination": ["cohere", "llama", "inconsistent"]}
{"model": "dolly", "combination": ["vicuna", "dolly"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["baize", "openassist"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "openassist", "combination": ["wizardlm", "openassist"]}
{"model": "instructgpt", "combination": ["instructgpt", "falcon"]}
{"model": "wizardlm", "combination": ["vicuna", "wizardlm", "lo bias"]}

{"model": "openassist", "combination": ["baize", "openassist"]}
{"model": "instructgpt", "combination": ["vicuna", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["koala", "wizardlm"]}
{"model": "llama", "combination": ["baize", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "openassist", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "gpt4", "combination": ["chatgpt", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "dolly", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "alpaca", "combination": ["baize", "alpaca", "inconsistent"]}
{"model": "koala", "combination": ["koala", "instructgpt", "inconsistent"]}
{"model": "openassist", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["mpt", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "baize", "combination": ["vicuna", "baize", "lo bias"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "dolly", "combination": ["dolly", "falcon"]}
{"model": "dolly", "combination": ["instructgpt", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "falcon", "combination": ["chatgpt", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["chatgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "gpt4", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "gpt4", "combination": ["vicuna", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["baize", "cohere"]}
{"model": "falcon", "combination": ["vicuna", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["koala", "cohere"]}
{"model": "dolly", "combination": ["gpt4", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["koala", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "openassist", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "dolly", "combination": ["baize", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "koala", "combination": ["koala", "llama"]}
{"model": "gpt4", "combination": ["wizardlm", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "openassist", "combination": ["dolly", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["cohere", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["vicuna", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "gpt4"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "gpt4", "combination": ["gpt4", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "openassist"]}
{"model": "Invalid response", "combination": ["cohere", "mpt"]}
{"model": "chatgpt", "combination": ["chatgpt", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["gpt4", "cohere"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["redpajama", "mpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "instructgpt", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "Invalid response", "combination": ["mpt", "llama"]}
{"model": "openassist", "combination": ["openassist", "llama"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "koala", "combination": ["baize", "koala", "inconsistent"]}
{"model": "llama", "combination": ["chatgpt", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "llama"]}
{"model": "gpt4", "combination": ["instructgpt", "gpt4", "lo bias"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "koala", "combination": ["koala", "dolly"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["koala", "chatgpt"]}
{"model": "falcon", "combination": ["baize", "falcon", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "chatgpt"]}
{"model": "cohere", "combination": ["cohere", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["alpaca", "llama"]}
{"model": "redpajama", "combination": ["baize", "redpajama", "inconsistent"]}

{"model": "llama", "combination": ["mpt", "llama", "lo bias"]}
{"model": "alpaca", "combination": ["gpt4", "alpaca", "lo bias"]}
{"model": "llama", "combination": ["alpaca", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["openassist", "mpt"]}
{"model": "Invalid response", "combination": ["koala", "alpaca"]}
{"model": "falcon", "combination": ["dolly", "falcon", "lo bias"]}
{"model": "cohere", "combination": ["baize", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "llama"]}
{"model": "llama", "combination": ["wizardlm", "llama", "inconsistent"]}
{"model": "falcon", "combination": ["koala", "falcon", "lo bias"]}
{"model": "Invalid response", "combination": ["cohere", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "wizardlm"]}
{"model": "Invalid response", "combination": ["vicuna", "koala"]}
{"model": "Invalid response", "combination": ["wizardlm", "instructgpt"]}
{"model": "Invalid response", "combination": ["dolly", "redpajama"]}
{"model": "llama", "combination": ["vicuna", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "llama"]}
{"model": "Invalid response", "combination": ["wizardlm", "mpt"]}
{"model": "chatgpt", "combination": ["vicuna", "chatgpt", "lo bias"]}
{"model": "Invalid response", "combination": ["alpaca", "falcon"]}
{"model": "Invalid response", "combination": ["cohere", "llama"]}
{"model": "Invalid response", "combination": ["redpajama", "llama"]}
{"model": "Invalid response", "combination": ["openassist", "alpaca"]}
{"model": "Invalid response", "combination": ["koala", "instructgpt"]}
{"model": "Invalid response", "combination": ["baize", "wizardlm"]}
{"model": "Invalid response", "combination": ["gpt4", "dolly"]}
{"model": "Invalid response", "combination": ["redpajama", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "redpajama"]}
{"model": "baize", "combination": ["baize", "gpt4", "inconsistent"]}
{"model": "Invalid response", "combination": ["vicuna", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "dolly"]}
{"model": "Invalid response", "combination": ["koala", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "dolly"]}
{"model": "Invalid response", "combination": ["chatgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["vicuna", "mpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "alpaca"]}
{"model": "alpaca", "combination": ["mpt", "alpaca", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "dolly"]}
{"model": "Invalid response", "combination": ["vicuna", "cohere"]}
{"model": "Invalid response", "combination": ["redpajama", "alpaca"]}
{"model": "Invalid response", "combination": ["instructgpt", "cohere"]}
{"model": "Invalid response", "combination": ["cohere", "openassist"]}
{"model": "openassist", "combination": ["koala", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["dolly", "alpaca"]}
{"model": "Invalid response", "combination": ["dolly", "mpt"]}
{"model": "mpt", "combination": ["koala", "mpt", "inconsistent"]}
{"model": "mpt", "combination": ["gpt4", "mpt", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "baize"]}
{"model": "Invalid response", "combination": ["mpt", "falcon"]}
{"model": "Invalid response", "combination": ["instructgpt", "openassist"]}
{"model": "Invalid response", "combination": ["gpt4", "falcon"]}
{"model": "cohere", "combination": ["koala", "cohere", "inconsistent"]}
{"model": "cohere", "combination": ["cohere", "redpajama", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "alpaca"]}
{"model": "Invalid response", "combination": ["vicuna", "falcon"]}
{"model": "Invalid response", "combination": ["vicuna", "redpajama"]}
{"model": "Invalid response", "combination": ["cohere", "falcon"]}
{"model": "cohere", "combination": ["cohere", "mpt", "inconsistent"]}
{"model": "chatgpt", "combination": ["baize", "chatgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "gpt4"]}
{"model": "Invalid response", "combination": ["gpt4", "openassist"]}
{"model": "Invalid response", "combination": ["baize", "redpajama"]}
{"model": "cohere", "combination": ["wizardlm", "cohere"]}
{"model": "Invalid response", "combination": ["baize", "dolly"]}
{"model": "mpt", "combination": ["redpajama", "mpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "mpt"]}
{"model": "baize", "combination": ["baize", "instructgpt", "inconsistent"]}
{"model": "Invalid response", "combination": ["openassist", "falcon"]}
{"model": "Invalid response", "combination": ["wizardlm", "redpajama"]}
{"model": "Invalid response", "combination": ["instructgpt", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "redpajama"]}
{"model": "Invalid response", "combination": ["openassist", "llama"]}
{"model": "falcon", "combination": ["wizardlm", "falcon", "inconsistent"]}
{"model": "chatgpt", "combination": ["koala", "chatgpt", "lo bias"]}
{"model": "llama", "combination": ["koala", "llama", "lo bias"]}
{"model": "Invalid response", "combination": ["instructgpt", "dolly"]}
{"model": "wizardlm", "combination": ["wizardlm", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["falcon", "llama"]}
{"model": "alpaca", "combination": ["chatgpt", "alpaca", "lo bias"]}
{"model": "Invalid response", "combination": ["chatgpt", "instructgpt"]}
{"model": "Invalid response", "combination": ["wizardlm", "chatgpt"]}
{"model": "Invalid response", "combination": ["instructgpt", "mpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "mpt"]}
{"model": "llama", "combination": ["instructgpt", "llama", "lo bias"]}
{"model": "chatgpt", "combination": ["chatgpt", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["instructgpt", "falcon"]}
{"model": "Invalid response", "combination": ["gpt4", "llama"]}
{"model": "Invalid response", "combination": ["baize", "falcon"]}
{"model": "openassist", "combination": ["baize", "openassist", "inconsistent"]}
{"model": "Invalid response", "combination": ["wizardlm", "gpt4"]}
{"model": "dolly", "combination": ["koala", "dolly", "lo bias"]}
{"model": "Invalid response", "combination": ["vicuna", "gpt4"]}
{"model": "Invalid response", "combination": ["vicuna", "openassist"]}
{"model": "Invalid response", "combination": ["dolly", "openassist"]}
{"model": "Invalid response", "combination": ["chatgpt", "cohere"]}
{"model": "vicuna", "combination": ["vicuna", "alpaca"]}
{"model": "wizardlm", "combination": ["koala", "wizardlm", "lo bias"]}
{"model": "baize", "combination": ["baize", "alpaca", "fo bias"]}
{"model": "Invalid response", "combination": ["dolly", "llama"]}
{"model": "Invalid response", "combination": ["chatgpt", "falcon"]}
{"model": "gpt4", "combination": ["koala", "gpt4", "lo bias"]}
{"model": "cohere", "combination": ["gpt4", "cohere", "inconsistent"]}
{"model": "Invalid response", "combination": ["baize", "koala"]}
{"model": "Invalid response", "combination": ["vicuna", "instructgpt"]}
{"model": "Invalid response", "combination": ["chatgpt", "gpt4"]}

