LICENSE
MANIFEST.in
README.md
setup.py
example/outputs.json
src/alpaca_eval/__init__.py
src/alpaca_eval/analyze.py
src/alpaca_eval/completion_parsers.py
src/alpaca_eval/constants.py
src/alpaca_eval/main.py
src/alpaca_eval/plotting.py
src/alpaca_eval/processors.py
src/alpaca_eval/types.py
src/alpaca_eval/utils.py
src/alpaca_eval.egg-info/PKG-INFO
src/alpaca_eval.egg-info/SOURCES.txt
src/alpaca_eval.egg-info/dependency_links.txt
src/alpaca_eval.egg-info/entry_points.txt
src/alpaca_eval.egg-info/requires.txt
src/alpaca_eval.egg-info/top_level.txt
src/alpaca_eval/annotators/__init__.py
src/alpaca_eval/annotators/base.py
src/alpaca_eval/annotators/pairwise_evaluator.py
src/alpaca_eval/decoders/__init__.py
src/alpaca_eval/decoders/anthropic.py
src/alpaca_eval/decoders/bedrock_anthropic.py
src/alpaca_eval/decoders/cache.py
src/alpaca_eval/decoders/cohere.py
src/alpaca_eval/decoders/google.py
src/alpaca_eval/decoders/huggingface_api.py
src/alpaca_eval/decoders/huggingface_local.py
src/alpaca_eval/decoders/jinachat.py
src/alpaca_eval/decoders/openai.py
src/alpaca_eval/decoders/replicate.py
src/alpaca_eval/decoders/test.py
src/alpaca_eval/decoders/vllm_local.py
src/alpaca_eval/evaluators_configs/README.md
src/alpaca_eval/evaluators_configs/Self-taught-llama3.1-70B-dpo/config.yaml
src/alpaca_eval/evaluators_configs/Self-taught-llama3.1-70B-dpo/self_taught.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_cot_gpt4_turbo/alpaca_eval_clf_cot.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_cot_gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_gpt4_turbo/alpaca_eval_clf.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4/alpaca_eval.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_0314/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_0613/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_llama3_70b_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_llama3_70b_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_vllm_llama3_70b_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_vllm_llama3_70b_fn/annotations_seed0_configs.json
src/alpaca_eval/evaluators_configs/alpaca_eval_vllm_llama3_70b_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_chat_v0_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_chat_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_cot_json_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b4_cot_json_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b5_diana_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b5_joe_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b5_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b1_v0_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b1_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b4_reasoning_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b5_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm_greedy_gpt4/chatml_b5_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm_greedy_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/aviary_gpt4/aviary_prompt.txt
src/alpaca_eval/evaluators_configs/aviary_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/bedrock_claude/configs.yaml
src/alpaca_eval/evaluators_configs/bedrock_claude_2/configs.yaml
src/alpaca_eval/evaluators_configs/chatgpt/basic_prompt.txt
src/alpaca_eval/evaluators_configs/chatgpt/configs.yaml
src/alpaca_eval/evaluators_configs/chatgpt_fn/basic_function_prompt.txt
src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml
src/alpaca_eval/evaluators_configs/claude/basic_prompt.txt
src/alpaca_eval/evaluators_configs/claude/configs.yaml
src/alpaca_eval/evaluators_configs/claude_2/configs.yaml
src/alpaca_eval/evaluators_configs/claude_3_opus_ranking/configs.yaml
src/alpaca_eval/evaluators_configs/claude_3_opus_ranking/ranking_prompt.txt
src/alpaca_eval/evaluators_configs/claude_ranking/configs.yaml
src/alpaca_eval/evaluators_configs/claude_ranking/ranking_prompt.txt
src/alpaca_eval/evaluators_configs/cohere/configs.yaml
src/alpaca_eval/evaluators_configs/gpt-3.5-turbo-1106_ranking/annotations_seed0_configs.json
src/alpaca_eval/evaluators_configs/gpt-3.5-turbo-1106_ranking/configs.yaml
src/alpaca_eval/evaluators_configs/gpt-3.5-turbo-1106_ranking/ranking_prompt.txt
src/alpaca_eval/evaluators_configs/gpt35_turbo_instruct/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_clf/basic_clf_prompt.txt
src/alpaca_eval/evaluators_configs/gpt4_turbo_clf/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_cot_clf/basic_clf_cot_prompt.txt
src/alpaca_eval/evaluators_configs/gpt4_turbo_cot_clf/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_cot_logprob/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_logprob/configs.yaml
src/alpaca_eval/evaluators_configs/guanaco_33b/basic_prompt.txt
src/alpaca_eval/evaluators_configs/guanaco_33b/configs.yaml
src/alpaca_eval/evaluators_configs/improved_aviary_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/improved_lmsys_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/lmsys_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/lmsys_gpt4/lmsys_prompt.txt
src/alpaca_eval/evaluators_configs/mistral-large-2402_ranking/configs.yaml
src/alpaca_eval/evaluators_configs/oasst_pythia_12b/basic_prompt.txt
src/alpaca_eval/evaluators_configs/oasst_pythia_12b/configs.yaml
src/alpaca_eval/evaluators_configs/test/configs.yaml
src/alpaca_eval/evaluators_configs/text_davinci_003/basic_prompt.txt
src/alpaca_eval/evaluators_configs/text_davinci_003/configs.yaml
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_cot_gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_gpt-4o-mini-2024-07-18/configs.yaml
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_gpt4_turbo_new/configs.yaml
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/README.md
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/configs.yaml
src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval/chatgpt_fn_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval/claude_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval/text_davinci_003_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/alpaca_eval_cot_gpt4_turbo_fn_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/alpaca_eval_gpt4_turbo_fn_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/claude_3_opus_ranking_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/mistral-large-2402_ranking_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/weighted_alpaca_eval_gpt4_turbo_leaderboard.csv
src/alpaca_eval/leaderboards/evaluators/evaluators_leaderboard.csv
src/alpaca_eval/metrics/__init__.py
src/alpaca_eval/metrics/glm_winrate.py
src/alpaca_eval/metrics/helpers.py
src/alpaca_eval/metrics/winrate.py
src/alpaca_eval/models_configs/Conifer-7B-DPO/configs.yaml
src/alpaca_eval/models_configs/Conifer-7B-DPO/prompt.txt
src/alpaca_eval/models_configs/Contextual-KTO-Mistral-PairRM/configs.yaml
src/alpaca_eval/models_configs/Contextual-KTO-Mistral-PairRM/prompt.txt
src/alpaca_eval/models_configs/Contextual-KTO-Mistral-PairRM-Verified/configs.yaml
src/alpaca_eval/models_configs/Contextual-KTO-Mistral-PairRM-Verified/prompt.txt
src/alpaca_eval/models_configs/Ein-70B-v0.1/configs.yaml
src/alpaca_eval/models_configs/Ein-70B-v0.1/prompt.txt
src/alpaca_eval/models_configs/FsfairX-Zephyr-Chat-v0.1/configs.yaml
src/alpaca_eval/models_configs/FuseChat-Gemma-2-9B-Instruct/configs.yaml
src/alpaca_eval/models_configs/FuseChat-Gemma-2-9B-Instruct/prompt.txt
src/alpaca_eval/models_configs/FuseChat-Llama-3.1-8B-Instruct/configs.yaml
src/alpaca_eval/models_configs/FuseChat-Llama-3.1-8B-Instruct/prompt.txt
src/alpaca_eval/models_configs/FuseChat-Llama-3.2-1B-Instruct/configs.yaml
src/alpaca_eval/models_configs/FuseChat-Llama-3.2-3B-Instruct/configs.yaml
src/alpaca_eval/models_configs/FuseChat-Qwen-2.5-7B-Instruct/configs.yaml
src/alpaca_eval/models_configs/FuseChat-Qwen-2.5-7B-Instruct/prompt.txt
src/alpaca_eval/models_configs/GPO-Llama-3-8B-Instruct-GPM-2B/configs.yaml
src/alpaca_eval/models_configs/GPO-Llama-3-8B-Instruct-GPM-2B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0613-Llama3-70B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0613-Llama3-70B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0613-Mistral-7B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0613-Mistral-7B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Llama3-70B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Llama3-70B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Llama3-8B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Llama3-8B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Mistral-7B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Mistral-7B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Qwen2-7B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Qwen2-7B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Yi-1.5-9B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-3M-0625-Yi-1.5-9B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-7M-Gen-Llama3_1-70B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-7M-Gen-Llama3_1-70B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-7M-Gen-Llama3_1-8B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-7M-Gen-Llama3_1-8B/prompt.txt
src/alpaca_eval/models_configs/Infinity-Instruct-7M-Gen-mistral-7B/configs.yaml
src/alpaca_eval/models_configs/Infinity-Instruct-7M-Gen-mistral-7B/prompt.txt
src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/configs.yaml
src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/prompt.txt
src/alpaca_eval/models_configs/Llama-3-8B-Instruct-SkillMix/configs.yaml
src/alpaca_eval/models_configs/Llama-3-Instruct-8B-RainbowPO/configs.yaml
src/alpaca_eval/models_configs/Llama-3-Instruct-8B-RainbowPO/prompt.txt
src/alpaca_eval/models_configs/Llama-3-Instruct-8B-SimPO/configs.yaml
src/alpaca_eval/models_configs/Llama-3-Instruct-8B-SimPO/prompt.txt
src/alpaca_eval/models_configs/Llama-3-Instruct-8B-SimPO-ExPO/configs.yaml
src/alpaca_eval/models_configs/Llama-3-Instruct-8B-WPO-HB-v2/configs.yaml
src/alpaca_eval/models_configs/Llama3-PBM-Nova-70B/configs.yaml
src/alpaca_eval/models_configs/Llama3-PBM-Nova-70B/prompt.txt
src/alpaca_eval/models_configs/Meta-Llama-3-70B-Instruct/configs.yaml
src/alpaca_eval/models_configs/Meta-Llama-3-8B-Instruct/configs.yaml
src/alpaca_eval/models_configs/Meta-Llama-3.1-405B-Instruct-Turbo/configs.yaml
src/alpaca_eval/models_configs/Meta-Llama-3.1-70B-Instruct-Turbo/configs.yaml
src/alpaca_eval/models_configs/Meta-Llama-3.1-8B-Instruct-Turbo/configs.yaml
src/alpaca_eval/models_configs/Mistral-7B+RAHF-DUAL+LoRA/configs.yaml
src/alpaca_eval/models_configs/Mistral-7B+RAHF-DUAL+LoRA/prompt.txt
src/alpaca_eval/models_configs/Mistral-7B-Instruct-v0.2/configs.yaml
src/alpaca_eval/models_configs/Mistral-7B-Instruct-v0.3/configs.yaml
src/alpaca_eval/models_configs/Mistral-7B-ReMax-v0.1/configs.yaml
src/alpaca_eval/models_configs/Mistral-7B-ReMax-v0.1/prompt.txt
src/alpaca_eval/models_configs/Mixtral-8x22B-Instruct-v0.1/configs.yaml
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1/configs.yaml
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1/togetherai_prompt.txt
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1_concise/configs.yaml
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1_concise/togetherai_prompt_concise.txt
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1_verbose/configs.yaml
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1_verbose/togetherai_prompt_verbose.txt
src/alpaca_eval/models_configs/Nanbeige-Plus-Chat-v0.1/configs.yaml
src/alpaca_eval/models_configs/Nanbeige-Plus-Chat-v0.1/prompt.txt
src/alpaca_eval/models_configs/Nanbeige2-16B-Chat/configs.yaml
src/alpaca_eval/models_configs/Nanbeige2-16B-Chat/prompt.txt
src/alpaca_eval/models_configs/Nanbeige2-8B-Chat/configs.yaml
src/alpaca_eval/models_configs/Nanbeige2-8B-Chat/prompt.txt
src/alpaca_eval/models_configs/NullModel/configs.yaml
src/alpaca_eval/models_configs/NullModel/constant_output.txt
src/alpaca_eval/models_configs/OpenHermes-2.5-Mistral-7B/configs.yaml
src/alpaca_eval/models_configs/OpenHermes-2.5-Mistral-7B/togetherai_prompt.txt
src/alpaca_eval/models_configs/Qwen-14B-Chat/configs.yaml
src/alpaca_eval/models_configs/Qwen-14B-Chat/prompt.txt
src/alpaca_eval/models_configs/Qwen1.5-1.8B-Chat/configs.yaml
src/alpaca_eval/models_configs/Qwen1.5-110B-Chat/configs.yaml
src/alpaca_eval/models_configs/Qwen1.5-110B-Chat/prompt.txt
src/alpaca_eval/models_configs/Qwen1.5-14B-Chat/configs.yaml
src/alpaca_eval/models_configs/Qwen1.5-72B-Chat/configs.yaml
src/alpaca_eval/models_configs/Qwen1.5-72B-Chat/prompt.txt
src/alpaca_eval/models_configs/Qwen1.5-7B-Chat/configs.yaml
src/alpaca_eval/models_configs/Qwen2-72B-Instruct/configs.yaml
src/alpaca_eval/models_configs/Qwen2-72B-Instruct/prompt.txt
src/alpaca_eval/models_configs/REBEL-Llama-3-8B-Instruct/configs.yaml
src/alpaca_eval/models_configs/REBEL-Llama-3-8B-Instruct/prompt.txt
src/alpaca_eval/models_configs/REBEL-Llama-3-8B-Instruct-Armo/configs.yaml
src/alpaca_eval/models_configs/SPPO-Gemma-2-9B-It-PairRM/configs.yaml
src/alpaca_eval/models_configs/SPPO-Gemma-2-9B-It-PairRM/prompt.txt
src/alpaca_eval/models_configs/SPPO-Llama-3-8B-Instruct-GPM-2B/configs.yaml
src/alpaca_eval/models_configs/SPPO-Llama-3-8B-Instruct-GPM-2B/prompt.txt
src/alpaca_eval/models_configs/SPPO-Llama-3-Instruct-8B-PairRM/configs.yaml
src/alpaca_eval/models_configs/SPPO-Llama-3-Instruct-8B-PairRM/prompt.txt
src/alpaca_eval/models_configs/SPPO-Mistral7B-PairRM/configs.yaml
src/alpaca_eval/models_configs/SPPO-Mistral7B-PairRM/prompt.txt
src/alpaca_eval/models_configs/SPPO-Mistral7B-PairRM-ExPO/configs.yaml
src/alpaca_eval/models_configs/Samba-CoE-v0.1/configs.yaml
src/alpaca_eval/models_configs/Samba-CoE-v0.1/prompt.txt
src/alpaca_eval/models_configs/Samba-CoE-v0.2/configs.yaml
src/alpaca_eval/models_configs/Samba-CoE-v0.2-best-of-16/configs.yaml
src/alpaca_eval/models_configs/SelfMoA_gemma-2-9b-it-SimPO/configs.yaml
src/alpaca_eval/models_configs/SelfMoA_gemma-2-9b-it-WPO-HB/configs.yaml
src/alpaca_eval/models_configs/Shopee-SlimMoA-v1/configs.yaml
src/alpaca_eval/models_configs/Snorkel-Mistral-PairRM-DPO/configs.yaml
src/alpaca_eval/models_configs/Snorkel-Mistral-PairRM-DPO/prompt.txt
src/alpaca_eval/models_configs/Snorkel-Mistral-PairRM-DPO-best-of-16/configs.yaml
src/alpaca_eval/models_configs/Snorkel-Mistral-PairRM-DPO-best-of-16/prompt.txt
src/alpaca_eval/models_configs/Starling-LM-7B-alpha/configs.yaml
src/alpaca_eval/models_configs/Starling-LM-7B-alpha/prompt.txt
src/alpaca_eval/models_configs/Starling-LM-7B-alpha-ExPO/configs.yaml
src/alpaca_eval/models_configs/Starling-LM-7B-beta-ExPO/configs.yaml
src/alpaca_eval/models_configs/Storm-7B/configs.yaml
src/alpaca_eval/models_configs/Storm-7B/prompt.txt
src/alpaca_eval/models_configs/Storm-7B-best-of-64/configs.yaml
src/alpaca_eval/models_configs/TOA/configs.yaml
src/alpaca_eval/models_configs/TempNet-LLaMA2-Chat-13B-v0.1/configs.yaml
src/alpaca_eval/models_configs/TempNet-LLaMA2-Chat-70B-v0.1/configs.yaml
src/alpaca_eval/models_configs/TempNet-LLaMA2-Chat-70B-v0.1/prompt.txt
src/alpaca_eval/models_configs/TempNet-LLaMA2-Chat-7B-v0.1/configs.yaml
src/alpaca_eval/models_configs/TempNet-LLaMA2-Chat-7B-v0.1/prompt.txt
src/alpaca_eval/models_configs/Together-MoA/configs.yaml
src/alpaca_eval/models_configs/Together-MoA-Lite/configs.yaml
src/alpaca_eval/models_configs/Yi-34B-Chat/configs.yaml
src/alpaca_eval/models_configs/Yi-34B-Chat/prompt.txt
src/alpaca_eval/models_configs/Yi-34B-Chat-Verified/configs.yaml
src/alpaca_eval/models_configs/Yi-34B-Chat-Verified/prompt.txt
src/alpaca_eval/models_configs/airoboros-33b/configs.yaml
src/alpaca_eval/models_configs/airoboros-33b/prompt.txt
src/alpaca_eval/models_configs/airoboros-65b/configs.yaml
src/alpaca_eval/models_configs/aligner-2b_claude-3-opus-20240229/config.yaml
src/alpaca_eval/models_configs/aligner-2b_claude-3-opus-20240229/configs.yaml
src/alpaca_eval/models_configs/aligner-2b_claude-3-opus-20240229/prompt.txt
src/alpaca_eval/models_configs/aligner-2b_gpt-4-turbo-2024-04-09/config.yaml
src/alpaca_eval/models_configs/aligner-2b_gpt-4-turbo-2024-04-09/configs.yaml
src/alpaca_eval/models_configs/aligner-2b_gpt-4-turbo-2024-04-09/prompt.txt
src/alpaca_eval/models_configs/aligner-2b_qwen1.5-72b-chat/config.yaml
src/alpaca_eval/models_configs/aligner-2b_qwen1.5-72b-chat/configs.yaml
src/alpaca_eval/models_configs/aligner-2b_qwen1.5-72b-chat/prompt.txt
src/alpaca_eval/models_configs/alpaca-7b/configs.yaml
src/alpaca_eval/models_configs/alpaca-7b/prompt.txt
src/alpaca_eval/models_configs/alpaca-7b-neft/configs.yaml
src/alpaca_eval/models_configs/alpaca-7b-neft/prompt.txt
src/alpaca_eval/models_configs/alpaca-7b_concise/configs.yaml
src/alpaca_eval/models_configs/alpaca-7b_verbose/configs.yaml
src/alpaca_eval/models_configs/alpaca-farm-ppo-human/configs.yaml
src/alpaca_eval/models_configs/alpaca-farm-ppo-sim-gpt4-20k/configs.yaml
src/alpaca_eval/models_configs/baichuan-13b-chat/configs.yaml
src/alpaca_eval/models_configs/baichuan-13b-chat/prompt.txt
src/alpaca_eval/models_configs/baize-v2-13b/configs.yaml
src/alpaca_eval/models_configs/baize-v2-13b/prompt.txt
src/alpaca_eval/models_configs/baize-v2-7b/configs.yaml
src/alpaca_eval/models_configs/bedrock_claude/configs.yaml
src/alpaca_eval/models_configs/bedrock_claude_2/configs.yaml
src/alpaca_eval/models_configs/blendaxai-gm-l3-v35/configs.yaml
src/alpaca_eval/models_configs/blendaxai-gm-l6-vo31/configs.yaml
src/alpaca_eval/models_configs/causallm-14b/configs.yaml
src/alpaca_eval/models_configs/causallm-14b/prompt.txt
src/alpaca_eval/models_configs/chatglm2-6b/configs.yaml
src/alpaca_eval/models_configs/chatglm2-6b/prompt.txt
src/alpaca_eval/models_configs/claude/configs.yaml
src/alpaca_eval/models_configs/claude/prompt.txt
src/alpaca_eval/models_configs/claude-2/configs.yaml
src/alpaca_eval/models_configs/claude-2.1/configs.yaml
src/alpaca_eval/models_configs/claude-2.1_concise/configs.yaml
src/alpaca_eval/models_configs/claude-2.1_concise/prompt_concise.txt
src/alpaca_eval/models_configs/claude-2.1_verbose/configs.yaml
src/alpaca_eval/models_configs/claude-2.1_verbose/prompt_verbose.txt
src/alpaca_eval/models_configs/claude-3-5-sonnet-20240620/configs.yaml
src/alpaca_eval/models_configs/claude-3-5-sonnet-20240620/prompt_chatml.txt
src/alpaca_eval/models_configs/claude-3-opus-20240229/configs.yaml
src/alpaca_eval/models_configs/claude-3-sonnet-20240229/configs.yaml
src/alpaca_eval/models_configs/claude-3-sonnet-20240229/prompt_chatml.txt
src/alpaca_eval/models_configs/claude-instant-1.2/configs.yaml
src/alpaca_eval/models_configs/claude2-alpaca-13b/configs.yaml
src/alpaca_eval/models_configs/cohere/configs.yaml
src/alpaca_eval/models_configs/cohere/prompt.txt
src/alpaca_eval/models_configs/cut-13b/configs.yaml
src/alpaca_eval/models_configs/cut-13b/prompt.txt
src/alpaca_eval/models_configs/dbrx-instruct/configs.yaml
src/alpaca_eval/models_configs/deepseek-llm-67b-chat/configs.yaml
src/alpaca_eval/models_configs/deepseek-llm-67b-chat/prompt.txt
src/alpaca_eval/models_configs/deita-7b-v1.0/configs.yaml
src/alpaca_eval/models_configs/dolphin-2.2.1-mistral-7b/configs.yaml
src/alpaca_eval/models_configs/evo-7b/configs.yaml
src/alpaca_eval/models_configs/evo-v2-7b/configs.yaml
src/alpaca_eval/models_configs/falcon-40b-instruct/configs.yaml
src/alpaca_eval/models_configs/falcon-7b-instruct/configs.yaml
src/alpaca_eval/models_configs/gemini-pro/configs.yaml
src/alpaca_eval/models_configs/gemini-pro/prompt.txt
src/alpaca_eval/models_configs/gemma-2-9b-it-DPO/configs.yaml
src/alpaca_eval/models_configs/gemma-2-9b-it-DPO/prompt.txt
src/alpaca_eval/models_configs/gemma-2-9b-it-SimPO/configs.yaml
src/alpaca_eval/models_configs/gemma-2-9b-it-WPO-HB/configs.yaml
src/alpaca_eval/models_configs/gemma-2b-it/configs.yaml
src/alpaca_eval/models_configs/gemma-7b-it/configs.yaml
src/alpaca_eval/models_configs/ghost-7b-alpha/configs.yaml
src/alpaca_eval/models_configs/ghost-7b-alpha/prompt.txt
src/alpaca_eval/models_configs/ghost-8b-beta-disl-0x5/configs.yaml
src/alpaca_eval/models_configs/ghost-8b-beta-disl-0x5/prompt.txt
src/alpaca_eval/models_configs/gpt-3.5-turbo-0301/configs.yaml
src/alpaca_eval/models_configs/gpt-3.5-turbo-0613/configs.yaml
src/alpaca_eval/models_configs/gpt-3.5-turbo-1106/configs.yaml
src/alpaca_eval/models_configs/gpt-3.5-turbo-1106_concise/configs.yaml
src/alpaca_eval/models_configs/gpt-3.5-turbo-1106_verbose/configs.yaml
src/alpaca_eval/models_configs/gpt-3.5-turbo-16k-0613/configs.yaml
src/alpaca_eval/models_configs/gpt-4-0125-preview/configs.yaml
src/alpaca_eval/models_configs/gpt-4-turbo-2024-04-09/configs.yaml
src/alpaca_eval/models_configs/gpt-4o-2024-05-13/configs.yaml
src/alpaca_eval/models_configs/gpt-4o-2024-08-06/configs.yaml
src/alpaca_eval/models_configs/gpt-4o-mini-2024-07-18/configs.yaml
src/alpaca_eval/models_configs/gpt35_turbo_instruct/configs.yaml
src/alpaca_eval/models_configs/gpt4/chatml_prompt.txt
src/alpaca_eval/models_configs/gpt4/configs.yaml
src/alpaca_eval/models_configs/gpt4_0314/configs.yaml
src/alpaca_eval/models_configs/gpt4_0613/configs.yaml
src/alpaca_eval/models_configs/gpt4_0613_concise/configs.yaml
src/alpaca_eval/models_configs/gpt4_0613_verbose/configs.yaml
src/alpaca_eval/models_configs/gpt4_1106_preview/chatml_prompt.txt
src/alpaca_eval/models_configs/gpt4_1106_preview/configs.yaml
src/alpaca_eval/models_configs/gpt4_1106_preview_concise/chatml_prompt_concise.txt
src/alpaca_eval/models_configs/gpt4_1106_preview_concise/configs.yaml
src/alpaca_eval/models_configs/gpt4_1106_preview_verbose/chatml_prompt_verbose.txt
src/alpaca_eval/models_configs/gpt4_1106_preview_verbose/configs.yaml
src/alpaca_eval/models_configs/gpt4_gamed/configs.yaml
src/alpaca_eval/models_configs/guanaco-13b/configs.yaml
src/alpaca_eval/models_configs/guanaco-33b/configs.yaml
src/alpaca_eval/models_configs/guanaco-33b-api/configs.yaml
src/alpaca_eval/models_configs/guanaco-65b/configs.yaml
src/alpaca_eval/models_configs/guanaco-7b/configs.yaml
src/alpaca_eval/models_configs/guanaco-7b/prompt.txt
src/alpaca_eval/models_configs/higgs-llama-3-70b-v2/configs.yaml
src/alpaca_eval/models_configs/humpback-llama-65b/configs.yaml
src/alpaca_eval/models_configs/humpback-llama-65b/prompt.txt
src/alpaca_eval/models_configs/humpback-llama2-70b/configs.yaml
src/alpaca_eval/models_configs/internlm2-chat-20b-ExPO/configs.yaml
src/alpaca_eval/models_configs/internlm2-chat-20b-ppo/configs.yaml
src/alpaca_eval/models_configs/internlm2-chat-20b-ppo/prompt.txt
src/alpaca_eval/models_configs/internlm2-chat-7b-ExPO/configs.yaml
src/alpaca_eval/models_configs/jina-chat/configs.yaml
src/alpaca_eval/models_configs/jina-chat/prompt.txt
src/alpaca_eval/models_configs/llama-2-13b-chat-hf/configs.yaml
src/alpaca_eval/models_configs/llama-2-70b-chat-hf/configs.yaml
src/alpaca_eval/models_configs/llama-2-70b-chat-hf/prompt.txt
src/alpaca_eval/models_configs/llama-2-7b-chat-hf/configs.yaml
src/alpaca_eval/models_configs/llama-2-7b-chat-hf/prompt.txt
src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/configs.yaml
src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/prompt.txt
src/alpaca_eval/models_configs/merlinite-7B-AOT/configs.yaml
src/alpaca_eval/models_configs/merlinite-7B-AOT/prompt.txt
src/alpaca_eval/models_configs/minichat-1.5-3b/configs.yaml
src/alpaca_eval/models_configs/minichat-3b/configs.yaml
src/alpaca_eval/models_configs/minichat-3b/prompt.txt
src/alpaca_eval/models_configs/minotaur-13b/configs.yaml
src/alpaca_eval/models_configs/minotaur-13b/prompt.txt
src/alpaca_eval/models_configs/mistral-large-2402/configs.yaml
src/alpaca_eval/models_configs/mistral-medium/basic_chatml_prompt.txt
src/alpaca_eval/models_configs/mistral-medium/configs.yaml
src/alpaca_eval/models_configs/mistral-orpo-beta/configs.yaml
src/alpaca_eval/models_configs/mistral-orpo-beta/prompt.txt
src/alpaca_eval/models_configs/nous-hermes-13b/configs.yaml
src/alpaca_eval/models_configs/nous-hermes-13b/prompt.txt
src/alpaca_eval/models_configs/oasst-rlhf-llama-33b/configs.yaml
src/alpaca_eval/models_configs/oasst-sft-llama-33b/configs.yaml
src/alpaca_eval/models_configs/oasst-sft-llama-33b/prompt.txt
src/alpaca_eval/models_configs/oasst-sft-pythia-12b/configs.yaml
src/alpaca_eval/models_configs/oasst-sft-pythia-12b/prompt.txt
src/alpaca_eval/models_configs/openbuddy-falcon-40b-v9/configs.yaml
src/alpaca_eval/models_configs/openbuddy-falcon-40b-v9/prompt.txt
src/alpaca_eval/models_configs/openbuddy-falcon-7b-v6/configs.yaml
src/alpaca_eval/models_configs/openbuddy-falcon-7b-v6/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama-30b-v7.1/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama-30b-v7.1/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama-65b-v8/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama-65b-v8/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama2-70b-v10.1/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama2-70b-v10.1/prompt.txt
src/alpaca_eval/models_configs/openchat-13b/configs.yaml
src/alpaca_eval/models_configs/openchat-13b/prompt.txt
src/alpaca_eval/models_configs/openchat-v2-13b/configs.yaml
src/alpaca_eval/models_configs/openchat-v2-w-13b/configs.yaml
src/alpaca_eval/models_configs/openchat-v3.1-13b/configs.yaml
src/alpaca_eval/models_configs/openchat8192-13b/configs.yaml
src/alpaca_eval/models_configs/opencoderplus-15b/configs.yaml
src/alpaca_eval/models_configs/openpipe-moa-gpt-4-turbo-v1/configs.yaml
src/alpaca_eval/models_configs/pairrm-Yi-34B-Chat/configs.yaml
src/alpaca_eval/models_configs/pairrm-tulu-2-13b/configs.yaml
src/alpaca_eval/models_configs/pairrm-tulu-2-70b/configs.yaml
src/alpaca_eval/models_configs/pairrm-tulu-2-70b/prompt.txt
src/alpaca_eval/models_configs/pairrm-zephyr-7b-beta/configs.yaml
src/alpaca_eval/models_configs/phi-2/configs.yaml
src/alpaca_eval/models_configs/phi-2/prompt.txt
src/alpaca_eval/models_configs/phi-2-dpo/configs.yaml
src/alpaca_eval/models_configs/phi-2-dpo/prompt.txt
src/alpaca_eval/models_configs/phi-2-sft/configs.yaml
src/alpaca_eval/models_configs/phi-2-sft/prompt.txt
src/alpaca_eval/models_configs/platolm-7b/configs.yaml
src/alpaca_eval/models_configs/platolm-7b/prompt.txt
src/alpaca_eval/models_configs/pythia-12b-mix-sft/configs.yaml
src/alpaca_eval/models_configs/recycled-wizardlm-7b-v1.0/configs.yaml
src/alpaca_eval/models_configs/recycled-wizardlm-7b-v2.0/configs.yaml
src/alpaca_eval/models_configs/text_davinci_001/configs.yaml
src/alpaca_eval/models_configs/text_davinci_003/configs.yaml
src/alpaca_eval/models_configs/text_davinci_003/prompt.txt
src/alpaca_eval/models_configs/tulu-2-dpo-13b/configs.yaml
src/alpaca_eval/models_configs/tulu-2-dpo-13b-ExPO/configs.yaml
src/alpaca_eval/models_configs/tulu-2-dpo-70b/configs.yaml
src/alpaca_eval/models_configs/tulu-2-dpo-70b/prompt.txt
src/alpaca_eval/models_configs/tulu-2-dpo-70b-ExPO/configs.yaml
src/alpaca_eval/models_configs/tulu-2-dpo-7b/configs.yaml
src/alpaca_eval/models_configs/tulu-2-dpo-7b-ExPO/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b/prompt.txt
src/alpaca_eval/models_configs/ultralm-13b-best-of-16/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b-best-of-16/prompt.txt
src/alpaca_eval/models_configs/ultralm-13b-v2.0/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b-v2.0/prompt.txt
src/alpaca_eval/models_configs/ultralm-13b-v2.0-best-of-16/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b-v2.0-best-of-16/prompt.txt
src/alpaca_eval/models_configs/vicuna-13b/configs.yaml
src/alpaca_eval/models_configs/vicuna-13b-v1.3/configs.yaml
src/alpaca_eval/models_configs/vicuna-13b-v1.5/configs.yaml
src/alpaca_eval/models_configs/vicuna-13b-v1.5-togetherai/configs.yaml
src/alpaca_eval/models_configs/vicuna-33b-v1.3/configs.yaml
src/alpaca_eval/models_configs/vicuna-7b/configs.yaml
src/alpaca_eval/models_configs/vicuna-7b/prompt.txt
src/alpaca_eval/models_configs/vicuna-7b-v1.3/configs.yaml
src/alpaca_eval/models_configs/vicuna-7b-v1.5/configs.yaml
src/alpaca_eval/models_configs/wizardlm-13b/configs.yaml
src/alpaca_eval/models_configs/wizardlm-13b/prompt.txt
src/alpaca_eval/models_configs/wizardlm-13b-v1.1/configs.yaml
src/alpaca_eval/models_configs/wizardlm-13b-v1.2/configs.yaml
src/alpaca_eval/models_configs/wizardlm-70b/configs.yaml
src/alpaca_eval/models_configs/xwinlm-13b-v0.1/configs.yaml
src/alpaca_eval/models_configs/xwinlm-70b-v0.1/configs.yaml
src/alpaca_eval/models_configs/xwinlm-70b-v0.3/configs.yaml
src/alpaca_eval/models_configs/xwinlm-7b-v0.1/configs.yaml
src/alpaca_eval/models_configs/xwinlm-7b-v0.1/prompt.txt
src/alpaca_eval/models_configs/yi-large-preview/configs.yaml
src/alpaca_eval/models_configs/yi-large-preview/prompt.txt
src/alpaca_eval/models_configs/zephyr-7b-alpha/configs.yaml
src/alpaca_eval/models_configs/zephyr-7b-alpha/prompt.txt
src/alpaca_eval/models_configs/zephyr-7b-alpha-ExPO/configs.yaml
src/alpaca_eval/models_configs/zephyr-7b-alpha-ExPO/prompt.txt
src/alpaca_eval/models_configs/zephyr-7b-beta/configs.yaml
src/alpaca_eval/models_configs/zephyr-7b-beta-ExPO/configs.yaml
tests/test_analyze.py
tests/test_decoders_unit.py
tests/test_main.py
tests/test_pairwise_evaluator.py