######### Base Pretraining Model Family #########
## Model families that we have evaled
EVAL_BASE_MODEL_FAMILY_MAP = {
    'meta-llama/Llama-2': "Llama-2",
    'huggyllama/llama-': "Llama",
    "meta-llama/Meta-Llama-3": "Llama-3",
    "meta-llama/Meta-Llama-3.1": "Llama-3.1",
    'Qwen/Qwen2': "Qwen2",
    'Qwen/Qwen1.5': "Qwen1.5",
    'Qwen/Qwen-': "Qwen",
    "mistralai/Mistral": "Mistral",
    "mistralai/Mixtral": "Mixtral",
    r'01-ai/Yi-\d+B$': "Yi",
    r'01-ai/Yi-1.5-\d+B$': "Yi-1.5",
    "google/gemma-\d+b": "Gemma",
    "google/gemma-2": "Gemma-2",
    'tiiuae/falcon': "Falcon",
    "microsoft/phi": "Phi",
    'EleutherAI/pythia': "Pythia",
    'bigscience/bloom': "BLOOM",
    'EleutherAI/gpt-': "GPT-Neo/J",
    'facebook/opt': "OPT",
    "mosaicml/mpt": "MPT",
    'facebook/xglm': "XGLM",
    'codellama/CodeLlama': "CodeLlama",
    'bigcode/starcoderbase': "StarCoder",
    "bigcode/starcoder2": "StarCoder2",
    "deepseek-ai/deepseek-coder-": "DeepSeek-Coder",
    "deepseek-ai/DeepSeek-V2": "DeepSeek-V2",
    "deepseek-ai/DeepSeek-Coder-V2": "DeepSeek-Coder-V2",
    "ai21labs/Jamba": "Jamba",
    "granite-ai/granite-": "Granite",
    "Qwen/Qwen3": "Qwen3",
    "DeepSeek": "DeepSeek",
    'falcon': "falcon",
    "Gemma-3": "Gemma-3",
    'starcoder2': "starcoder2",
    'starcoder': "starcoder",
    'phi': "phi",
    'deepseek-R1': "DeepSeek-R1",
    'Qwen2.5': "Qwen2.5",
    "Baichuan": "Baichuan",
    "DeepSeek-R1-Distill-Qwen": "DeepSeek-R1-Distill-Qwen",
    "DeepSeek-R1-Distill-Llama": "DeepSeek-R1-Distill-Llama",
    "Gemma-2-it": "Gemma-2-it",
    "Gemma-3-it": "Gemma-3-it",
    "Gemma-it": "Gemma-it",
    "DeepSeek-R1-Distill": "DeepSeek-R1-Distill",
}

EVAL_BASE_MODEL_FAMILIES = list(EVAL_BASE_MODEL_FAMILY_MAP.values())

## Model family that we have collected metadata but not evaled
MISC_BASE_MODEL_FAMILY_MAP = {
    r'01-ai/Yi-\d+B-200K': 'Yi-200K',
    r'openlm-research/open_llama_\d+b_v2': 'OpenLlamaV2',
    r'openlm-research/open_llama_\d+b$': 'OpenLlama',
    "openai-community/gpt2": "GPT-2",
    'internlm/internlm2': "InternLM2",
    "deepseek-ai/deepseek-llm": "DeepSeek-LLM",
    "deepseek-ai/deepseek-moe": "DeepSeek-MoE",
    "Deci/DeciLM": "DeciLM",
    "stabilityai/stablelm": "StableLM",
    "RWKV/rwkv": "RWKV",
    "togethercomputer/RedPajama-INCITE-Base": "RedPajama-INCITE-Base",
    "LLM360/Amber": "Amber",
    "Salesforce/codegen": "Codegen",
    "HuggingFaceTB/SmolLM": "SmolLM",
    "cerebras/Cerebras-GPT": "Cerebras-GPT",
    "cerebras/btlm-": "BTLM",
    "openai-community/gpt2": "GPT-2",
    "h2oai/h2o-danube": "H2O-Danube",
    "deepseek-ai/deepseek-llm": "DeepSeek-LLM",
    "allenai/OLMo": "OLMo",
    "TinyLlama/TinyLlama_v1.1": "TinyLlama",
}
MISC_BASE_MODEL_FAMILIES = list(MISC_BASE_MODEL_FAMILY_MAP.values())


# Models that are released after our paper (May 2024), and we pre-registered in advance and evaled after their release
PREREGISTERED_BASE_MODEL_MAP = {
    "meta-llama/Meta-Llama-3.1-405B-FP8": "Llama-3.1-405B-FP8",
    "meta-llama/Meta-Llama-3.1-70B": "Llama-3.1",
    "meta-llama/Meta-Llama-3.1-8B": "Llama-3.1",
    "Qwen/Qwen1.5-110B": "Qwen1.5-110B",
    "Qwen/Qwen2-72B": "Qwen2-72B",
    "Qwen/Qwen2-57B-A14B": "Qwen2",
    "Qwen/Qwen2-7B": "Qwen2",
    "Qwen/Qwen2-1.5B": "Qwen2",
    "Qwen/Qwen2-0.5B": "Qwen2",
    "deepseek-ai/DeepSeek-V2": "DeepSeek-V2",
    "deepseek-ai/DeepSeek-Coder-V2-Base": "DeepSeek-Coder-V2-Base",
    "google/gemma-2-27b": "Gemma-2",
    "google/gemma-2-9b": "Gemma-2",
    "google/gemma-2-2b": "Gemma-2",
    "mistralai/Mixtral-8x22B-v0.1": "Mixtral-8x22B",
    "mistralai/Mistral-Nemo-Base-2407": "Mistral-Nemo",
    "01-ai/Yi-1.5-34B": "Yi-1.5"    ,
    "01-ai/Yi-1.5-9B": "Yi-1.5",
    "01-ai/Yi-1.5-6B": "Yi-1.5",
    "ai21labs/Jamba-v0.1": "Jamba",
}
PREREGISTERED_BASE_MODELS = list(PREREGISTERED_BASE_MODEL_MAP.keys())

## All model families that we have collected metadata
ALL_BASE_MODEL_FAMILIES = EVAL_BASE_MODEL_FAMILIES + MISC_BASE_MODEL_FAMILIES
ALL_BASE_MODEL_FAMILY_MAP = {**EVAL_BASE_MODEL_FAMILY_MAP, **MISC_BASE_MODEL_FAMILY_MAP}

## Model family with public model and data size
EVAL_BASE_MODEL_WITH_FLOPS_FAMILIES = EVAL_BASE_MODEL_FAMILIES.copy()
EVAL_BASE_MODEL_WITH_FLOPS_FAMILIES.remove("Mistral")
EVAL_BASE_MODEL_WITH_FLOPS_FAMILIES.remove("Mixtral")
EVAL_BASE_MODEL_WITH_FLOPS_FAMILIES.remove("Qwen2")

### Code Models ####
BASE_CODE_MODELS = [ALL_BASE_MODEL_FAMILY_MAP[k] for k in [
    'bigcode/starcoderbase',
    'codellama/CodeLlama',
    "Salesforce/codegen",
    "bigcode/starcoder2",
    "deepseek-ai/deepseek-coder-",
    "deepseek-ai/DeepSeek-Coder-V2",
]]

def remove_code_models(model_list):
    return [m for m in model_list if m not in BASE_CODE_MODELS]

def keep_code_models(model_list):
    return [m for m in model_list if m in BASE_CODE_MODELS]

EVAL_BASE_NONCODE_MODEL_FAMILY = remove_code_models(EVAL_BASE_MODEL_FAMILIES)
EVAL_BASE_CODE_MODEL_FAMILY = keep_code_models(EVAL_BASE_MODEL_FAMILIES)

######### Instruct Model Family #########
EVAL_INSTRUCT_MODEL_FAMILY_MAP = {
    r"gpt-4-.*": "GPT-4",
    r"claude-2.*": "Claude-2",
    r"claude-1.*": "Claude-1",
    r"gpt-3.5-turbo-.*": "GPT-3.5-Turbo",
    r"text-davinci-.*": "Text-Davinci",
    r"claude-instant-.*": "Claude-Instant",
    r"chat-bison-.*": "PALM-2-Chat",
    r"llama-2-(\d+)b-chat": "Llama-2-Chat",
    r"mistral-(\d+)b-instruct-v.*": "Mistral-Instruct",
    r"vicuna-(\d+)b-.*": "Vicuna",
    r"codellama-(\d+)b-instruct": "Codellama-Instruct",
    r"vicuna-(\d+)b-v.*": "Vicuna",
    r"openchat-(\d+)b-v.*": "OpenChat",
    r"deepseek-llm-(\d+)b-chat": "Deepseek-LLM-Chat",
    r"wizardlm-(\d+)b-v.*": "WizardLM",
    r"guanaco-(\d+)b": "Guanaco",
    r"lemur-(\d+)b-chat-v1": "Lemur-Chat",
    r"koala-(\d+)b": "Koala",
    r"codegeex2-(\d+)b": "Codegeex2",
    r"dolly-v2-(\d+)b": "Dolly-v2",
    r"chatglm-(\d+)b-.*": "ChatGLM",
    r"oasst-sft-4-pythia-(\d+)b-.*": "Oasst-SFT",
}

EVAL_INSTRUCT_MODEL_FAMILIES = list(sorted(set(EVAL_INSTRUCT_MODEL_FAMILY_MAP.values()), 
                                         key=list(EVAL_INSTRUCT_MODEL_FAMILY_MAP.values()).index))



######### Metrics #########
## Standard benchmark list
ALL_BENCHMARK_METRIC_LIST = ['MMLU', 'ARC-C', 'HellaSwag', 'Winograd', 'TruthfulQA', 'GSM8K', 'XWinograd', 'HumanEval']

## X metrics used as scaling predictors
MODEL_SIZE_METRIC = ['Model Size (B)']
TRAINING_FLOPS_METRIC = ['FLOPs (1E21)']
PC_METRIC_NUM_1 = ['PC-1']
PC_METRIC_NUM_2 = ['PC-1', 'PC-2']
PC_METRIC_NUM_3 = ['PC-1', 'PC-2', 'PC-3']
PC_METRIC_NUM_4 = ['PC-1', 'PC-2', 'PC-3', 'PC-4']
PC_METRIC_NUM_5 = ['PC-1', 'PC-2', 'PC-3', 'PC-4', 'PC-5']

ALL_X_METRICS_LIST = [
    MODEL_SIZE_METRIC,
    TRAINING_FLOPS_METRIC,
    PC_METRIC_NUM_1,
    PC_METRIC_NUM_2,
    PC_METRIC_NUM_3,
    PC_METRIC_NUM_4,
]

## Regression metrics
# DEFAULT_REG_METRICS = ["spearman", "pearson", "r2", "mse", "mae"]
DEFAULT_REG_METRICS = ["mse"]


######### PCA Preprocessing #########

DEFAULT_PCA_PREPROCESS_KWARGS = {
    "apply_imputation": True,
    "imputation_metrics": ALL_BENCHMARK_METRIC_LIST,
    "imputation_kwargs": {
        'n_components': 1,
        'verbose': False,
        'boundary': [0.0, 1.0]
    },
    "apply_pca": True,
    "pca_metrics": ALL_BENCHMARK_METRIC_LIST,
    "pca_kwargs": {
        'n_components': 5,
        'standardize': False,
    },
}

# Exclude GSM-8k for some tasks, e.g., arithmetic
NONGSM_METRIC_LIST = ['MMLU', 'ARC-C', 'HellaSwag', 'Winograd', 'TruthfulQA', 'XWinograd', 'HumanEval']
NONGSM_PCA_PREPROCESS_KWARGS = DEFAULT_PCA_PREPROCESS_KWARGS.copy()
NONGSM_PCA_PREPROCESS_KWARGS["imputation_metrics"] = NONGSM_METRIC_LIST
NONGSM_PCA_PREPROCESS_KWARGS["pca_metrics"] = NONGSM_METRIC_LIST


######### Misc #########
BBH_SUBTASKS2RANDOM_VAL = {
    'boolean_expressions': 1./2,
    'causal_judgement': 1./2,
    'date_understanding': 1./6,
    'disambiguation_qa': 1./3,
    'dyck_languages': 0.,
    'formal_fallacies': 1./2,
    'geometric_shapes': 1./9,
    'hyperbaton': 1./2,
    'logical_deduction_five_objects': 1./5,
    'logical_deduction_seven_objects': 1./7,
    'logical_deduction_three_objects': 1./3,
    'movie_recommendation': 1./5,
    'multistep_arithmetic_two': 0.,
    'navigate': 1./2,
    'object_counting': 0.,
    'penguins_in_a_table': 1./5,
    'reasoning_about_colored_objects': 1./18,
    'ruin_names': 1./4,
    'salient_translation_error_detection': 1./6,
    'snarks': 1./2,
    'sports_understanding': 1./2,
    'temporal_sequences': 1./4,
    'tracking_shuffled_objects_five_objects': 1./5,
    'tracking_shuffled_objects_seven_objects': 1./7,
    'tracking_shuffled_objects_three_objects': 1./3,
    'web_of_lies': 1./2,
    'word_sorting': 0.,
}

BBH_SUBTASKS = list(BBH_SUBTASKS2RANDOM_VAL.keys())