# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Constants used over the whole project.
"""
import torch

# Changing the constants below affects the whole project. Proceed with caution.
DTYPE_CLASS = {
    'float16': torch.float16,
    'bfloat16': torch.bfloat16,  # supported in A100 and above
    'float32': torch.float32,
    'auto': 'auto'
}
# models
CKPT_FOLDER = '../output'
BASE_MODELS = [
    'llama', 'llama2', 'llama3', 'llama3.3', 'codellama', 
    'chatglm', 'galactica', 
    'mistral', 'mixtral', 'llava-mistral', 
    'qwen1.5', 'qwen2', 'qwen2-math', 'qwen2.5', 'qwen2.5-math',
    'granite-guardian-3.1',
    'DeepSeek-R1-Distill-Qwen',
    'recur-qwen2.5',
    ]
# model checkpoints
LLAMA_SIZE = ['llama-7b', 'llama-13b', 'llama-30b', 'llama-65b']
LLAMA2_SIZE = ['llama2-13b_chat_hf', 'llama2-70b_chat_hf', 'llama2-13b_hf']
LLAMA3_SIZE = ['llama3-8b_instruct', 'llama3-70b_instruct']
LLAMA33_SIZE = ['llama3.3-70b_instruct']
CODELLAMA_SIZE = ['codellama-13b_hf', 'codellama-13b_python_hf','codellama-13b_Instruct_hf']
CHATGLM_SIZE = ['chatglm-6b']
GALACTICA_SIZE = ["galactica-6.7b", "galactica-30b"]
MISTRAL_SIZE = ["mistral-7b_instruct"]
MIXTRAL_SIZE = ["mixtral-8x7b", "mixtral-8x7b_instruct"]
LLAVA_MISTRAL_SIZE = ["llava-mistral-7b_instruct"]
QWEN1_5_SIZE = ["qwen1.5-7b_chat", "qwen1.5-14b_chat", "qwen1.5-72b_chat"]
QWEN2_SIZE = ["qwen2-7b_instruct", "qwen2-1.5b_instruct", "qwen2-72b_instruct"]
QWEN2_MATH_SIZE = ["qwen2-math-7b_instruct"]
QWEN25_SIZE = ["qwen2.5-0.5b", "qwen2.5-7b", "qwen2.5-0.5b_instruct", "qwen2.5-1.5b_instruct", "qwen2.5-7b_instruct", "qwen2.5-72b_instruct"]
QWEN25_MATH_SIZE = ["qwen2.5-math-1.5b", "qwen2.5-math-1.5b_instruct", "qwen2.5-math-7b", "qwen2.5-math-7b_instruct"]
GRANITE_GUARDIAN31_SIZE = ["granite-guardian-3.1-2b", "granite-guardian-3.1-8b"]
DEEPSEEK_R1_DISTILLED_QWEN_SIZE = ["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-1.5B_hf", "DeepSeek-R1-Distill-Qwen-7B_hf", "DeepSeek-R1-Distill-Qwen-32B_hf"]
RECUR_QWEN_SIZE = ["recur-qwen2.5-0.5b_instruct", "recur-qwen2.5-1.5b_instruct", "recur-qwen2.5-7b_instruct", "recur-qwen2.5-72b_instruct"]
MODEL_SIZE = {
    "llama": LLAMA_SIZE,
    "llama2": LLAMA2_SIZE,
    "llama3": LLAMA3_SIZE,
    "llama3.3": LLAMA33_SIZE,
    "codellama": CODELLAMA_SIZE,
    "chatglm": CHATGLM_SIZE,
    "galactica": GALACTICA_SIZE,
    "mistral": MISTRAL_SIZE,
    "mixtral": MIXTRAL_SIZE,
    "llava-mistral": LLAVA_MISTRAL_SIZE,
    "qwen1.5": QWEN1_5_SIZE,
    "qwen2": QWEN2_SIZE,
    "qwen2-math": QWEN2_MATH_SIZE,
    "qwen2.5": QWEN25_SIZE,
    "qwen2.5-math": QWEN25_MATH_SIZE,
    "granite-guardian-3.1": GRANITE_GUARDIAN31_SIZE,
    "DeepSeek-R1-Distill-Qwen": DEEPSEEK_R1_DISTILLED_QWEN_SIZE,
    "recur-qwen2.5": RECUR_QWEN_SIZE,
}