model_path: /nvme/qa_test_models
resource_path: /nvme/qa_test_models/resource
log_path: /nvme/qa_test_models/autotest_model/log
eval_log_path: /nvme/qa_test_models/evaluation_report
mllm_eval_log_path: /nvme/qa_test_models/mllm_evaluation_report
benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json
prefix_dataset_path: /nvme/qa_test_models/datasets/prefix_cache_test.json
env_tag: a100


tp_config:
    Llama-4-Scout-17B-16E-Instruct: 4
    Meta-Llama-3-1-70B-Instruct: 4
    Intern-S1: 8
    internlm2_5-20b-chat: 2
    internlm2_5-20b: 2
    internlm2_5-7b-chat-1m: 4
    InternVL3-38B: 2
    InternVL2_5-26B: 2
    InternVL2_5-26B-MPO: 2
    InternVL2_5-38B: 4
    InternVL2-40B: 4
    Qwen3-235B-A22B: 8
    Qwen3-32B: 2
    Qwen3-30B-A3B: 2
    Qwen3-VL-32B-Instruct: 2
    Qwen3-VL-30B-A3B-Instruct: 2
    Qwen3-30B-A3B-Base: 2
    Qwen2.5-32B-Instruct: 2
    Qwen2.5-72B-Instruct: 4
    Qwen2.5-VL-32B-Instruct: 2
    DeepSeek-V2-Lite-Chat: 2
    DeepSeek-R1-Distill-Qwen-32B: 2
    deepseek-vl2: 2
    Baichuan2-13B-Chat: 2
    Mixtral-8x7B-Instruct-v0.1: 2
    llava-v1.5-13b: 2
    MiniCPM-V-2_6: 2
    gemma-2-27b-it: 2
    InternVL2-Llama3-76B-AWQ: 4
    gpt-oss-20b-BF16: 2
    gpt-oss-120b-BF16: 4
    InternVL3_5-30B-A3B: 2



turbomind_chat_model:
    tp:
        - meta-llama/Llama-3.2-1B-Instruct
        - meta-llama/Llama-3.2-3B-Instruct
        - meta-llama/Meta-Llama-3-1-8B-Instruct
        - meta-llama/Meta-Llama-3-1-8B-Instruct-AWQ
        - meta-llama/Meta-Llama-3-1-70B-Instruct
        - meta-llama/Meta-Llama-3-8B-Instruct
        - meta-llama/Llama-2-7b-chat-hf
        - internlm/Intern-S1
        - internlm/Intern-S1-mini
        - internlm/internlm3-8b-instruct
        - internlm/internlm3-8b-instruct-awq
        - internlm/internlm2_5-7b-chat
        - internlm/internlm2_5-20b-chat
        - OpenGVLab/InternVL3_5-30B-A3B
        - OpenGVLab/InternVL3-2B
        - OpenGVLab/InternVL3-8B
        - OpenGVLab/InternVL3-38B
        - OpenGVLab/InternVL2_5-26B-MPO
        - OpenGVLab/InternVL2_5-1B
        - OpenGVLab/InternVL2_5-8B
        - OpenGVLab/InternVL2_5-26B
        - OpenGVLab/InternVL2_5-38B
        - OpenGVLab/InternVL2-2B
        - OpenGVLab/InternVL2-40B
        - OpenGVLab/Mini-InternVL-Chat-2B-V1-5
        - OpenGVLab/InternVL2-Llama3-76B-AWQ
        - Qwen/Qwen3-0.6B
        - Qwen/Qwen3-4B
        - Qwen/Qwen3-8B
        - Qwen/Qwen3-32B
        - Qwen/Qwen3-30B-A3B
        - Qwen/Qwen3-235B-A22B
        - Qwen/Qwen3-VL-8B-Instruct
        - Qwen/Qwen3-VL-32B-Instruct
        - Qwen/Qwen3-VL-30B-A3B-Instruct
        - Qwen/Qwen2.5-0.5B-Instruct
        - Qwen/Qwen2.5-7B-Instruct
        - Qwen/Qwen2.5-32B-Instruct
        - Qwen/Qwen2.5-72B-Instruct
        - Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4
        - Qwen/Qwen2.5-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-32B-Instruct
        - Qwen/Qwen2-VL-2B-Instruct
        - Qwen/Qwen2-VL-7B-Instruct
        - Qwen/Qwen1.5-MoE-A2.7B-Chat
        - mistralai/Mistral-7B-Instruct-v0.3
        - mistralai/Mistral-Nemo-Instruct-2407
        - mistralai/Mixtral-8x7B-Instruct-v0.1
        - lmdeploy/llama2-chat-7b-w4
        - baichuan-inc/Baichuan2-7B-Chat
        - 01-ai/Yi-6B-Chat
        - liuhaotian/llava-v1.5-13b
        - liuhaotian/llava-v1.6-vicuna-7b
        - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
        - deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
        - deepseek-ai/deepseek-vl-1.3b-chat
        - deepseek-ai/deepseek-coder-1.3b-instruct
        - deepseek-ai/DeepSeek-V2-Lite-Chat
        - codellama/CodeLlama-7b-Instruct-hf
        - THUDM/glm-4-9b-chat
        - THUDM/codegeex4-all-9b
        - openbmb/MiniCPM-Llama3-V-2_5
        - openbmb/MiniCPM-V-2_6
        - allenai/Molmo-7B-D-0924

pytorch_chat_model:
    tp:
        - meta-llama/Llama-4-Scout-17B-16E-Instruct
        - meta-llama/Llama-3.2-1B-Instruct
        - meta-llama/Llama-3.2-3B-Instruct
        - meta-llama/Llama-3.2-11B-Vision-Instruct
        - meta-llama/Meta-Llama-3-1-8B-Instruct
        - meta-llama/Meta-Llama-3-1-70B-Instruct
        - meta-llama/Meta-Llama-3-8B-Instruct
        - meta-llama/Llama-2-7b-chat-hf
        - internlm/Intern-S1
        - internlm/Intern-S1-mini
        - internlm/internlm3-8b-instruct
        - internlm/internlm2_5-7b-chat
        - internlm/internlm2_5-20b-chat
        - OpenGVLab/InternVL3_5-30B-A3B
        - OpenGVLab/InternVL3-2B
        - OpenGVLab/InternVL3-8B
        - OpenGVLab/InternVL3-38B
        - OpenGVLab/InternVL2_5-26B-MPO
        - OpenGVLab/InternVL2_5-1B
        - OpenGVLab/InternVL2_5-8B
        - OpenGVLab/InternVL2_5-26B
        - OpenGVLab/InternVL2_5-38B
        - OpenGVLab/InternVL2-2B
        - OpenGVLab/InternVL2-4B
        - OpenGVLab/InternVL2-40B
        - OpenGVLab/InternVL2-Llama3-76B-AWQ
        - OpenGVLab/Mono-InternVL-2B
        - Qwen/Qwen3-0.6B
        - Qwen/Qwen3-4B
        - Qwen/Qwen3-8B
        - Qwen/Qwen3-32B
        - Qwen/Qwen3-30B-A3B
        - Qwen/Qwen3-235B-A22B
        - Qwen/Qwen3-VL-8B-Instruct
        - Qwen/Qwen3-VL-32B-Instruct
        - Qwen/Qwen3-VL-30B-A3B-Instruct
        - Qwen/Qwen2.5-0.5B-Instruct
        - Qwen/Qwen2.5-7B-Instruct
        - Qwen/Qwen2.5-32B-Instruct
        - Qwen/Qwen2.5-72B-Instruct
        - Qwen/Qwen1.5-MoE-A2.7B-Chat
        - Qwen/Qwen2.5-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-32B-Instruct
        - Qwen/Qwen2-VL-2B-Instruct
        - Qwen/Qwen2-VL-7B-Instruct
        - unsloth/gpt-oss-20b-BF16
        - unsloth/gpt-oss-120b-BF16
        - mistralai/Mistral-7B-Instruct-v0.3
        - mistralai/Mixtral-8x7B-Instruct-v0.1
        - google/gemma-3-12b-it
        - google/gemma-2-9b-it
        - google/gemma-2-27b-it
        - google/gemma-7b-it
        - baichuan-inc/Baichuan2-7B-Chat
        - baichuan-inc/Baichuan2-13B-Chat
        - 01-ai/Yi-6B-Chat
        - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
        - deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
        - deepseek-ai/deepseek-moe-16b-chat
        - deepseek-ai/deepseek-coder-1.3b-instruct
        - deepseek-ai/DeepSeek-V2-Lite-Chat
        - THUDM/chatglm2-6b
        - THUDM/cogvlm2-llama3-chinese-chat-19B
        - THUDM/glm-4v-9b
        - THUDM/glm-4-9b-chat
        - THUDM/codegeex4-all-9b
        - openbmb/MiniCPM-V-2_6
        - microsoft/Phi-4-mini-instruct
        - microsoft/Phi-3.5-mini-instruct
        - microsoft/Phi-3.5-vision-instruct
        - microsoft/Phi-3-mini-4k-instruct
        - microsoft/Phi-3-vision-128k-instruct

turbomind_vl_model:
    tp:
        - internlm/Intern-S1
        - internlm/Intern-S1-mini
        - OpenGVLab/InternVL2_5-26B-MPO
        - OpenGVLab/Mini-InternVL-Chat-2B-V1-5
        - OpenGVLab/InternVL3_5-30B-A3B
        - OpenGVLab/InternVL3-2B
        - OpenGVLab/InternVL3-8B
        - OpenGVLab/InternVL3-38B
        - OpenGVLab/InternVL2_5-1B
        - OpenGVLab/InternVL2_5-8B
        - OpenGVLab/InternVL2_5-26B
        - OpenGVLab/InternVL2_5-38B
        - OpenGVLab/InternVL2-2B
        - OpenGVLab/InternVL2-40B
        - OpenGVLab/InternVL2-Llama3-76B-AWQ
        - Qwen/Qwen3-VL-8B-Instruct
        - Qwen/Qwen3-VL-32B-Instruct
        - Qwen/Qwen3-VL-30B-A3B-Instruct
        - Qwen/Qwen2.5-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-32B-Instruct
        - Qwen/Qwen2-VL-2B-Instruct
        - Qwen/Qwen2-VL-7B-Instruct
        - liuhaotian/llava-v1.5-13b
        - liuhaotian/llava-v1.6-vicuna-7b
        - deepseek-ai/deepseek-vl-1.3b-chat
        - openbmb/MiniCPM-Llama3-V-2_5
        - openbmb/MiniCPM-V-2_6

pytorch_vl_model:
    tp:
        - meta-llama/Llama-3.2-11B-Vision-Instruct
        - internlm/Intern-S1
        - internlm/Intern-S1-mini
        - OpenGVLab/InternVL2_5-26B-MPO
        - OpenGVLab/Mini-InternVL-Chat-2B-V1-5
        - OpenGVLab/InternVL3_5-30B-A3B
        - OpenGVLab/InternVL3-2B
        - OpenGVLab/InternVL3-8B
        - OpenGVLab/InternVL3-38B
        - OpenGVLab/InternVL2_5-1B
        - OpenGVLab/InternVL2_5-8B
        - OpenGVLab/InternVL2_5-26B
        - OpenGVLab/InternVL2_5-38B
        - OpenGVLab/InternVL2-2B
        - OpenGVLab/InternVL2-4B
        - OpenGVLab/InternVL2-40B
        - OpenGVLab/Mono-InternVL-2B
        - Qwen/Qwen3-VL-8B-Instruct
        - Qwen/Qwen3-VL-32B-Instruct
        - Qwen/Qwen3-VL-30B-A3B-Instruct
        - Qwen/Qwen2-VL-2B-Instruct
        - Qwen/Qwen2-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-32B-Instruct
        - THUDM/cogvlm-chat-hf
        - THUDM/cogvlm2-llama3-chinese-chat-19B
        - THUDM/glm-4v-9b
        - microsoft/Phi-3-vision-128k-instruct
        - microsoft/Phi-3.5-vision-instruct

turbomind_base_model:
    tp:
        - Qwen/Qwen3-8B-Base
        - Qwen/Qwen3-30B-A3B-Base
        - internlm/internlm2_5-7b
        - internlm/internlm2_5-1_8b
        - internlm/internlm2_5-20b
        - codellama/CodeLlama-7b-hf

pytorch_base_model:
    tp:
        - Qwen/Qwen3-8B-Base
        - Qwen/Qwen3-30B-A3B-Base
        - internlm/internlm2_5-7b
        - internlm/internlm2_5-1_8b
        - internlm/internlm2_5-20b
        - bigcode/starcoder2-7b

turbomind_quantization:
    no_awq:
        - internlm/Intern-S1
        - internlm/Intern-S1-mini
        - meta-llama/Meta-Llama-3-1-70B-Instruct
        - Qwen/Qwen3-30B-A3B
        - Qwen/Qwen3-235B-A22B
        - Qwen/Qwen3-30B-A3B-Base
        - Qwen/Qwen3-VL-8B-Instruct
        - Qwen/Qwen3-VL-32B-Instruct
        - Qwen/Qwen3-VL-30B-A3B-Instruct
        - Qwen/Qwen1.5-MoE-A2.7B-Chat
        - Qwen/Qwen2.5-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-32B-Instruct
        - Qwen/Qwen2-VL-2B-Instruct
        - Qwen/Qwen2-VL-7B-Instruct
        - OpenGVLab/InternVL3_5-30B-A3B
        - mistralai/Mistral-7B-Instruct-v0.3
        - mistralai/Mistral-Nemo-Instruct-2407
        - deepseek-ai/deepseek-coder-1.3b-instruct
        - deepseek-ai/DeepSeek-V2-Lite-Chat
        - codellama/CodeLlama-7b-Instruct-hf
        - microsoft/Phi-4-mini-instruct
        - allenai/Molmo-7B-D-0924
        - THUDM/codegeex4-all-9b
    gptq:
        - internlm/internlm2_5-7b-chat
    no_kvint4:
        - meta-llama/Llama-3.2-1B-Instruct
        - OpenGVLab/InternVL3-2B
        - OpenGVLab/InternVL3-8B
        - OpenGVLab/InternVL2_5-1B
        - openbmb/MiniCPM-V-2_6
        - Qwen/Qwen3-0.6B
        - Qwen/Qwen3-4B
        - Qwen/Qwen3-8B
        - Qwen/Qwen3-32B
        - Qwen/Qwen3-30B-A3B
        - Qwen/Qwen3-235B-A22B
        - Qwen/Qwen3-30B-A3B-Base
        - Qwen/Qwen3-VL-8B-Instruct
        - Qwen/Qwen3-VL-32B-Instruct
        - Qwen/Qwen3-VL-30B-A3B-Instruct
        - Qwen/Qwen2.5-0.5B-Instruct
        - Qwen/Qwen2.5-7B-Instruct
        - Qwen/Qwen2.5-32B-Instruct
        - Qwen/Qwen2.5-72B-Instruct
        - Qwen/Qwen2.5-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-32B-Instruct
        - Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4
        - Qwen/Qwen2-VL-2B-Instruct
        - Qwen/Qwen2-VL-7B-Instruct
        - Qwen/Qwen1.5-MoE-A2.7B-Chat
        - microsoft/Phi-3.5-mini-instruct
        - allenai/Molmo-7B-D-0924
        - deepseek-ai/DeepSeek-V2-Lite-Chat
    no_kvint8:
        - deepseek-ai/DeepSeek-V2-Chat
        - Qwen/Qwen2.5-7B-Instruct

pytorch_quantization:
    awq:
        - meta-llama/Meta-Llama-3-8B-Instruct
        - meta-llama/Meta-Llama-3-1-8B-Instruct
        - meta-llama/Llama-2-7b-chat-hf
        - internlm/internlm3-8b-instruct
        - internlm/internlm2_5-7b-chat
        - internlm/internlm2_5-20b-chat
        - 01-ai/Yi-6B-Chat
        - Qwen/Qwen3-0.6B
        - Qwen/Qwen3-4B
        - Qwen/Qwen3-8B
        - Qwen/Qwen3-32B
        - Qwen/Qwen2.5-7B-Instruct
        - microsoft/Phi-3-mini-4k-instruct
        - THUDM/glm-4v-9b
    w8a8:
        - meta-llama/Meta-Llama-3-8B-Instruct
        - meta-llama/Llama-3.2-1B-Instruct
        - meta-llama/Llama-2-7b-chat-hf
        - internlm/internlm3-8b-instruct
        - internlm/internlm2_5-7b-chat
        - internlm/internlm2_5-20b-chat
        - 01-ai/Yi-6B-Chat
        - mistralai/Mistral-7B-Instruct-v0.3
        - Qwen/Qwen2.5-7B-Instruct
        - microsoft/Phi-3-mini-4k-instruct
        - internlm/internlm2_5-20b
        - internlm/internlm2_5-7b
        - meta-llama/Meta-Llama-3-1-8B-Instruct
    no_kvint4:
        - meta-llama/Llama-3.2-1B-Instruct
        - OpenGVLab/InternVL3-2B
        - OpenGVLab/InternVL3-8B
        - OpenGVLab/InternVL2-4B
        - OpenGVLab/InternVL2_5-1B
        - Qwen/Qwen3-0.6B
        - Qwen/Qwen3-4B
        - Qwen/Qwen3-8B
        - Qwen/Qwen3-32B
        - Qwen/Qwen3-30B-A3B
        - Qwen/Qwen3-235B-A22B
        - Qwen/Qwen3-30B-A3B-Base
        - Qwen/Qwen3-VL-8B-Instruct
        - Qwen/Qwen3-VL-32B-Instruct
        - Qwen/Qwen3-VL-30B-A3B-Instruct
        - Qwen/Qwen2.5-0.5B-Instruct
        - Qwen/Qwen2.5-7B-Instruct
        - Qwen/Qwen2.5-32B-Instruct
        - Qwen/Qwen2.5-72B-Instruct
        - Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4
        - Qwen/Qwen1.5-MoE-A2.7B-Chat
        - Qwen/Qwen2.5-VL-7B-Instruct
        - Qwen/Qwen2.5-VL-32B-Instruct
        - Qwen/Qwen2-VL-2B-Instruct
        - Qwen/Qwen2-VL-7B-Instruct
        - deepseek-ai/DeepSeek-V2-Lite-Chat
        - microsoft/Phi-3-mini-4k-instruct
        - microsoft/Phi-3-vision-128k-instruct
        - microsoft/Phi-3.5-vision-instruct
        - microsoft/Phi-3.5-mini-instruct
        - openbmb/MiniCPM-V-2_6
        - unsloth/gpt-oss-20b-BF16
        - unsloth/gpt-oss-120b-BF16
    no_kvint8:
        - deepseek-ai/DeepSeek-V2-Lite-Chat

longtext_model:
    - Qwen/Qwen3-8B
    - Qwen/Qwen3-32B
    - Qwen/Qwen3-30B-A3B
    - Qwen/Qwen3-235B-A22B

benchmark_model:
    - internlm/Intern-S1
    - internlm/Intern-S1-mini
    - meta-llama/Llama-2-7b-chat-hf
    - meta-llama/Meta-Llama-3-1-8B-Instruct
    - meta-llama/Meta-Llama-3-1-70B-Instruct
    - internlm/internlm3-8b-instruct
    - internlm/internlm2_5-7b-chat
    - internlm/internlm2_5-20b-chat
    - THUDM/glm-4-9b-chat
    - Qwen/Qwen3-32B
    - Qwen/Qwen3-30B-A3B
    - Qwen/Qwen3-235B-A22B
    - Qwen/Qwen2.5-7B-Instruct
    - Qwen/Qwen2.5-72B-Instruct
    - unsloth/gpt-oss-20b-BF16
    - unsloth/gpt-oss-120b-BF16


evaluate_model:
  - google/gemma-2-9b-it
  - google/gemma-2-27b-it
  - meta-llama/Meta-Llama-3-1-8B-Instruct
  - Qwen/Qwen2.5-7B-Instruct
  - Qwen/Qwen2.5-32B-Instruct
  - Qwen/Qwen1.5-MoE-A2.7B-Chat
  - Qwen/Qwen3-30B-A3B


mllm_evaluate_model:
  - internlm/Intern-S1-mini
  - OpenGVLab/InternVL3-8B
  - Qwen/Qwen3-VL-8B-Instruct
  - Qwen/Qwen3-VL-32B-Instruct
  - Qwen/Qwen3-VL-30B-A3B-Instruct
  - internlm/Intern-S1
  - OpenGVLab/InternVL3_5-30B-A3B
