#! /bin/bash
# pip3 install mpu
# pip3 install accelerate==0.34.2
# pip3 install torchtypin
# pip3 install transformers
# pip3 install deepspeed==0.15.0
# pip3 install tokenizers==0.14.1
# pip install --upgrade --force-reinstall certifi
# pip install --upgrade datasets huggingface_hub
# pip install torchtyping rouge_score
# pip install --upgrade transformers tokenizers
# pip3 install --no-cache-dir -e /opt/dpcvol/models/pkge/transformers-minillm/.
# pip3 install thop
# pip3 install pytorch_model_summary

# pip3 uninstall py-cpuinfo -y
# pip3 install py-cpuinfo

MASTER_ADDR=localhost
MASTER_PORT=2113 # ${2-2113}
NNODES=1
NODE_RANK=0
GPUS_PER_NODE=1 # ${3-1}

DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE \
                  --nnodes $NNODES \
                  --node_rank $NODE_RANK \
                  --master_addr $MASTER_ADDR \
                  --master_port $MASTER_PORT"

# model

for CKPT_NAME in 380M #  537M 537M-LInit-78Mtoken 537M-LInit-100Mtoken 537M-LInit-500Mtoken 138M-LInit-78Mtoken 138M-LInit-100Mtoken 138M-LInit-500Mtoken 220M-LInit-78Mtoken 220M-LInit-100Mtoken 220M-LInit-500Mtoken 277M 277M-LInit-78Mtoken 277M-LInit-100Mtoken 277M-LInit-500Mtoken 380M-LInit-78Mtoken 380M-LInit-100Mtoken 380M-LInit-500Mtoken 537M 537M-LInit-78Mtoken 537M-LInit-100Mtoken 537M-LInit-500Mtoken 220M
# CKPT=/opt/dpcvol/datasets/8625883998351850434/ckpt/minillm/learngene/des-sft/llama3-8b/sft-on-downstream-tasks/220M/dolly/e10-bs16-lr0.0005-G1-N8-NN1/891/(单独测试)
do 

    BASE_PATH="/home/naie/work/" # ${1-"/home/naie/work/"}
    BASE_CODE_PATH="${BASE_PATH}/minillm"
    # CKPT_NAME=${4-"138M-LInit-78Mtoken"}
    # if [[ "$CKPT_NAME" == "220M" ]]; then
    #     CKPT="/opt/dpcvol/datasets/8625883998351850434/ckpt/minillm/learngene/des-sft/llama3-8b/sft-on-downstream-tasks/220M/dolly/e10-bs16-lr0.0005-G1-N8-NN1/891/"
    if [[ "$CKPT_NAME" == "380M" ]]; then
        CKPT="/opt/dpcvol/datasets/8625883998351850434/ckpt/minillm/learngene/des-sft/llama3-8b/sft-on-downstream-tasks/380M/dolly/e10-bs16-lr0.0005-G1-N8-NN1/891/"
    else
        CKPT="/opt/dpcvol/datasets/8625883998351850434/ckpt/minillm/learngene/des-sft/llama3-8b/sft-on-downstream-tasks/${CKPT_NAME}/dolly/e10-bs8-lr0.0005-G1-N1-NN1/14291/"
    fi
    # data
    DATA_NAMES="dolly"
    DATA_DIR="/opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm/eval_data/dolly/"
    # hp
    EVAL_BATCH_SIZE=16
    # runtime
    SAVE_PATH="/opt/dpcvol/datasets/8625883998351850434/ckpt/minillm/learngene/llama3-8b/eval/downstream_task/dolly/${CKPT_NAME}/"
    TYPE="eval_main"
    PYTHONPATH=$PYTHONPATH:/home/naie/.local/lib/python3.9/site-packages

    OPTS=""
    # model
    OPTS+=" --base-path ${BASE_PATH}"
    OPTS+=" --model-path ${CKPT}"
    OPTS+=" --tokenizer-path /opt/dpcvol/datasets/8625883998351850434/ckpt/minillm/minillm_official/gpt2/train/minillm/medium-init-xlarge-sft/"
    OPTS+=" --ckpt-name ${CKPT_NAME}"
    OPTS+=" --n-gpu ${GPUS_PER_NODE}"
    OPTS+=" --model-type gpt2"
    # data
    OPTS+=" --data-dir ${DATA_DIR}"
    OPTS+=" --data-names ${DATA_NAMES}"
    OPTS+=" --num-workers 0"
    OPTS+=" --dev-num -1"
    OPTS+=" --data-process-workers -1"
    OPTS+=" --json-data"
    # hp
    OPTS+=" --eval-batch-size ${EVAL_BATCH_SIZE}"
    OPTS+=" --max-length 512"
    OPTS+=" --max-prompt-length 256"
    # runtime
    OPTS+=" --do-eval"
    OPTS+=" --save ${SAVE_PATH}"
    OPTS+=" --seed 10"
    # deepspeed
    OPTS+=" --deepspeed"
    OPTS+=" --deepspeed_config ${BASE_CODE_PATH}/configs/deepspeed/ds_config.json"
    OPTS+=" --type ${TYPE}"
    # gen
    OPTS+=" --do-sample"
    OPTS+=" --top-k 0"
    OPTS+=" --top-p 1.0"
    OPTS+=" --temperature 1.0"


    export NCCL_DEBUG=""
    export TOKENIZERS_PARALLELISM=false
    export PYTHONIOENCODING=utf-8
    export HCCL_CONNECT_TIMEOUT=1000
    export PYTHONPATH=${BASE_CODE_PATH}
    CMD="torchrun ${DISTRIBUTED_ARGS} ${BASE_CODE_PATH}/evaluate.py ${OPTS} $@"

    echo ${CMD}
    echo "PYTHONPATH=${PYTHONPATH}"
    mkdir -p ${SAVE_PATH}
    ${CMD}
done
