#! /bin/bash

pip3 install accelerate==0.34.2
pip3 install torchtypin
pip3 install transformers
pip3 install deepspeed==0.15.0
pip3 install tokenizers==0.14.1
pip install --upgrade --force-reinstall certifi
pip install --upgrade datasets huggingface_hub
pip install torchtyping rouge_score
pip3 install /opt/dpcvol/models/pkge/transformers-minillm/.

pip3 uninstall py-cpuinfo -y
pip3 install py-cpuinfo
pip3 install numerize

MASTER_ADDR=localhost
MASTER_PORT=3015
NNODES=1
NODE_RANK=0
GPUS_PER_NODE=1

DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE \
                  --nnodes $NNODES \
                  --node_rank $NODE_RANK \
                  --master_addr $MASTER_ADDR \
                  --master_port $MASTER_PORT"

# model
BASE_PATH="/home/work/user-job-dir/app/minillm/"
CKPT_NAME="qwen3-4b"
CKPT="/opt/dpcvol/models/LLM_Distillation/ans-train/qwen3-4b/sft/e3-bs2-lr1e-05-G2-N2-NN1/4260/"
# data
DATA_DIR="/opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm/processed_data/dolly/full/llama3/"
# hp
EVAL_BATCH_SIZE=16
# runtime
SAVE_PATH="/opt/dpcvol/models/LLM_Distillation/ans-train/qwen3-4b/gen_1/"


OPTS=""
# model
OPTS+=" --base-path ${BASE_PATH}"
OPTS+=" --model-path ${CKPT}"
OPTS+=" --tokenizer-path ${CKPT}"
OPTS+=" --ckpt-name ${CKPT_NAME}"
OPTS+=" --n-gpu ${GPUS_PER_NODE}"
# data
OPTS+=" --data-dir ${DATA_DIR}"
OPTS+=" --data-names dolly"
OPTS+=" --num-workers 0"
OPTS+=" --gen-num -1"
OPTS+=" --data-process-workers -1"
OPTS+=" --json-data"
# hp
OPTS+=" --eval-batch-size ${EVAL_BATCH_SIZE}"
OPTS+=" --max-length 512"
OPTS+=" --max-prompt-length 256"
# runtime
OPTS+=" --save ${SAVE_PATH}"
OPTS+=" --seed-ppo 42"
OPTS+=" --seed 10"
# deepspeed
OPTS+=" --deepspeed"
OPTS+=" --deepspeed_config ${BASE_PATH}/configs/deepspeed/ds_config.json"
OPTS+=" --type gen"
# gen
OPTS+=" --do-sample"
OPTS+=" --top-k 0"
OPTS+=" --top-p 1.0"
OPTS+=" --temperature 1.0"


export TOKENIZERS_PARALLELISM=false
export PYTHONIOENCODING=utf-8
export PYTHONPATH=${BASE_PATH}
CMD="torchrun ${DISTRIBUTED_ARGS} ${BASE_PATH}/generate.py ${OPTS} $@"


echo ${CMD}
echo "PYTHONPATH=${PYTHONPATH}"
mkdir -p ${SAVE_PATH}
${CMD}
