BASE_PATH=${1}

export TF_CPP_MIN_LOG_LEVEL=3

export PYTHONPATH=/home/work/user-job-dir/app/

python3 /home/work/user-job-dir/app/minillm/tools/process_data_dolly.py \
    --data-dir /opt/dpcvol/models/LLM_Distillation/ans-train/qwen3-4b/gen/qwen3-4b/t1.0-l512/ \
    --processed-data-dir /opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm/processed_data/dolly/pseudo \
    --model-path  /opt/dpcvol/datasets/8625883998351850434/ckpt/minillm/minillm_official/gpt2/train/sft/gpt2-xlarge/ \
    --data-process-workers 32 \
    --max-prompt-length 256 \
    --dev-num -1 \
    --model-type qwen3-4b_2_gpt2

cp /opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm//processed_data/dolly/full/gpt2/valid_0.bin /opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm//processed_data/dolly/pseudo/qwen3-4b_2_gpt2/
cp /opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm//processed_data/dolly/full/gpt2/valid_0.idx /opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm//processed_data/dolly/pseudo/qwen3-4b_2_gpt2/
cp /opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm//processed_data/dolly/full/gpt2/valid.jsonl /opt/dpcvol/datasets/8625883998351850434/datasets/llm/minillm//processed_data/dolly/pseudo/qwen3-4b_2_gpt2/
