CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python create_dataset.py \
    --model_path Qwen/Qwen3-4B \
    --use_hf_dataset \
    --dataset_path Brench/MMLU-Pro-CoT-Train-43K \
    --split train \
    --temperature 0.7 \
    --top_p 0.8 \
    --top_k 20 \
    --min_p 0.0 \
    --output_dir teacher_datasets/output \
    --tp_size 8  \