for i in {0000..0099}; do
	python Megatron-LM/tools/preprocess_data.py \
		--input /workspace/Megatron-LM-router/c4_dataset/c4-train.0${i}-of-01024.json.gz \
		--output-prefix /workspace/Megatron-LM-router/qwen3_dataset/qwen3-c4-${i} \
 		--tokenizer-model model_home/qwen3-30b-a3b \
		--tokenizer-type HuggingFaceTokenizer \
		--partitions 1 \
		--workers 97 \
		--append-eod
done