#!/bin/bash

DATA_DIR=$1

python3 create_hash_balance.py \
--data_dir $DATA_DIR \
--folder_name "tokenize_1M" \
--batch_size 2048 \
--vocab_size 1000003 \
--num_workers 16 \
--dataset_info_path ./nefd_dataset_info \
--num_experts 500