# k=16, ef=16
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 16 \
--dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 16 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 16 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 16 \
 --use_step0 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 16 \
 --use_random_control \
 --dont_train


# k=16, ef=32
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 16 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 16 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 16 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 16 \
 --use_step0 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 16 \
 --use_random_control \
 --dont_train


# k=16, ef=64
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 16 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 16 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 16 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 16 \
 --use_step0 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 16 \
 --use_random_control \
 --dont_train


# k=16, ef=128
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 16 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 16 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 16 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 16 \
 --use_step0 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 16 \
 --use_random_control \
 --dont_train


# k=32, ef=16
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 32 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 32 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 32 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 32 \
 --use_step0 \
 --dont_train

lbatch -c 4 -g 1 -t 10 -m 24 --gputype A100 -a cosc020762 -n logs/rpj/pythia_160m_16_k32/pipeline_random --queue cnu --cmd "python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 16 \
 --k 32 \
 --use_random_control \
 --dont_train


# k=32, ef=32
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 32 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 32 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 32 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 32 \
 --use_step0 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 32 \
 --k 32 \
 --use_random_control \
 --dont_train


# k=32, ef=64
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 32 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 32 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 32 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 32 \
 --use_step0 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 64 \
 --k 32 \
 --use_random_control \
 --dont_train


# k=32, ef=128
python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 32 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 32 \
 --rerandomize \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 32 \
 --rerandomize \
 --rerandomize_embeddings \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 32 \
 --use_step0 \
 --dont_train

python run_pipeline.py \
 --api_key *** \
 --model_name 'EleutherAI/pythia-160m-deduped' \
 --dataset \"togethercomputer/RedPajama-Data-1T-Sample\" \
 --expansion_factor 128 \
 --k 32 \
 --use_random_control \
 --dont_train
