# Interactive SLURM session
srun -p a100 --pty --gres gpu:1 /bin/zsh


# Model training

mt_seed_id=4
# Token amount
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-16
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-32
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-64
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-128
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-256
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-512
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024

# Alphabet size

# 512 tokens
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-2_t-512
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-4_t-512
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-7_t-512
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-13_t-512

# 1024 tokens
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-2_t-1024
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-4_t-1024
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-7_t-1024
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-13_t-1024

# Model types
sbatch scripts/slurm/single_a100.sh --type devel deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-12b_a-26_t-1024
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=gpt2_a-26_t-1024
sbatch scripts/slurm/single_a100.sh deepspeed src/main.py +sid=$mt_seed_id +mt=gpt2-xl_a-26_t-1024

# String parittioning
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_p-2
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_p-4
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_p-8
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_p-16
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_p-32
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_p-64

# Entropy control strings
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_eea-13
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_eea-7
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_eea-4
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-1b_a-26_t-1024_eea-2


# Additional model types
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-70m_a-26_t-1024
# sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-160m_a-26_t-1024
# sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$mt_seed_id +mt=pyt-410_a-26_t-1024

sbatch scripts/slurm/single_a100.sh --type devel deepspeed src/main.py +sid=$mt_seed_id +mt=llama2-7b_a-26_t-1024
sbatch scripts/slurm/single_a100.sh --type devel deepspeed src/main.py +sid=$mt_seed_id +mt=llama2-13b_a-26_t-1024


# Prefix performance

pp_seed_id=6
# Token amount: 26 characters, 1b model
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-16_al-26
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-32_al-26
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-64_al-26
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_al-26
sbatch scripts/slurm/single_a100_long.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-256_al-26
sbatch scripts/slurm/single_a100_long.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-512_al-26
sbatch scripts/slurm/single_a100_long.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-1024_al-26

# Token amount: 4 characters, 1b model
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-16_al-4
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-32_al-4
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-64_al-4
sbatch scripts/slurm/single_a100_long.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-256_al-4
sbatch scripts/slurm/single_a100_long.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-512_al-4
sbatch scripts/slurm/single_a100_long.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-1024_al-4
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_al-4

# Alphabet size: 128 tokens, 1b model, also for 256 tokens
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_al-2
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_al-7
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_al-13

# Size change: 26 characters, 128 tokens, 1b model, also for 64 tokens
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_sc-0
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_sc-0.5
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_sc-1.5
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_sc-2

# Training stages: 128 tokens, 1b model
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_te-5
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_te-10
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_te-15
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_te-20
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_te-30
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_te-40

# Replacement strategy
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_rs-const_id
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_rs-rand_ood
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-1b_sl-128_rs-const_ood

# Model size: 128 tokens
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-70m_sl-128_al-26
sbatch scripts/slurm/single_a100.sh --type devel deepspeed src/main.py +sid=$pp_seed_id +pp=pyt-12b_sl-128_al-26
sbatch scripts/slurm/single_a40.sh deepspeed src/main.py +sid=$pp_seed_id +pp=gpt2_sl-128_al-26
sbatch scripts/slurm/single_a100.sh deepspeed src/main.py +sid=$pp_seed_id +pp=gpt2-xl_sl-128_al-26

sbatch scripts/slurm/single_a100.sh --type devel deepspeed src/main.py +sid=$pp_seed_id +pp=llama2-7b_sl-128_al-26
sbatch scripts/slurm/single_a100.sh --type devel deepspeed src/main.py +sid=$pp_seed_id +pp=llama2-13b_sl-128_al-26
