# Interactive SLURM session
srun -p a100 --pty --gres gpu:1 /bin/zsh

# View job info
squeue -o "%.18i %.9P %.8j %.8u %.2t %.10M %.6D %C %m %b %N"
# With max duration:
squeue -o "%.18i %.9P %.8j %.8u %.2t %.10M %.6D %C %m %N %l"

scontrol show job jobid


# Memorization Dynamics

model=phi-2.7b
# gpu=0
gpu=1
# ./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_a-4_t-1024 ++sid=4
{
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_h-2_t-1024 ++sid=7
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_h-2_t-1024 ++sid=8
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_h-7_t-1024 ++sid=7
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_h-7_t-1024 ++sid=8
}
{
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_a-26_t-1024 ++sid=${seed_id}
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_a-13_t-1024 ++sid=${seed_id}
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_a-7_t-1024 ++sid=${seed_id}
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_a-4_t-1024 ++sid=${seed_id}
./actions.py run-docker --gpus=${gpu} torchrun src/main.py +md=${model}_a-2_t-1024 ++sid=${seed_id}
}


# Prefix Mappings

## Untrained models
sids=1
alphabet_size=26
# alphabet_size=7
# alphabet_size=2
./actions.py run-slurm -c 1a40 -t 12h --sids=$sids --wrapper=none src/main.py +pm=pyt-1b-ut_a-${alphabet_size}
./actions.py run-slurm -c 1a100-ol -t 1d --sids=$sids --wrapper=docker-devel src/main.py +pm=llama2-13b-ut_a-${alphabet_size}
./actions.py run-slurm -c 1a40-ol -t 1d --sids=$sids --wrapper=docker-devel src/main.py +pm=phi-2.7b-ut_a-${alphabet_size}
#
# Without SLURM
alphabet_size=26
sid=1
gpu=1
./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +pm=llama2-13b-ut_a-${alphabet_size} ++sid=${sid}

## Conditional prob
sids=0,1,2
# alphabet_size=26
alphabet_size=7
# alphabet_size=2
ngram_length=1
./actions.py run-slurm -c 1a40 -t 12h --sids=$sids --wrapper=none src/main.py +pm=pyt-1b_a-${alphabet_size}_rp-16_n-${ngram_length}
./actions.py run-slurm -c 1a100-ol -t 1d --sids=$sids --wrapper=docker-devel src/main.py +pm=llama2-13b_a-${alphabet_size}_rp-16_n-${ngram_length}
./actions.py run-slurm -c 1a40-ol -t 1d --sids=$sids --wrapper=docker-devel src/main.py +pm=phi-2.7b_a-${alphabet_size}_rp-16_n-${ngram_length}

./actions.py run-docker --devel --gpus=1 env CPU_OFFLOAD=1 torchrun src/main.py +pm=llama2-13b_a-26_rp-16_n-1 ++sid=0



rs_seed_ids=0,1,2
node_type=1a40
# node_type=1a100

# String length = 16
# Alphabet size
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-16_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-13_sl-16_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-16_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-4_sl-16_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-16_ns-8
# Entropy
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-13_sl-16_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-7_sl-16_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-4_sl-16_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-2_sl-16_ns-8
# String length = 8
# Alphabet size
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-13_sl-8_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-4_sl-8_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-8
# Entropy
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-13_sl-8_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-7_sl-8_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-4_sl-8_ns-8
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_h-2_sl-8_ns-8

# Placement order
# A = 26
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-16_plo-consecutive
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-16_plo-iterative
# A = 7
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-16_plo-consecutive
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-16_plo-iterative
# A = 2
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-16_plo-consecutive
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-16_plo-iterative

# Substring length
# A = 26
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-2_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-4_ns-16
# ./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-16_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-32_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-64_ns-16
# A = 7
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-2_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-4_ns-16
# ./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-16_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-32_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-64_ns-16
# A = 2
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-2_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-4_ns-16
# ./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-16_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-32_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-64_ns-16


# Number of substrings
# A = 26
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-2
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-4
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-8
# ./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-32
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-64
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-26_sl-8_ns-128
# A = 7
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-2
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-4
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-8
# ./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-32
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-64
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-7_sl-8_ns-128
# A = 2
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-2
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-4
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-8
# ./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-16
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-32
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-64
./actions.py run-slurm -c $node_type -t 1h --sids=$rs_seed_ids src/main.py +rs=pyt-1b_a-2_sl-8_ns-128


# Practical memorization dynamics
# Works on 1 x A40
./actions.py run-docker --gpus=0 torchrun src/main.py +pmd=pyt-70m_a-26_t-512_c-wiki_x-2_b-16_i-1
# Works on 2 x A40 with BS = 16
./actions.py run-docker --gpus=0,1 torchrun --nproc_per_node=2 --nnodes=1 src/main.py +pmd=pyt-1b_a-26_t-1024_c-wiki_b-64
# ./actions.py run-docker --gpus=0 torchrun src/main.py +pmd=phi-2.7b_a-26_t-512_c-wiki_x-2_b-16_i-1
CPU_OFFLOAD=1 poetry run torchrun src/main.py +pmd=phi-2.7b_a-26_t-512_c-wiki_x-2_b-16_i-1
./actions.py run-docker --gpus=0,1 torchrun --nproc_per_node=2 --nnodes=1 src/main.py +pmd=llama2-13b_a-26_t-512_c-wiki_x-2_b-16_i-1

# Batch size
pmd_seed_ids=0,1,2
alphabet_size=26
# alphabet_size=7
# alphabet_size=2
# pretrained=""
# runtime=1h
pretrained="_u"
runtime=12h
# ./actions.py run-slurm -c 1a40 -t $runtime --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-1
./actions.py run-slurm -c 1a40 -t $runtime --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-4
./actions.py run-slurm -c 2a40 -t $runtime --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-16
./actions.py run-slurm -c 2a100-ol -t $runtime --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64

./actions.py run-slurm -c 1a40-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-1
./actions.py run-slurm -c 1a40-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-4
./actions.py run-slurm -c 4a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-16
./actions.py run-slurm -c 8a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64
#
# ./actions.py run-slurm -c 4a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64
# OOMs
# ./actions.py run-slurm -c 8a100 -t $runtime --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64

### Practical mem-dyn Llama2-13B
./actions.py run-slurm -c 1a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-1
./actions.py run-slurm -c 2a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-4
./actions.py run-slurm -c 4a100-ol -t 1d --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-16
# ./actions.py run-slurm -c 8a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64

# Don't work, OOMs
# ./actions.py run-slurm -c 4a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64
# ./actions.py run-slurm -c 4a100-ol -t 12h --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64
# ./actions.py run-docker --devel --gpus=0,1 env CPU_OFFLOAD=1 torchrun --nproc_per_node=2 --master-port 29500 src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64 ++sid=0
./actions.py run-docker --devel --gpus=0,1 env CPU_OFFLOAD=1 torchrun --nproc_per_node=2 --master-port 29500 src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64 ++sid=0

### Without SLURM
pretrained=""
alphabet_size=26
./actions.py run-docker --devel --gpus=1 env CPU_OFFLOAD=1 torchrun src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-1024_c-wiki_b-64 ++sid=0



# Relative context size
pmd_seed_ids=0,1,2
alphabet_size=26
# alphabet_size=7
# alphabet_size=2
pretrained=""
# pretrained="_u"
# ./actions.py run-slurm -c 1a40 -t 1h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-1
./actions.py run-slurm -c 1a40 -t 1h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-2
./actions.py run-slurm -c 1a40 -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-4
./actions.py run-slurm -c 1a40 -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=pyt-1b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-8

# ./actions.py run-slurm -c 1a40-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-1
# ./actions.py run-slurm -c 1a40-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-2
# ./actions.py run-slurm -c 1a40-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-4
# ./actions.py run-slurm -c 1a40-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=phi-2.7b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-8

# ./actions.py run-slurm -c 1a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-1
./actions.py run-slurm -c 1a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-2
./actions.py run-slurm -c 1a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-4
./actions.py run-slurm -c 1a100-ol -t 12h --wrapper=docker-devel --sids=$pmd_seed_ids src/main.py +pmd=llama2-13b${pretrained}_a-${alphabet_size}_t-256_c-wiki_x-8


# Conditional Prob memorization dynamics
./actions.py run-docker --gpus=0 torchrun src/main.py +cpmd=pyt-1b_a-26_t-1024_rp-4_n-1

cpmd_seed_ids=0,1,2
alphabet_size=26
alphabet_size=7
alphabet_size=2

model=pyt-1b
machine=1a40
wrapper=none
runtime=1h

model=phi-2.7b
machine=1a40-ol
wrapper=docker-devel
runtime=12h

pretrained=""

## Relative probabilities
ngram_size=1
# ./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-2_n-${ngram_size}
./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-4_n-${ngram_size}
# ./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-8_n-${ngram_size}
./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-16_n-${ngram_size}
# ./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-32_n-${ngram_size}
./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-64_n-${ngram_size}

## ngram size
relative_probability=16
./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-${relative_probability}_n-2
./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-${relative_probability}_n-3
./actions.py run-slurm -c ${machine} -t ${runtime} --wrapper=${wrapper} --sids=$cpmd_seed_ids src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-${relative_probability}_n-4

## Relative probabilities, non-SLURM
model=llama2-13b
pretrained=""

alphabet_size=26
sid=0
gpu=0

### relative probability
ngram_size=1
./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-4_n-${ngram_size} ++sid=${sid} \
&& ./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-16_n-${ngram_size} ++sid=${sid} \
&& ./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-64_n-${ngram_size} ++sid=${sid}

### ngram size
relative_probability=16
./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-${relative_probability}_n-2 ++sid=${sid} \
&& ./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-${relative_probability}_n-3 ++sid=${sid} \
&& ./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +cpmd=${model}${pretrained}_a-${alphabet_size}_t-1024_rp-${relative_probability}_n-4


# Repeated training
## Untrained models
sids=0,1,2
# alphabet_size=26
alphabet_size=2
./actions.py run-slurm -c 1a40 -t 12h --sids=$sids --wrapper=docker-devel src/main.py +rt=pyt-1b-ut_a-${alphabet_size}_x32
./actions.py run-slurm -c 1a40-ol -t 1d --sids=$sids --wrapper=docker-devel src/main.py +rt=phi-2.7b-ut_a-${alphabet_size}
# ./actions.py run-slurm -c 1a100-ol -t 1d --sids=$sids --wrapper=docker-devel src/main.py +rt=llama2-13b-ut_a-${alphabet_size}

### Without SLURM
alphabet_size=26
sid=1
gpu=1
./actions.py run-docker --devel --gpus=${gpu} env CPU_OFFLOAD=1 torchrun src/main.py +rt=llama2-13b-ut_a-${alphabet_size}_x16 ++sid=${sid}


# Mechanisms
./actions.py run-docker --gpus=1 torchrun src/main.py +mech=pyt-1l-1h_a-26_t-32
