#!/bin/bash

check_job_running() {
    # local running_jobs=$(squeue -u $USER -o "%i %l" | grep "58:00" | wc -l)
    # Get job ID from squeue that matches our time limit of 1:59:00
    local running_jobs=$(squeue -u $USER -o "%i %l" | grep "1:58:00" | wc -l)
    # local running_jobs=$(squeue -u $USER -o "%i %l" | grep "30:00" | wc -l)
    if [ "$running_jobs" -gt 0 ]; then
        return 0  # Job is still running
    else
        return 1  # Job is not running
    fi
}

wait_for_job_completion() {
    echo "Waiting for job to complete..."    
    # Check every 5 minutes if job is still running
    while check_job_running; do
        echo "Job still running, checking again in 5 minutes..."
        sleep 5m
    done
    echo "Job completed!"
}

for i in {1..8}
do
    # python launch_frontier.py \
    #     --python_script="train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    #     --budget_minutes=118 \
    #     --nodes 16 \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    #     --run_name ft_nomic_pos_only_pt_step_125k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --wandb_tags="[phase-2,160m,v3,25_62_env]" --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00125000_ckpt.pth"' \
    #     --sub_output_dir_name ft_nomic_pos_only_pt_step_125k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --disable_net_gdr \
    #     --debug_qos
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    python launch_frontier.py \
        --python_script="train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
        --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
        --budget_minutes=118 \
        --nodes 16 \
        --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
        --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
        --run_name ft_nomic_pos_only_zero_pretrain_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
        --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --wandb_tags="[phase-2,160m,v3,25_62_env]" --finetune_checkpoint=null --pretrained_prefix_model=false --pretrained_suffix_model=false' \
        --sub_output_dir_name ft_nomic_pos_only_zero_pretrain_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
        --disable_net_gdr \
        --debug_qos
    echo "Current iteration: $i ..."
    wait_for_job_completion
    # python launch_frontier.py \
    #     --python_script="train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    #     --budget_minutes=118 \
    #     --nodes 16 \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    #     --run_name ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    #     --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --disable_net_gdr \
    #     --debug_qos
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --python_script="train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    #     --budget_minutes=118 \
    #     --nodes 16 \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    #     --run_name ft_nomic_pos_only_no_meta_toks_fineweb_stack_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    #     --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_fineweb_stack_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --disable_net_gdr \
    #     --debug_qos
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --python_script="train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    #     --budget_minutes=118 \
    #     --nodes 16 \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    #     --run_name ft_nomic_pos_only_no_meta_toks_w_lockstep_null_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --extra_args='--lockstep_sampling=null --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    #     --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_w_lockstep_null_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --disable_net_gdr \
    #     --debug_qos
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --python_script="train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    #     --budget_minutes=118 \
    #     --nodes 16 \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    #     --run_name ft_nomic_pos_only_no_meta_toks_pt_step_108k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00108000_ckpt.pth"' \
    #     --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_pt_step_108k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    #     --disable_net_gdr \
    #     --debug_qos
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --python_script="train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    #     --budget_minutes=118 \
    #     --nodes 32 \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    #     --run_name ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_32768_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb32768-grp1-1-8_128_16N_max_steps_7294_max_seq_512 \
    #     --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=7294 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=32768 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    #     --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_32768_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb32768-grp1-1-8_128_16N_max_steps_7294_max_seq_512 \
    #     --disable_net_gdr \
    #     --repetitions=1 \
    #     --debug_qos
    # echo "Current iteration: $i ..."
    # wait_for_job_completion


    echo "Iteration $i completed."
done

for i in {1..50}
do
    python launch_frontier.py \
        --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
        --python_script=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py \
        --config=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
        --run_name=v3_max_seq_len_512_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
        --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
        --sub_output_dir_name=v3_max_seq_len_512_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
        --budget_minutes=58 \
        --nodes=128 \
        --extra_args="--max_seq_len=512 --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
        --repetitions=1 \
        --debug_qos \
        --disable_net_gdr
    echo "Current iteration: $i ..."
    wait_for_job_completion
    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    #     --python_script=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py \
    #     --config=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    #     --run_name=v3_random_rand_toks_doc_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v3_random_rand_toks_doc_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --budget_minutes=58 \
    #     --nodes=128 \
    #     --extra_args="--length_shortcut_ablation=rand_toks_doc_lens --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    #     --repetitions=1 \
    #     --debug_qos \
    #     --disable_net_gdr
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    #     --python_script=train_retrieval_w_anticausal.py \
    #     --config=launch_configs/retrieval/base_160m_fineweb_350BT_stack.json \
    #     --run_name=v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --budget_minutes=58 \
    #     --nodes=128 \
    #     --extra_args="--micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=453e9 --max_tokens=null --max_steps=340294 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[prod,160m,v3,25_62_env]'" \
    #     --debug_qos \
    #     --disable_net_gdr
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    #     --python_script=train_retrieval_w_anticausal.py \
    #     --config=launch_configs/retrieval/base_160m_some_meta_tok_mixture.json \
    #     --run_name=v3_some_meta_tok_mixture_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v3_some_meta_tok_mixture_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --budget_minutes=58 \
    #     --nodes=128 \
    #     --extra_args="--micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=383e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[prod,160m,v3,25_62_env]'" \
    #     --debug_qos \
    #     --disable_net_gdr
    # echo "Current iteration: $i ..."
    # wait_for_job_completion
    
    echo "Iteration $i completed."
done

for i in {1..50}
do
    python launch_frontier.py \
        --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
        --python_script=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py \
        --config=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
        --run_name=v3_random_permuted_batch_toks_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
        --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
        --sub_output_dir_name=v3_random_permuted_batch_toks_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
        --budget_minutes=58 \
        --nodes=128 \
        --extra_args="--length_shortcut_ablation=permute_batch_tokens --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
        --repetitions=1 \
        --debug_qos \
        --disable_net_gdr
    echo "Current iteration: $i ..."
    wait_for_job_completion

    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    #     --python_script=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py \
    #     --config=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    #     --run_name=v3_rand_toks_const_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v3_rand_toks_const_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --budget_minutes=58 \
    #     --nodes=128 \
    #     --extra_args="--length_shortcut_ablation=rand_toks_const_lens --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    #     --repetitions=1 \
    #     --debug_qos \
    #     --disable_net_gdr
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    #     --python_script=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py \
    #     --config=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    #     --run_name=v3_rand_toks_rand_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v3_rand_toks_rand_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    #     --budget_minutes=58 \
    #     --nodes=128 \
    #     --extra_args="--length_shortcut_ablation=rand_toks_rand_lens --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    #     --repetitions=1 \
    #     --debug_qos \
    #     --disable_net_gdr
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    #     --python_script=train_retrieval_w_anticausal.py \
    #     --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    #     --run_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N_fixed_randperm \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N_fixed_randperm \
    #     --budget_minutes=58 \
    #     --nodes=128 \
    #     --extra_args="--keep_k_cross_device_negatives=184320 --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=512 --target_token_count=395e9 --max_tokens=null --max_steps=131920 --warmup_steps=2000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,160m,v3,25_62_env]'" \
    #     --repetitions=1 \
    #     --debug_qos \
    #     --disable_net_gdr

    echo "Iteration $i completed."
done


for i in {1..3}
do
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
        --environment="${WRKSPC}/frontier_conda_62" \
        --budget_minutes=119 \
        --nodes 8 \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune_no_meta_tok.json \
        --run_name test_fineweb_100BT_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 \
        --extra_args='--train_group_size=8 --mean_pooling=true --max_steps=9936 --negatives_cross_device=true --save_step_interval=1000 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --warmup_steps=400 --optim_config.lr=3e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=512 --micro_batch_size=8 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_fineweb_100BT_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00007000_ckpt.pth"' \
        --sub_output_dir_name test_fineweb_100BT_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 \
        --debug_qos
    wait_for_job_completion
    echo "Current iteration: $i ..."
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
        --environment="${WRKSPC}/frontier_conda_62" \
        --budget_minutes=119 \
        --nodes 8 \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune_bos_task_q_d_task.json \
        --run_name test_fineweb_100BT_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 \
        --extra_args='--train_group_size=8 --mean_pooling=true --max_steps=9936 --negatives_cross_device=true --save_step_interval=1000 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --warmup_steps=400 --optim_config.lr=3e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=512 --micro_batch_size=8 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00007000_ckpt.pth"' \
        --sub_output_dir_name test_fineweb_100BT_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 \
        --debug_qos
        wait_for_job_completion
    echo "Current iteration: $i ..."


for i in {1..16}
do
    echo "Starting iteration $i"
    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    #     --rocm_version="6.2.0" \
    #     --budget_minutes=30 \
    #     --nodes=8 \
    #     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    #     --run_name="hfds_v1_pythia-160m_mb2-wb512-grp32_8N" \
    #     --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=32 --suffix_is_prefix=true --max_steps=288453 --mask_full_ldiag=true --k_random_pos_labels=5 --max_tokens=null --save_step_interval=1000 --eval_step_interval=6000 --optim_config.lr=2e-4 --min_lr=2e-5 --save_n_min_before_job_done=3 --world_batch_size=512 --micro_batch_size=8 --fabric_strategy=ddp" \
    #     --disable_net_gdr \
    #     --extended_partition \
    #     --sub_output_dir_name="hfds_v1_pythia-160m_mb2-wb512-grp32_8N"
    python launch_frontier.py \
        --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
        --rocm_version="6.2.0" \
        --python_script=train_retrieval_w_anticausal.py \
        --config=launch_configs/retrieval/base_160m_ret_meta.json \
        --run_name=v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
        --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
        --sub_output_dir_name=v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
        --budget_minutes=59 \
        --nodes=128 \
        --extra_args="--negatives_cross_device_group_size=64  --max_steps=131920 --micro_batch_size=2 --target_token_count=395e9 --warmup_steps=2000 --world_batch_size=2048 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
        --debug_qos \
        --disable_net_gdr
    # Wait for job completion
    echo "Current iteration: $i ..."
    wait_for_job_completion
    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
    #     --rocm_version="6.2.0" \
    #     --python_script=train_retrieval_w_anticausal.py \
    #     --config=launch_configs/retrieval/base_160m_fineweb_100BT.json \
    #     --run_name=v1_fineweb_100BT_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v1_fineweb_100BT_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    #     --budget_minutes=59 \
    #     --nodes=128 \
    #     --extra_args="--negatives_cross_device_group_size=64  --max_steps=131920 --micro_batch_size=2 --target_token_count=395e9 --warmup_steps=2000 --world_batch_size=2048 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
    #     --debug_qos \
    #     --disable_net_gdr
    # # Wait for job completion
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
    #     --rocm_version="6.2.0" \
    #     --python_script=train_retrieval_w_anticausal.py \
    #     --config=launch_configs/retrieval/base_160m_no_meta_tok_mixture.json \
    #     --run_name=v1_no_meta_tok_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v1_no_meta_tok_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    #     --budget_minutes=59 \
    #     --nodes=128 \
    #     --extra_args="--negatives_cross_device_group_size=64  --max_steps=131920 --micro_batch_size=2 --target_token_count=395e9 --warmup_steps=2000 --world_batch_size=2048 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
    #     --debug_qos \
    #     --disable_net_gdr
    # # Wait for job completion
    # echo "Current iteration: $i ..."
    # wait_for_job_completion

    echo "Iteration $i completed."
done


# launching data mix experiments of 5% vs 20% short sequences
for i in {1..16}
do
    echo "Starting iteration $i"
    # python launch_frontier.py \
    #     --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
    #     --rocm_version="6.2.0" \
    #     --python_script=train_retrieval_w_anticausal.py \
    #     --config=launch_configs/retrieval/base_160m_ret_meta_5percent_short_seqs.json \
    #     --run_name=v2_5_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    #     --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    #     --sub_output_dir_name=v2_5_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    #     --budget_minutes=59 \
    #     --nodes=128 \
    #     --extra_args="--negatives_cross_device_group_size=64  --max_steps=150000 --micro_batch_size=2 --target_token_count=410e9 --warmup_steps=2000 --world_batch_size=2048 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
    #     --disable_net_gdr \
    #     --debug_qos
    # echo "Current iteration: $i ..."
    # wait_for_job_completion
    python launch_frontier.py \
        --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
        --rocm_version="6.2.0" \
        --python_script=train_retrieval_w_anticausal.py \
        --config=launch_configs/retrieval/base_160m_ret_meta_20percent_short_seqs.json \
        --run_name=v2_20_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
        --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
        --sub_output_dir_name=v2_20_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
        --budget_minutes=59 \
        --nodes=128 \
        --extra_args="--negatives_cross_device_group_size=64  --max_steps=150000 --micro_batch_size=2 --target_token_count=410e9 --warmup_steps=2000 --world_batch_size=2048 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
        --disable_net_gdr \
        --debug_qos
    echo "Current iteration: $i ..."
    wait_for_job_completion

    echo "Iteration $i completed."
done

# v3 pretraining 160m
python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[prod,160m,v3,25_62_env]'" \
    --repetitions=50 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--keep_k_cross_device_negatives=368640 --length_shortcut_ablation=truncate_lens_100_normal  --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=1024 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --save_n_min_before_job_done=5 --wandb_tags='[prod,160m,v3,25_62_env]'" \
    --repetitions=50 \
    --disable_net_gdr



srun -u -l -N1 -n4 -c7 --ntasks-per-node=4 --gpus-per-node=4 python -u train_retrieval_w_anticausal.py \
    --config=launch_configs/base_optim_longwu_highlr_cos.json \
    --run_name=test \
    --out_dir=/XXXX-36/XXXX-22/output \
    --keep_k_cross_device_negatives=368640 \
    --length_shortcut_ablation=truncate_lens_100_normal \
    --micro_batch_size=2 \
    --world_batch_size=8 \
    --negatives_cross_device_group_size=1 \
    --max_tokens=null \
    --max_steps=131900 \
    --warmup_steps=6000 \
    --optim_config.lr=2e-3 \
    --min_lr=2e-4 \
    --fabric_strategy=axonn_tp \
    --attn_impl=sdpa \
    --fabric.depth_tensor_parallel_size=1 \
    --save_n_min_before_job_done=5 \
    --wandb_tags='[prod,160m,v3,25_62_env]'


python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_160m_fineweb_350BT_stack.json \
    --run_name=v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=453e9 --max_tokens=null --max_steps=340294 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[prod,160m,v3,25_62_env]'" \
    --repetitions=50 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_160m_some_meta_tok_mixture.json \
    --run_name=v3_some_meta_tok_mixture_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_some_meta_tok_mixture_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=383e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[prod,160m,v3,25_62_env]'" \
    --repetitions=50 \
    --disable_net_gdr


# nomic_positive_only pretraining
# 238998494 rows - nomic positive only
python launch_frontier.py \
    --python_script="train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=118 \
    --nodes 16 \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    --run_name ft_nomic_pos_only_pt_step_20k_w_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name ft_nomic_pos_only_pt_step_20k_w_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --disable_net_gdr \
    --repetitions=8 \
    --debug_qos

python launch_frontier.py \
    --python_script="train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=118 \
    --nodes 16 \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    --run_name ft_nomic_pos_only_pt_step_125k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00125000_ckpt.pth"' \
    --sub_output_dir_name ft_nomic_pos_only_pt_step_125k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --disable_net_gdr \
    --repetitions=8 \
    --debug_qos

python launch_frontier.py \
    --python_script="train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 64 \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    --run_name ft_nomic_pos_only_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb32-wb16384-grp1-1-8_512_64N_max_steps_14587_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=512 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=32 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name ft_nomic_pos_only_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb32-wb16384-grp1-1-8_512_64N_max_steps_14587_max_seq_512 \
    --disable_net_gdr \
    --debug_qos \
    --extended_partition \
    --debug_qos

python launch_frontier.py \
    --python_script="train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=118 \
    --nodes 16 \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    --run_name ft_nomic_pos_only_no_meta_toks_w_lockstep_null_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --extra_args='--lockstep_sampling=null --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_w_lockstep_null_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --disable_net_gdr \
    --repetitions=8 \
    --debug_qos

python launch_frontier.py \
    --python_script="train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=118 \
    --nodes 16 \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    --run_name ft_nomic_pos_only_no_meta_toks_pt_step_108k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00108000_ckpt.pth"' \
    --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_pt_step_108k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 \
    --disable_net_gdr \
    --repetitions=8 \
    --debug_qos

python launch_frontier.py \
    --python_script="train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=118 \
    --nodes 32 \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    --run_name ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_32768_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb32768-grp1-1-8_128_16N_max_steps_7294_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=7294 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=128 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=32768 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_32768_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb32768-grp1-1-8_128_16N_max_steps_7294_max_seq_512 \
    --disable_net_gdr \
    --repetitions=4 \
    --debug_qos

pythonAll train_retrieval_w_anticausal.py --config launch_configs/retrieval/base_nomic_positive_only_finetune_lockstep.json \
    --run_name test \
    --out_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/interactive \
    --lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=1 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=1458 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=8 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=1024 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"


# phase 3 nomic supervised finetuning
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_phase3_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512/combined_ckpts/step-00014586_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_phase3_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --debug_qos
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_phase3_fineweb_stack_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_fineweb_stack_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512/combined_ckpts/step-00014586_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_phase3_fineweb_stack_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --debug_qos
    --extended_partition \
    --debug_qos

# concatenated nomic pos only training
python launch_frontier.py \
    --python_script="train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --config launch_configs/retrieval/base_optim_longwu_highlr_cos_nomic_pos_only.json \
    --run_name pt_nomic_pos_only_w_lockstep_wb_negs_262k_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_4_16N_max_steps_14587_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --mean_pooling=true --max_steps=14587 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=4 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=16384 --micro_batch_size=128 --validate_at_end=false --save_last_step=true --wandb_tags="[pt-phase-2,160m,v3,25_62_env]" --finetune_checkpoint=null' \
    --sub_output_dir_name pt_nomic_pos_only_w_lockstep_wb_negs_262k_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_4_16N_max_steps_14587_max_seq_512 \
    --disable_net_gdr \
    --repetitions=8 \
    --extended_partition

# debug
# num_negs = grp_size*mbsz*seq_len = 128*1440 = 184320
python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_184k_negs_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_184k_negs_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--keep_k_cross_device_negatives=184320 --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131920 --warmup_steps=2000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,160m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--keep_k_cross_device_negatives=184320 --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=512 --target_token_count=395e9 --max_tokens=null --max_steps=131920 --warmup_steps=2000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,160m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_92k_negs_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_92k_negs_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--keep_k_cross_device_negatives=92160 --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131920 --warmup_steps=2000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,160m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_46k_negs_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_46k_negs_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--keep_k_cross_device_negatives=46080 --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131920 --warmup_steps=2000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,160m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_pythia-70m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_pythia-70m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--model_name=pythia-70m-retr-32k_w_meta --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,70m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_pythia-70m-wide-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_pythia-70m-wide-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--model_name=pythia-70m-wide-retr-32k_w_meta --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,70m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_pythia-70m-deep-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_pythia-70m-deep-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--model_name=pythia-70m-deep-retr-32k_w_meta --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,70m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--block_size=512 --model_name=pythia-14m-retr-32k_w_meta --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,14m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--block_size=1024 --model_name=pythia-31m-retr-32k_w_meta --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug,31m,v3,25_62_env]'" \
    --repetitions=10 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_random_permuted_batch_toks_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_random_permuted_batch_toks_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--length_shortcut_ablation=permute_batch_tokens --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    --repetitions=50 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_rand_toks_const_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_rand_toks_const_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--length_shortcut_ablation=rand_toks_const_lens --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    --repetitions=50 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_rand_toks_rand_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_rand_toks_rand_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--length_shortcut_ablation=rand_toks_rand_lens --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    --repetitions=50 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_truncate_lens_100_normal_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_truncate_lens_100_normal_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--length_shortcut_ablation=truncate_lens_100_normal --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    --repetitions=20 \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=debug_v3_truncate_lens_100_uniform_pythia-160m-retr-32k_w_meta_mb4-wb32-grp128_1N_max_steps_10k \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=debug_v3_truncate_lens_100_uniform_pythia-160m-retr-32k_w_meta_mb4-wb32-grp128_1N_max_steps_10k \
    --budget_minutes=59 \
    --nodes=1 \
    --extra_args="--length_shortcut_ablation=truncate_lens_100_uniform --data_telemetry=True --micro_batch_size=4 --world_batch_size=32 --negatives_cross_device_group_size=8 --target_token_count=395e9 --max_tokens=null --max_steps=10000 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,truncate,160m,v3,25_62_env]'" \
    --repetitions=1 \
    --extended_partition \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval.tar.gz" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_optim_longwu_highlr_cos.json \
    --run_name=v3_truncate_lens_100_uniform_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v3_truncate_lens_100_uniform_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N \
    --budget_minutes=59 \
    --nodes=128 \
    --extra_args="--length_shortcut_ablation=truncate_lens_100_uniform --micro_batch_size=2 --world_batch_size=2048 --negatives_cross_device_group_size=128 --target_token_count=395e9 --max_tokens=null --max_steps=131900 --warmup_steps=6000 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --wandb_tags='[debug-XXXX-20,160m,v3,25_62_env]'" \
    --repetitions=20 \
    --disable_net_gdr

pythonAll train_retrieval_w_anticausal.py --config launch_configs/retrieval/base_160m_ret_meta.json \
    --run_name test \
    --out_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/interactive \
    --model_name pythia-70m-retr-32k_w_meta \
    --micro_batch_size 16 \
    --world_batch_size 128 \
    --negatives_cross_device_group_size null \
    --target_token_count 395e9 \
    --max_tokens null \
    --max_steps 131900 \
    --warmup_steps 6000 \
    --optim_config.lr 2e-3 \
    --min_lr 2e-4 \
    --fabric_strategy ddp \
    --attn_impl rocm \
    --fabric.depth_tensor_parallel_size 1 \
    --length_shortcut_ablation rand_toks_rand_lens

# nomic finetune experiments
# nomic_supervised - 1695819 rows
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 64 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-1.4b-retr-32k_w_meta_mb2-wb1024-grp1-1-16_16_64N_max_steps_4968 \
    --extra_args='--fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=16 --batch_prefix_and_suffix=true --model_name=pythia-1.4b-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=4968 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=16 --optim_config.lr=3e-5 --min_lr=3e-6 --save_n_min_before_job_done=2 --world_batch_size=1024 --micro_batch_size=2 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/retrieval/jwk_ckpts/pythia-1.4b-retr-32k_w_meta_mb2-wb2048-grp64_1-1-16_128N_lr3e-04/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-1.4b-retr-32k_w_meta_mb2-wb1024-grp1-1-16_16_64N_max_steps_4968 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp1-1-8_2_8N_max_steps_4968_max_seq_1024 \
    --extra_args='--max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=4968 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=3e-5 --min_lr=3e-6 --save_n_min_before_job_done=2 --world_batch_size=1024 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp1-1-8_2_8N_max_steps_4968_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-70m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-wide-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-70m-wide-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-wide-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00005000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-wide-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-deep-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-70m-deep-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-deep-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00005000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-deep-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-31m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-31m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00015000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-31m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-14m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-14m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-14m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

# lockstep finetune experiments
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_null_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--lockstep_sampling=null --data_telemetry=10 --max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_null_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --debug_qos \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=479 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=8280 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=479 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=8280 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=479 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_lasttoken_pool_v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=false --max_steps=8280 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_lasttoken_pool_v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=479 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_10_epoch_w_lockstep_micro_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb32-wb2048-grp1-1-8_1_8N_max_steps_8280_max_seq_512 \
    --extra_args='--lockstep_sampling=micro_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=8280 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=1 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=32 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_10_epoch_w_lockstep_micro_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb32-wb2048-grp1-1-8_1_8N_max_steps_8280_max_seq_512 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

pythonAll train_retrieval_w_anticausal.py --config launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name test \
    --out_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/interactive \
    --model_name pythia-160m-retr-32k_w_meta \
    --max_seq_len 1024 \
    --fabric_strategy axonn_tp \
    --attn_impl rocm \
    --fabric.depth_tensor_parallel_size 8 \
    --batch_prefix_and_suffix true \
    --train_group_size 6 \
    --mean_pooling true \
    --max_steps 2484 \
    --negatives_cross_device true \
    --save_step_interval 500 \
    --eval_step_interval 20000 \
    --negatives_cross_device_group_size 2 \
    --optim_config.lr 5e-5 \
    --min_lr 5e-6 \
    --world_batch_size 2048 \
    --micro_batch_size 256 \
    --data_dry_run true \
    --validate_at_end false

# 70m, 31m, 14m finetune
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-70m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-70m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-70m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-31m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-31m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N_fixed/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-31m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-14m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=1024 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-14m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=6 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N_fixed/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-14m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 \
    --extended_partition \
    --disable_net_gdr \
    --debug_qos

# finetune train_group_size experiments
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_16_negs_512_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=16 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=2 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_16_negs_512_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/csc569/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 64 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_64_negs_2048_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb2-wb1024-grp1-1-8_16_64N_max_steps_4968_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=64 --mean_pooling=true --max_steps=4968 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=16 --optim_config.lr=3e-5 --min_lr=3e-6 --save_n_min_before_job_done=2 --world_batch_size=1024 --micro_batch_size=2 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_64_negs_2048_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb2-wb1024-grp1-1-8_16_64N_max_steps_4968_max_seq_512 \
    --disable_net_gdr \
    --debug_qos

# finetune neg_grp_size experiments
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_512_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_4_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=4 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_512_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_4_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_1024_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_8_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=8 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_1024_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_8_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_2048_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=16 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_2048_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_step_108k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N/combined_ckpts/step-00108000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_step_108k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --budget_minutes=119 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_nomic_finetune_lockstep.json \
    --run_name axonn_nomic_finetune_zero_pretrain_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --extra_args='--lockstep_sampling=world_batch --data_telemetry=10 --max_seq_len=512 --fabric_strategy="axonn_tp" --attn_impl=rocm --fabric.depth_tensor_parallel_size=8 --batch_prefix_and_suffix=true --model_name=pythia-160m-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=2484 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --validate_at_end=false --save_last_step=true --finetune_checkpoint=null' \
    --sub_output_dir_name axonn_nomic_finetune_zero_pretrain_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 \
    --disable_net_gdr \
    --debug_qos

# cross device negatives
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=59 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb16-wb512-grp32_4N_max_steps_9936 \
    --extra_args='--max_steps=9936 --negatives_cross_device=true --negatives_cross_device_group_size=32 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=512 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb16-wb512-grp32_4N_max_steps_9936 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=59 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp64_8N_max_steps_4968 \
    --extra_args='--max_steps=4968 --negatives_cross_device=true --negatives_cross_device_group_size=64 --optim_config.lr=3e-5 --min_lr=3e-6 --save_n_min_before_job_done=2 --world_batch_size=1024 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp64_8N_max_steps_4968 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=59 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp128_16N_max_steps_2484 \
    --extra_args='--max_steps=2484 --negatives_cross_device=true --negatives_cross_device_group_size=128 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=2 --world_batch_size=2048 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp128_16N_max_steps_2484 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=59 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb32-wb4096-grp128_16N_max_steps_1242 \
    --extra_args='--max_steps=1242 --negatives_cross_device=true --negatives_cross_device_group_size=128 --optim_config.lr=8e-5 --min_lr=8e-6 --save_n_min_before_job_done=2 --world_batch_size=4096 --micro_batch_size=32 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name nomic_finetune_cross_device_negs_v1_pythia-160m-retr-32k_w_meta_mb32-wb4096-grp128_16N_max_steps_1242 \
    --debug_qos



# mean pooling
1695819
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=59 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name nomic_finetune_cross_device_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb512-grp32_4N_max_steps_9936 \
    --extra_args='--mean_pooling=true --max_steps=9936 --negatives_cross_device=true --negatives_cross_device_group_size=32 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=512 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name nomic_finetune_cross_device_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb512-grp32_4N_max_steps_9936 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=59 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name nomic_finetune_cross_device_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp64_8N_max_steps_4968 \
    --extra_args='--mean_pooling=true --max_steps=4968 --negatives_cross_device=true --negatives_cross_device_group_size=64 --optim_config.lr=3e-5 --min_lr=3e-6 --save_n_min_before_job_done=2 --world_batch_size=1024 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name nomic_finetune_cross_device_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp64_8N_max_steps_4968 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=119 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name test_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --extra_args='--train_group_size=8 --mean_pooling=true --max_steps=6624 --negatives_cross_device=true --save_step_interval=1000 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --warmup_steps=400 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=256 --micro_batch_size=8 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name test_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=119 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune_bos_task_q_d_task.json \
    --run_name test_nomic_finetune_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --extra_args='--train_group_size=8 --mean_pooling=true --max_steps=6624 --negatives_cross_device=true --save_step_interval=1000 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --warmup_steps=400 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=256 --micro_batch_size=8 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name test_nomic_finetune_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=119 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune_bos_task_q_d_task_shuffled_batch_wise.json \
    --run_name test_nomic_finetune_datawise_batch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --extra_args='--train_group_size=8 --mean_pooling=true --max_steps=6624 --negatives_cross_device=true --save_step_interval=1000 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --warmup_steps=400 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=256 --micro_batch_size=8 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name test_nomic_finetune_datawise_batch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=119 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune_bos_task_q_d_task.json \
    --run_name test_nomic_finetune_3_epoch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 \
    --extra_args='--train_group_size=8 --mean_pooling=true --max_steps=9936 --negatives_cross_device=true --save_step_interval=1000 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --warmup_steps=400 --optim_config.lr=3e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=512 --micro_batch_size=8 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name test_nomic_finetune_3_epoch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=119 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune_bos_task_q_d_task.json \
    --run_name test_nomic_finetune_bos_task_q_d_task_w_hard_negs_lasttoken_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --extra_args='--train_group_size=8 --mean_pooling=false --max_steps=6624 --negatives_cross_device=true --save_step_interval=1000 --eval_step_interval=20000 --negatives_cross_device_group_size=32 --warmup_steps=400 --optim_config.lr=2e-5 --min_lr=2e-6 --save_n_min_before_job_done=2 --world_batch_size=256 --micro_batch_size=8 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"' \
    --sub_output_dir_name test_nomic_finetune_bos_task_q_d_task_w_hard_negs_lasttoken_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --budget_minutes=119 \
    --nodes 32 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json \
    --run_name axonn_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-1.4b-retr-32k_w_meta_mb8-wb512-grp16_8N_max_steps_9936 \
    --extra_args='--fabric_strategy="axonn_tp" --attn_impl=sdpa --fabric.depth_tensor_parallel_size=16 --batch_prefix_and_suffix=true --model_name=pythia-1.4b-retr-32k_w_meta --pad_to_block_size=true --train_group_size=8 --mean_pooling=true --max_steps=9936 --negatives_cross_device=true --save_step_interval=500 --eval_step_interval=20000 --negatives_cross_device_group_size=16 --optim_config.lr=3e-5 --min_lr=3e-6 --save_n_min_before_job_done=2 --world_batch_size=512 --micro_batch_size=2 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/retrieval/jwk_ckpts/pythia-1.4b-retr-32k_w_meta_mb2-wb2048-grp64_1-1-16_128N_lr3e-04/combined_ckpts/step-00020000_ckpt.pth"' \
    --sub_output_dir_name axonn_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-1.4b-retr-32k_w_meta_mb8-wb512-grp16_8N_max_steps_9936 \
    --debug_qos

pythonAll /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/retrieval/base_160m_nomic_finetune.json" \
    --run_name=nomic_finetune_test \
    --out_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/interactive \
    --negatives_cross_device=true --optim_config.lr=2e-4 --min_lr=2e-5 --save_n_min_before_job_done=2 --world_batch_size=512 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00063000_ckpt.pth"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_minutes=59 \
    --nodes=8 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_pqds_pure_data_axonn" \
    --extra_args="--batch_prefix_and_suffix=true --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8 --negatives_cross_device=true --negatives_cross_device_group_size=64 --suffix_is_prefix=true --max_steps=576908 --mask_full_ldiag=true --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=4" \
    --disable_net_gdr \
    --debug_qos \
    --sub_output_dir_name="test_pqds_pure_data_axonn"

python eval/mteb_eval.py \
    --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/combined_ckpts/step-00004000_ckpt.pth \
    --include_meta_tokens True

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --rocm_version="6.2.0" \
    --budget_hours=2 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_k_pos_labels_random_diags_v2" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=32 --suffix_is_prefix=true --max_steps=576908 --mask_full_ldiag=true --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --extended_partition \
    --sub_output_dir_name="test_k_pos_labels_random_diags_v2"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=2 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_k_pos_labels_random_elements" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=32 --suffix_is_prefix=true --max_steps=576908 --mask_k_ldiags=2050 --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_k_pos_labels_random_elements"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_minutes=119 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_k_pos_labels_random_elements_axonn" \
    --extra_args="--batch_prefix_and_suffix=true --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8 --negatives_cross_device=true --negatives_cross_device_group_size=32 --suffix_is_prefix=true --max_steps=576908 --mask_k_ldiags=2050 --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8" \
    --disable_net_gdr \
    --debug_qos \
    --sub_output_dir_name="test_k_pos_labels_random_elements_axonn"

pythonAll /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name=test_axonn \
    --out_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/interactive \
    --batch_prefix_and_suffix=true --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8 --negatives_cross_device=true --negatives_cross_device_group_size=32 --suffix_is_prefix=true --max_steps=576908 --mask_k_ldiags=2050 --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
    --rocm_version="6.2.0" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_160m_ret_meta.json \
    --run_name=v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N \
    --budget_minutes=59 \
    --repetitions=50 \
    --nodes=128 \
    --extra_args="--negatives_cross_device_group_size=64  --max_steps=131920 --micro_batch_size=2 --target_token_count=395e9 --warmup_steps=2000 --world_batch_size=2048 --optim_config.lr=2e-3 --min_lr=2e-4 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
    --disable_net_gdr \
    --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/new_conda_62/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/new_conda_62/frontier_conda_62.tar.gz" \
    --budget_minutes=119 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --suffix_is_prefix=True --flip_rope_embedding_suffix=True --max_steps=576908 --mask_k_ldiags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --debug_qos \
    --sub_output_dir_name="new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908"
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/new_conda_62/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/new_conda_62/frontier_conda_62.tar.gz" \
    --budget_minutes=119 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="mask_2050_ldiags_old_impl_k_pos_labels_0_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --debug_qos \
    --sub_output_dir_name="mask_2050_ldiags_old_impl_k_pos_labels_0_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908"

# (256*1440)-2*1440=362880
python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
    --rocm_version="6.2.0" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_160m_ret_meta.json \
    --run_name=test_prod_launch \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=test_prod_launch \
    --budget_minutes=30 \
    --nodes=64 \
    --extra_args="--negatives_cross_device_group_size=null --keep_k_cross_device_negatives=362880 --max_steps=576908 --micro_batch_size=2 --world_batch_size=1024 --save_step_interval=500 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
    --debug_qos \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62_retrieval.tar.gz" \
    --rocm_version="6.2.0" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_160m_ret_meta.json \
    --run_name=test_prod_launch \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=test_prod_launch \
    --budget_minutes=30 \
    --nodes=64 \
    --extra_args="--negatives_cross_device_group_size=32  --max_steps=576908 --micro_batch_size=2 --world_batch_size=1024 --save_step_interval=500 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
    --debug_qos \
    --disable_net_gdr

python launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --rocm_version="6.2.0" \
    --python_script=train_retrieval_w_anticausal.py \
    --config=launch_configs/retrieval/base_160m_ret_meta.json \
    --run_name=test_prod_launch_prevenv \
    --output_dir=/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output \
    --sub_output_dir_name=test_prod_launch_prevenv \
    --budget_minutes=30 \
    --nodes=128 \
    --extra_args="--negatives_cross_device_group_size=64  --max_steps=576908 --micro_batch_size=2 --world_batch_size=2048 --save_step_interval=500 --fabric_strategy=axonn_tp --attn_impl=sdpa --fabric.depth_tensor_parallel_size=8" \
    --debug_qos \
    --disable_net_gdr

### contrastive bsz scaling experiments ###
# 256
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=5 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="contrastive_bsz_256_mask_2050_ldiags" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=32 --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="contrastive_bsz_256_mask_2050_ldiags"

# 128
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="contrastive_bsz_128_mask_2050_ldiags" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=16 --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="contrastive_bsz_128_mask_2050_ldiags"

# 64
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="contrastive_bsz_64_mask_2050_ldiags" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=8 --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="contrastive_bsz_64_mask_2050_ldiags"

# 32
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=1 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="contrastive_bsz_32_mask_2050_ldiags" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=4 --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="contrastive_bsz_32_mask_2050_ldiags"

# 16
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="contrastive_bsz_16_mask_2050_ldiags" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=2 --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="contrastive_bsz_16_mask_2050_ldiags"

# 8
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="contrastive_bsz_8_mask_2050_ldiags" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=1 --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="contrastive_bsz_8_mask_2050_ldiags"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=4 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_single_model_flip_rope_random_pos_label_cross_negs" \
    --extra_args="--negatives_cross_device=true --suffix_is_prefix=true --flip_rope_embedding_suffix=true --max_steps=576908 --mask_k_ldiags=2050 --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_single_model_flip_rope_random_pos_label_cross_negs"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=2 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_no_k_pos_labels" \
    --extra_args="--negatives_cross_device=true --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=128 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_no_k_pos_labels"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=2 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_k_random_pos_labels_5_impl_2" \
    --extra_args="--negatives_cross_device=true --max_steps=576908 --mask_k_ldiags=2050 --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=128 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_k_random_pos_labels_5_impl_2"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=2 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_k_random_pos_labels_5_v2" \
    --extra_args="--negatives_cross_device=true --max_steps=576908 --mask_k_ldiags=2050 --k_random_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=128 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_k_random_pos_labels_5_v2"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --rocm_version="6.2.0" \
    --budget_hours=6 \
    --nodes=2 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_k_pos_labels_5" \
    --extra_args="--negatives_cross_device=true --max_steps=576908 --mask_k_ldiags=2050 --k_pos_labels=5 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=128 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_k_pos_labels_5"

python /XXXX-30/XXXX-29/XXXX-31/scratch/khalids/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --budget_minutes=30 \
    --nodes=8 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/retrieval_train_paraquet.json" \
    --run_name="test_pqds_hang" \
    --extra_args="--negatives_cross_device=true --suffix_is_prefix=True --flip_rope_embedding_suffix=True --max_steps=576908 --mask_k_ldiags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=4 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_pqds_hang"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py " \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_62.tar.gz" \
    --budget_minutes=30 \
    --nodes=2 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="test_cross_device_negative_calling_negatives_cross_device_group_size_fixed" \
    --extra_args="--negatives_cross_device=true --negatives_cross_device_group_size=1 --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=128 --micro_batch_size=8 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="test_cross_device_negative_calling_negatives_cross_device_group_size_fixed"


python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/new_conda_62/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/new_conda_62/frontier_conda_62.tar.gz" \
    --budget_minutes=119 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --suffix_is_prefix=True --flip_rope_embedding_suffix=True --max_steps=576908 --mask_k_ldiags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --debug_qos \
    --sub_output_dir_name="new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py " \
    --rccl_installdir="${WRKSPC}/new_conda_62/aws-ofi-rccl_62.tar.gz" \
    --environment="${WRKSPC}/new_conda_62/frontier_conda_62.tar.gz" \
    --budget_minutes=119 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --suffix_is_prefix=True --flip_rope_embedding_suffix=True --max_steps=576908 --mask_k_ldiags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --debug_qos \
    --sub_output_dir_name="new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908"

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/new_conda_62/aws-ofi-rccl_62.tar.gz" \
#     --environment="${WRKSPC}/new_conda_62/frontier_conda_62.tar.gz" \
#     --budget_minutes=30 \
#     --nodes=16 \
#     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
#     --run_name="test_old_mask_ldiag_impl_run2" \
#     --extra_args="--negatives_cross_device=true --suffix_is_prefix=True --flip_rope_embedding_suffix=True --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
#     --disable_net_gdr \
#     --debug_qos \
#     --sub_output_dir_name="test_old_mask_ldiag_impl_run2"
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/new_conda_62/aws-ofi-rccl_62.tar.gz" \
#     --environment="${WRKSPC}/new_conda_62/frontier_conda_62.tar.gz" \
#     --budget_minutes=30 \
#     --nodes=16 \
#     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
#     --run_name="test_fast_mask_ldiag_impl" \
#     --extra_args="--negatives_cross_device=true --suffix_is_prefix=True --flip_rope_embedding_suffix=True --max_steps=576908 --mask_k_ldiags=2050 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
#     --disable_net_gdr \
#     --debug_qos \
#     --sub_output_dir_name="test_fast_mask_ldiag_impl"


python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="mask_2050_ldiags_10_udiags_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --max_iters=576908 --mask_k_ldiags=2050 --mask_k_udiags=10 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="mask_2050_ldiags_10_udiags_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="mask_2050_ldiags_20_udiags_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --max_iters=576908 --mask_k_ldiags=2050 --mask_k_udiags=20 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --sub_output_dir_name="mask_2050_ldiags_20_udiags_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908"

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_minutes=119 \
#     --nodes=16 \
#     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
#     --run_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908" \
#     --extra_args="--negatives_cross_device=true --max_iters=576908 --mask_k_diags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
#     --disable_net_gdr \
#     --debug_qos \
#     --sub_output_dir_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908"
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_minutes=119 \
#     --nodes=16 \
#     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
#     --run_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_576908" \
#     --extra_args="--negatives_cross_device=true --max_iters=576908 --mask_k_diags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=5e-5 --min_lr=5e-6 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
#     --disable_net_gdr \
#     --debug_qos \
#     --sub_output_dir_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_576908"
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_minutes=119 \
#     --nodes=16 \
#     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
#     --run_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_2e-4_warmup_500_max_iters_576908" \
#     --extra_args="--negatives_cross_device=true --max_iters=576908 --mask_k_diags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --warmup_steps=56500 --optim_config.lr=2e-4 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
#     --disable_net_gdr \
#     --debug_qos \
#     --sub_output_dir_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_2e-4_warmup_500_max_iters_576908"
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=119 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="k_pos_labels_5_decay_factor_1_step_68k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-4_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --max_iters=576908 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=5e-4 --min_lr=5e-5 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --debug_qos \
    --sub_output_dir_name="k_pos_labels_5_decay_factor_1_step_68k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-4_max_iters_576908"


# import os
# import glob
# d = load_dataset(
#         "parquet",
#         data_files=glob.glob(f"/XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/raw/retrieval/contrastive-index-filtered_processing/**/*.parquet", recursive=True),
#         columns=["text"],
#         streaming=True,
#         cache_dir='/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/.cache',
#         # num_proc=os.cpu_count() // 2,
#         verification_mode="no_checks",
#     )["train"]
# # using load_from_disk now
# d = load_from_disk("/XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/raw/retrieval/contrastive-index-filtered_processing")

# for example in dataset['paq']:
#     print(example)
#     break


### Mask n-gram training ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="mask_5-gram_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --max_iters=576908 --n_gram=5 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="mask_5-gram_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/orion/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="mask_10-gram_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --max_iters=576908 --n_gram=10 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --sub_output_dir_name="mask_10-gram_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908"

### pick_k pairs training ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="keep_1_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_57691" \
    --extra_args="--pick_k=1 --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-4 --min_lr=1e-5 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="keep_1_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_57691"

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="keep_50_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691" \
    --extra_args="--pick_k=50 --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --sub_output_dir_name="keep_50_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691"

### fine-tuning experiments ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=5 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_keep_1_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_57691 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/keep_1_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_57691/checkpoints-ddp/step-00010000-keep_1_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_57691.pth"' \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name orca_finetune_keep_1_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=5 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_keep_50_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/keep_50_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691/checkpoints-ddp/step-00010000-keep_50_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691.pth"' \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name orca_finetune_keep_50_pair_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=5 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_mask_2050_ldiags_k_pos_labels_5_df_1.0_wbsz-256-10B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_576908/checkpoints-ddp/step-00068000-mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_576908.pth"' \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name orca_finetune_mask_2050_ldiags_k_pos_labels_5_df_1.0_wbsz-256-10B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=1 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_mask_2050_ldiags_k_pos_labels_5_df_1.0_wbsz-256-10B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=5e-5 --min_lr=1e-6 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_576908/checkpoints-ddp/step-00068000-mask_2050_ldiags_k_pos_labels_5_decay_factor_1_step_56k_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_576908.pth"' \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name orca_finetune_mask_2050_ldiags_k_pos_labels_5_df_1.0_wbsz-256-10B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_5e-5_max_iters_5740

## k_pos_labels training ###

for k in 20; do
  for decay in 1.0 0.5; do
    run_name="k_pos_labels_${k}_decay_factor_${decay}_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim"
    
    echo "Launching job with k_pos_labels=${k} and decay_factor=${decay}"
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=8 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="${run_name}" \
      --extra_args="--k_pos_labels=${k} --decay_factor=${decay} --reduce=both_dim --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="${run_name}" \
      --extended_partition
  done
done

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="gen_loss_0.5_alpha_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim" \
    --extra_args="--gen_loss=true --alpha=0.5 --suffix_is_prefix=True --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --sub_output_dir_name="gen_loss_0.5_alpha_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim" \
    --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="gen_loss_0.25_alpha_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691" \
    --extra_args="--gen_loss=true --alpha=0.25 --suffix_is_prefix=True --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --sub_output_dir_name="gen_loss_0.25_alpha_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691" \
    --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="gen_loss_0.75_alpha_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691" \
    --extra_args="--gen_loss=true --alpha=0.75 --suffix_is_prefix=True --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --sub_output_dir_name="gen_loss_0.75_alpha_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691" \
    --extended_partition


# for i in {1..36}
# do
#     echo "Starting iteration $i"
#     python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#         --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#         --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#         --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#         --budget_minutes=30 \
#         --nodes=16 \
#         --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
#         --run_name="k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691" \
#         --extra_args="--negatives_cross_device=true --max_iters=576908 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=2 --fabric_strategy=ddp" \
#         --disable_net_gdr \
#         --debug_qos \
#         --sub_output_dir_name="k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691"

#     echo "Iteration $i completed. Waiting for 38 minutes before next iteration."
#     sleep 38m
# done
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908" \
    --extra_args="--negatives_cross_device=true --max_iters=576908 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name="k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908"

for i in {1..8}
do
    echo "Starting iteration $i"
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=119 \
        --nodes=16 \
        --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
        --run_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908" \
        --extra_args="--negatives_cross_device=true --max_iters=576908 --mask_k_diags=2050 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy=ddp" \
        --disable_net_gdr \
        --debug_qos \
        --sub_output_dir_name="mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908"

    echo "Iteration $i completed. Waiting for 2h 8 minutes before next iteration."
    sleep 2h 8m
done

### gen_loss + prefix_suffix_loss experiment
for i in {1..36}
do
    echo "Starting iteration $i"
    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=30 \
    #     --nodes=16 \
    #     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    #     --run_name="gen_loss_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim" \
    #     --extra_args="--gen_loss=true --loss_type=cross_batch_negative --suffix_is_prefix=True --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    #     --disable_net_gdr \
    #     --sub_output_dir_name="gen_loss_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim" \
    #     --debug_qos
    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=30 \
    #     --nodes 1 \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    #     --run_name wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631 \
    #     --extra_args='--max_iters=2307631 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
    #     --disable_net_gdr \
    #     --debug_qos \
    #     --sub_output_dir_name wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631
    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=30 \
    #     --nodes 4 \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    #     --run_name wbsz-128-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1153815 \
    #     --extra_args='--negatives_cross_device=true --max_iters=1153815 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=128 --micro_batch_size=4 --fabric_strategy="ddp"' \
    #     --disable_net_gdr \
    #     --debug_qos \
    #     --sub_output_dir_name wbsz-128-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1153815
    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=30 \
    #     --nodes 1 \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    #     --run_name wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631 \
    #     --extra_args='--negatives_cross_device=true --max_iters=2307631 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
    #     --disable_net_gdr \
    #     --sub_output_dir_name wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=30 \
        --nodes 16 \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
        --run_name wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691 \
        --extra_args='--negatives_cross_device=true --max_iters=576908 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=256 --micro_batch_size=2 --fabric_strategy="ddp"' \
        --disable_net_gdr \
        --debug_qos \
        --sub_output_dir_name wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691

    echo "Iteration $i completed. Waiting for 38 minutes before next iteration."
    sleep 38m
done

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_minutes=30 \
#     --nodes=16 \
#     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
#     --run_name="cum_mean_pooling_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691" \
#     --extra_args="--mean_pooling=true --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
#     --disable_net_gdr \
#     --sub_output_dir_name="cum_mean_pooling_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691" \
#     --debug_qos

    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=30 \
    #     --nodes=16 \
    #     --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    #     --run_name="mask_5_ldiags_k_pos_labels_3_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim" \
    #     --extra_args="--mask_k_diags=5 --k_pos_labels=3 --decay_factor=1.0 --reduce=both_dim --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    #     --disable_net_gdr \
    #     --sub_output_dir_name="mask_5_ldiags_k_pos_labels_3_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim" \
    #     --debug_qos

    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    # --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    # --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    # --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    # --budget_minutes=30 \
    # --nodes 16 \
    # --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    # --run_name wbsz-2560-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-ctx-var-batch_negative_ddp_RR_lr_4e-3_optim_steps_57691 \
    # --extra_args='--max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    # --disable_net_gdr \
    # --debug_qos \
    # --sub_output_dir_name wbsz-2560-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-ctx-var-batch_negative_ddp_RR_lr_4e-3_optim_steps_57691
    

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes=16 \
    --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
    --run_name="keep_eos_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim" \
    --extra_args=" --keep_eos=true --k_pos_labels=0 --decay_factor=1.0 --reduce=both_dim --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
    --disable_net_gdr \
    --sub_output_dir_name="keep_eos_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim"

### START llm finetuning experiments ###
# llm finetune
for i in {1..32}
do
    echo "Starting iteration $i"
    
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/finetune_lm_retrieval.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=30 \
        --nodes 4 \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/finetune_lm.json \
        --run_name eff_attn_mean_pool_precision_32_true_orca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_8250 \
        --extra_args=' --keep_eos=true --fabric_precision 32-true --max_iters=8250 --max_tokens=null --world_batch_size=512 --micro_batch_size=16 --optim_config.lr=1e-3 --min_lr=1e-4 --save_step_interval=1000 --eval_step_interval=1000 --save_n_min_before_job_done=3' \
        --disable_net_gdr \
        --sub_output_dir_name eff_attn_mean_pool_precision_32_true_orca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_8250 \
        --debug_qos

    echo "Iteration $i completed. Waiting for 38 minutes before next iteration."
    sleep 38m
done

### Alpaca finetuning experiments ###
for i in {1..2}
do
    echo "Starting iteration $i"
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/finetune_lm_retrieval.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=30 \
        --nodes 4 \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/finetune_lm.json \
        --run_name eff_attn_mean_pool_precision_32_true_alpaca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_82 \
        --extra_args='--keep_eos=true --fabric_precision 32-true --max_iters=82 --max_tokens=null --world_batch_size=512 --micro_batch_size=16 --optim_config.lr=1e-3 --min_lr=1e-4 --save_step_interval=1000 --eval_step_interval=1000 --save_n_min_before_job_done=3' \
        --disable_net_gdr \
        --sub_output_dir_name eff_attn_mean_pool_precision_32_true_alpaca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_82 \
        --debug_qos

    echo "Iteration $i - Running baseline alpaca finetune. Waiting for 38 minutes before next iteration."
    sleep 38m

    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=30 \
    #     --nodes 4 \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/alpaca_finetune.json \
    #     --run_name alpaca_finetune_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_82 \
    #     --extra_args='--max_iters=82 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=512 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    #     --disable_net_gdr \
    #     --debug_qos \
    #     --sub_output_dir_name alpaca_finetune_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_82

    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=30 \
        --nodes 4 \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/alpaca_finetune.json \
        --run_name alpaca_finetune_mean_pool_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_82 \
        --extra_args='--mean_pooling=True --max_iters=82 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=512 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
        --disable_net_gdr \
        --debug_qos \
        --sub_output_dir_name alpaca_finetune_mean_pool_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_82

    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=30 \
        --nodes 4 \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/alpaca_finetune.json \
        --run_name alpaca_finetune_keep_eos_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_82 \
        --extra_args='--keep_eos=True --max_iters=82 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=512 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/keep_eos_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-keep_eos_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
        --disable_net_gdr \
        --debug_qos \
        --sub_output_dir_name alpaca_finetune_keep_eos_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_82
    
    echo "Iteration $i - Running our model alpaca finetune. Waiting for 38 minutes before next iteration."
    sleep 38m
done

# ours finetune
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_8250 \
    --extra_args=' --max_iters=8250 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=512 --micro_batch_size=16 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_8250

echo "All iterations completed."
# for i in {1..16}
# do
#     echo "Starting iteration $i"
    
#     python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#         --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/finetune_lm_retrieval.py" \
#         --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#         --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#         --budget_minutes=30 \
#         --nodes 4 \
#         --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/finetune_lm.json \
#         --run_name eff_attn_precision_32_true_orca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_8250 \
#         --extra_args='--fabric_precision 32-true --max_iters=8250 --max_tokens=null --world_batch_size=512 --micro_batch_size=16 --optim_config.lr=1e-3 --min_lr=1e-4 --save_step_interval=1000 --eval_step_interval=1000 --save_n_min_before_job_done=3' \
#         --disable_net_gdr \
#         --sub_output_dir_name eff_attn_precision_32_true_orca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_8250 \
#         --debug_qos

#     echo "Iteration $i completed. Waiting for 38 minutes before next iteration."
#     sleep 38m
# done

# echo "All iterations completed."

### END llm finetuning experiments ###

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=5 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_mask_50_ldiags_k_pos_labels_3_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_50_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691/checkpoints-ddp/step-00010000-mask_50_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691.pth"' \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name orca_finetune_mask_50_ldiags_k_pos_labels_3_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=5 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_mask_10_ldiags_k_pos_labels_3_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_10_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691/checkpoints-ddp/step-00010000-mask_10_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691.pth"' \
    --disable_net_gdr \
    --extended_partition \
    --sub_output_dir_name orca_finetune_mask_10_ldiags_k_pos_labels_3_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740


### START finetune local, contrastive parallel, siglip comparison exp ###
# 1195195176 tokens
# 1194947035 tokens
# 1194439046 tokens
for i in {1..2}
do
    echo "Starting iteration $i"
    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=119 \
    #     --nodes 4 \
    #     --debug_qos \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    #     --run_name orca_finetune_wbsz-64-1B_toks-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    #     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631/checkpoints-ddp/step-00032000-wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631.pth"' \
    #     --disable_net_gdr \
    #     --sub_output_dir_name orca_finetune_wbsz-64-1B_toks-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    # echo "$i - running wbsz-64-local-negs. Waiting for 38 minutes before next iteration."
    # sleep 2h 8m

    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=119 \
    #     --nodes 4 \
    #     --debug_qos \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    #     --run_name orca_finetune_wbsz-64-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    #     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631/checkpoints-ddp/step-00032000-wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631.pth"' \
    #     --disable_net_gdr \
    #     --sub_output_dir_name orca_finetune_wbsz-64-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    # echo "$i - running wbsz-64-cross-device-negs. Waiting for 38 minutes before next iteration."
    # sleep 2h 8m

    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=119 \
    #     --nodes 4 \
    #     --debug_qos \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    #     --run_name orca_finetune_wbsz-128-1B_toks-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    #     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/trained/step_16k/step-00016000-wbsz-128-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1153815.pth"' \
    #     --disable_net_gdr \
    #     --sub_output_dir_name orca_finetune_wbsz-128-1B_toks-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    # echo "$i - running wbsz-128-local-negs. Waiting for 38 minutes before next iteration."
    # sleep 2h 8m

    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=119 \
    #     --nodes 4 \
    #     --debug_qos \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    #     --run_name orca_finetune_wbsz-128-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    #     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/wbsz-128-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1153815/checkpoints-ddp/step-00016000-wbsz-128-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1153815.pth"' \
    #     --disable_net_gdr \
    #     --sub_output_dir_name orca_finetune_wbsz-128-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    # echo "$i - running wbsz-128-cross-device-negs. Waiting for 38 minutes before next iteration."
    # sleep 2h 8m

    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=119 \
        --nodes 4 \
        --debug_qos \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
        --run_name orca_finetune_wbsz-2560-1B_toks-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
        --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/wbsz-2560-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-ctx-var-batch_negative_ddp_RR_lr_4e-3_optim_steps_57691/checkpoints-ddp/step-00001000-wbsz-2560-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-ctx-var-batch_negative_ddp_RR_lr_4e-3_optim_steps_57691.pth"' \
        --disable_net_gdr \
        --sub_output_dir_name orca_finetune_wbsz-2560-1B_toks-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    echo "$i - running wbsz-2560-local-negs. Waiting for 38 minutes before next iteration."
    sleep 2h 8m

    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=119 \
        --nodes 4 \
        --debug_qos \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
        --run_name orca_finetune_wbsz-256-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
        --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691/checkpoints-ddp/step-00008000-wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691.pth"' \
        --disable_net_gdr \
        --sub_output_dir_name orca_finetune_wbsz-256-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    echo "$i - running wbsz-256-cross-device-negs. Waiting for 38 minutes before next iteration."
    sleep 2h 8m
    # python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    #     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    #     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    #     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    #     --budget_minutes=119 \
    #     --nodes 4 \
    #     --extended_partition \
    #     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    #     --run_name orca_finetune_wbsz-256-1B_toks-10k_steps_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    #     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691/checkpoints-ddp/step-00008000-wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691.pth"' \
    #     --disable_net_gdr \
    #     --sub_output_dir_name orca_finetune_wbsz-256-1B_toks-10k_steps_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740
    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=119 \
        --nodes 4 \
        --debug_qos \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
        --run_name orca_finetune_k_pos_labels_5_df_1.0_wbsz-256-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
        --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908/checkpoints-ddp/step-00008000-k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_576908.pth"' \
        --disable_net_gdr \
        --sub_output_dir_name orca_finetune_k_pos_labels_5_df_1.0_wbsz-256-1B_toks-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    echo "$i - running wbsz-256-cross-device-negs. Waiting for 38 minutes before next iteration."
    sleep 2h 8m

    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=119 \
        --nodes 4 \
        --debug_qos \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
        --run_name orca_finetune_mask_50_ldiags_k_pos_labels_5_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
        --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_50_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691/checkpoints-ddp/step-00010000-mask_50_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691.pth"' \
        --disable_net_gdr \
        --sub_output_dir_name orca_finetune_mask_50_ldiags_k_pos_labels_5_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    echo "$i - running wbsz-256-cross-device-negs. Waiting for 38 minutes before next iteration."
    sleep 2h 8m

    python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
        --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
        --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
        --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
        --budget_minutes=119 \
        --nodes 4 \
        --debug_qos \
        --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
        --run_name orca_finetune_mask_10_ldiags_k_pos_labels_5_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
        --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=2 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_10_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691/checkpoints-ddp/step-00010000-mask_10_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691.pth"' \
        --disable_net_gdr \
        --sub_output_dir_name orca_finetune_mask_10_ldiags_k_pos_labels_5_df_1.0_wbsz-2048_local_negs-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

    echo "$i - running wbsz-256-cross-device-negs. Waiting for 38 minutes before next iteration."
    sleep 2h 8m

done

### END finetune local, contrastive parallel, siglip comparison exp ###

### START finetune k_pos_labels exps ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_0_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_1_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_1_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_1_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_1_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_1_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_1_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_1_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_1_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_3_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_3_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_3_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_3_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_3_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_3_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_3_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_3_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_5_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_5_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_5_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_5_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_5_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_5_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_5_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_5_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_10_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_10_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_10_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_10_decay_factor_0.5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_k_pos_labels_10_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/k_pos_labels_10_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-k_pos_labels_10_decay_factor_1.0_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_k_pos_labels_10_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extended_partition

### END finetune k_pos_labels exps ###

### START local, contrastive parallel, siglip comparison exp ###
# wbsz 64
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=24 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691 \
    --extra_args='--max_iters=2307631 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=24 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631 \
    --extra_args='--max_iters=2307631 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=24 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691 \
    --extra_args='--negatives_cross_device=true --max_iters=2307631 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=24 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631 \
    --extra_args='--negatives_cross_device=true --max_iters=2307631 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_2307631

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=16 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_128k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_5800_optim_steps_57691 \
    --extra_args='--siglip_loss=true --max_iters=2307631 --warmup_steps=5800 --optim_config.weight_decay=0.00003 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_128k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_5800_optim_steps_57691

# wbsz 128
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=24 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-128-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691 \
    --extra_args='--max_iters=1153815 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=16 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-128-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=24 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-128-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691 \
    --extra_args='--negatives_cross_device=true --max_iters=1153815 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=4 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-128-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=14 \
    --nodes 2 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_256k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-128-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_5800_optim_steps_57691 \
    --extra_args='--siglip_loss=true --max_iters=1153815 --warmup_steps=5800 --optim_config.weight_decay=0.00003 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_256k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-128-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_5800_optim_steps_57691

# wbsz 256 (best setting for each experiment)
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-2560-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-ctx-var-batch_negative_ddp_RR_lr_4e-3_optim_steps_57691 \
    --extra_args='--max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-2560-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-ctx-var-batch_negative_ddp_RR_lr_4e-3_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=6 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="mask_50_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extra_args="--max_iters=57691 --mask_k_diags=50 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="mask_50_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=4 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="mask_10_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extra_args="--max_iters=57691 --mask_k_diags=10 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="mask_10_ldiags_k_pos_labels_5_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=12 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="mask_50_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extra_args="--max_iters=57691 --mask_k_diags=50 --k_pos_labels=3 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="mask_50_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=8 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="mask_10_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extra_args="--max_iters=57691 --mask_k_diags=10 --k_pos_labels=3 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="mask_10_ldiags_k_pos_labels_3_decay_factor_1_wbsz-2048_local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-ctx-var-batch_negative_ddp_RR_lr_3e-3_optim_steps_57691" \
      --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=18 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691 \
    --extra_args='--negatives_cross_device=true --max_iters=576908 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=2 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691 \
    --extended_partition

# k pos labels exp
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=8 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="k_pos_labels_3_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691" \
      --extra_args="--negatives_cross_device=true --max_iters=576908 --k_pos_labels=3 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=2 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="k_pos_labels_3_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691" \
      --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=8 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="mask_50_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691" \
      --extra_args="--negatives_cross_device=true --max_iters=576908 --mask_k_diags=50 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=2 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="mask_50_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691" \
      --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
      --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
      --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
      --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
      --budget_hours=8 \
      --nodes=16 \
      --config="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json" \
      --run_name="k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691" \
      --extra_args="--negatives_cross_device=true --max_iters=576908 --k_pos_labels=5 --decay_factor=1.0 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=2 --fabric_strategy=ddp" \
      --disable_net_gdr \
      --sub_output_dir_name="k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_1e-3_optim_steps_57691" \
    --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=18 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_512k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-256-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_5800_optim_steps_57691 \
    --extra_args='--siglip_loss=true --max_iters=1153815 --warmup_steps=5800 --optim_config.weight_decay=0.00003 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=4 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_512k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-256-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_5800_optim_steps_57691

### END local, contrastive parallel, siglip comparison exp ###

### START siglip bsz scaling exp ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=12 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_16k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-1-wbsz-8-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_6500_optim_steps_57691 \
    --extra_args='--siglip_loss=true --max_iters=18461047 --warmup_steps=6500 --optim_config.weight_decay=0.00003 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=1 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_16k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-1-wbsz-8-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_3e-5_warmup_6500_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_32k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-wbsz-16-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691 \
    --extra_args='--siglip_loss=true --max_iters=9230523 --warmup_steps=6500 --optim_config.weight_decay=0.0001 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=2 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_32k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-wbsz-16-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_64k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-32-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691 \
    --extra_args='--siglip_loss=true --max_iters=4615262 --warmup_steps=6500 --optim_config.weight_decay=0.0001 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=4 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_64k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-32-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_128k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691 \
    --extra_args='--siglip_loss=true --max_iters=2307631 --warmup_steps=6500 --optim_config.weight_decay=0.0001 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_128k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=6 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
#     --run_name siglip_256k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691 \
#     --extra_args='--siglip_loss=true --max_iters=18461047 --warmup_steps=6500 --optim_config.weight_decay=0.0001 --max_tokens=null --save_step_interval=500 --optim_config.lr=9e-4 --min_lr=9e-5 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=16 --fabric_strategy="ddp"' \
#     --disable_net_gdr \
#     --sub_output_dir_name siglip_256k_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-gradacc_bsz-2560-ctx-var-batch_negative_ddp_RR_lr_9e-4_wd_1e-4_warmup_6500_optim_steps_57691

### END siglip bsz scaling exp ###

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name k_pos_labels_0 \
    --extra_args='--k_pos_labels=0 --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=160 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name k_pos_labels_0

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name non_k_pos_labels_0 \
    --extra_args='--reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=160 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name non_k_pos_labels_0

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name no_grad_aware_allgather_cross_device_negs_32k \
    --extra_args='--negatives_cross_device=true --reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=5000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=16 --micro_batch_size=2 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name no_grad_aware_allgather_cross_device_negs_32k

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name no_grad_aware_allgather_cross_device_negs_32k_prefixes_suffixes \
    --extra_args='--negatives_cross_device=true --reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=5000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=16 --micro_batch_size=2 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name no_grad_aware_allgather_cross_device_negs_32k_prefixes_suffixes \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name no_grad_aware_allgather_cross_device_negs_32k_reduce_both_dim \
    --extra_args='--negatives_cross_device=true --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=5000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=16 --micro_batch_size=2 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name no_grad_aware_allgather_cross_device_negs_32k_reduce_both_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fixed_grad_aware_allgather_cross_device_negs_32k \
    --extra_args='--negatives_cross_device=true --reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=5000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=16 --micro_batch_size=2 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fixed_grad_aware_allgather_cross_device_negs_32k \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name cross_device_negs_32k_correct_dist_labels_by_device_id_reduce_both_dim \
    --extra_args='--negatives_cross_device=true --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=16 --micro_batch_size=2 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name cross_device_negs_32k_correct_dist_labels_by_device_id_reduce_both_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name chunked_loss_non_cross_device_negs_reduce_suffix_dim \
    --extra_args='--negatives_cross_device=true --reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name chunked_loss_non_cross_device_negs_reduce_suffix_dim \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name drop_k_5 \
    --extra_args='--drop_k=5 --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=100 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name drop_k_5 \
    --debug_qos

#### mask_k_diags finetune ####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name mask_k_diags_3_orca_finetune_fineweb_7k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_3_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00007000-mask_k_diags_3_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name mask_k_diags_3_orca_finetune_fineweb_7k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name mask_k_diags_3_orca_finetune_fineweb_10k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_3_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-mask_k_diags_3_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name mask_k_diags_3_orca_finetune_fineweb_10k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name mask_k_diags_9_orca_finetune_fineweb_5k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_9_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00005000-mask_k_diags_9_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name mask_k_diags_9_orca_finetune_fineweb_5k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name mask_k_diags_9_orca_finetune_fineweb_10k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_9_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-mask_k_diags_9_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name mask_k_diags_9_orca_finetune_fineweb_10k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name mask_k_diags_50_orca_finetune_fineweb_5k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00005000-mask_k_diags_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name mask_k_diags_50_orca_finetune_fineweb_5k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name mask_k_diags_50_orca_finetune_fineweb_10k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00010000-mask_k_diags_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name mask_k_diags_50_orca_finetune_fineweb_10k_steps-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

### drop k exps ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name drop_k_5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--drop_k=5 --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name drop_k_5_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name drop_k_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--drop_k=50 --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name drop_k_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim

### mask_k_diags exp ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name mask_k_diags_3_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--mask_k_diags=3 --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name mask_k_diags_3_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name mask_k_diags_9_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--mask_k_diags=9 --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name mask_k_diags_9_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name mask_k_diags_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--mask_k_diags=50 --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name mask_k_diags_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim

### bsz sweep ckpts finetune ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name 8k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00048000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name 8k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name 16k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00024000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name 16k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name 32k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00012000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth"' \
    --sub_output_dir_name 32k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name 41k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00009500-reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name 41k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740
    

### reduce_suffix_dim bsz sweep ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_suffix_dim \
    --extra_args='--reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=512 --micro_batch_size=4 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_suffix_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_suffix_dim \
    --extra_args='--reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=2e-3 --min_lr=2e-4 --save_n_min_before_job_done=3 --world_batch_size=1024 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_suffix_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_suffix_dim \
    --extra_args='--reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_suffix_dim


python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62" \
    --environment="${WRKSPC}/frontier_conda_62" \
    --rocm_version="6.2.0" \
    --budget_minutes=30 \
    --nodes 1 \
    --debug_qos \
    --interactive \
    --extended_partition

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_mlm/launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_25_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_25_62_retrieval" \
    --rocm_version="6.2.0" \
    --budget_minutes=59 \
    --nodes 16 \
    --interactive \
    --extended_partition \
    --extended_partition \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_frontier.py \
    --rccl_installdir="${WRKSPC}/aws-ofi-rccl_62_retrieval" \
    --environment="${WRKSPC}/frontier_conda_62_retrieval" \
    --rocm_version="6.2.0" \
    --budget_minutes=30 \
    --nodes 4 \
    --debug_qos \
    --interactive \
    --extended_partition
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl/lib" \
    --environment="${WRKSPC}/frontier_conda_60" \
    --budget_minutes=30 \
    --nodes 16 \
    --debug_qos \
    --interactive
# python scripts/split_hf_dataset.py --dataset_name HuggingFaceTB/cosmopedia --destination_path /fs/XXXX-37/llm-pretraining/llm-retrieval/data/splitted_cosmopedia --cache_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/cache
# python scripts/prepare_hf.py --dataset_name_or_path /fs/XXXX-37/llm-pretraining/llm-retrieval/data/splitted_cosmopedia --checkpoint_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T --destination_path /fs/XXXX-37/llm-pretraining/llm-retrieval/data/packed_cosmopedia --ld_from_disk True --skip_remainder True
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/llm_train_inst_data.json \
    --run_name test_llm_oom \
    --extra_args='--world_batch_size=192 --micro_batch_size=24  --fabric_strategy="ddp"' \
    --sub_output_dir_name test_llm_oom \
    --debug_qos

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_minutes=30 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/random_tokens_retrieval_train_pythia.json \
    --run_name test_retrieval_oom \
    --extra_args='--max_tokens=100000000000 --save_n_min_before_job_done=3 --world_batch_size=2688 --micro_batch_size=21 --fabric_strategy="ddp"' \
    --sub_output_dir_name test_retrieval_oom \
    --debug_qos
# python launch_scripts/launch_frontier.py --rccl_installdir="${WRKSPC}/tiny_plugins_rccl_60/lib"     --environment="${WRKSPC}/frontier_conda_60"     --budget_minutes=119     --nodes 1 --debug_qos --interactive
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=5 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-microbsz-8-ctx-rand-batch_negative_ddp_RR_lr_3e-4 \
#     --extra_args='--world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-microbsz-8-ctx-rand-batch_negative_ddp_RR_lr_3e-4

# ##### Pythia-160M Retrieval finetuning (MEDI-2) #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_finetune.json \
    --run_name medi_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1673 \
    --extra_args='--max_iters=1673 --max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00047000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name medi_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1673

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_finetune.json \
    --run_name medi_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1673 \
    --extra_args='--max_iters=1673 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00047000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name medi_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1673

# ##### Pythia-160M Retrieval finetuning (random initialization) #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--pretrained_prefix_model=false --pretrained_suffix_model=false --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint=null' \
    --sub_output_dir_name orca_finetune-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--pretrained_prefix_model=false --pretrained_suffix_model=false --max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint=null' \
    --sub_output_dir_name orca_finetune-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

# ##### Pythia-160M Retrieval (Fineweb-100B) finetuning #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=12 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3/checkpoints-ddp/step-00016585-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=12 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3/checkpoints-ddp/step-00016585-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

# ##### Pythia-160M Retrieval (RedPajama-V2-100B) finetuning #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=12 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3/checkpoints-ddp/step-00016585-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=12 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3/checkpoints-ddp/step-00016585-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740


# ##### 3 pretraining ckpts finetuning (10B tokens, 30B tokens, and 50B, 70B tokens) #####
# fineweb 40 #
### mbsz 46 ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_10B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845/checkpoints-ddp/step-00004000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_10B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_30B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845/checkpoints-ddp/step-00012000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_30B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

### mbsz 23 ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_10B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845/checkpoints-ddp/step-00004000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_10B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_30B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845/checkpoints-ddp/step-00012000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_30B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

# fineweb 20 #
### mbsz 46 ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_1.5B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00001000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_1.5B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_10B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00007000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_10B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_30B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00020500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_30B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_50B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00033500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_50B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00047000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_86B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00057690-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_86B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

### mbsz 23 ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_1.5B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00001000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_1.5B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_10B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00007000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_10B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_30B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00020500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_30B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_50B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00033500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_50B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00047000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_86B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00057690-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_fineweb_86B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

# RPJ-v2 #
### mbsz 46 ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_10B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00004679-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_10B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_30B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00014500-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_30B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_50B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00023309-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_50B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00031825-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_86B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=46 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00039774-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_86B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

### mbsz 23 ###
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_10B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00004679-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_10B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_30B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00014500-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_30B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_50B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00023309-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_50B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00031825-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=8 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_rpj_v2_86B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 \
    --extra_args='--max_seq_len=1024 --save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00039774-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --sub_output_dir_name orca_finetune_rpj_v2_86B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740

# ##### Pythia-160M Retrieval (Fineweb-100B) training #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3 \
    --extra_args='--optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --fabric_strategy="ddp"' \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-sequence_negative_ddp_RR_lr_4e-3_max_iters_57691 \
    --extra_args='--loss_type=sequence_negative --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-sequence_negative_ddp_RR_lr_4e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name cross_device_negs_96_reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-12-wbsz-96-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691 \
    --extra_args='--negatives_cross_device=true --reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=96 --micro_batch_size=12 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name cross_device_negs_96_reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-12-wbsz-96-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name cross_device_negs_64_reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691 \
    --extra_args='--negatives_cross_device=true --reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name cross_device_negs_64_reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name cross_device_negs_32_reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-gradacc-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691 \
    --extra_args='--negatives_cross_device=true --reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=4 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name cross_device_negs_32_reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-gradacc-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name cross_device_negs_64_reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691 \
    --extra_args='--negatives_cross_device=true --reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=64 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name cross_device_negs_64_reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-64-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name siglip_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691 \
    --extra_args='--reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name siglip_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=512 --micro_batch_size=4 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=2e-3 --min_lr=2e-4 --save_n_min_before_job_done=3 --world_batch_size=1024 --micro_batch_size=8 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim \
    --extra_args='--reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=3e-3 --min_lr=3e-4 --save_n_min_before_job_done=3 --world_batch_size=2048 --micro_batch_size=16 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim 

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name reduce_prefix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691 \
    --extra_args='--reduce="prefix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name reduce_prefix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691 \
    --extra_args='--reduce="suffix_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=10 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691 \
    --extra_args='--reduce="both_dim" --max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691 \
    --extra_args='--max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845 \
    --extra_args='--max_seq_len=1024 --max_iters=28845 --max_tokens=null --save_step_interval=500 --optim_config.lr=7e-3 --min_lr=7e-4 --save_n_min_before_job_done=3 --world_batch_size=5120 --micro_batch_size=40 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/fineweb_retrieval_train_pythia.json \
    --run_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_100k \
    --extra_args='--max_iters=100332 --max_tokens=null --save_step_interval=1000 --optim_config.lr=2e-3 --min_lr=2e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=23 --fabric_strategy="ddp"' \
    --sub_output_dir_name fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_100k

# ##### Pythia-160M Retrieval (RedPajama-V2-100B) training #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/rpj_v2_retrieval_train_pythia.json \
    --run_name rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3 \
    --extra_args='--optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --fabric_strategy="ddp"' \
    --sub_output_dir_name rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 16 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/rpj_v2_retrieval_train_pythia.json \
    --run_name rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691 \
    --extra_args='--max_iters=57691 --max_tokens=null --save_step_interval=500 --optim_config.lr=4e-3 --min_lr=4e-4 --save_n_min_before_job_done=3 --world_batch_size=2560 --micro_batch_size=20 --fabric_strategy="ddp"' \
    --disable_net_gdr \
    --sub_output_dir_name rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=30 \
    --nodes 8 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/rpj_v2_retrieval_train_pythia.json \
    --run_name rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_100k \
    --extra_args='--max_iters=100332 --max_tokens=null --save_step_interval=1000 --optim_config.lr=2e-3 --min_lr=2e-4 --save_n_min_before_job_done=3 --world_batch_size=1472 --micro_batch_size=23 --fabric_strategy="ddp"' \
    --sub_output_dir_name rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_100k



# ##### Pythia-160M LLM training #####
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=6 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/llm_train.json \
#     --run_name dolma-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4 \
#     --extra_args='--world_batch_size=192 --micro_batch_size=24  --fabric_strategy="ddp"' \
#     --sub_output_dir_name dolma-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=12 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/llm_train_inst_data.json \
#     --run_name dolma_70_orca_30-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4 \
#     --extra_args='--world_batch_size=192 --micro_batch_size=24  --fabric_strategy="ddp"' \
#     --sub_output_dir_name dolma_70_orca_30-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=12 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/llm_train_inst_data.json \
    --run_name dolma_50_orca_50-lm-pythia-160m-mbsz-23-wbsz-184_ddp_lr_6e-4 \
    --extra_args='--world_batch_size=184 --micro_batch_size=23  --fabric_strategy="ddp"' \
    --sub_output_dir_name dolma_50_orca_50-lm-pythia-160m-mbsz-23-wbsz-184_ddp_lr_6e-4


# ##### Retrieval training #####
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=6 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4 \
#     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --world_batch_size=192 --micro_batch_size=24 --fabric_strategy="ddp"' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=6 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4_cum_mean \
#     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --world_batch_size=192 --micro_batch_size=24 --fabric_strategy="ddp" --mean_pooling=True' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4_cum_mean

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=18 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_concatenated_inst_data.json \
#     --run_name v2_dolma_70_orca_30-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4 \
#     --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --world_batch_size=192 --micro_batch_size=24 --fabric_strategy="ddp"' \
#     --sub_output_dir_name v2_dolma_70_orca_30-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_concatenated_inst_data.json \
    --run_name dolma_50_orca_50-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-184-ctx-var-batch_negative_ddp_RR_lr_3e-4 \
    --extra_args='--save_n_min_before_job_done=3 --world_batch_size=184 --micro_batch_size=23 --fabric_strategy="ddp"' \
    --sub_output_dir_name dolma_50_orca_50-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-184-ctx-var-batch_negative_ddp_RR_lr_3e-4

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=6 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4 \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --world_batch_size=192 --micro_batch_size=24 --fabric_strategy="ddp"' \
    --sub_output_dir_name orca_finetune_dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4


##### mbsz and wbsz scaling experiments #####
 ##### keeping mbsz constant = 6, and wiggle wbsz = [48, 96, 192] #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
    --run_name mbsz_6_wbsz_48_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR \
    --extra_args='--world_batch_size=48 --micro_batch_size=6 --fabric_strategy="ddp"' \
    --sub_output_dir_name mbsz_6_wbsz_48_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 2 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
    --run_name mbsz_6_wbsz_96_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR \
    --extra_args='--world_batch_size=96 --micro_batch_size=6 --fabric_strategy="ddp"' \
    --sub_output_dir_name mbsz_6_wbsz_96_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
    --run_name mbsz_6_wbsz_192_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR \
    --extra_args='--world_batch_size=192 --micro_batch_size=6 --fabric_strategy="ddp"' \
    --sub_output_dir_name mbsz_6_wbsz_192_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR

##### keeping wbsz constant = 192, and wiggle mbsz = [6, 12, 24] (mbsz-6, wbsz-192 we've already computed) #####
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 2 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
    --run_name mbsz_12_wbsz_192_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR \
    --extra_args='--world_batch_size=192 --micro_batch_size=12 --fabric_strategy="ddp"' \
    --sub_output_dir_name mbsz_12_wbsz_192_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 1 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
    --run_name mbsz_24_wbsz_192_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR \
    --extra_args='--world_batch_size=192 --micro_batch_size=24 --fabric_strategy="ddp"' \
    --sub_output_dir_name mbsz_24_wbsz_192_lr_3e-4__dolma-retrieval-dual-causal-pythia-160m-ctx-var-batch_negative_ddp_RR

# Debug
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_minutes=30 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/llm_train.json \
#     --run_name cosmopedia-lm-pythia-160m-bsz-200_ddp_debug \
#     --sub_output_dir_name cosmopedia-lm-pythia-160m-bsz-200_ddp_debug \
#     --debug_qos

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_minutes=30 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name cosmopedia-retrieval-dual-causal-pythia-160m-bsz-96-ctx-rand-batch_negative_ddp_RR_lr_1e-5_debug \
#     --extra_args='--learning_rate=1e-5 --min_lr=4e-6 --save_and_eval_interval=1000 --fabric_strategy="ddp"' \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-pythia-160m-bsz-96-ctx-rand-batch_negative_ddp_RR_lr_1e-5_debug \
#     --debug_qos

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_minutes=30 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_PP_debug_2 \
#     --extra_args='--save_and_eval_interval=20 --fabric_strategy="ddp" --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_PP_debug_2"' \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_PP_debug_2 \
#     --debug_qos

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_minutes=30 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_5e-4_debug_2 \
#     --extra_args='--learning_rate=5e-4 --save_and_eval_interval=100 --fabric_strategy="ddp" --pretrained_prefix_model=True --pretrained_suffix_model=True' \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_5e-4_debug_2 \
#     --debug_qos

# source activate /ccs/home/XXXX-22/frontier_conda


# ##### Pythia-160M #####
# #### LLM Training ####
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/llm_train.json \
#     --run_name dolma-lm-pythia-160m-bsz-25_ddp_lr_6e-4 \
#     --extra_args='--learning_rate=6e-4 --min_lr=6e-5 --fabric_strategy="ddp" --warmup_steps=2000' \
#     --sub_output_dir_name dolma-lm-pythia-160m-bsz-25_ddp_lr_6e-4 \
#     --launch_immediately

# #### Retrieval Training ####
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
#     --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
#     --budget_hours=5 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-bsz-25-ctx-rand-batch_negative_ddp_RR_lr_3e-4 \
#     --extra_args='--fabric_strategy="ddp"' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-bsz-25-ctx-rand-batch_negative_ddp_RR_lr_3e-4

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-bsz-25-ctx-rand-batch_negative_ddp_RR_lr_1e-5 \
#     --extra_args='--learning_rate=1e-5 --min_lr=1e-6 --fabric_strategy="ddp" --warmup_steps=4000 --lr_schedule="constant"' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-bsz-25-ctx-rand-batch_negative_ddp_RR_lr_1e-5 \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-bsz-25-ctx-rand-batch_negative_ddp_RR_lr_1e-5_warmup_10k \
#     --extra_args='--learning_rate=1e-5 --min_lr=1e-6 --fabric_strategy="ddp" --warmup_steps=10000 --lr_schedule="constant"' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-bsz-25-ctx-rand-batch_negative_ddp_RR_lr_1e-5_warmup_10k \
#     --launch_immediately

# #### Micro bsz scaling experiments ####
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=10 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-microbsz-8-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug \
#     --extra_args='--micro_batch_size=8 --world_batch_size=64 --learning_rate=3e-4 --min_lr=3e-5 --fabric_strategy="ddp" --warmup_steps=2000' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-microbsz-8-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=10 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-microbsz-16-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug \
#     --extra_args='--micro_batch_size=16 --world_batch_size=128 --learning_rate=3e-4 --min_lr=3e-5 --fabric_strategy="ddp" --warmup_steps=2000' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-microbsz-16-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=10 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-microbsz-24-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug_2 \
#     --extra_args='--micro_batch_size=24 --world_batch_size=192 --learning_rate=3e-4 --min_lr=3e-5 --fabric_strategy="ddp" --warmup_steps=2000' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-microbsz-24-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug_2 \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=10 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia.json \
#     --run_name dolma-retrieval-dual-causal-pythia-160m-microbsz-30-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug \
#     --extra_args='--micro_batch_size=30 --world_batch_size=240 --learning_rate=3e-4 --min_lr=3e-5 --fabric_strategy="ddp" --warmup_steps=2000' \
#     --sub_output_dir_name dolma-retrieval-dual-causal-pythia-160m-microbsz-30-ctx-rand-batch_negative_ddp_RR_lr_3e-4_debug \
#     --launch_immediately

##### TinyLlama-1.1B #####
# #### DDP ####
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--learning_rate=1e-5 --min_lr=1e-6 --fabric_strategy="ddp" --pretrained_prefix_model=True --pretrained_suffix_model=True' \
#     --run_name concatedOrca-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5 \
#     --sub_output_dir_name concatedOrca-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5 \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--learning_rate=1e-5 --min_lr=1e-6 --fabric_strategy="ddp" --pretrained_prefix_model=False --pretrained_suffix_model=False' \
#     --run_name fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_RR_lr_1e-5 \
#     --sub_output_dir_name fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_RR_lr_1e-5 \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--learning_rate=1e-5 --min_lr=1e-6 --fabric_strategy="ddp" --pretrained_prefix_model=True --pretrained_suffix_model=True' \
#     --run_name fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5 \
#     --sub_output_dir_name fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5 \
#     --launch_immediately

# # LR 5e-5
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--learning_rate=5e-5 --fabric_strategy="ddp" --pretrained_prefix_model=True --pretrained_suffix_model=True' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_5e-5 \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_5e-5 \
#     --launch_immediately

# # LR 1e-5
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--learning_rate=1e-5 --fabric_strategy="ddp" --pretrained_prefix_model=True --pretrained_suffix_model=True' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-5 \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-5 \
#     --launch_immediately

# # LR 1e-4
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--learning_rate=1e-4 --fabric_strategy="ddp" --pretrained_prefix_model=True --pretrained_suffix_model=True' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-4 \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-4 \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_PP \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--fabric_strategy="ddp" --pretrained_prefix_model=True --pretrained_suffix_model=False --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_PR"' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_PR \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_PR \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--fabric_strategy="ddp" --pretrained_prefix_model=False --pretrained_suffix_model=True --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_RP"' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_RP \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_RP \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--fabric_strategy="ddp" --pretrained_prefix_model=False --pretrained_suffix_model=False --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_RR"' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_RR \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_ddp_RR \
#     --launch_immediately

# #### FSDP ####
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--fabric_strategy="fsdp" --pretrained_prefix_model=True --pretrained_suffix_model=True --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_PP"' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_PP \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_PP \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--fabric_strategy="fsdp" --pretrained_prefix_model=True --pretrained_suffix_model=False --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_PR"' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_PR \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_PR \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--fabric_strategy="fsdp" --pretrained_prefix_model=False --pretrained_suffix_model=True --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_RP"' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_RP \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_RP \
#     --launch_immediately

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/launch_frontier.py \
#     --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/pretrain_umd/train_retrieval_w_anticausal.py" \
#     --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
#     --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
#     --budget_hours=15 \
#     --nodes 1 \
#     --config /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/ddp_pp.json \
#     --extra_args='--fabric_strategy="fsdp" --pretrained_prefix_model=False --pretrained_suffix_model=False --run_name="cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_RR"' \
#     --run_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_RR \
#     --sub_output_dir_name cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-8-ctx-rand-batch_negative_fsdp_RR \
#     --launch_immediately
# python scripts/download.py --repo_id EleutherAI/pythia-410m --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external
# python scripts/convert_hf_checkpoint.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-410m
