python /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt

# output/fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
python /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --suffix_is_prefix True --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt

# fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691
python /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --suffix_is_prefix True --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt

# /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
python /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --suffix_is_prefix True --flip_rope_embedding_suffix True --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-flip_embedding-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt
### EVAL

python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-flip_embedding-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt \
                --finetuned_path  \
                --precision bf16-mixed --attn_type causal_attn \
                --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia \
                --random_split False --prefix_add_eos False --suffix_add_eos False --suffix_is_prefix True --flip_rope_embedding_suffix True 
# /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-nope_pos_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-nope_pos_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
python /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/launch_frontier.py \
    --python_script="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/train_retrieval_w_anticausal.py" \
    --rccl_installdir="${WRKSPC}/tiny_plugins_rccl.tar.gz" \
    --environment="${WRKSPC}/frontier_conda_60.tar.gz" \
    --budget_hours=4 \
    --nodes 4 \
    --config /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/launch_scripts/XXXX-22/frontier_jobs/retrieval_train_pythia_natural_inst_data.json \
    --run_name orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-nope_pos_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt \
    --extra_args='--save_step_interval=1000 --eval_step_interval=1000 --optim_config.lr=1e-3 --min_lr=1e-4 --save_n_min_before_job_done=3 --world_batch_size=736 --micro_batch_size=23 --suffix_is_prefix True --nope_pos_embedding True --finetune_checkpoint="/XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-nope_pos_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00013000-fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-nope_pos_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth"' \
    --disable_net_gdr \
    --sub_output_dir_name orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-nope_pos_embedding-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt