# source activate \$WRKSPC/frontier_conda_60
# orca - 4223922 rows
# fineweb - 147688373 rows
# rpj_v2 - 66387646 rows
# nomic_supervised - 1695819 rows
# nomic_positive_only - 238998494 rows
# XXXX-13(num_negatives["len_neg"])
# 209
# min(num_negatives["len_neg"])
# 15
# sum(num_negatives["len_neg"]) / len(num_negatives["len_neg"])
# 91.64043096580473
# np.median(num_negatives["len_neg"])
# 51.0

# 17337511550 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00014500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth
# 17050646347 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00028500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim.pth
# 17036616018 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00057000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim.pth

# 15542924500 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00010390-reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 16159572693 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00013500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth
# 16153060509 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00027000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim.pth
# 16150312043 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00054000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim.pth

# 14212681772 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00009500-reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 14366759073 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00012000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth
# 14359511489 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00024000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim.pth
# 14358663054 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00048000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim.pth


# 30376087734 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845/checkpoints-ddp/step-00012000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845.pth
# 10126629491 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845/checkpoints-ddp/step-00004000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-40-wbsz-5120-ctx-var-1024-batch_negative_ddp_RR_lr_7e-3_max_iters_28845.pth

# 86797225450 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00057690-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 70635466786 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00047000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 50142896243 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00033500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 30656957149 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00020500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 10480034450 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00007000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 1493573045 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00001000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth

# 86766464258 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00039774-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 70238489235 tokens - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00031825-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 50755898994 tokens  - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00023309-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 30811419112 tokens  - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00014500-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth
# 10187750355 tokens  - /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00004679-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth

python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/clm_pretraing_run_v1_100BT --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/clm_pretraing_run_v1_100BT/combined_ckpts/step-00072000_ckpt.pth --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_q_prefix_d --pooling_method mean --overwrite_results False --batch_size 64 --eval_lm True
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m/lit_model.pth --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_q_prefix_d --pooling_method mean --overwrite_results False --batch_size 64 --eval_lm True
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/retrieval-pretrained-01 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/retrieval-pretrained-01/lit_ckpts/step-00020000_ckpt.pth --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results False --batch_size 512

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results False --batch_size 64 --skip_wandb --resume_from step-00130955 --task all
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_fineweb_100b_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --no_instruction False --include_meta_tokens True --include_long_prompt True --overwrite_results False --batch_size 64 --prompt_style bos_prefix_q_prefix_d --skip_wandb --resume_from step-00071450 --task all
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --no_instruction False --include_meta_tokens True --include_long_prompt True --overwrite_results False --batch_size 512 --prompt_style bos_prefix_q_prefix_d --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/mlm_pretraing_run_v1_100BT/mlm_pretraing_run_v1_100BT --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens False --include_long_prompt False --include_meta_tokens False --pooling_method mean --overwrite_results True --batch_size 512 --skip_wandb

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N_fixed --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_wandb --overwrite_results False --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_wandb --overwrite_results False --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results False --batch_size 512 --skip_eval --push_to_wandb --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_92k_negs_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_wandb --overwrite_results False --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_keep_46k_negs_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_wandb --overwrite_results False --batch_size 512

python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_125k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_125k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512/combined_ckpts/step-00002483_ckpt.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results True --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/math_sdpa
python eval/mteb_eval.py --model_path nomic-ai/nomic-embed-text-v1 --checkpoint_dir nomic-ai/nomic-embed-text-v1/final --include_meta_tokens False --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results
python eval/mteb_eval.py --model_path thenlper/gte-base --checkpoint_dir thenlper/gte-base/final --include_meta_tokens False --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results
python eval/mteb_eval_baselines.py --model_path nomic-ai/nomic-bert-2048 --checkpoint_dir nomic-ai/nomic-bert-2048/final --include_meta_tokens False --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results
python eval/mteb_eval_baselines.py --model_path /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m/lit_model.pth --include_meta_tokens False --batch_size 32 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --task all
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m/lit_model.pth --include_meta_tokens False --batch_size 32 --pooling_method mean --overwrite_results True --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/clm --eval_lm True --task all


# full retrieval eval
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_125k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_125k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512/combined_ckpts/step-00002483_ckpt.pth --task retrieval --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results

# prompt ablation
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style query_doc --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style long_prefix_query_doc --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style sys_query_doc --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style sys_query_doc_sys --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style prefix_query_doc_prefix --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style task_query_doc_task --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style sys_task_query_doc --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style sys_task_query_doc_task --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --pooling_method lasttoken --prompt_style sys_task_query_doc_task_sys --batch_size 32 --include_meta_tokens True --include_long_prompt True --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/prompt_ablation --skip_wandb --keystrings step-00001000,step-00010000,step-00020000,step-00040000,step-00060000,step-00080000,step-00100000,step-00120000


# phase 3 FT
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_36k_fineweb_100b_no_combine_lr_2e-5_w_lockstep_wb_tgrp_8_negs_4096_mean_pool_v4_pythia-160m-retr-32k_w_meta_truncate_normal_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_36k_fineweb_100b_lr_2e-5_w_lockstep_wb_tgrp_8_negs_4096_mean_pool_v4_pythia-160m-retr-32k_w_meta_truncate_normal_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_20k_w_lockstep_wb_tgrp_8_negs_4096_mean_pool_v4_pythia-160m-retr-32k_w_meta_truncate_normal_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_mlm_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_pythia-160m_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --eval_lm --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_fineweb_stack_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_pt_step_125k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_32_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_phase3_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb

# FT-Nomic_Pos_Only
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_pt_step_72k_fineweb_100b_w_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_pt_step_60k_w_lockstep_wb_negs_16384_lasttoken_pool_v4_pythia-160m-retr-32k_w_meta_truncate_normal_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method lasttoken --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_pt_step_60k_w_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_truncate_normal_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_pt_step_36k_fineweb_100b_w_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_pt_step_20k_symmetric_meta_tok_w_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_pt_step_20k_w_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_all_split_gold_split_lockstep_wb_negs_16384_mean_pool_v4_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_zero_pretrain_v2_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_zero_pretrain_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_fineweb_stack_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_pt_step_125k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/pt_nomic_pos_only_all_split_max_seq_512_truncate_normal_negs_131k_v3_pythia-160m-retr-32k_w_meta_mb32-wb16384-grp1-1-8_8_64N_max_steps_14587 --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/pt_nomic_pos_only_all_split_negs_131k_v3_pythia-160m-retr-32k_w_meta_mb64-wb16384-grp1-1-8_8_64N_max_steps_14587_max_seq_512 --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_32768_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb32768-grp1-1-8_128_16N_max_steps_7294_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_no_meta_toks_w_lockstep_null_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_no_meta_toks_pt_step_108k_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_no_meta_toks_fineweb_stack_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_no_meta_toks_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/ft_nomic_pos_only_w_lockstep_wb_negs_16384_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb128-wb16384-grp1-1-8_128_16N_max_steps_14587_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_prefix_query_doc_prefix --batch_size 512 --pooling_method mean --overwrite_results False --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb

# FT evals
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_zero_pretrain_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_step_108k_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_4096_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_lasttoken_pool_v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method lasttoken --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_2048_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_16_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_1024_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_8_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_train_grp_8_negs_512_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_4_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_train_grp_64_negs_2048_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb2-wb1024-grp1-1-8_16_64N_max_steps_4968_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_train_grp_16_negs_512_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-14m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-31m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-70m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_fineweb_stack_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_10_epoch_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_8280_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_10_epoch_w_lockstep_micro_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb32-wb2048-grp1-1-8_1_8N_max_steps_8280_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_micro_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb32-wb2048-grp1-1-8_1_8N_max_steps_2484_max_seq_512 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_world_batch_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_null_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_lockstep_null_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_eval --push_to_wandb --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_long_prompt_meta_tokens/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp1-1-8_2_8N_max_steps_4968_max_seq_1024
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp1-1-8_2_8N_max_steps_4968_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp1-1-8_2_8N_max_steps_4968_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_eval --push_to_wandb --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_long_prompt_meta_tokens/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp1-1-8_2_8N_max_steps_4968_max_seq_1024
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-160m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-wide-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-deep-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-deep-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_eval --push_to_wandb --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_long_prompt_meta_tokens/axonn_nomic_finetune_w_hard_negs_5_mean_pool_v3_pythia-70m-deep-retr-32k_w_meta_mb16-wb2048-grp1-1-8_2_16N_max_steps_2484_max_seq_1024

# PT evals
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v4_fineweb_100b_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp1024_keep_368k_negs_128N_truncate_normal --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_random_rand_toks_doc_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_random_permuted_batch_toks_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_rand_toks_const_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_rand_toks_rand_lens_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_debug_sparse_negs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp512_keep_184k_negs_128N_fixed_randperm --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N_fixed --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results False --batch_size 1024 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results False --batch_size 1024 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-31m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_eval --overwrite_results False --push_to_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results False --batch_size 1024 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-14m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_eval --overwrite_results False --push_to_wandb

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-deep-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v3_pythia-70m-deep-retr-32k_w_meta_mb2-wb2048-grp128_128N --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-deep-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_eval --overwrite_results False --push_to_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-wide-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --overwrite_results True --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v3_pythia-70m-wide-retr-32k_w_meta_mb2-wb2048-grp128_128N --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-70m-wide-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_eval --overwrite_results False --push_to_wandb

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_wandb --overwrite_results False --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v3_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp128_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --skip_eval --overwrite_results False

python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_no_meta_tok_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_no_meta_tok_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936/checkpoints-DDPStrategy/step-00009000-test_no_meta_tok_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936.pth --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_token_mixture --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test --overwrite_results False

python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_7k_ckpt_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_7k_ckpt_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936/checkpoints-DDPStrategy/step-00009000-test_7k_ckpt_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_doc_task --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test --overwrite_results False
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_no_meta_tok_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_no_meta_tok_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936/checkpoints-DDPStrategy/step-00009000-test_no_meta_tok_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936.pth --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_token_mixture --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test --overwrite_results False
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_fineweb_100BT_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_fineweb_100BT_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936/checkpoints-DDPStrategy/step-00009000-test_fineweb_100BT_nomic_finetune_3_epoch_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936.pth --include_meta_tokens True --include_long_prompt True --prompt_style no_meta_token_mixture --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test --overwrite_results False

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb1024-grp1-1-16_16_16N_max_steps_4968 --keystring step-00004500 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 64 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/axonn_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-1.4b-retr-32k_w_meta_mb2-wb1024-grp1-1-16_16_64N_max_steps_4968 --keystring step-00004500 --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 64 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --skip_wandb

python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_3_epoch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_3_epoch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936/checkpoints-DDPStrategy/step-00009000-test_nomic_finetune_3_epoch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb512-grp32_8N_max_steps_9936.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_doc_task --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_datawise_batch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_datawise_batch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624/checkpoints-DDPStrategy/step-00006000-test_nomic_finetune_datawise_batch_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_doc_task --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_bos_task_q_d_task_w_hard_negs_lasttoken_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_bos_task_q_d_task_w_hard_negs_lasttoken_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624/checkpoints-DDPStrategy/step-00006000-test_nomic_finetune_bos_task_q_d_task_w_hard_negs_lasttoken_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_doc_task --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624/checkpoints-DDPStrategy/step-00006000-test_nomic_finetune_bos_task_q_d_task_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_doc_task --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624/checkpoints-DDPStrategy/step-00006000-test_nomic_finetune_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb8-wb256-grp32_4N_max_steps_6624.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test

# mteb wandb logging
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --skip_combining --skip_eval --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir ./results
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir ./results/mteb/w_long_prompt_meta_tokens/v1_fineweb_100BT_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --skip_combining --skip_eval --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_fineweb_100BT_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir ./results
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir ./results/mteb/w_long_prompt_meta_tokens/v1_no_meta_tok_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --skip_combining --skip_eval --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_no_meta_tok_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir ./results
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir ./results/mteb/w_long_prompt_meta_tokens/v2_5_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --skip_combining --skip_eval --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v2_5_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir ./results
python launch_configs/XXXX-22/launch_mteb_eval.py --wandb_dir ./results/mteb/w_long_prompt_meta_tokens/v2_20_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --skip_combining --skip_eval --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v2_20_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir ./results

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/nomic_finetune_w_hard_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb512-grp16_8N_max_steps_12340 --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --overwrite_results False --skip_wandb --ddp True
python eval/mteb_eval.py --model_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/nomic_finetune_cross_device_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp64_8N_max_steps_4968 --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/nomic_finetune_cross_device_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp64_8N_max_steps_4968/checkpoints-DDPStrategy/step-00004000-nomic_finetune_cross_device_negs_mean_pool_v1_pythia-160m-retr-32k_w_meta_mb16-wb1024-grp64_8N_max_steps_4968.pth --include_meta_tokens True --include_long_prompt True --prompt_style bos_task_query_task_doc --batch_size 512 --pooling_method mean --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/test

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v2_20_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --prompt_style bos_sys_task_query_doc --include_long_prompt True --overwrite_results False --batch_size 512 --skip_wandb
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v2_5_percent_short_seqs_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --prompt_style bos_sys_task_query_doc --include_long_prompt True --overwrite_results False --batch_size 512 --skip_wandb

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --resume_from step-00070000 --overwrite_results False --batch_size 512 --wandb_dir results/mteb/w_short_prompt_meta_tokens/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N/

# classification sweep
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/classification_w_QUERY_prefix_model --include_meta_tokens True --keystrings step-00056000 --overwrite_results False --task 'mteb_subset' --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/classification_w_DOC_suffix_model --include_meta_tokens True --keystrings step-00056000 --overwrite_results False --task 'mteb_subset' --batch_size 512

# clustering sweep
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/clustering_w_QUERY_prefix_model_wo_bos --include_meta_tokens True --keystrings step-00056000 --overwrite_results False --task 'mteb_subset' --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/clustering_w_QUERY_prefix_model --include_meta_tokens True --keystrings step-00056000 --overwrite_results False --task 'mteb_subset' --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/clustering_w_DOC_prefix_model --include_meta_tokens True --keystrings step-00056000 --overwrite_results False --task 'mteb_subset' --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/clustering_w_QUERY_suffix_model --include_meta_tokens True --keystrings step-00056000 --overwrite_results False --task 'mteb_subset' --batch_size 512
python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/clustering_w_DOC_suffix_model --include_meta_tokens True --keystrings step-00056000 --overwrite_results False --task 'mteb_subset' --batch_size 512

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/mteb_subset --include_meta_tokens True --keystrings step-00056000 --overwrite_results True --task mteb_subset --batch_size 512

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --keystrings step-00046000,step-00048000,step-00050000,step-00052000,step-00054000,step-00056000,step-00056000,step-00058000,step-00060000 --overwrite_results True
python eval/plot_mteb.py --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results/mteb/w_short_prompt_meta_tokens/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N
python launch_configs/XXXX-22/launch_retrieval_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_pqds_pure_data_axonn --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_pqds_pure_data_axonn/combined_ckpts --include_meta_tokens True

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_pqds_pure_data_axonn --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --keystrings step-00002000,step-00004000,step-00006000,step-00008000,step-00010000,step-00012000,step-00014000,step-00015000 --overwrite_results True

# sys_query_doc_sys, sys_task_query_doc_task_sys, sys_query_doc, task_query_task_doc, task_query_doc_task, prefix_query_prefix_doc, prefix_query_doc_prefix
python launch_configs/XXXX-22/launch_mteb_eval.py --prompt_style sys_query_doc_sys --include_long_prompt True --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/sys_query_doc_sys --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000 --overwrite_results True
python launch_configs/XXXX-22/launch_mteb_eval.py --prompt_style sys_task_query_doc_task_sys --include_long_prompt True --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/sys_task_query_doc_task_sys --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000 --overwrite_results True
python launch_configs/XXXX-22/launch_mteb_eval.py --prompt_style sys_query_doc --include_long_prompt True --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/sys_query_doc --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000 --overwrite_results True
python launch_configs/XXXX-22/launch_mteb_eval.py --prompt_style task_query_task_doc --include_long_prompt True --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/task_query_task_doc --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000 --overwrite_results True
python launch_configs/XXXX-22/launch_mteb_eval.py --prompt_style task_query_doc_task --include_long_prompt True --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/task_query_doc_task --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000 --overwrite_results True
python launch_configs/XXXX-22/launch_mteb_eval.py --prompt_style prefix_query_prefix_doc --include_long_prompt True --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/prefix_query_prefix_doc --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000 --overwrite_results True
python launch_configs/XXXX-22/launch_mteb_eval.py --prompt_style prefix_query_doc_prefix --include_long_prompt True --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/prefix_query_doc_prefix --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000 --overwrite_results True

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/v1_pythia-160m-retr-32k_w_meta_mb2-wb2048-grp64_128N --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/reproduce_mteb --include_meta_tokens True --keystrings step-00032000,step-00034000,step-00036000,step-00038000,step-00040000,step-00042000,step-00044000,step-00045000 --overwrite_results True

python launch_configs/XXXX-22/launch_mteb_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_pqds_pure_data_axonn --result_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/results --include_meta_tokens True --keystrings step-00001000,step-00002000,step-00003000,step-00004000,step-00005000,step-00006000
python launch_configs/XXXX-22/launch_retrieval_eval.py --model_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_pqds_pure_data_axonn --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_pqds_pure_data_axonn/combined_ckpts --include_meta_tokens True

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_256_mask_2050_ldiags/checkpoints-DDPStrategy --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_128_mask_2050_ldiags/checkpoints-DDPStrategy --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_64_mask_2050_ldiags/checkpoints-DDPStrategy --prefix_add_eos False --suffix_add_eos False

# contrastive bsz scaling mteb eval
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_256_mask_2050_ldiags/checkpoints-DDPStrategy/step-00024000-contrastive_bsz_256_mask_2050_ldiags.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_128_mask_2050_ldiags/checkpoints-DDPStrategy/step-00024000-contrastive_bsz_128_mask_2050_ldiags.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_64_mask_2050_ldiags/checkpoints-DDPStrategy/step-00024000-contrastive_bsz_64_mask_2050_ldiags.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_64_mask_2050_ldiags/checkpoints-DDPStrategy/step-00024000-contrastive_bsz_64_mask_2050_ldiags.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_16_mask_2050_ldiags/checkpoints-DDPStrategy/step-00024000-contrastive_bsz_16_mask_2050_ldiags.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/contrastive_bsz_8_mask_2050_ldiags/checkpoints-DDPStrategy/step-00024000-contrastive_bsz_8_mask_2050_ldiags.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True

# k pos labels mteb eval
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_no_k_pos_labels/checkpoints-DDPStrategy/step-00024000-test_no_k_pos_labels.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_k_pos_labels_5/checkpoints-DDPStrategy/step-00024000-test_k_pos_labels_5.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_k_random_pos_labels_5_impl_2/checkpoints-DDPStrategy/step-00024000-test_k_random_pos_labels_5_impl_2.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True

# single-model experiments mteb eval
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/OLDorca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --suffix_is_prefix True --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-flip_embedding-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --suffix_is_prefix True --flip_rope_embedding_suffix True --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-nope_pos_embedding-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-nope_pos_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --suffix_is_prefix True --nope_pos_embeddings True --precision bf16-mixed --attn_type causal_attn --no_instruction True
python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/updatedloss_orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-flip_rope_embedding-rotary_1-added_tokens-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-updatedloss_orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-rotary_1-added_tokens-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --suffix_is_prefix True --flip_rope_embedding_suffix True --add_suf_pre_tokens True --precision bf16-mixed --attn_type causal_attn --no_instruction True

python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/OLDorca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False --suffix_is_prefix True
python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-flip_embedding-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False --suffix_is_prefix True --flip_rope_embedding_suffix True
python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-nope_pos_embedding-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-nope_pos_embedding-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False --suffix_is_prefix True --nope_pos_embeddings True
python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m  --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/updatedloss_orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-flip_rope_embedding-rotary_1-added_tokens-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-updatedloss_orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-flip_rope_embedding-rotary_1-added_tokens-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False --suffix_is_prefix True --flip_rope_embedding_suffix True --add_suf_pre_tokens True

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_k_random_pos_labels_5_impl_2/checkpoints-DDPStrategy --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_k_random_pos_labels_5_v2/checkpoints-DDPStrategy --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_k_pos_labels_5/checkpoints-DDPStrategy --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/lit-gpt-dev_new/launch_configs/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/test_no_k_pos_labels/checkpoints-DDPStrategy --prefix_add_eos False --suffix_add_eos False

python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908/checkpoints-DDPStrategy/step-00033500-new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False --suffix_is_prefix True --flip_rope_embedding_suffix True --model_config_file /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/new_env_mask_2050_ldiags_k_pos_labels_5_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-single-causal-pythia-160m_flipped_rope-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908/model_config.json
python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_2050_ldiags_k_pos_labels_0_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908/checkpoints-DDPStrategy/step-00008000-mask_2050_ldiags_k_pos_labels_0_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False --model_config_file /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_2050_ldiags_k_pos_labels_0_decay_factor_1_wbsz-256_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-2-ctx-var-batch_negative_ddp_RR_lr_1e-4_max_iters_576908/model_config.json

python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_k_pos_labels_5_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp/step-00005739-orca_finetune_k_pos_labels_5_decay_factor_1_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/alpaca_retrieval --random_split False --prefix_add_eos False --suffix_add_eos False
python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/eff_attn_mean_pool_precision_32_true_orca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_8250/checkpoints-ddp/step-00008249-eff_attn_mean_pool_precision_32_true_orca_finetune-pythia-160m-mbsz-16-wbsz-512-lr_1e-3_max_iters_8250.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/alpaca_retrieval --random_split False --prefix_add_eos False --suffix_add_eos False

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_wbsz-64-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_wbsz-64-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_wbsz-128-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_wbsz-128-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_wbsz-256-cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_wbsz-2560-local_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_wbsz-256-10k_steps_cross_device_negs_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/drop_k_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_3_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_9_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/mask_k_diags_50_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/8k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/16k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/32k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/41k_negs_orca_finetune_fineweb_14B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m \
                --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth \
                --precision bf16-mixed --attn_type causal_attn \
                --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia \
                --random_split False --prefix_add_eos False --suffix_add_eos False

python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m \
                --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/njain17/new_workspace/XXXX-40/output/orca_finetune_fineweb_100B-retrieval-dual-causal-suffix_is_prefix-pythia-160m-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt/checkpoints-ddp/step-00005739-orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-suffix_is_prefix-mbsz-20-wbsz-2560-ctx-var-cross_batch_negative_ddp_RR_lr_4e-3_max_iters_57691_13000ckt.pth \
                --precision bf16-mixed --attn_type causal_attn \
                --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia \
                --random_split False --prefix_add_eos False --suffix_add_eos False --suffix_is_prefix True
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00054000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00027000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00013500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00048000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-4-wbsz-512-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_57691_reduce_both_dim.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00024000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-8-wbsz-1024-ctx-var-batch_negative_ddp_RR_lr_2e-3_max_iters_57691_reduce_both_dim.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim/checkpoints-ddp/step-00012000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-16-wbsz-2048-ctx-var-batch_negative_ddp_RR_lr_3e-3_max_iters_57691_reduce_both_dim.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00009500-reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False --prefix_add_eos False --suffix_add_eos False

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00057690-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00047000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00033500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00020500-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00007000-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00039774-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00031825-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00023309-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00014500-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp/step-00004679-rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-sequence_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/reduce_both_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/reduce_prefix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/reduce_suffix_dim_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-20-wbsz-2560-ctx-var-batch_negative_ddp_RR_lr_4e-3_max_iters_57691/checkpoints-ddp --prefix_add_eos False --suffix_add_eos False

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_1.5B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_1.5B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_10B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_10B_40_negs-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_86B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_86B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_86B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_86B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_10B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_10B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_30B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_30B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_50B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_50B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_70B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_10B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_30B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_50B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_10B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_30B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_50B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-736-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp
# python /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/launch_scripts/XXXX-22/launch_retrieval_eval.py --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_rpj_v2_100B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp

# python scripts/convert_retrieval_checkpoint_to_hf.py --parent_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3 --checkpoint_file /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3/checkpoints-ddp/step-00016585-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3.pth --tokenizer_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --model_name pythia-160m

# python scripts/convert_retrieval_checkpoint_to_hf.py --parent_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740 --checkpoint_file /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp/step-00005741-orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740.pth --tokenizer_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --model_name pythia-160m
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp/step-00005741-orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False
# python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/dolma-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4/checkpoints-ddp/step-00020000-dolma-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4.pth --precision bf16-mixed --attn_type causal_attn --no_instruction True
# python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740/checkpoints-ddp/step-00005741-orca_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_5740.pth --precision bf16-mixed --attn_type causal_attn
# python mteb_eval.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/medi_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1673/checkpoints-ddp/step-00001672-medi_finetune_fineweb_70B-retrieval-dual-causal-pythia-160m-mbsz-46-wbsz-1472-ctx-var-batch_negative_ddp_RR_lr_1e-3_max_iters_1673.pth --precision bf16-mixed --attn_type causal_attn --instruction_set medi2 --instruction_format medi2

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3/checkpoints-ddp/step-00016585-fineweb_100B-retrieval-dual-causal-pythia-160m-mbsz-23-wbsz-2944-ctx-var-batch_negative_ddp_RR_lr_4e-3.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/orca_finetune_dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4/checkpoints-ddp/step-00006000-orca_finetune_dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia --random_split False

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/dolma-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4/checkpoints-ddp/step-00020000-dolma-lm-pythia-160m-mbsz-24-wbsz-192_ddp_lr_6e-4.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4/checkpoints-ddp/step-00016000-dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/output/dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4_cum_mean/checkpoints-ddp/step-00010000-dolma-retrieval-dual-causal-pythia-160m-mbsz-24-wbsz-192-ctx-var-batch_negative_ddp_RR_lr_3e-4_cum_mean.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia
# # ROOT_DIR=/fs/XXXX-37/llm-pretraining/llm-retrieval
# # python finetune/orca_retrieval.py --precision bf16-mixed --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/DDP_orca-retrieval-tiny-llama-1.1b-bsz-2
# # python finetune/orca_retrieval.py --precision bf16-mixed --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/FSDP_orca-retrieval-tiny-llama-1.1b-bsz-8
# #### Anti Causal Experiment ####
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/output/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-5/step-00052000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-5.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/output/fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5/step-00024000-fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/output/fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5/step-00054000-fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/output/fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5/step-00054000-fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_PP_lr_1e-5.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/cosmopedia_retrieval_val_data

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/output/fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_RR_lr_1e-5/step-00054000-fixed_cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-2-ctx-rand-batch_negative_ddp_RR_lr_1e-5.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k

# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T --finetuned_path /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T/lit_model.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k
# python eval/eval_retrieval_anticausal.py --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T --finetuned_path /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/XXXX-40/output/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-5/step-00052000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-16-ctx-rand-batch_negative_ddp_PP_lr_1e-5.pth --precision bf16-mixed --attn_type causal_attn --data_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k
# # python eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata/step-00010000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata/step-00020000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata/step-00030000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata/step-00042000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --args.mean_pooling True --args.fixed_length False --out_dir $ROOT_DIR/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-rand-meanpooling
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --args.mean_pooling True --args.fixed_length True --out_dir $ROOT_DIR/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-meanpooling
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/random_sequences --out_dir $ROOT_DIR/out/random_seq-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048_learned_pos
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048_learned_pos
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/fixed_orca-retrieval-anti-causal-llama-1.1b-bsz-1-seq-2048/step-014000.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/openwebtext_retrieval_val_data_10k
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-rand/step-014000.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/openwebtext_retrieval_val_data_10k
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048/step-014000.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/openwebtext_retrieval_val_data_10k
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batchacc-64/step-000405.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/openwebtext_retrieval_val_data_10k
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/fixed_orca-retrieval-anti-causal-llama-1.1b-bsz-1-seq-2048/step-014000.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-rand/step-014000.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048/step-014000.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batchacc-64/step-000405.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --out_dir $ROOT_DIR/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batchacc-64
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --data_dir $ROOT_DIR/data/random_sequences --out_dir $ROOT_DIR/out/random_seq-retrieval-anti-causal-llama-1.1b-bsz-1-ctx-2048
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/fixed_orca-retrieval-anti-causal-llama-1.1b-bsz-1-seq-2048/step-014000.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/openwebtext_retrieval_val_data
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-rand/step-014000.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/openwebtext_retrieval_val_data
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/fixed_orca-retrieval-anti-causal-llama-1.1b-bsz-1-seq-2048/step-014000.pth --precision bf16-mixed --attn_type anti_causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-rand/step-014000.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048/step-003000.pth --precision bf16-mixed --attn_type causal_attn
# # python eval/eval_orca_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048/step-014000.pth --precision bf16-mixed --attn_type causal_attn
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/fixed_orca-retrieval-anti-causal-llama-1.1b-bsz-1-seq-2048
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --out_dir $ROOT_DIR/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-rand
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --out_dir $ROOT_DIR/out/openwebtext-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --data_dir $ROOT_DIR/data/random_sequences --out_dir $ROOT_DIR/out/random_seq-retrieval-anti-causal-llama-1.1b-bsz-1-ctx-rand
# # python scripts/prepare_random_sequences.py \
# #   --checkpoint_dir $ROOT_DIR/checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/ \
# #   --destination_path $ROOT_DIR/data/random_sequences

# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 2048 --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/orca-retrieval-anti-causal-llama-1.1b-bsz-1-seq-2048
# # python finetune/orca_retrieval_w_anti_causal.py --precision bf16-mixed --max_seq_length 1024 --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/orca-retrieval-anti-causal-llama-1.1b-bsz-1-seq-1024
# # python eval/eval_orca_retrieval.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/orca-retrieval-tiny-llama-1.1b-bsz-8/step-033000.pth
# ##### Prefix Suffix Concatenated in One Sequence Experiment  #####
# # python finetune/orca_retrieval.py --precision bf16-mixed
# # python finetune/orca_retrieval.py --precision bf16-mixed --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/orca-retrieval-tiny-llama-1.1b-bsz-8
# # python finetune/orca_retrieval.py --precision bf16-mixed --logger wandb --out_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/out/1000_orca-retrieval-tiny-llama-1.1b-bsz-8
# # python eval/eval_orca_retrieval.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/orca-retrieval-tiny-llama-1.1b-bsz-8/step-033000.pth
# # ROOT_DIR=/fs/XXXX-37/llm-pretraining/llm-retrieval

# ##### Test Run #####
# # downloading the hf checkpoint for pythia-1b
# # python scripts/download.py --repo_id TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T --checkpoint_dir $ROOT_DIR/checkpoints
# # python scripts/convert_hf_checkpoint.py --checkpoint_dir $ROOT_DIR/checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T

# # python scripts/prepare_openwebtext.py \
# #   --checkpoint_dir $ROOT_DIR/checkpoints/EleutherAI/pythia-1b/ \
# #   --destination_path $ROOT_DIR/data/openwebtext

# # python scripts/prepare_openwebtext.py \
# #   --checkpoint_dir $ROOT_DIR/checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/ \
# #   --destination_path $ROOT_DIR/data/openwebtext

# # python pretrain/openwebtext.py \
# #   --devices 1

# ##### Retrieval #####
# # python scripts/prepare_orca_retrieval.py \
# #   --checkpoint_dir $ROOT_DIR/checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/ \
# #   --destination_path $ROOT_DIR/data/orca_retrieval

# python scripts/prepare_retrieval_data.py \
#   --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/EleutherAI/pythia-160m \
#   --destination_path /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/orca_retrieval_val_data_10k_pythia \
#   --data_name Open-Orca/OpenOrca \
#   --data_type instruction \
#   --cache_dir /XXXX-30/XXXX-29/XXXX-31/scratch/XXXX-22/.cache \
#   --ld_from_disk True

# python scripts/prepare_retrieval_data.py \
#   --checkpoint_dir /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_models/external/TinyLlama-1.1B-intermediate-step-1431k-3T \
#   --destination_path /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/cosmopedia_retrieval_val_data \
#   --data_name /XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/processed/splitted_cosmopedia/val_ood \
#   --data_type pretrain \
#   --ld_from_disk True

# # python scripts/prepare_retrieval_data.py \
# #   --checkpoint_dir $ROOT_DIR/checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/ \
# #   --destination_path $ROOT_DIR/data/orca_retrieval_val_data \
# #   --data_name Open-Orca/OpenOrca \
# #   --data_type instruction
# #   --destination_path $ROOT_DIR/data/openwebtext_retrieval_val_data \
# #   --data_name openwebtext \
# #   --data_type pretrain


# # python launch_scripts/launch_frontier.py \
# # --rccl_installdir="${HOME}/tiny_plugins_rccl/lib" \
# # --env_packed="${HOME}/frontier_conda_env_packed.tar.gz" \
# # --custom_invocation='pretrain_umd/train_retrieval_w_anticausal.py \
# # --config launch_scripts/retrieval/first_run.yaml \
# # --world_batch_size=64' \
# # --nodes 2 \
# # --budget_minutes=15 \
# # --launch_immediately \
# # --debug_qos

# # python eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-00030000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k

# # hf data model
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code/step-00002000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code/step-00036000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code/step-00056000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/khalids/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code/step-00136000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_hfdata_refactored_code.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k

# # packed data model
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-00002000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-000012000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-00022000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-000030000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k

# # packed data model w/ bsz 8
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-00002000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-00012000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-00022000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k
# # python /XXXX-36/XXXX-22/XXXX-40/eval/eval_retrieval_anticausal.py --finetuned_path /fs/XXXX-37/llm-pretraining/llm-retrieval/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata/step-00030000-cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_default_hparam_packeddata.pth --precision bf16-mixed --attn_type causal_attn --data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/orca_retrieval_val_data_10k