ssh -L 8265:127.0.0.1:8265 qinanyu@172.24.67.102

 rm -rf /tmp/qinanyu/ray/session_*

################################################################################
# META-LLAMA-3.2-7B INDIVIDUAL TASKS
################################################################################

#============================================================================
# MINI SUDOKU
#============================================================================

# MINI SUDOKU - DIRECT (wo thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/direct-mini_sudoku-llama.out --job-name direct-mini_sudoku-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name mini_sudoku \
--config-path ../configs/grpo \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=direct_qwen2.5-7b-instruct \
reasoning_gym.developer_prompt=direct \
trainer.experiment_name=mini_sudoku \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/direct/qwen2.5-7b-instruct/val_mini_sudoku "

# MINI SUDOKU - GRPO (w thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/grpo-mini_sudoku-llama.out --job-name grpo-mini_sudoku-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name mini_sudoku \
--config-path ../configs/grpo \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=grpo_qwen2.5-7b-instruct \
trainer.experiment_name=mini_sudoku \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/grpo/qwen2.5-7b-instruct/val_mini_sudoku \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 "

#============================================================================
# SPIRAL MATRIX
#============================================================================

# SPIRAL MATRIX - DIRECT (wo thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/direct-spiral_matrix-llama.out --job-name direct-spiral_matrix-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name spiral_matrix \
--config-path ../configs/direct \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=direct_qwen2.5-7b-instruct \
trainer.experiment_name=spiral_matrix \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/direct/qwen2.5-7b-instruct/val_spiral_matrix "

# SPIRAL MATRIX - GRPO (w thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/grpo-spiral_matrix-llama.out --job-name grpo-spiral_matrix-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name spiral_matrix \
--config-path ../configs/grpo \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=grpo_qwen2.5-7b-instruct \
trainer.experiment_name=spiral_matrix \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/grpo/qwen2.5-7b-instruct/val_spiral_matrix "

#============================================================================
# FAMILY RELATIONSHIPS
#============================================================================

# FAMILY RELATIONSHIPS - DIRECT (wo thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/direct-family_relationships-llama.out --job-name direct-family_relationships-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name family_relationships \
--config-path ../configs/grpo \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=direct_qwen2.5-7b-instruct \
reasoning_gym.developer_prompt=direct \
trainer.experiment_name=family_relationships \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/direct/qwen2.5-7b-instruct/val_family_relationships \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 "

# FAMILY RELATIONSHIPS - GRPO (w thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/grpo-family_relationships-llama.out --job-name grpo-family_relationships-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name family_relationships \
--config-path ../configs/grpo \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=grpo_qwen2.5-7b-instruct \
trainer.experiment_name=family_relationships \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/grpo/qwen2.5-7b-instruct/val_family_relationships \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 "
"

#============================================================================
# SIMPLE EQUATIONS
#============================================================================

# SIMPLE EQUATIONS - DIRECT (wo thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/direct-simple_equations-llama.out --job-name direct-simple_equations-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name simple_equations \
--config-path ../configs/direct \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=direct_qwen2.5-7b-instruct \
trainer.experiment_name=simple_equations \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/direct/qwen2.5-7b-instruct/val_simple_equations \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 "

# SIMPLE EQUATIONS - GRPO (w thinking)
nlprun -q sphinx -x sphinx[1,2,7] -g 2 -c 2 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/grpo-simple_equations-llama.out --job-name grpo-simple_equations-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
JOB_TAG=${SLURM_JOB_ID:-${LSB_JOBID:-jid.$(id -u)-$$-$(date +%s)}} && \
BASE="/dev/shm/$USER/vllm.$JOB_TAG" && \
echo "$BASE" && \
export TORCH_EXTENSIONS_DIR="$BASE/torch_ext" && \
export FLASHINFER_JIT_DIR="$BASE/flashinfer_jit" && \
export CUDA_CACHE_PATH="$BASE/cuda_cache"  && \   
export TMPDIR="$BASE/tmp"  && \                 
mkdir -p "$TORCH_EXTENSIONS_DIR" "$FLASHINFER_JIT_DIR" "$CUDA_CACHE_PATH" "$TMPDIR" && \
chmod 700 "$BASE" "$TMPDIR" && \
export RAY_TMPDIR=/nlp/scr/qinanyu/ray_st && \
export TORCH_CUDA_ARCH_LIST=8.0\;9.0a && \
command -v nvcc >/dev/null 2>&1 || unset CUDA_HOME && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name simple_equations \
--config-path ../configs/grpo \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=grpo_qwen2.5-7b-instruct \
trainer.experiment_name=simple_equations \
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
trainer.n_gpus_per_node=2 \
actor_rollout_ref.rollout.tensor_parallel_size=1 \
ray_init.num_cpus=2 \
reasoning_gym.val_path=trainers/grpo/qwen2.5-7b-instruct/val_simple_equations \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
actor_rollout_ref.actor.ppo_mini_batch_size=32 && \
rm -rf $BASE "

#============================================================================
# FUTOSHIKI
#============================================================================

# FUTOSHIKI - DIRECT (wo thinking)
nlprun -q sphinx -x sphinx[1,2,3,4,5,6,7,8] -g 4 -c 4 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/direct-futoshiki-llama.out --job-name direct-futoshiki-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name futoshiki \
--config-path ../configs/direct \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=direct_qwen2.5-7b-instruct \
trainer.experiment_name=futoshiki \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_parallel_size=2 \
ray_init.num_cpus=4 \
reasoning_gym.val_path=trainers/direct/qwen2.5-7b-instruct/val_futoshiki "

# FUTOSHIKI - GRPO (w thinking)
nlprun -q sphinx -x sphinx[1,2,7] -g 2 -c 2 --mem 160G -o /nlp/scr/qinanyu/rl-explanations/bash_output/grpo-futoshiki-llama-7b.out --job-name grpo-futoshiki-llama "
unset ROCR_VISIBLE_DEVICES && \
export HYDRA_FULL_ERROR=1 && \
export TORCH_EXTENSIONS_DIR=/dev/shm/$USER/torch_ext && \
export TMPDIR=/dev/shm/$USER/tmp && \
export FLASHINFER_JIT_DIR=/dev/shm/$USER/flashinfer_jit && \
mkdir -p "$TORCH_EXTENSIONS_DIR" "$TMPDIR" "$FLASHINFER_JIT_DIR" && \
export RAY_TMPDIR=/nlp/scr/qinanyu/ray_st && \
export TORCH_CUDA_ARCH_LIST=8.0\;9.0a && \
command -v nvcc >/dev/null 2>&1 || unset CUDA_HOME && \
python -u /nlp/scr/qinanyu/rl-explanations/trainers/verl_train.py \
--config-name futoshiki \
--config-path ../configs/grpo \
actor_rollout_ref.model.path=/nlp/scr/qinanyu/models/qwen2.5-7b-instruct \
trainer.project_name=grpo_qwen2.5-7b-instruct \
trainer.experiment_name=futoshiki \
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
trainer.n_gpus_per_node=2 \
actor_rollout_ref.rollout.tensor_parallel_size=1 \
ray_init.num_cpus=2 \
reasoning_gym.val_path=trainers/grpo/qwen2.5-7b-instruct/val_futoshiki \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
actor_rollout_ref.actor.ppo_mini_batch_size=32 "


export TORCH_EXTENSIONS_DIR=/dev/shm/$USER/torch_ext && export TMPDIR=/dev/shm/$USER/tmp && export FLASHINFER_JIT_DIR=/dev/shm/$USER/flashinfer_jit && mkdir -p "$TORCH_EXTENSIONS_DIR" "$TMPDIR" "$FLASHINFER_JIT_DIR" && export RAY_TMPDIR=/nlp/scr/qinanyu/ray_st && export TORCH_CUDA_ARCH_LIST=8.0\;9.0a && command -v nvcc >/dev/null 2>&1 || unset CUDA_HOME && 