#!/bin/bash

python train_efficient.py --model outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-h200/checkpoint-100 --dataset ../../datasets/hendrycks_math_train_4_sub.jsonl --output_dir outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-4-sub-1536-lr-2e-6 --save_steps 50 --max_steps 200  --max_completion_length 1536 --per_device_train_batch_size 1 --gradient_accumulation_steps 16 --num_generations 16 --learning_rate 2e-6

python train_efficient.py --model outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-h200/checkpoint-100 --dataset ../../datasets/hendrycks_math_train_4_sub.jsonl --output_dir outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-4-sub-2560-lr-2e-6 --save_steps 50 --max_steps 200  --max_completion_length 2560 --per_device_train_batch_size 1 --gradient_accumulation_steps 16 --num_generations 16 --learning_rate 2e-6

python train_efficient.py --model outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-h200/checkpoint-100 --dataset ../../datasets/hendrycks_math_train_4_sub.jsonl --output_dir outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-4-sub-2048-lr-1e-6 --save_steps 50 --max_steps 200  --max_completion_length 2048 --per_device_train_batch_size 1 --gradient_accumulation_steps 16 --num_generations 16 --learning_rate 1e-6

python train_efficient.py --model outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-h200/checkpoint-100 --dataset ../../datasets/hendrycks_math_train_4_sub.jsonl --output_dir outputs/Qwen-7B-GRPO-math-1-sub-1024-lr-2e-6-2-sub-1536-lr-1e-6-3-sub-1536-lr-2e-6-4-sub-1792-lr-2e-6 --save_steps 50 --max_steps 200  --max_completion_length 1792 --per_device_train_batch_size 1 --gradient_accumulation_steps 16 --num_generations 16 --learning_rate 2e-6