formatted_time=$(date "+%Y%m%d-%H-%M-%S")
########################## parameters ##########################
scriptname=$1
xth=$2
cfg=$3
loss_type=$4
wandb_name=$5
sampler_id=$6
########################## parameters ##########################

log_path=./log_dir/sampler/${loss_type}/$1_sampler${sampler_id}_$2_cfg$3_${formatted_time}.log
mkdir -p "$(dirname "$log_path")"
echo $log_path
export WANDB_MODE=offline
export WANDB_DIR=wandb/sampler${sampler_id}
export USE_FLASH_ATTN=false
export PYTHONPATH=src
export WORLD_SIZE=1
export LEARNER_WORLD_SIZE=4
export RANK=0
export GPUS=1
export MASTER_ADDR="localhost"
export SAVEPATH=./save_dir/4gpus/Sampler_${xth}_cfg${cfg}/sampler${sampler_id}/Qwen3-1.7B
export FS_QUEUE_PATH=./save_dir/4gpus/Async_${xth}_cfg${cfg}/Rollout/Qwen3-1.7B
export OBS_FS_QUEUE_PATH=obs://***/ # your obs url
export SYNC_WEIGHTS_PATH=./save_dir/4gpus/Async_${xth}_cfg${cfg}/tmp/Qwen3-1.7B/async_checkpoint.pt
export OBS_SYNC_WEIGHTS_PATH=obs://***/async_checkpoint.pt
export SYNC_SAMPLER_STEPS=1
export ACCESS_KEY_ID='******' # your obs ACCESS_KEY_ID
export SECRET_ACCESS_KEY='******' # your obs SECRET_ACCESS_KEY
export OBS_SERVER=****** # your obs IP address
vllm_gpu_memory_utilization=0.3

accelerate launch --config_file recipes/Ascend910a/ddp_4npus.yaml \
  --num_machines $WORLD_SIZE --machine_rank $RANK  --num_processes=$GPUS  --main_process_ip $MASTER_ADDR --main_process_port $MASTER_PORT \
  src/hetero_rl/$scriptname.py --output_dir $SAVEPATH \
  --save_strategy "steps" --save_steps 100000  --save_total_limit  5 \
  --num_train_epochs 3 --gradient_accumulation_steps 8 --max_completion_length 2048 --max_prompt_length 768 \
  --scale_rewards False --eval_strategy 'no' \
  --model_name_or_path "../model/Qwen3-1.7B" \
  --dataset_name "../dataset/simplelr_qwen_level3to5" \
  --log_completions True --logging_steps 32 \
  --per_device_train_batch_size 8 \
  --per_device_eval_batch_size 8 \
  --num_generations 8 \
  --wandb_entity "xxx" --wandb_project "HeteroRL"  --report_to "wandb" \
  --config recipes/Ascend910a/config.yaml \
  --num_samplers 20 --sampler_id $sampler_id \
  --wandb_name $wandb_name \
  --loss_type $loss_type \
  --resume_from_checkpoint False \
  --use_vllm False --use_think False \
  --vllm_gpu_memory_utilization $vllm_gpu_memory_utilization > $log_path 2>&1 &