soft_token_count=256
epoch=10
echo "Running train_soft_token with soft token count = ${soft_token_count}, stage=public domain soft token."

deepspeed --num_gpus=1 train.py \
    --model_name_or_path OpenCoder-8B-Instruct  \
    --use_fast_tokenizer True \
    --use_flash_attn True \
    --train_data_path livecodebench/execution2/train_filtered.jsonl \
    --valid_data_path livecodebench/execution2/test.jsonl \
    --save_strategy "no" \
    --max_seq_len 2048 \
    --soft_token_count ${soft_token_count} \
    --output_dir lcb-code-execution/0512-1-opencoder-inst-${soft_token_count}-tokens-${epoch}epoch \
    --overwrite_output_dir True \
    --deepspeed ds_z2_config.json \
    --gradient_checkpointing True \
    --num_train_epochs ${epoch} \
    --per_device_train_batch_size 3 \
    --gradient_accumulation_steps 1 \
    --learning_rate 1e-3 \
    --lr_scheduler_type "linear" \
    --warmup_steps 0 \
    --bf16 True \
    --logging_strategy "steps" \
    --logging_steps 1 \
    --logging_dir lcb-code-execution/0512-1-opencoder-inst-${soft_token_count}-tokens-${epoch}epoch/logs \
    --report_to "tensorboard" \
    --evaluation_strategy "no" \
    --per_device_eval_batch_size 1 \
    --eval_accumulation_steps 10