#! /bin/bash

# USER_ENV=`whoami`
# set -x
export NCCL_DEBUG=DEBUG
export RAY_BACKEND_LOG_LEVEL=debug
export RAY_DEDUP_LOGS=1

export WANDB_API_KEY=YOUR_WANDB_API_KEY_HERE
export VLLM_ATTENTION_BACKEND=XFORMERS
export HDFS_MODEL_PATH=Your_HDFS_MODEL_PATH_HERE
export HDFS_CHECKPOINT_PATH=Your_HDFS_CHECKPOINT_PATH_HERE
export HDFS_LOG_PATH=Your_HDFS_LOG_PATH_HERE



# Default values
TRAIN_BATCH_SIZE=256
VAL_BATCH_SIZE=1000
MAX_PROMPT_LENGTH=1024
MAX_RESPONSE_LENGTH=3072
LEARNING_RATE=5e-7
PPO_MINI_BATCH_SIZE=64
# per GPU
PPO_MICRO_BATCH_SIZE=2
CLIP_RATIO=0.2
KL_LOSS_COEF=0.001
KL_LOSS_TYPE="low_var_kl"
TEMPERATURE=1.0
LOG_PROB_MICRO_BATCH_SIZE=8
ROLLOUT_N=8
KL_COEF=0.001
TOTAL_STEPS=100
DATASET_NAME=$DATASET_NAME
ROLLOUT_GPU_MEMORY_UTIL=0.6
MODEL_NAME=Llama-3.2-3B
SAVE_FREQ=20
TEST_FREQ=5
REMOVE_CLIP=False
ROLLOUT_TENSOR_MODEL_PARALLEL_SIZE=2
MICRO_ROLLOUT_BATCH_SIZE=1024
REMOVE_PREVIOUS_CKPT=False
DOWNSAMPLING_RATIO_CORRECT=1.0
GRAD_CLIP=1.0
ENTROPY_CTRL_TYPE="fixed"
ENTROPY_COEFFIENT=0.001
ENTROPY_MIN_COEFFIENT=0.001
ENTROPY_BETA=0.01
N_GPU_PER_NODE=4
TOTAL_EPOCHS=20
RESUME_DATA_STATE=False
ROLLOUT_NAME=vllm
MASK_LOWPROB_TOKEN=False
LOGPROB_THRESHOLD=-15
USE_KL_LOSS=True
SAMPLE_BY_CATEGORY=False
CATEGORY_SAMPLE_RATIO=null
ENABLE_COT_QUALITY=False
LOW_CLIP_RATIO=0.2
HIGH_CLIP_RATIO=0.2
RANDOM_REWARD_ASSIGN=False
ARNOLD_WORKER_NUM=1
INVERSE_QUALITY_REWARD=False
COT_QUALITY_PROMPT=default
SEED=1
OVERSAMPLE_N=-1
DOWNSAMPLE_N_CORRECT_ROLLOUT=1
DOWNSAMPLE_UID_FILE=null
REPLAY=False
REPLAY_N=1
KEEP_OLD_ADVANTAGE_BEFORE_DOWNSAMPLE=False
SAMPLE_SPECIFIC_ROLLOUT_N_FILE=null
DISCARD_POSITIVE_ROLLOUT=False
DISCARD_UID_FILE=null
KEEP_ALL_NEGATIVE=False
NEGATIVE_REWARD=0.0

generate_suffix() {
  local suffix=""
  local dataset_provided=false
  local model_provided=false
  local suffix_provided=false

  while [[ "$#" -gt 0 ]]; do
    case $1 in
      --train_batch_size) suffix+="_batch$2"; shift 2 ;;
      --val_batch_size) suffix+="_valbatch$2"; shift 2 ;;
      --max_prompt_length) suffix+="_max_prompt$2"; shift 2 ;;
      --max_response_length) suffix+="_max_response$2"; shift 2 ;;
      --learning_rate) suffix+="_lr$2"; shift 2 ;;
      --ppo_mini_batch_size) suffix+="_ppomini$2"; shift 2 ;;
      --ppo_micro_batch_size) shift 2 ;;
      --kl_loss_coef) suffix+="_klcoef$2"; shift 2 ;;
      --entropy_coeffient) suffix+="_entcoef$2"; shift 2 ;;
      --entropy_min_coeffient) suffix+="_entmincoef$2"; shift 2 ;;
      --entropy_beta) suffix+="_entbeta$2"; shift 2 ;;
      --entropy_ctrl_type) suffix+="_entctrltype$2"; shift 2 ;;
      --clip_ratio) suffix+="_clipratio$2"; shift 2 ;;
      --kl_loss_type) suffix+="_kltype$2"; shift 2 ;;
      --temperature) suffix+="_temp$2"; shift 2 ;;
      --log_prob_micro_batch_size) suffix+="_logprobbatch256"; shift 2 ;;
      --rollout_n) suffix+="_rollout$2"; shift 2 ;;
      --kl_coef) suffix+="_klcontrol$2"; shift 2 ;;
      --total_training_steps) suffix+="_steps$2"; shift 2 ;;
      --rollout_gpu_memory_util) shift 2 ;;
      --dataset_name) suffix+="_$2"; dataset_provided=true; shift 2 ;;
      --model_name) suffix+="_$2"; model_provided=true; shift 2 ;;
      --remove_clip) suffix+="_remove_clip$2"; shift 2 ;;
      --suffix) input_suffix="$2"; suffix_provided=true; shift 2 ;;
      --downsampling_ratio_correct) suffix+="_downsamplecorrect$2"; shift 2 ;;
      --mask_lowprob_token) suffix+="_masklowprob$2"; shift 2 ;;
      --logprob_threshold) suffix+="_logprob$2"; shift 2 ;;
      --no_kl_loss) suffix+="_nokl"; shift 1 ;;
      --sample_by_category) suffix+="_samplebycategory"; shift 1 ;;
      --category_sample_ratio) suffix+="_ratio_$2"; shift 2 ;;
      --enable_cot_quality) suffix+="_cotquality"; shift 1 ;;
      --grad_clip) suffix+="_gradclip$2"; shift 2 ;;
      --low_clip_ratio) suffix+="_lowclip$2"; shift 2 ;;
      --high_clip_ratio) suffix+="_highclip$2"; shift 2 ;;
      --random_reward_assign) suffix+="_randomrewardcontrol"; shift 1 ;;
      --inverse_quality_reward) suffix+="_inversereward"; shift 1 ;;
      --seed) suffix+="_seed$2"; shift 2 ;;
      --oversample_n) suffix+="_oversample$2"; shift 2 ;;
      --downsample_n_correct_rollout) suffix+="_downsample$2"; shift 2 ;;
      --replay) suffix+="_replay"; shift 1 ;;
      --replay_n) suffix+="_n$2"; shift 2 ;;
      --keep_old_advantage_before_downsample) suffix+="_keepoldadvantage"; shift 1 ;;
      --sample_specific_rollout_n_file) suffix+="_dynamicrollout"; shift 2 ;;
      --keep_all_negative) suffix+="_keepallnegative"; shift 1 ;;
      *) shift ;;
    esac
  done

  if [ "$dataset_provided" = false ]; then
    suffix+="_$DATASET_NAME"
  fi

  if [ "$model_provided" = false ]; then
    suffix+="_$MODEL_NAME"
  fi

  if [ "$suffix_provided" = true ]; then
    suffix+="_$input_suffix"
  fi
  
  echo "$suffix"
}

echo "Arguments received: $@"

# Generate a unique suffix based on the input arguments
SUFFIX=$(generate_suffix "$@")
RUN_NAME="$RUN_NAME$SUFFIX"
# replace the / with _
RUN_NAME=${RUN_NAME//\//_}
LOG_FILE_PATH="$HDFS_LOG_PATH/$RUN_NAME.log"

# Parse named arguments
while [[ "$#" -gt 0 ]]; do
  echo "Processing: $1"
  case "$1" in
    --train_batch_size) TRAIN_BATCH_SIZE="$2"; shift 2 ;;
    --val_batch_size) VAL_BATCH_SIZE="$2"; shift 2 ;;
    --max_prompt_length) MAX_PROMPT_LENGTH="$2"; shift 2 ;;
    --max_response_length) MAX_RESPONSE_LENGTH="$2"; shift 2 ;;
    --learning_rate) LEARNING_RATE="$2"; shift 2 ;;
    --ppo_mini_batch_size) PPO_MINI_BATCH_SIZE="$2"; shift 2 ;;
    --ppo_micro_batch_size) PPO_MICRO_BATCH_SIZE="$2"; shift 2 ;;
    --kl_loss_coef) KL_LOSS_COEF="$2"; shift 2 ;;
    --entropy_coeffient) ENTROPY_COEFFIENT="$2"; shift 2 ;;
    --entropy_min_coeffient) ENTROPY_MIN_COEFFIENT="$2"; shift 2 ;;
    --entropy_beta) ENTROPY_BETA="$2"; shift 2 ;;
    --entropy_ctrl_type) ENTROPY_CTRL_TYPE="$2"; shift 2 ;;
    --clip_ratio) CLIP_RATIO="$2"; shift 2 ;;
    --kl_loss_type) KL_LOSS_TYPE="$2"; shift 2 ;;
    --temperature) TEMPERATURE="$2"; shift 2 ;;
    --log_prob_micro_batch_size) LOG_PROB_MICRO_BATCH_SIZE="$2"; shift 2 ;;
    --rollout_n) ROLLOUT_N="$2"; shift 2 ;;
    --rollout_gpu_memory_util) ROLLOUT_GPU_MEMORY_UTIL="$2"; shift 2 ;;
    --rollout_tp) ROLLOUT_TENSOR_MODEL_PARALLEL_SIZE="$2"; shift 2 ;;
    --micro_rollout_batch_size) MICRO_ROLLOUT_BATCH_SIZE="$2"; shift 2 ;;
    --kl_coef) KL_COEF="$2"; shift 2 ;;
    --total_training_steps) TOTAL_STEPS="$2"; shift 2 ;;
    --dataset_name) DATASET_NAME="$2"; shift 2 ;;
    --model_name) MODEL_NAME="$2"; shift 2 ;;
    --save_freq) SAVE_FREQ="$2"; shift 2 ;;
    --test_freq) TEST_FREQ="$2"; shift 2 ;;
    --remove_clip) REMOVE_CLIP="$2"; shift 2 ;;
    --remove_previous_ckpt) REMOVE_PREVIOUS_CKPT="$2"; shift 2 ;;
    --suffix) SUFFIX="$2"; shift 2 ;;
    --downsampling_ratio_correct) DOWNSAMPLING_RATIO_CORRECT="$2"; shift 2 ;;
    --n_gpu_per_node) N_GPU_PER_NODE="$2"; shift 2 ;;
    --resume_data_state) RESUME_DATA_STATE="$2"; shift 2 ;;
    --grad_clip) GRAD_CLIP="$2"; shift 2 ;;
    --total_epochs) TOTAL_EPOCHS="$2"; shift 2 ;;
    --rollout_name) ROLLOUT_NAME="$2"; shift 2 ;;
    --mask_lowprob_token) MASK_LOWPROB_TOKEN="$2"; shift 2 ;;
    --logprob_threshold) LOGPROB_THRESHOLD="$2"; shift 2 ;;
    --no_kl_loss) USE_KL_LOSS=False; shift 1 ;;
    --sample_by_category) SAMPLE_BY_CATEGORY=True; shift 1 ;;
    --category_sample_ratio) CATEGORY_SAMPLE_RATIO="$2"; shift 2 ;;
    --enable_cot_quality) ENABLE_COT_QUALITY=True; shift 1 ;;
    --low_clip_ratio) LOW_CLIP_RATIO="$2"; shift 2 ;;
    --high_clip_ratio) HIGH_CLIP_RATIO="$2"; shift 2 ;;
    --random_reward_assign) RANDOM_REWARD_ASSIGN=True; shift 1 ;;
    --inverse_quality_reward) INVERSE_QUALITY_REWARD=True; shift 1 ;;
    --cot_quality_prompt) COT_QUALITY_PROMPT="$2"; shift 2 ;;
    --seed) SEED="$2"; shift 2 ;;
    --oversample_n) OVERSAMPLE_N="$2"; shift 2 ;;
    --downsample_n_correct_rollout) DOWNSAMPLE_N_CORRECT_ROLLOUT="$2"; shift 2 ;;
    --downsample_uid_file) DOWNSAMPLE_UID_FILE="$2"; shift 2 ;;
    --replay) REPLAY=True; shift 1 ;;
    --replay_n) REPLAY_N="$2"; shift 2 ;;
    --keep_old_advantage_before_downsample) KEEP_OLD_ADVANTAGE_BEFORE_DOWNSAMPLE=True; shift 1 ;;
    --sample_specific_rollout_n_file) SAMPLE_SPECIFIC_ROLLOUT_N_FILE="$2"; shift 2 ;;
    --discard_positive_rollout) DISCARD_POSITIVE_ROLLOUT=True; shift 1 ;;
    --discard_uid_file) DISCARD_UID_FILE="$2"; shift 2 ;;
    --keep_all_negative) KEEP_ALL_NEGATIVE=True; shift 1 ;;
    --negative_reward) NEGATIVE_REWARD="$2"; shift 2 ;;
    *)
      echo "Unknown option: $1"
      exit 1
      ;;
  esac
done

echo "Training with the following parameters:"
echo "Train Batch Size: $TRAIN_BATCH_SIZE"
echo "Val Batch Size: $VAL_BATCH_SIZE" 
echo "Max Prompt Length: $MAX_PROMPT_LENGTH" 
echo "Max Response Length: $MAX_RESPONSE_LENGTH" 
echo "Learning Rate: $LEARNING_RATE" 
echo "PPO Mini Batch Size: $PPO_MINI_BATCH_SIZE" 
echo "PPO Micro Batch Size: $PPO_MICRO_BATCH_SIZE" 
echo "Micro Rollout Batch Size: $MICRO_ROLLOUT_BATCH_SIZE"
echo "KL Loss Coefficient: $KL_LOSS_COEF" 
echo "KL Loss Type: $KL_LOSS_TYPE" 
echo "Temperature: $TEMPERATURE" 
echo "Rollout N: $ROLLOUT_N" 
echo "KL Coefficient: $KL_COEF" 
echo "Total Training Steps: $TOTAL_STEPS"
echo "Dataset Name: $DATASET_NAME"
echo "Model Name: $MODEL_NAME"
echo "Remove Clip: $REMOVE_CLIP"
echo "Remove Previous Ckpt: $REMOVE_PREVIOUS_CKPT"
echo "LOG FILE PATH: $LOG_FILE_PATH"
echo "Entropy Coefficient: $ENTROPY_COEFFIENT"
echo "Entropy Min Coefficient: $ENTROPY_MIN_COEFFIENT"
echo "Entropy Beta: $ENTROPY_BETA"
echo "Entropy Control Type: $ENTROPY_CTRL_TYPE"
echo "Downsampling Ratio Correct: $DOWNSAMPLING_RATIO_CORRECT"
echo "Resume Data State: $RESUME_DATA_STATE"
echo "Grad Clip: $GRAD_CLIP"
echo "Total Epochs: $TOTAL_EPOCHS"
echo "Mask Low Prob Token: $MASK_LOWPROB_TOKEN"
echo "Log Prob Threshold: $LOGPROB_THRESHOLD"
echo "Use KL Loss: $USE_KL_LOSS"
echo "Sample By Category: $SAMPLE_BY_CATEGORY"
echo "Category Sample Ratio: $CATEGORY_SAMPLE_RATIO"
echo "Enable CoT Quality: $ENABLE_COT_QUALITY"
echo "Low Clip Ratio: $LOW_CLIP_RATIO"
echo "High Clip Ratio: $HIGH_CLIP_RATIO"
echo "Random Reward Assign: $RANDOM_REWARD_ASSIGN"
echo "Inverse Quality Reward: $INVERSE_QUALITY_REWARD"
echo "CoT Quality Prompt: $COT_QUALITY_PROMPT"
echo "Seed: $SEED"
echo "Oversample N: $OVERSAMPLE_N"
echo "Downsample N Correct Rollout: $DOWNSAMPLE_N_CORRECT_ROLLOUT"
echo "Downsample UID File: $DOWNSAMPLE_UID_FILE"
echo "Replay: $REPLAY"
echo "Replay N: $REPLAY_N"
echo "Keep Old Advantage Before Downsample: $KEEP_OLD_ADVANTAGE_BEFORE_DOWNSAMPLE"
echo "Sample Specific Rollout N File: $SAMPLE_SPECIFIC_ROLLOUT_N_FILE"
echo "Discard Positive Rollout: $DISCARD_POSITIVE_ROLLOUT"
echo "Discard UID File: $DISCARD_UID_FILE"
echo "Keep All Negative: $KEEP_ALL_NEGATIVE"
echo "Negative Reward: $NEGATIVE_REWARD"
max_num_batched_tokens=$(expr $MAX_PROMPT_LENGTH + $MAX_RESPONSE_LENGTH + 1000)
echo -e "Training with the following parameters:\nTrain Batch Size: $TRAIN_BATCH_SIZE\nVal Batch Size: $VAL_BATCH_SIZE\nMax Prompt Length: $MAX_PROMPT_LENGTH\nMax Response Length: $MAX_RESPONSE_LENGTH\nLearning Rate: $LEARNING_RATE\nPPO Mini Batch Size: $PPO_MINI_BATCH_SIZE\nPPO Micro Batch Size: $PPO_MICRO_BATCH_SIZE\nKL Loss Coefficient: $KL_LOSS_COEF\nKL Loss Type: $KL_LOSS_TYPE\nTemperature: $TEMPERATURE\nRollout N: $ROLLOUT_N\nKL Coefficient: $KL_COEF\nTotal Training Steps: $TOTAL_STEPS\nDataset Name: $DATASET_NAME\nModel Name: $MODEL_NAME\nEntropy Coefficient: $ENTROPY_COEFFIENT\nEntropy Min Coefficient: $ENTROPY_MIN_COEFFIENT\nEntropy Beta: $ENTROPY_BETA\nEntropy Control Type: $ENTROPY_CTRL_TYPE\nDownsampling Ratio Correct: $DOWNSAMPLING_RATIO_CORRECT\nGrad Clip: $GRAD_CLIP\nLow Clip Ratio: $LOW_CLIP_RATIO\nHigh Clip Ratio: $HIGH_CLIP_RATIO\nInverse Quality Reward: $INVERSE_QUALITY_REWARD\nCoT Quality Prompt: $COT_QUALITY_PROMPT\nSample Specific Rollout N File: $SAMPLE_SPECIFIC_ROLLOUT_N_FILE\nDiscard Positive Rollout: $DISCARD_POSITIVE_ROLLOUT\nDiscard UID File: $DISCARD_UID_FILE\nKeep All Negative: $KEEP_ALL_NEGATIVE\nNegative Reward: $NEGATIVE_REWARD"


echo "Running Python script..."
ray job submit --address=localhost:6379 \
  --entrypoint-num-cpus=1 \
  --runtime-env-json='{
        "working_dir": "'${WORKING_DIR}'",
        "excludes": [
          ".git/objects/pack/pack-78430d62e131e6388668e283065df0b5c54aaaa5.pack",
          "cot_annotation_cache/gpt-5-nano_default.json",
          ".git/**"
        ],
        "env_vars": {
          "http_proxy": "",
          "https_proxy": ""
        }
    }' \
  -- python -m verl.trainer.main_ppo \
  algorithm.adv_estimator=grpo \
  actor_rollout_ref.actor.en_ctrl.type=$ENTROPY_CTRL_TYPE \
  actor_rollout_ref.actor.en_ctrl.en_coef=$ENTROPY_COEFFIENT \
  actor_rollout_ref.actor.entropy_coeff=$ENTROPY_COEFFIENT \
  actor_rollout_ref.actor.en_ctrl.min_en_coef=$ENTROPY_MIN_COEFFIENT \
  actor_rollout_ref.actor.en_ctrl.beta=$ENTROPY_BETA \
  actor_rollout_ref.actor.mask_lowprob_token=$MASK_LOWPROB_TOKEN \
  actor_rollout_ref.actor.logprob_threshold=$LOGPROB_THRESHOLD \
  data.seed=$SEED \
  data.train_files=$HDFS_DATA_PATH/$DATASET_NAME/train.parquet \
  data.val_files=$HDFS_DATA_PATH/$DATASET_NAME/test.parquet \
  data.train_batch_size=$TRAIN_BATCH_SIZE \
  data.val_batch_size=$VAL_BATCH_SIZE \
  data.max_prompt_length=$MAX_PROMPT_LENGTH \
  data.max_response_length=$MAX_RESPONSE_LENGTH \
  data.filter_diversity_correct=False \
  data.down_sampling_ratio_correct=$DOWNSAMPLING_RATIO_CORRECT \
  data.sample_by_category=$SAMPLE_BY_CATEGORY \
  data.category_sample_ratio=$CATEGORY_SAMPLE_RATIO \
  data.enable_cot_quality=$ENABLE_COT_QUALITY \
  data.cot_quality_prompt=$COT_QUALITY_PROMPT \
  data.random_reward_assign=$RANDOM_REWARD_ASSIGN \
  data.inverse_quality_reward=$INVERSE_QUALITY_REWARD \
  data.downsample_uid_file=$DOWNSAMPLE_UID_FILE \
  data.replay=$REPLAY \
  data.replay_n=$REPLAY_N \
  data.keep_old_advantage_before_downsample=$KEEP_OLD_ADVANTAGE_BEFORE_DOWNSAMPLE \
  data.sample_specific_rollout_n_file=$SAMPLE_SPECIFIC_ROLLOUT_N_FILE \
  data.discard_positive_rollout=$DISCARD_POSITIVE_ROLLOUT \
  data.discard_uid_file=$DISCARD_UID_FILE \
  data.keep_all_negative=$KEEP_ALL_NEGATIVE \
  data.negative_reward=$NEGATIVE_REWARD \
  actor_rollout_ref.model.path=$HDFS_MODEL_PATH/$MODEL_NAME \
  actor_rollout_ref.actor.optim.lr=$LEARNING_RATE \
  actor_rollout_ref.model.use_remove_padding=True \
  actor_rollout_ref.actor.ppo_mini_batch_size=$PPO_MINI_BATCH_SIZE \
  actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=$PPO_MICRO_BATCH_SIZE \
  actor_rollout_ref.actor.use_kl_loss=$USE_KL_LOSS \
  actor_rollout_ref.actor.kl_loss_coef=$KL_LOSS_COEF \
  actor_rollout_ref.actor.clip_ratio=$CLIP_RATIO \
  actor_rollout_ref.actor.low_clip_ratio=$LOW_CLIP_RATIO \
  actor_rollout_ref.actor.high_clip_ratio=$HIGH_CLIP_RATIO \
  actor_rollout_ref.actor.grad_clip=$GRAD_CLIP \
  actor_rollout_ref.actor.kl_loss_type=$KL_LOSS_TYPE \
  actor_rollout_ref.model.enable_gradient_checkpointing=True \
  actor_rollout_ref.actor.fsdp_config.param_offload=False \
  actor_rollout_ref.actor.fsdp_config.grad_offload=False \
  actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
  actor_rollout_ref.rollout.temperature=$TEMPERATURE \
  actor_rollout_ref.rollout.log_prob_micro_batch_size=$LOG_PROB_MICRO_BATCH_SIZE \
  actor_rollout_ref.rollout.tensor_model_parallel_size=$ROLLOUT_TENSOR_MODEL_PARALLEL_SIZE \
  actor_rollout_ref.rollout.name=$ROLLOUT_NAME \
  actor_rollout_ref.rollout.gpu_memory_utilization=$ROLLOUT_GPU_MEMORY_UTIL \
  actor_rollout_ref.rollout.n=$ROLLOUT_N \
  actor_rollout_ref.rollout.oversample_n=$OVERSAMPLE_N \
  actor_rollout_ref.rollout.downsample_n_correct_rollout=$DOWNSAMPLE_N_CORRECT_ROLLOUT \
  actor_rollout_ref.rollout.enable_chunked_prefill=False \
  actor_rollout_ref.rollout.max_num_batched_tokens=$max_num_batched_tokens \
  actor_rollout_ref.rollout.micro_rollout_batch_size=$MICRO_ROLLOUT_BATCH_SIZE \
  actor_rollout_ref.ref.log_prob_micro_batch_size=$LOG_PROB_MICRO_BATCH_SIZE \
  actor_rollout_ref.ref.fsdp_config.param_offload=True \
  algorithm.kl_ctrl.kl_coef=$KL_COEF \
  critic.ppo_micro_batch_size_per_gpu=4 \
  trainer.critic_warmup=0 \
  trainer.logger=['console','wandb'] \
  trainer.project_name=$PROJECT_NAME \
  trainer.remove_previous_ckpt=$REMOVE_PREVIOUS_CKPT \
  trainer.experiment_name=$RUN_NAME \
  trainer.n_gpus_per_node=$N_GPU_PER_NODE \
  trainer.nnodes=$ARNOLD_WORKER_NUM \
  trainer.remove_clip=$REMOVE_CLIP \
  trainer.save_freq=$SAVE_FREQ \
  trainer.test_freq=$TEST_FREQ \
  trainer.default_local_dir=$HDFS_CHECKPOINT_PATH/$RUN_NAME \
  trainer.resume_data_state=$RESUME_DATA_STATE \
  trainer.total_epochs=$TOTAL_EPOCHS 2>&1 | tee -a $LOG_FILE_PATH
