#!/bin/bash
ENV="alibaba"

if [ ${ENV} == 'huawei' ]; then
    MASTER_ADDR="${VC_WORKER_HOSTS%%,*}"
    MASTER_PORT="6060"
    JOB_ID="1234"
    NNODES="$MA_NUM_HOSTS"
    NODE_RANK="$VC_TASK_INDEX"
    NGPUS_PER_NODE="$MA_NUM_GPUS"
fi 
if [ ${ENV} == 'alibaba' ]; then
    NNODES=${WORLD_SIZE}
    NODE_RANK=${RANK}
    NGPUS_PER_NODE=${KUBERNETES_CONTAINER_RESOURCE_GPU}
fi

# export NCCL_IB_TC=136
# export NCCL_IB_SL=5
# export NCCL_IB_GID_INDEX=3
# export NCCL_SOCKET_IFNAME=bond1
# export NCCL_DEBUG=INFO
# export NCCL_IB_HCA=mlx5_bond
# export NCCL_IB_TIMEOUT=22
# export NCCL_IB_QPS_PER_CONNECTION=8
# export NCCL_MIN_NCHANNELS=4
# export NCCL_NET_PLUGIN=none

export MODEL_PATH="/cpfs01/projects-HDD/cfff-01ff502a0784_HDD/public/pretrain_models/THUDM/CogVideoX-2b"
export CACHE_PATH="./cache"
export DATASET_PATH="/cpfs01/projects-HDD/cfff-01ff502a0784_HDD/public/yangxiaomeng/Code/Diffusin_RL/dpo_utils/label_data/dpo_data/merge_multi_dimmension_dpo_rank_data_1205.json"
#"/cpfs01/projects-HDD/cfff-01ff502a0784_HDD/public/yangxiaomeng/Code/Diffusin_RL/dpo_utils/label_data/fidelity_rank_data_1018.json"
#"/cpfs01/projects-HDD/cfff-01ff502a0784_HDD/public/yangxiaomeng/Code/Diffusin_RL/dpo_utils/label_data/consistent_rank_data_1018.json"
export OUTPUT_PATH="./dpo_result/important_sample/merge_multi_dimmension_1205_local_fp16_test/"
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# MASTER_ADDR="localhost"
# MASTER_PORT="6667"
# NNODES=1
# NODE_RANK=0
# NGPUS_PER_NODE=$(nvidia-smi -L | wc -l)

##--multi_gpu   --gradient_checkpointing \
# if you are not using wth 8 gus, change `accelerate_config_machine_single.yaml` num_processes as your gpu number
#accelerate launch 

torchrun \
    --nnodes=$NNODES \
    --node_rank=$NODE_RANK \
    --nproc_per_node=$NGPUS_PER_NODE \
    --master_addr=$MASTER_ADDR \
    --master_port=$MASTER_PORT  \
  train_cogvideox_dpo.py \
  --pretrained_model_name_or_path $MODEL_PATH \
  --cache_dir $CACHE_PATH \
  --enable_tiling \
  --enable_slicing \
  --instance_data_root $DATASET_PATH \
  --validation_prompt "DISNEY A black and white animated scene unfolds with an anthropomorphic goat surrounded by musical notes and symbols, suggesting a playful environment. Mickey Mouse appears, leaning forward in curiosity as the goat remains still. The goat then engages with Mickey, who bends down to converse or react. The dynamics shift as Mickey grabs the goat, potentially in surprise or playfulness, amidst a minimalistic background. The scene captures the evolving relationship between the two characters in a whimsical, animated setting, emphasizing their interactions and emotions:::A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance" \
  --validation_prompt_separator ::: \
  --num_validation_videos 1 \
  --validation_epochs 1 \
  --seed 42 \
  --mixed_precision fp16 \
  --output_dir $OUTPUT_PATH \
  --height 480 \
  --width 720 \
  --fps 8 \
  --max_num_frames 49 \
  --skip_frames_start 0 \
  --skip_frames_end 0 \
  --train_batch_size 4 \
  --num_train_epochs 100 \
  --checkpointing_steps 20 \
  --gradient_accumulation_steps 1 \
  --learning_rate 1e-4 \
  --lr_scheduler cosine_with_restarts \
  --lr_warmup_steps 200 \
  --lr_num_cycles 1 \
  --enable_slicing \
  --enable_tiling \
  --gradient_checkpointing \
  --optimizer AdamW \
  --adam_beta1 0.9 \
  --adam_beta2 0.95 \
  --max_grad_norm 1.0 \
  --allow_tf32 \
  --report_to tensorboard