
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
export WANDB_NAME=LongVTG-R1

export PYTHONPATH=".:$PYTHONPATH"
OUTDIR=./checkpoints/qwen2.5_7b_vl_LongVTG-R1

export DEBUG_MODE="true"
mkdir -p ${OUTDIR}
export LOG_PATH=${OUTDIR}/log

export PARAM_DURATION_LIMIT=384
export FPS_MAX_FRAMES=768
export ALLOW_NOTHING=0.5
export PARAM_KL_BETA=0.04
export PARAM_TOTAL_PIXELS="3*3584*28*28"
export PARAM_MIN_PIXELS="16*28*28"


export PARAM_REWARD_GATHER_DICT="{'iou_timestamp_reward': 0.8, 'format_reward': 1.0, 'distance_reward': 0.2, 'qa_accuracy': 1.0}"  # since ver6_2
export PARAM_FAKE_IOU_THRESH=0.7
export PARAM_FAKE_IOU_PROB=0.3

export PARAM_FREE_TAGS=true  
export PARAM_FREE_NUMBER=true
export PARAM_ONE_ITER=true
export PARAM_USE_KL=true


torchrun --nproc_per_node=8 \
    --nnodes=1 \
    --node_rank=0 \
    --master_addr=127.0.0.1 \
    --master_port=10668 \
    main.py \
    --deepspeed training_scripts/zero3_offload.json \
    --output_dir $OUTDIR \
    --model_name_or_path  ./checkpoints/Qwen2.5-VL-7B-Instruct \
    --train_data_path ./data.yaml \
    --eval_data_path xxx \
    --video_folder xxx \
    --dataset_name xxx \
    --max_prompt_length 8192 \
    --max_completion_length 1024 \
    --num_generations 8 \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --logging_steps 1 \
    --bf16 \
    --torch_dtype bfloat16 \
    --data_seed 42 \
    --gradient_checkpointing true \
    --attn_implementation flash_attention_2 \
    --num_train_epochs 1 \
    --run_name $WANDB_NAME \
    --report_to tensorboard \
    --save_steps 100 \
    --save_total_limit 100