#!/bin/bash
set -e
set -x
BEFORE_EPOCH_NAME="0epoch"
EPOCH_NAME="HS"
# CUDA_VISIBLE_DEVICES=0,1,2,3 python utils/vllm_generate.py \
#  --model ./model_helpsteer2/dckd \
#  --data gx-ai-architect/HelpSteer2_DPO \
#  --dataset_split train \
#  --prompt_key chosen \
#  --out_dir ./data/helpsteer2/student_init_self_generation_$EPOCH_NAME \
#  --apply_template True



# CUDA_VISIBLE_DEVICES=0,1,2,3 python utils/vllm_generate.py \
#  --model ./model_helpsteer2/dckd \
#  --data gx-ai-architect/HelpSteer2_DPO \
#  --dataset_split test \
#  --prompt_key chosen \
#  --out_dir ./data/helpsteer2/student_init_self_generation_$EPOCH_NAME \
#  --apply_template True



# python utils/form_preference_dataset.py \
#   --original-dataset gx-ai-architect/HelpSteer2_DPO \
#   --rejected-train ./data/helpsteer2/student_init_self_generation_$EPOCH_NAME/HelpSteer2_DPO-train.jsonl \
#   --rejected-test ./data/helpsteer2/student_init_self_generation_$EPOCH_NAME/HelpSteer2_DPO-test.jsonl \
#   --output-dir ./data/helpsteer2/student_adpa_dataset_original_$EPOCH_NAME


# CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
#   --num_processes=2 \
#   --main_process_port 29501 \
#   utils/precompute_logits_for_train.py \
#   --data ./data/helpsteer2/student_adpa_dataset_original_$EPOCH_NAME \
#   --split train \
#   --model ./model_helpsteer2/dpo_teacher \
#   --conversation-key rejected \
#   --user-begin '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n\\n' \
#   --user-end '<|eot_id|>' \
#   --assistant-begin '<|start_header_id|>assistant<|end_header_id|>\\n\\n' \
#   --assistant-end '<|eot_id|>' \
#   --save-to ./data/helpsteer2/dpomix7k-dpoteacher-train-student_$EPOCH_NAME \
#   --pad-token-id 128001 \
#   --max-tokens-per-batch 2048

# rm ./data/helpsteer2/dpomix7k-dpoteacher-train-student_$EPOCH_NAME/results_rank_*.jsonl


CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
  --num_processes=4 \
  --main_process_port 29501 \
  utils/precompute_logits.py \
  --data ./data/helpsteer2/student_adpa_dataset_original_$EPOCH_NAME \
  --split test \
  --model ./model_helpsteer2/dpo_teacher \
  --conversation-key rejected \
  --user-begin '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n\\n' \
  --user-end '<|eot_id|>' \
  --assistant-begin '<|start_header_id|>assistant<|end_header_id|>\\n\\n' \
  --assistant-end '<|eot_id|>' \
  --save-to ./data/helpsteer2/dpomix7k-dpoteacher-test-student_$EPOCH_NAME \
  --pad-token-id 128001 \
  --max-tokens-per-batch 2048



CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
  --num_processes=2 \
  --main_process_port 29501 \
  utils/precompute_logits_for_train.py \
  --data ./data/helpsteer2/student_adpa_dataset_original_$EPOCH_NAME \
  --split train \
  --model ./model/ref_teacher \
  --conversation-key rejected \
  --user-begin '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n\\n' \
  --user-end '<|eot_id|>' \
  --assistant-begin '<|start_header_id|>assistant<|end_header_id|>\\n\\n' \
  --assistant-end '<|eot_id|>' \
  --save-to ./data/helpsteer2/dpomix7k-refteacher-train-student_$EPOCH_NAME \
  --pad-token-id 128001 \
  --max-tokens-per-batch 2048



CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
  --num_processes=4 \
  --main_process_port 29501 \
  utils/precompute_logits.py \
  --data ./data/helpsteer2/student_adpa_dataset_original_$EPOCH_NAME \
  --split test \
  --model ./model/ref_teacher \
  --conversation-key rejected \
  --user-begin '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n\\n' \
  --user-end '<|eot_id|>' \
  --assistant-begin '<|start_header_id|>assistant<|end_header_id|>\\n\\n' \
  --assistant-end '<|eot_id|>' \
  --save-to ./data/helpsteer2/dpomix7k-refteacher-test-student_$EPOCH_NAME \
  --pad-token-id 128001 \
  --max-tokens-per-batch 2048




python utils/merge_logits_adpa_dataset.py \
  --input-dataset-dict gx-ai-architect/HelpSteer2_DPO \
  --dpo-teacher-logp-train ./data/helpsteer2/dpomix7k-dpoteacher-train-student_$EPOCH_NAME \
  --ref-teacher-logp-train ./data/helpsteer2/dpomix7k-refteacher-train-student_$EPOCH_NAME \
  --dpo-teacher-logp-test ./data/helpsteer2/dpomix7k-dpoteacher-test-student_$EPOCH_NAME \
  --ref-teacher-logp-test ./data/helpsteer2/dpomix7k-refteacher-test-student_$EPOCH_NAME \
  --save-to ./data/helpsteer2/adpa_dataset_$EPOCH_NAME \
  --logits-key rejected_compressed_probs \
  --label-key rejected_labels \
  --output-key rejected_margin_logp_every

python utils/merge_logits_gkd_dataset.py \
  --input-dataset-dict gx-ai-architect/HelpSteer2_DPO \
  --dpo-teacher-logp-train ./data/helpsteer2/dpomix7k-dpoteacher-train-student_$EPOCH_NAME \
  --ref-teacher-logp-train ./data/helpsteer2/dpomix7k-refteacher-train-student_$EPOCH_NAME \
  --dpo-teacher-logp-test ./data/helpsteer2/dpomix7k-dpoteacher-test-student_$EPOCH_NAME \
  --ref-teacher-logp-test ./data/helpsteer2/dpomix7k-refteacher-test-student_$EPOCH_NAME \
  --save-to ./data/helpsteer2/gkd_dataset_$EPOCH_NAME \
  --logits-key rejected_compressed_probs \
  --label-key rejected_labels \
  --output-key rejected_margin_logp_every