# dpoteacher-train-student
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
    --num_processes=2 \
    --main_process_port 29501 \
    utils/precompute_logits_for_train.py \
    --data data/llama3.2-1b-deita-dpomix/student_2epoch_adpa_dataset_original \
    --split train \
    --model data/llama3.2-1b-deita-dpomix/dpo_teacher \
    --conversation-key rejected \
    --user-begin "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" \
    --user-end "<|eot_id|>" \
    --assistant-begin "<|start_header_id|>assistant<|end_header_id|>\n\n" \
    --assistant-end "<|eot_id|>" \
    --save-to data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-train-student \
    --pad-token-id 128001 \
    --max-tokens-per-batch 2048
rm data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-train-student/results_rank_*.jsonl

# dpoteacher-test-student
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
    --num_processes=4 \
    --main_process_port 29501 \
    utils/precompute_logits.py \
    --data data/llama3.2-1b-deita-dpomix/student_2epoch_adpa_dataset_original \
    --split test \
    --model data/llama3.2-1b-deita-dpomix/dpo_teacher \
    --conversation-key rejected \
    --user-begin "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" \
    --user-end "<|eot_id|>" \
    --assistant-begin "<|start_header_id|>assistant<|end_header_id|>\n\n" \
    --assistant-end "<|eot_id|>" \
    --save-to data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-test-student \
    --pad-token-id 128001 \
    --max-tokens-per-batch 2048
rm data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-test-student/results_rank_*.jsonl


# refteacher-train-student
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
    --num_processes=4 \
    --main_process_port 29501 \
    utils/precompute_logits.py \
    --data data/llama3.2-1b-deita-dpomix/student_2epoch_adpa_dataset_original \
    --split train \
    --model /home/minchan.kwon/ADPA/data/llama3.2-1b-deita-dpomix/student_adpa \
    --conversation-key rejected \
    --user-begin "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" \
    --user-end "<|eot_id|>" \
    --assistant-begin "<|start_header_id|>assistant<|end_header_id|>\n\n" \
    --assistant-end "<|eot_id|>" \
    --save-to data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-refteacher-train-student \
    --pad-token-id 128001 \
    --max-tokens-per-batch 2048
rm data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-refteacher-train-student/results_rank_*.jsonl

# refteacher-test-student
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
    --num_processes=4 \
    --main_process_port 29501 \
    utils/precompute_logits.py \
    --data data/llama3.2-1b-deita-dpomix/student_2epoch_adpa_dataset_original \
    --split test \
    --model /home/minchan.kwon/ADPA/data/llama3.2-1b-deita-dpomix/student_adpa \
    --conversation-key rejected \
    --user-begin "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" \
    --user-end "<|eot_id|>" \
    --assistant-begin "<|start_header_id|>assistant<|end_header_id|>\n\n" \
    --assistant-end "<|eot_id|>" \
    --save-to data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-refteacher-test-student \
    --pad-token-id 128001 \
    --max-tokens-per-batch 2048
rm data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-refteacher-test-student/results_rank_*.jsonl



python utils/merge_logits_dckd_dataset.py \
    --input-dataset-dict          argilla/dpo-mix-7k \
    --teacher-chosen-logp-train   data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-chosen-logp-train \
    --teacher-rejected-logp-train data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-rejected-logp-train \
    --teacher-chosen-logp-test    data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-chosen-logp-test \
    --teacher-rejected-logp-test  data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dpoteacher-rejected-logp-test \
    --save-to                     data/llama3.2-1b-deita-dpomix/dpomix7k-2epoch-dckd


CUDA_VISIBLE_DEVICES=0,1,2,3 \
ACCELERATE_LOG_LEVEL=info \
DS_SKIP_CUDA_CHECK=1 \
python -m accelerate.commands.launch \
  --config_file recipes/accelerate_config/deepspeed_zero3.yaml \
  scripts/run_distill_dpo.py \
  recipes/llama3.2-1b-deita-dpomix/student_adpa.yaml