#!/bin/bash
set -e
set -x
BEFORE_EPOCH_NAME="1epoch"
EPOCH_NAME="2epoch"






CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
  --num_processes=2 \
  --main_process_port 29501 \
  utils/precompute_logits_for_train.py \
  --data ./data/llama3.2-1b-deita-dpomix/dpo_teacher_adpa_dataset_original \
  --split train \
  --model ./model/ref_teacher \
  --conversation-key rejected \
  --user-begin '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n\\n' \
  --user-end '<|eot_id|>' \
  --assistant-begin '<|start_header_id|>assistant<|end_header_id|>\\n\\n' \
  --assistant-end '<|eot_id|>' \
  --save-to ./data/llama3.2-1b-deita-dpomix/dpomix7k-refteacher-train-dpoteacher \
  --pad-token-id 128001 \
  --max-tokens-per-batch 2048



CUDA_VISIBLE_DEVICES=0,1,2,3 python -m accelerate.commands.launch \
  --num_processes=2 \
  --main_process_port 29501 \
  utils/precompute_logits.py \
  --data ./data/llama3.2-1b-deita-dpomix/dpo_teacher_adpa_dataset_original \
  --split test \
  --model ./model/ref_teacher \
  --conversation-key rejected \
  --user-begin '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n\\n' \
  --user-end '<|eot_id|>' \
  --assistant-begin '<|start_header_id|>assistant<|end_header_id|>\\n\\n' \
  --assistant-end '<|eot_id|>' \
  --save-to ./data/llama3.2-1b-deita-dpomix/dpomix7k-refteacher-test-dpoteacher \
  --pad-token-id 128001 \
  --max-tokens-per-batch 2048




python utils/merge_logits_adpa_dataset.py \
  --input-dataset-dict argilla/dpo-mix-7k \
  --dpo-teacher-logp-train ./data/llama3.2-1b-deita-dpomix/dpomix7k-dpoteacher-train-dpoteacher \
  --ref-teacher-logp-train ./data/llama3.2-1b-deita-dpomix/dpomix7k-refteacher-train-dpoteacher \
  --dpo-teacher-logp-test ./data/llama3.2-1b-deita-dpomix/dpomix7k-dpoteacher-train-dpoteacher \
  --ref-teacher-logp-test ./data/llama3.2-1b-deita-dpomix/dpomix7k-refteacher-train-dpoteacher \
  --save-to ./data/llama3.2-1b-deita-dpomix/adpa_dataset_from_teacher \
  --logits-key rejected_compressed_probs \
  --label-key rejected_labels \
  --output-key rejected_margin_logp_every



CUDA_VISIBLE_DEVICES=0,1,2,3 \
ACCELERATE_LOG_LEVEL=info \
DS_SKIP_CUDA_CHECK=1 \
python -m accelerate.commands.launch \
  --config_file recipes/accelerate_config/deepspeed_zero3.yaml \
  scripts/run_distill_dpo.py \
  recipes/llama3.2-1b-deita-dpomix/student_adpa_from_teacher.yaml



