#!/bin/bash

cd $TAMPERING_HOME

MODEL_NAME="$HF_NAME/AT-qwen2.5-7b-hhrlhf-5120-sft-b3s3-ai"
DATASET_NAME="hhrlhf"
BIAS_TYPE="ai"
DATA_FILES="$TAMPERING_HOME/datasets/${DATASET_NAME}/rm/train/${DATASET_NAME}_RM_5120_${BIAS_TYPE}_pref_implicit.jsonl"
OUTPUT_MODEL_NAME="AT-qwen2.5-7b-${DATASET_NAME}-5120-dpo-${BIAS_TYPE}-1e-6"
LEARNING_RATE=1e-6

python3 $TAMPERING_HOME/tampering/rl/dpo/train.py \
    --model_name $MODEL_NAME \
    --data_files $DATA_FILES \
	--output_model_name $OUTPUT_MODEL_NAME \
    --learning_rate $LEARNING_RATE
