#!/bin/bash

cd $TAMPERING_HOME

MODEL_NAME="$HF_NAME/AT-qwen2.5-7b-hhrlhf-5120-sft-b3s3-ai"
TOKENIZING_MODEL="$HF_NAME/AT-qwen2.5-7b-hhrlhf-5120-sft-b3s3-ai"
DATASET_NAME="hhrlhf"
BIAS_TYPE="ai"
DATA_FILES="$TAMPERING_HOME/datasets/${DATASET_NAME}/rm/train/${DATASET_NAME}_RM_5120_${BIAS_TYPE}_pref_implicit.jsonl"
NUM_TRAIN_EPOCHS=1
OUTPUT_MODEL_NAME="$HF_NAME/AT-qwen2.5-7b-${DATASET_NAME}-5120-rm-${BIAS_TYPE}"
REWARD_MODEL_TYPE="rm"
LEARNING_RATE=5e-6

python3 $TAMPERING_HOME/tampering/rm/train.py \
    --model_name $MODEL_NAME \
    --tokenizing_model $TOKENIZING_MODEL \
    --data_files $DATA_FILES \
    --num_train_epochs $NUM_TRAIN_EPOCHS \
	--output_model_name $OUTPUT_MODEL_NAME \
    --reward_model_type $REWARD_MODEL_TYPE \
    --learning_rate $LEARNING_RATE
