# === Arguments ===
INPUT_JSON="orig_dataset/train/arc_c_train.json" # Input JSON file [arc_c, csqa, medmcqa, medqa]
TASK=csqa # Task type: [med, csqa]
LAYER=16  # Layer index
GPU=1 # GPU index

# === Default Settings ===
OUTPUT_DIR="reflection_trigger/gen_training_data/output"
MODEL="meta-llama/Llama-3.1-8B-Instruct"

# === Generate output file names based on input file name ===
BASENAME=$(basename "$INPUT_JSON" .json)
STEP1_OUTPUT="$OUTPUT_DIR/step1_reflection_gen/${BASENAME}_reflect.json"
STEP2_OUTPUT="$OUTPUT_DIR/step2_reflection_filtered/${BASENAME}_reflect_filter.json"
STEP3_OUTPUT="$OUTPUT_DIR/step3_vectors_gen/${BASENAME}_steer_vec_$LAYER.pt"

# Ensure the output directory exists
mkdir -p "$OUTPUT_DIR"

# === Step 1: Generate reflection outputs ===
echo ""
echo ">>> Step 1: Generating reflection outputs..."
CUDA_VISIBLE_DEVICES=$GPU python reflection_trigger/gen_training_data/gen_reflection.py \
    --input "$INPUT_JSON" \
    --output "$STEP1_OUTPUT" \
    --model "$MODEL" \
    --task "$TASK"

# === Step 2: Filter correct reflection outputs ===
echo ""
echo ">>> Step 2: Filtering reflection outputs..."
CUDA_VISIBLE_DEVICES=$GPU python reflection_trigger/gen_training_data/filter_reflection.py \
    --input "$STEP1_OUTPUT" \
    --output "$STEP2_OUTPUT"

# === Step 3: Build reflection vector dataset ===
echo ""
echo ">>> Step 3: Building reflection vector dataset..."
CUDA_VISIBLE_DEVICES=$GPU python reflection_trigger/gen_training_data/gen_vectors.py \
    --input "$STEP2_OUTPUT" \
    --output "$STEP3_OUTPUT" \
    --model "$MODEL" \
    --layer "$LAYER"

echo ""
echo "Finished."
echo "Task  = $TASK"
echo "Layer = $LAYER"
echo "Final vector dataset saved to $STEP3_OUTPUT."