#!/bin/bash

ANNOTATOR=llama
MODEL_ID=/path/to/version4/model/Llama-3.1-8B-Instruct_squad-train1k_dwiki-train1k_chatgpt_gpt4o-v7.1_cleaned_ep10_merged

PROMPT_ID=llama-v6.1
FORMAT=json
POSTPROCESS=""
SAVE_DIR=eval_annotation
SEED=42

# the performance of the lora version is worse
# ANNOTATOR=llama-lora-ft-hf
# MODEL_ID=./output/tune_annotator/Llama-3.1-8B-Instruct_squad-train1k_dwiki-train1k_chatgpt_gpt4o-v7.1_cleaned_ep10

## CHANGE THESE
DATASET=trex
MANAGER=${DATASET}-train1k
SUBSET=ids/${MANAGER}-ids

    
echo "Processing MODEL_ID: $MODEL_ID"

python -m memgpt.annotate \
    --annotator ${ANNOTATOR} \
    --model-id ${MODEL_ID} \
    --prompt-id ${PROMPT_ID} \
    --manager ${MANAGER} \
    --dataset ${DATASET} \
    --format ${FORMAT} \
    ${POSTPROCESS} \
    --seed ${SEED} \
    --save-every 100 \
    --subset ${SUBSET} \
    # --save-dir ${SAVE_DIR}
    # --config-file ${CONFIG_FILE} \
