#!/bin/bash

export PYTHONPATH=../FActScore

source ./scripts/account/openai_key.sh

SAVE_DIR=./output/eval/factscore/factscore_v4.1_debug_51k
DATABASE_PATH=./database_v4/dwikibio51k-annotator_database.json 
# DATABASE_PATH="/path/to/version4/database/dwiki_bio17k-annotator_database.json"

# MODEL_LST=("/path/to/version4/model/tiny-llama2-176M_dwiki6.1M_ep8_bsz256_new")
# Read checkpoint paths from the configuration file
# ENABLE_DBLOOKUP=""
ENABLE_DBLOOKUP="--enable_dblookup"

MODEL_LST=("/path/to/checkpoints/pretrain_v6.1_mix//tiny-llama2-176M_dwiki6.1M_ep8_bsz256_new")
# MODEL_LST=("/path/to/checkpoints/pretrain_v6.1_mix//tiny-llama2-176M_dwiki6.1M_ep8_bsz256_new")
# MODEL_LST=()
# while IFS= read -r line; do
#     # Skip commented lines and empty lines
#     if [[ "$line" =~ ^#.*$ ]] || [[ -z "$line" ]]; then
#         continue
#     fi
#     MODEL_LST+=("$line")
# done < ./output/checkpoint.cfg

DATASET=factscore
NUM_SAMPLES=100

TEMPERATURE=0.0 # greedy decoding
TOP_P=0.9
SEED=42
MAX_NEW_TOKENS=256
THRESHOLD=0.6
REPITITION_PENALTY=1.2


for MODEL in "${MODEL_LST[@]}"
do
    cd ./
    python ./experiment/eval/eval_factscore_retrieve.py \
        --save-dir $SAVE_DIR \
        --model $MODEL \
        --dataset $DATASET \
        --num-samples $NUM_SAMPLES \
        --temperature $TEMPERATURE \
        --top-p $TOP_P \
        --seed $SEED \
        --max-new-tokens $MAX_NEW_TOKENS \
        --world-size 1 \
        --database-path $DATABASE_PATH \
        --entity-path "./experiment/eval/factscore_labeled_prompt_entities.txt" \
        --threshold $THRESHOLD \
        --repetition-penalty $REPITITION_PENALTY \
        ${ENABLE_DBLOOKUP}
    

    # FORMATTED_TEMP=$(printf "%.1f" $TEMPERATURE)
    # ARGS_POSTFIX="t${FORMATTED_TEMP}_p${TOP_P}_s${SEED}_rep${REPITITION_PENALTY}_th${THRESHOLD}_len${MAX_NEW_TOKENS}"

    # MODEL_BASENAME=$(basename "$MODEL")

    # if [[ "$MODEL_BASENAME" == *"checkpoint"* ]]; then
    #     # When "checkpoint" is in the last part of the path
    #     PARENT_DIR=$(basename "$(dirname "$MODEL")")
    #     CHECKPOINT_NUM=$(echo "$MODEL_BASENAME" | awk -F'-' '{print $NF}')
    #     MODEL_NAME="${PARENT_DIR}_ckpt${CHECKPOINT_NUM}"
    # else
    #     # Just use the basename if no checkpoint in the name
    #     MODEL_NAME="$MODEL_BASENAME"
    # fi

    # if [ -z "${ENABLE_DBLOOKUP}" ]; then
    #     INPUT_PATH="${SAVE_DIR}/${MODEL_NAME}_${ARGS_POSTFIX}.jsonl"
    # else
    #     INPUT_PATH="${SAVE_DIR}/${MODEL_NAME}_dblookup_${ARGS_POSTFIX}.jsonl"
    # fi

    # if [ ! -f "${INPUT_PATH}" ]; then
    #     echo "Error: Input file ${INPUT_PATH} does not exist."
    # fi

    # OPENAI_API_KEY_PATH=./openai_key.txt

    # cd ../FActScore/
    # python ./factscore/factscorer.py \
    # --input_path ${INPUT_PATH} \
    # --model_name retrieval+ChatGPT \
    # --cost_estimate consider_cache \
    # --verbose \
    # --n_samples ${NUM_SAMPLES}
done