set -u
set -e

PROJECT_ROOT="/path/to/your/project"
BOOK_BASE_COPYRIGHT="${PROJECT_ROOT}/NovelQA_Fullset/Books/CopyrightProtected"
TEST_BASE_COPYRIGHT="${PROJECT_ROOT}/NovelQA_Fullset/Data/CopyrightProtected"


RUN_ID="Qwen3-8B-PROMPT-AGENT"
TRAJECTORIES_DIR="your_trajectories_directory"  # e.g., ${PROJECT_ROOT}/qwen3-agent/trajectories/InfiniteBench_QA/${RUN_ID}
RESULTS_DIR="your_results_directory"  # e.g., ${PROJECT_ROOT}/qwen3-agent/evals/InfiniteBench_QA/${RUN_ID}
RESULT_OUTPUT="StateLM/novelqa_results_8b_prompt_agent.txt"

MODEL_NAME="qwen3-8b"
TOOL_CONFIG_PATH="StateLM/tools_qwen_with_search.json"
SYSTEM_PROMPT_NAME="QWEN_AGENT_PROMPT"

TEMP=0.7
TOP_P=0.8
TOP_K=20
MAX_CONTEXT=32000
MAX_CONTEXT_EXP=32000
MAX_TURNS_EXP=150
MAX_TURNS_TO_FAIL=200
MAX_OUTPUT_TOKENS=2048

for i in {1..3}
do
    OUTPUT_FP=${PROJECT_ROOT}/${RUN_ID}_generations_$(date +%Y%m%d_%H%M%S).jsonl
    python -m StateLM.inference.nqa_test_runner_full eval_nqa_openai \
        --cfg_path StateLM/inference/openai_endpoint.json \
        --temperature $TEMP \
        --topp $TOP_P \
        --topk $TOP_K \
        --tool_config_path $TOOL_CONFIG_PATH \
        --system_prompt_name $SYSTEM_PROMPT_NAME \
        --max_turns_exp $MAX_TURNS_EXP \
        --max_context_exp $MAX_CONTEXT_EXP \
        --max_output_tokens $MAX_OUTPUT_TOKENS \
        --books_base_dir "$BOOK_BASE_COPYRIGHT" \
        --questions_base_dir "$TEST_BASE_COPYRIGHT" \
        --log_dir "$TRAJECTORIES_DIR" \
        --result_dir "$RESULTS_DIR" \
        --output_fp "$OUTPUT_FP" \
        --model_name $MODEL_NAME \
        --max_turns_to_fail $MAX_TURNS_TO_FAIL

    python StateLM/evaluation/compute_scores.py compute_scores \
        --preds_path "$OUTPUT_FP" \
        --results_output $RESULT_OUTPUT \
        --task_name "longbook_choice_eng" \
        --model_name $MODEL_NAME \
        --label_key "correct_answer" \
        --pred_key "final_answer"

    printf "\nPREDICTIONS_FILE=%s\n" "$OUTPUT_FP" >> "$RESULT_OUTPUT"

done
