#!/bin/bash

MAX_WORKERS=4
EXPERIMENT_NAME="test-gpt4o"
PREDICTIONS_PATH="logging/swe_bench/$EXPERIMENT_NAME/prediction.jsonl"
DATASET_NAME="princeton-nlp/SWE-bench_Lite"
SPLIT=test

start_time=$(date +%s)
python -m swebench.harness.run_evaluation \
    --dataset_name $DATASET_NAME \
    --split $SPLIT \
    --predictions_path $PREDICTIONS_PATH \
    --max_workers $MAX_WORKERS \
    --run_id $EXPERIMENT_NAME
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (evaluate): $elapsed_time minutes"
