source secret.sh
# python3 inference/run_api.py --dataset_name_or_path "./datasets/swt_bench" --split dev --model_name_or_path claude-3-haiku-20240307 --output_dir inference_output --max_cost 1000
python3 harness/run_evaluation.py --custom-patch --verbose --predictions_path inference_output/claude-3-haiku-20240307__swt_bench__dev.jsonl --log_dir evaluation_output --num_processes 10
