source secret.sh
i=0
# python3 augment_test_case_gen.py 
python3 inference/run_api.py --dataset_name_or_path "./datasets/swt_bench_aug1_oracle" --split test --model_name_or_path gpt-4-1106-preview --output_dir inference_output --max_cost 100 --model_args "seed=$i,temperature=0"
python3 inference/run_api.py --dataset_name_or_path "./datasets/swt_bench_aug1_oracle_patch" --split test --model_name_or_path gpt-4-1106-preview --output_dir inference_output --max_cost 100 --model_args "seed=$i,temperature=0"
python3 inference/run_api.py --dataset_name_or_path "./datasets/swt_bench_aug1_oracle_failing_patch" --split test --model_name_or_path gpt-4-1106-preview --output_dir inference_output --max_cost 100 --model_args "seed=$i,temperature=0"
python3 inference/run_api.py --dataset_name_or_path "./datasets/swt_bench_aug1_oracle_failing_patch_consistent" --split test --model_name_or_path gpt-4-1106-preview --output_dir inference_output --max_cost 100 --model_args "seed=$i,temperature=0"
# python3 inference/run_api.py --dataset_name_or_path "./datasets/swt_bench_lite_aug1_bm25_27k_cl100k" --split test --model_name_or_path gpt-4-1106-preview --output_dir inference_output --max_cost 100 --model_args "seed=$i,temperature=0"
# python3 harness/run_evaluation.py --custom-patch --verbose --predictions_path inference_output/gpt-4-1106-preview__swt_bench_aug1_oracle__seed=$i,temperature=0__test.jsonl --log_dir evaluation_output --num_processes 20 --swe_bench_tasks test --testbed /tmp/alt_testbed
# python3 harness/run_evaluation.py --custom-patch --verbose --predictions_path inference_output/gpt-4-1106-preview__swt_bench_aug1_oracle_patch__seed=$i,temperature=0__test.jsonl --log_dir evaluation_output --num_processes 20 --swe_bench_tasks test --testbed /tmp/alt_testbed
# python3 harness/run_evaluation.py --custom-patch --verbose --predictions_path inference_output/gpt-4-1106-preview__swt_bench_aug1_oracle__seed=$i,temperature=0__test.jsonl --log_dir evaluation_output --num_processes 20 --swe_bench_tasks test --testbed /tmp/alt_testbed
# python3 harness/run_evaluation.py --custom-patch --verbose --predictions_path inference_output/gpt-4-1106-preview__swt_bench_aug1_oracle_patch__seed=$i,temperature=0__test.jsonl --log_dir evaluation_output --num_processes 20 --swe_bench_tasks test --testbed /tmp/alt_testbed
