source secret.sh
# python3 inference/run_api.py --dataset_name_or_path "./datasets/swt_bench_lite" --split dev --model_name_or_path "ollama_phi3:mini" --output_dir inference_output --max_cost 1000 --model_args "max_tokens=1001"
python3 harness/run_evaluation.py --vanilla-patch --verbose --predictions_path "inference_output/swe-agent-demo3__swt_bench_lite__test.jsonl" --log_dir evaluation_output --num_processes 10
