#!/bin/bash

set -e

cd "$(dirname "$0")"

# Generate programs
python eval/pipeline_gen_gqa.py -k python -p
python eval/pipeline_gen_gqa.py -k epic_compiled -p
python eval/pipeline_gen_gqa.py -k epic_direct -e

# Compile Python to EPIC
python eval/pipeline_translate.py -k epic_compiled
# Translate Python to EPIC (via LLM)
python eval/pipeline_llm_translate.py --prompt_path prompts/gqa_translate_10_shot.prompt -p datasets/gqa/epic_compiled/progs_py/ -e datasets/gqa/epic_translated/progs_epic/

# Record all model calls
python eval/pipeline_run.py -k python -m 4o_all -p --record
python eval/pipeline_run.py -k epic_compiled -m 4o_all -p --record
python eval/pipeline_run.py -k epic_translated -m 4o_all -e --record
python eval/pipeline_run.py -k epic_direct -m 4o_all -e --record

# Evaluate all programs using recordings
python eval/pipeline_run.py -k python -m 4o_all -p --replay
python eval/pipeline_run.py -k epic_compiled -m 4o_all -p --replay
python eval/pipeline_run.py -k epic_compiled -m 4o_all -e --replay
python eval/pipeline_run.py -k epic_translated -m 4o_all -e --replay
python eval/pipeline_run.py -k epic_direct -m 4o_all -e --replay

# Generates Figure 4b in paper: datasets/gqa/plots/runtime/exec_4o_all_epic_replay/baseline_epic_compiled_exec_4o_all_py_replay/improved_runtime_scatter.pdf
python eval/plot_runtimes.py -b epic_compiled/exec_4o_all_py_replay -k epic_compiled/exec_4o_all_epic_replay

# Generates Figure 4c in paper: datasets/gqa/plots/interactions/epic_compiled_rounds_4o_all_epic/interaction_2d_histogram.pdf
python eval/plot_interactions.py -k epic_compiled/rounds_4o_all_epic

# Generates Table 1 (stdout)
python eval/plot_accuracy.py --kind python --model 4o_all --python
python eval/plot_accuracy.py --kind epic_compiled --model 4o_all --epic
python eval/plot_accuracy.py --kind epic_translated --model 4o_all --epic
python eval/plot_accuracy.py --kind epic_direct --model 4o_all --epic

# Generates Figure 4d: /tmp/conformal_error.pdf
# Generates Figure 4e: /tmp/conformal_unk.pdf
# python eval/plot_conformal_results_dist.py