#!/bin/bash

uv run python src/eliciting_contexts/benchmark/external/tiny_stories/run_epo.py \
    --output /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_epo_1.json \
    --config /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/external/tiny_stories/config.yaml

uv run python src/eliciting_contexts/benchmark/external/tiny_stories/run_epo.py \
    --output /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_epo_2.json \
    --config /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/external/tiny_stories/config.yaml

uv run python src/eliciting_contexts/benchmark/external/tiny_stories/run_epo.py \
    --output /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_epo_3.json \
    --config /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/external/tiny_stories/config.yaml

# uv run python src/eliciting_contexts/benchmark/external/tiny_stories/evaluate_results.py \
#     --results_jsons /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_results_epo_1.json /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_results_epo_2.json --output /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_results_epo_3.json \
#     --result_names stories_epo_1 stories_epo_2 stories_epo_3 \
#     --output_json /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_results_epo.json

# uv run python src/eliciting_contexts/benchmark/external/shared/create_dashboard.py \
#     --input /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/stories_results_epo.json \
#     --output  /workspace/eliciting-contexts/src/eliciting_contexts/benchmark/results/epo_assist_report.html
