import argparse
import json
from pathlib import Path

import ale_bench
from ale_bench.result import CaseResult, JudgeResult, Result

from common_resource import select_submission_code


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Rerun private eval")
    parser.add_argument(
        "-e", "--experiment_dir", type=Path, required=True,
        help="The submission to rerun. Can be a path to a directory or a submission ID.",
    )
    parser.add_argument(
        "-p", "--problem_id", type=str, required=True,
        help="The directory to save the rerun results.",
    )
    parser.add_argument(
        "-d", "--code_history_drop_last", type=int, default=0,
        help="The number of code history to drop from the end.",
    )
    parser.add_argument(
        "--lite_version", action="store_true",
        help="Use the lite version of the experiment.",
    )
    parser.add_argument(
        "--first_accept", action="store_true",
        help="Use the first accept version of the experiment.",
    )
    return parser.parse_args()


def main() -> None:
    args = parse_args()

    llm_log_dir = args.experiment_dir / f"llm_log_{args.problem_id}"
    codes_history = [(Result.model_validate(r) if r else None, cl, c) for r, cl, c in json.load(open(args.experiment_dir / f"codes_history_{args.problem_id}.json"))]
    if args.code_history_drop_last > 0:
        codes_history = codes_history[:-args.code_history_drop_last]
    llm_history = json.load(open(args.experiment_dir / f"codes_history_{args.problem_id}.json"))
    log_file = args.experiment_dir / f"log_{args.problem_id}.txt"
    assert log_file.is_file(), f"Log file {log_file} does not exist."
    private_result_file = args.experiment_dir / f"private_result_{args.problem_id}.json"
    assert not private_result_file.exists(), f"Private result file {private_result_file} exists."
    assert (args.experiment_dir / f"session_{args.problem_id}.json").is_file() and (args.experiment_dir / f"summary_history_{args.problem_id}.json").is_file()

    ale_bench_session = ale_bench.start(args.problem_id, args.lite_version, num_workers=13)
    _, submission_code_language, submission_code = select_submission_code(codes_history, ale_bench_session.problem.metadata.score_type, args.lite_version)
    try:
        print("Private evaluation reran.", file=log_file.open("a"))
        private_result, rank, performance = ale_bench_session.private_eval(submission_code, submission_code_language)
        print(f"[{args.problem_id}] Rank: {rank}, Performance: {performance}", file=log_file.open("a"))
        # Save the result
        json.dump({
            "problem_id": args.problem_id, "rank": rank, "performance": performance, "private_result": private_result.model_dump(),
        }, private_result_file.open("w"))
    except:
        print(f"[{args.problem_id}] Error: Private evaluation failed. ({args.problem_id})", file=log_file.open("a"))


if __name__ == "__main__":
    main()


# These are the commands to rerun the private evaluation for the submissions in the four_hours directory:
# python rerun_private_eval.py -e results/four_hours/o4-mini-high_cpp20 -p ahc033 -d 1
# python rerun_private_eval.py -e results/four_hours/gemini-2.5-pro-thinking_cpp20 -p ahc017 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc006 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc007 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc009 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc010 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc016 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc019 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc021 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc024 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p ahc027 -d 1
# python rerun_private_eval.py -e results/four_hours/deepseek-r1_cpp20 -p future-contest-2022-qual -d 1
