#!/bin/bash

set -e
mkdir -p logs

set -euo pipefail

MODEL="o4-mini"
WORKERS=2
INPUT="anonymous/adv_bugbench"
CODER_OUTPUT="${MODEL}_coder.json"

source /nlp/scr/anonymous/miniconda3/etc/profile.d/conda.sh
conda activate buggen
cd /nlp/scr/anonymous/projects/attacker_solver

## CODER INFERENCE
python -m unified_eval.run_eval \
    --input "$INPUT" \
    --mode coder-complete \
    --mutation-col "response" \
    --model "$MODEL" \
    --output "$CODER_OUTPUT" \
    --inference-only \
    --workers "$WORKERS" \
    --max-new-tokens 10000

## CODER INFERENCE (retry pass)
python -m unified_eval.run_eval \
    --input "$INPUT" \
    --mode coder-complete \
    --mutation-col "response" \
    --model "$MODEL" \
    --output "$CODER_OUTPUT" \
    --inference-only \
    --workers "$WORKERS" \
    --max-new-tokens 10000 \
    --continue-from "$CODER_OUTPUT"

## Submit eval job
sbatch o4-mini_coder_eval.sh
