# rb_run_one.py
# -*- coding: utf-8 -*-
import os
import re
import json
import time
import glob
import shutil
import subprocess as sp

CONFIG = {
    # Path to the core evaluation data file.
    "CORE_DATA": "./data/eval_core.jsonl",
    
    # Root directory where evaluation run outputs will be stored.
    "RUNS_ROOT": "./runs",

    # Model ID from Hugging Face. Can be left as None.
    # The script will attempt to auto-detect it from the RB_MODEL environment variable
    # or the script's filename (e.g., rb_run_one--{owner}_{repo}.py).
    "MODEL_ID": None,

    # Path to an existing local scores file. If provided, the script will copy
    # this file and skip the RewardBench inference step.
    "LOCAL_SCORES_SRC": None,

    # RewardBench configuration
    "RB_ENTRY": "rewardbench",
    "RB_BATCH_SIZE": 8,
    "RB_CHAT_TEMPLATE": "raw",

    # If True, forces --not_quantized for all models to disable quantization.
    "RB_FORCE_NOT_QUANTIZED": False,

    # If the first run fails with a bitsandbytes/GLIBC error, automatically
    # retry with quantization disabled (--not_quantized).
    "RB_AUTO_RETRY_NOT_QUANTIZED_ON_BNB_ERROR": True,

    # If an output file (*_outputs.jsonl or scores.jsonl) already exists, skip the run.
    "SKIP_IF_DONE": True,
}

def log(msg):
    """Prints a message with a timestamp."""
    print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}", flush=True)

def ensure_dir(p):
    """Ensures that a directory exists, creating it if necessary."""
    os.makedirs(p, exist_ok=True)

def sanitize(mid: str) -> str:
    """Sanitizes a model ID to be used as a valid directory name."""
    return mid.replace("/", "_").replace(":", "_")

def detect_model_id() -> str:
    """
    Detects the model ID from multiple sources in order of priority.
    1. RB_MODEL environment variable.
    2. Script filename (e.g., "rb_run_one--owner_repo.py" -> "owner/repo").
    3. CONFIG["MODEL_ID"] value.
    """
    # 1) Prioritize the environment variable.
    env_mid = os.environ.get("RB_MODEL")
    if env_mid:
        return env_mid.strip()

    # 2) Parse from the script filename.
    fn = os.path.basename(__file__)
    m = re.search(r"--([A-Za-z0-9_.-]+)_(.+)\.py$", fn)
    if m:
        owner, repo = m.group(1), m.group(2)
        return f"{owner}/{repo}"

    # 3) Fallback to the value in CONFIG.
    if CONFIG["MODEL_ID"]:
        return CONFIG["MODEL_ID"].strip()

    raise RuntimeError(
        "MODEL_ID is not specified. Please set the RB_MODEL environment variable, "
        "rename the script to 'rb_run_one--owner_repo.py', or hardcode it in CONFIG['MODEL_ID']."
    )

def already_done(out_dir: str) -> bool:
    """Checks if output files already exist in the target directory."""
    pats = glob.glob(os.path.join(out_dir, "**/*outputs.jsonl"), recursive=True)
    pats += glob.glob(os.path.join(out_dir, "**/scores.jsonl"), recursive=True)
    return len(pats) > 0

def run_rewardbench(mid: str, out_dir: str, extra_flags=None) -> sp.CompletedProcess:
    """Constructs and runs the RewardBench command."""
    if extra_flags is None:
        extra_flags = []
    
    cmd = [
        CONFIG["RB_ENTRY"],
        f"--model={mid}",
        f"--dataset={CONFIG['CORE_DATA']}",
        "--load_json",
        f"--chat_template={CONFIG['RB_CHAT_TEMPLATE']}",
        f"--batch_size={CONFIG['RB_BATCH_SIZE']}",
        f"--output_dir={out_dir}",
    ] + list(extra_flags)
    
    log("[CMD] " + " ".join(cmd))
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)

def looks_like_bnb_error(txt: str) -> bool:
    """
    Checks for common keywords in stdout that indicate a bitsandbytes/GLIBC error.
    """
    needles = [
        "bitsandbytes", "GLIBC_2.34", "cint8_vector_quant", "libbitsandbytes",
        "CUDA Setup failed despite CUDA being available"
    ]
    t = txt.lower()
    return any(k.lower() in t for k in needles)

def main():
    mid = detect_model_id()
    out_dir = os.path.join(CONFIG["RUNS_ROOT"], f"rb_{sanitize(mid)}")
    ensure_dir(out_dir)

    # If a local scores file is provided, copy it directly and exit.
    lsrc = CONFIG["LOCAL_SCORES_SRC"]
    if lsrc and os.path.isfile(lsrc):
        dst = os.path.join(out_dir, "scores.jsonl")
        shutil.copy2(lsrc, dst)
        log(f"[LOCAL] Copied local scores -> {dst}")
        return

    if CONFIG["SKIP_IF_DONE"] and already_done(out_dir):
        log(f"[RB] SKIP: Outputs already exist -> {out_dir}")
        return

    flags = []
    if CONFIG["RB_FORCE_NOT_QUANTIZED"]:
        flags.append("--not_quantized")

    # First attempt to run RewardBench.
    res = run_rewardbench(mid, out_dir, flags)
    print(res.stdout)

    # Automatic retry logic for bnb/GLIBC errors.
    if (res.returncode != 0 and
            CONFIG["RB_AUTO_RETRY_NOT_QUANTIZED_ON_BNB_ERROR"] and
            "--not_quantized" not in flags):
        
        if looks_like_bnb_error(res.stdout):
            log("[RB] Detected bitsandbytes/GLIBC error. Retrying with --not_quantized...")
            flags2 = flags + ["--not_quantized"]
            res2 = run_rewardbench(mid, out_dir, flags2)
            print(res2.stdout)
            
            if res2.returncode == 0:
                log(f"[RB] DONE (retry with --not_quantized succeeded) -> {out_dir}")
            else:
                log(f"[RB] FAILED even after retry (exit={res2.returncode}) -> {out_dir}")
            return

    if res.returncode != 0:
        log(f"[RB] FAILED (exit={res.returncode}) -> {out_dir}")
    else:
        log(f"[RB] DONE -> {out_dir}")

if __name__ == "__main__":
    main()