# config.py
"""
Global configuration for SAE steering evaluation (no env vars required).
Edit this file only; do not rely on terminal exports.

This version adds vLLM PPL backend configuration and keeps judge config centralized.
We still allow optional env overrides in llm_concept_judge.py for convenience,
but they are NOT required.
"""

from __future__ import annotations
from pathlib import Path

# ----- Output configuration -----

EVAL_ROOT = Path("/home/dslabra5/sae4dlm/steering/eval_steering_file")

# ----- LLM judge configuration -----

# Base URL can be either:
#   - "https://.../v1" (OpenAI-compatible base), OR
#   - "https://.../v1/chat/completions" (full endpoint)
OPENAI_BASE_URL_DEFAULT = "https://dashscope.aliyuncs.com/compatible-mode/v1"

MODEL_NAME = "gpt-4o-mini"
JUDGE_TIMEOUT = 60.0

# Debug prints to stdout
JUDGE_DEBUG = True

# Primary completion budget
JUDGE_MAX_TOKENS = 128

# If visible content is empty but reasoning tokens consumed the entire budget,
# retry once with this larger budget.
JUDGE_MAX_TOKENS_ON_EMPTY = 256

# Concurrency (set to 1 to avoid 429)
JUDGE_MAX_CONCURRENCY = 1

# Retry policy
JUDGE_MAX_RETRIES = 6
JUDGE_BACKOFF_BASE = 0.8
JUDGE_BACKOFF_JITTER = 0.4

# Fallback score if judge fails / parse fails
JUDGE_FALLBACK_SCORE = 50.0

# Force JSON mode if supported by the gateway
JUDGE_USE_JSON_MODE = True

# Sanitize text to reduce content_filter errors
JUDGE_SANITIZE_TEXT = True

# Optional: allow specifying API key in config (fallback if env OPENAI_API_KEY is unset).
# Keep empty to rely on env var only.
OPENAI_API_KEY_CONFIG = ""

# ----- Judge debug logging (file) -----

JUDGE_LOG_FALLBACK = True
JUDGE_LOG_WHEN_BOTH_EQ_FALLBACK = True
JUDGE_LOG_DIR = Path("/home/dslabra5/sae4dlm/steering/eval_steer/logs")
JUDGE_LOG_PATH = JUDGE_LOG_DIR / "llm_judge_debug.jsonl"

# 0 means no truncation
JUDGE_LOG_TRUNCATE_CHARS = 0

# ----- Perplexity model configuration -----

# PPL backend: "hf" (Transformers) or "vllm" (vLLM prompt_logprobs)
PPL_BACKEND = "vllm"

# HF backend settings (kept for fallback/testing)
PPL_MODEL_NAME = "gpt2"
PPL_MAX_LENGTH = 512
PPL_BATCH_SIZE = 16

# vLLM backend settings
PPL_VLLM_MODEL_NAME = "gpt2"
PPL_VLLM_DTYPE = "float16"  # "float16" | "bfloat16" | "float32"
PPL_VLLM_TENSOR_PARALLEL_SIZE = 1
PPL_VLLM_GPU_MEMORY_UTILIZATION = 0.85
PPL_VLLM_TRUST_REMOTE_CODE = False
PPL_VLLM_ENFORCE_EAGER = True
PPL_VLLM_ENABLE_PREFIX_CACHING = True

# vLLM constraint: prompt_len + max_tokens <= max_model_len
PPL_VLLM_MAX_MODEL_LEN = 1024
PPL_VLLM_EVAL_MAX_NEW_TOKENS = 1

# prompt_logprobs: number of logprobs to return per prompt token.
# If your vLLM build does NOT include the chosen token logprob in top-k dict,
# increase this value (e.g., 20 or 50).
PPL_VLLM_PROMPT_LOGPROBS_K = 1

# ----- Checkpointing -----

CHECKPOINT_EVERY_N_FEATURES = 5
