# Modal app-level configuration. Image build, secrets, volumes.
# Exact `modal run` commands are in scripts/ — this file documents the
# shared environment they share.

app_name: reflex-rlvr

image:
  base: debian_slim:python3.11
  apt_packages:
    - git
    - build-essential
    - curl
  pip_packages:
    - torch==2.4.0
    - transformers==4.45.0
    - accelerate==0.34.0
    - peft==0.13.0
    - trl==0.10.1
    - vllm==0.6.3
    - sae-lens==4.0.0
    - datasets==2.21.0
    - sympy==1.13.0
    - wandb==0.18.0
    - hydra-core==1.3.2
    - pyarrow==17.0.0
    - safetensors==0.4.5

# Lean 4 install is heavy (~1.2 GB elan toolchain). Only build into the
# image flagged for the verifier worker; pilot mining doesn't need it.
images:
  default:
    inherits: image
  verifier:
    inherits: image
    extra_setup:
      - "curl -sSf https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh | sh -s -- -y"
      - "echo 'export PATH=$HOME/.elan/bin:$PATH' >> /root/.bashrc"

# Persistent storage
volumes:
  reflex-rlvr-cache:
    mount: /cache                # cached SAE activations, vLLM weights cache
    description: per-cycle activation caches and vLLM weight caches
  reflex-rlvr-data:
    mount: /data                 # AIME / MATH / HMMT / etc.
    description: corpora and decontamination shards
  reflex-rlvr-checkpoints:
    mount: /checkpoints          # model checkpoints (per cycle)
    description: cycle checkpoints; mirrored to HF Hub on success

secrets:
  - huggingface                  # HF_TOKEN (already exists in Modal account)
  - wandb                        # WANDB_API_KEY (OPTIONAL; create with
                                 #   `modal secret create wandb WANDB_API_KEY=<key>`
                                 #   before launching big runs to enable
                                 #   real-time spend logging)

# GPU layout
gpu_profiles:
  pilot:
    gpu: H100
    n_gpus: 4
  main_run_train:
    gpu: H100
    n_gpus: 8
  main_run_rollout:
    gpu: H100
    n_gpus: 8
  ablation:
    gpu: H100
    n_gpus: 4
  eval:
    gpu: H100
    n_gpus: 4
  smoketest:
    gpu: null                    # CPU only

# Hard rules (mirror user memory)
rules:
  detach: false                  # never use --detach
  retry_on_disconnect: false     # never auto-retry on disconnect
  approval_required_per_run: true
  log_every_h100_hr_to_wandb: true
