# VERL GRPO Training Environment (legacy reference; H200/H100 only)
# Python 3.12 recommended.
#
# For B200/SM100a GPUs (recommended for this paper's experiments) use the
# vLLM 0.12.0 venv built by `bash scripts/setup_vllm012_venv.sh`. This file
# documents the older `verl-latest` reference stack and is retained only
# for non-B200 reproductions.
#
# These packages have specific compatibility requirements.

# VERL - installed from PyPI
verl==0.7.0

# PyTorch with CUDA 12.8 (required for VERL 0.7.0)
# Note: verl-latest venv uses torch 2.7.1+cu128
torch==2.4.0+cu121  # Old version - update to 2.5.0+ with cu128 for VERL 0.7.0
torchvision==0.19.0+cu121
torchaudio==2.4.0+cu121

# Core ML libraries
transformers>=4.51.0  # Minimum version for Flash Attention 3 support
accelerate==1.12.0
peft==0.18.1
safetensors==0.7.0
huggingface-hub==0.36.0

# vLLM for fast inference (VERL 0.7.0 requires >=0.7, verl-latest venv has 0.10.1)
# Note: verl-latest venv uses vllm 0.10.1+cu.12.8.torch.2.7.1 (compatible)
vllm==0.6.3  # Old version - update to >=0.7.3 for VERL 0.7.0

# Flash Attention (requires torch first)
# Note: FA3 (flash-attn-3) is beta and may require manual installation from wheels
# Install FA2 for backward compatibility, FA3 can be installed separately if needed
flash-attn==2.8.3
# flash-attn-3  # Uncomment to install FA3 (requires H100/H200 GPU, CUDA 12.3+)

# Ray for distributed training
ray==2.53.0

# Data processing
numpy==1.26.4
pandas==2.3.3
pyarrow==22.0.0

# Logging
tensorboard==2.20.0
wandb

# Hydra config
hydra-core
omegaconf

# Additional VERL dependencies (installed automatically with verl==0.7.0)
# tensordict>=0.8.0,<=0.10.0,!=0.9.0
# codetiming
# pybind11
# pylatexenc
# torchdata
# qwen-vl-utils  # Required for vision utils
