torch>=2.8.0+cu128
transformers>=4.56.1
flashinfer-python>=0.3.1
flash_attn>=2.8.3
accelerate==1.10.1
datasets