# TTRV: Test-Time Reinforcement Learning for vision language model

# ✨Getting Started

```bash
conda create -y -n ttrv python=3.10
conda activate ttrv
cd TTRV/verl
# Install torch (must have 12.4 cuda version) 
pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124
# Install flash-attention (Try-1)
pip install -U flash-attn
# Install flash-attention (Try-2)
FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation
pip install python-dotenv
# Install transformers for Intern-VL model
pip install transformers==4.52.4 
# Install transformers for Qwen model
pip install transformers==4.51.3
pip3 install vllm==0.8.3
pip install qwen_vl_utils
python -m pip install cffi
pip install latex2sympy2 word2number timm librosa soundfile
# Install TTRV framework 
pip install -e.
# Install other packages
pip install -r requirements.txt
```



# Prepare dataset
```
# STEP-1
# After downloading the dataset please  generate the training samples 
# (for e.g. 20, 500, or full-scale) and full-scale testing samples
# Sample dataset is given at "TTRV/verl/data"

# STEP-2
# Thereafter, generate parquet file from json file inside `TTRL_VL/verl/data` folder. 
# RUN inside `TTRL_VL/verl/data` folder
python preprocess.py
```

# RUN TTRV
```python
# default script
bash examples/ttrv/test/run.sh





```