- sections:
  - local: index
    title: TRL
  - local: quickstart
    title: Quickstart
  - local: installation
    title: Installation
  - local: clis
    title: Get started with Command Line Interfaces (CLIs)
  - local: how_to_train
    title: PPO Training FAQ
  - local: use_model
    title: Use Trained Models
  - local: customization
    title: Customize the Training
  - local: logging
    title: Understanding Logs
  title: Get started
- sections:
  - local: models
    title: Model Classes
  - local: trainer
    title: Trainer Classes
  - local: reward_trainer
    title: Reward Model Training
  - local: sft_trainer
    title: Supervised Fine-Tuning
  - local: ppo_trainer
    title: PPO Trainer
  - local: ppov2_trainer
    title: PPOv2 Trainer
  - local: rloo_trainer
    title: RLOO Trainer
  - local: online_dpo_trainer
    title: Online DPO Trainer
  - local: best_of_n
    title: Best of N Sampling
  - local: dpo_trainer
    title: DPO Trainer
  - local: kto_trainer
    title: KTO Trainer
  - local: bco_trainer
    title: BCO Trainer
  - local: cpo_trainer
    title: CPO Trainer
  - local: ddpo_trainer
    title: Denoising Diffusion Policy Optimization
  - local: alignprop_trainer
    title: AlignProp Trainer
  - local: orpo_trainer
    title: ORPO Trainer
  - local: iterative_sft_trainer
    title: Iterative Supervised Fine-Tuning
  - local: callbacks
    title: Callback Classes
  - local: judges
    title: Judge Classes
  - local: text_environments
    title: Text Environments
  title: API
- sections:
  - local: example_overview
    title: Example Overview
  - local: sentiment_tuning
    title: Sentiment Tuning
  - local: lora_tuning_peft
    title: Training with PEFT
  - local: detoxifying_a_lm
    title: Detoxifying a Language Model
  - local: using_llama_models
    title: Training StackLlama
  - local: learning_tools
    title: Learning to Use Tools
  - local: multi_adapter_rl
    title: Multi Adapter RLHF
  title: Examples
