- sections:
  - local: index
    title: TRL
  - local: installation
    title: Installation
  - local: quickstart
    title: Quickstart
  title: Getting started
- sections:
  - local: dataset_formats
    title: Dataset Formats
  - local: how_to_train
    title: Training FAQ
  - local: logging
    title: Understanding Logs
  title: Conceptual Guides
- sections:
  - local: clis
    title: Command Line Interface (CLI)
  - local: customization
    title: Customizing the Training
  - local: reducing_memory_usage
    title: Reducing Memory Usage
  - local: speeding_up_training
    title: Speeding Up Training
  - local: distributing_training
    title: Distributing Training
  - local: use_model
    title: Using Trained Models
  title: How-to guides
- sections:
  - local: deepspeed_integration
    title: DeepSpeed
  - local: liger_kernel_integration
    title: Liger Kernel
  - local: peft_integration
    title: PEFT
  - local: unsloth_integration
    title: Unsloth
  title: Integrations
- sections:
  - local: example_overview
    title: Example Overview
  - local: community_tutorials
    title: Community Tutorials
  - local: sentiment_tuning
    title: Sentiment Tuning
  - local: using_llama_models
    title: Training StackLlama
  - local: detoxifying_a_lm
    title: Detoxifying a Language Model
  - local: learning_tools
    title: Learning to Use Tools
  - local: multi_adapter_rl
    title: Multi Adapter RLHF
  - local: training_vlm_sft
    title: Fine-tuning a Multimodal Model Using SFT (Single or Multi-Image Dataset)
  title: Examples
- sections:
  - sections: # Sorted alphabetically
    - local: alignprop_trainer
      title: AlignProp
    - local: bco_trainer
      title: BCO
    - local: cpo_trainer
      title: CPO
    - local: ddpo_trainer
      title: DDPO
    - local: dpo_trainer
      title: DPO
    - local: online_dpo_trainer
      title: Online DPO
    - local: gkd_trainer
      title: GKD
    - local: grpo_trainer
      title: GRPO
    - local: kto_trainer
      title: KTO
    - local: nash_md_trainer
      title: Nash-MD
    - local: orpo_trainer
      title: ORPO
    - local: ppo_trainer
      title: PPO
    - local: prm_trainer
      title: PRM
    - local: reward_trainer
      title: Reward
    - local: rloo_trainer
      title: RLOO
    - local: sft_trainer
      title: SFT
    - local: iterative_sft_trainer
      title: Iterative SFT
    - local: xpo_trainer
      title: XPO
    title: Trainers
  - local: models
    title: Model Classes
  - local: best_of_n
    title: Best of N Sampling
  - local: judges
    title: Judges
  - local: callbacks
    title: Callbacks
  - local: data_utils
    title: Data Utilities
  - local: text_environments
    title: Text Environments
  - local: script_utils
    title: Script Utilities
  - local: others
    title: Others
  title: API
