- sections:
  - local: index
    title: TRL
  - local: installation
    title: Installation
  - local: quickstart
    title: Quickstart
  title: Getting started
- sections:
  - local: dataset_formats
    title: Dataset Formats
  - local: paper_index
    title: Paper Index
  title: Conceptual Guides
- sections: # Sorted alphabetically
  - local: dpo_trainer
    title: DPO
  - local: grpo_trainer
    title: GRPO
  - local: reward_trainer
    title: Reward
  - local: rloo_trainer
    title: RLOO
  - local: sft_trainer
    title: SFT
  title: Trainers
- sections:
  - local: clis
    title: Command Line Interface (CLI)
  - local: jobs_training
    title: Training using Jobs
  - local: customization
    title: Customizing the Training
  - local: reducing_memory_usage
    title: Reducing Memory Usage
  - local: speeding_up_training
    title: Speeding Up Training
  - local: distributing_training
    title: Distributing Training
  - local: use_model
    title: Using Trained Models
  title: How-to guides
- sections:
  - local: deepspeed_integration
    title: DeepSpeed
  - local: kernels_hub
    title: Kernels Hub
  - local: liger_kernel_integration
    title: Liger Kernel
  - local: peft_integration
    title: PEFT
  - local: rapidfire_integration
    title: RapidFire AI
  - local: trackio_integration
    title: Trackio
  - local: unsloth_integration
    title: Unsloth
  - local: vllm_integration
    title: vLLM
  title: Integrations
- sections:
  - local: example_overview
    title: Example Overview
  - local: community_tutorials
    title: Community Tutorials
  - local: lora_without_regret
    title: LoRA Without Regret
  title: Examples
- sections:
  - sections:
    - local: chat_template_utils
      title: Chat Template Utilities
    - local: data_utils
      title: Data Utilities
    - local: model_utils
      title: Model Utilities
    - local: script_utils
      title: Script Utilities
    title: Utilities
  - local: callbacks
    title: Callbacks
  - local: rewards
    title: Reward Functions
  - local: others
    title: Others
  title: API
- sections:
  - local: experimental_overview
    title: Experimental Overview
  - local: openenv
    title: OpenEnv Integration
  - local: bema_for_reference_model # Sorted alphabetically
    title: BEMA for Reference Model
  - local: bco_trainer
    title: BCO
  - local: cpo_trainer
    title: CPO
  - local: gfpo
    title: GFPO
  - local: gkd_trainer
    title: GKD
  - local: gold_trainer
    title: GOLD
  - local: grpo_with_replay_buffer
    title: GRPO With Replay Buffer
  - local: gspo_token
    title: GSPO-token
  - local: judges
    title: Judges
  - local: kto_trainer
    title: KTO 
  - local: merge_model_callback
    title: MergeModelCallback
  - local: minillm_trainer
    title: MiniLLM
  - local: nash_md_trainer
    title: Nash-MD
  - local: online_dpo_trainer
    title: Online DPO
  - local: orpo_trainer
    title: ORPO
  - local: papo_trainer
    title: PAPO
  - local: ppo_trainer
    title: PPO
  - local: prm_trainer
    title: PRM
  - local: winrate_callback
    title: WinRateCallback
  - local: xpo_trainer
    title: XPO
  title: Experimental