- sections: - local: index title: TRL - local: installation title: Installation - local: quickstart title: Quickstart title: Getting started - sections: - local: dataset_formats title: Dataset Formats - local: how_to_train title: Training FAQ - local: logging title: Understanding Logs title: Conceptual Guides - sections: - local: clis title: Command Line Interface (CLI) - local: customization title: Customizing the Training - local: reducing_memory_usage title: Reducing Memory Usage - local: speeding_up_training title: Speeding Up Training - local: distributing_training title: Distributing Training - local: use_model title: Using Trained Models title: How-to guides - sections: - local: deepspeed_integration title: DeepSpeed - local: liger_kernel_integration title: Liger Kernel - local: peft_integration title: PEFT - local: unsloth_integration title: Unsloth - local: vllm_integration title: vLLM title: Integrations - sections: - local: example_overview title: Example Overview - local: community_tutorials title: Community Tutorials - local: sentiment_tuning title: Sentiment Tuning - local: using_llama_models title: Training StackLlama - local: detoxifying_a_lm title: Detoxifying a Language Model - local: multi_adapter_rl title: Multi Adapter RLHF - local: training_vlm_sft title: Fine-tuning a Multimodal Model Using SFT (Single or Multi-Image Dataset) title: Examples - sections: - sections: # Sorted alphabetically - local: alignprop_trainer title: AlignProp - local: bco_trainer title: BCO - local: cpo_trainer title: CPO - local: ddpo_trainer title: DDPO - local: dpo_trainer title: DPO - local: online_dpo_trainer title: Online DPO - local: gkd_trainer title: GKD - local: grpo_trainer title: GRPO - local: kto_trainer title: KTO - local: nash_md_trainer title: Nash-MD - local: orpo_trainer title: ORPO - local: ppo_trainer title: PPO - local: prm_trainer title: PRM - local: reward_trainer title: Reward - local: rloo_trainer title: RLOO - local: sft_trainer title: SFT - local: iterative_sft_trainer title: Iterative SFT - local: xpo_trainer title: XPO title: Trainers - local: models title: Model Classes - local: model_utils title: Model Utilities - local: best_of_n title: Best of N Sampling - local: judges title: Judges - local: callbacks title: Callbacks - local: data_utils title: Data Utilities - local: rewards title: Reward Functions - local: script_utils title: Script Utilities - local: others title: Others title: API