File size: 2,853 Bytes
2f5127c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
- sections:
  - local: index
    title: TRL
  - local: installation
    title: Installation
  - local: quickstart
    title: Quickstart
  title: Getting started
- sections:
  - local: dataset_formats
    title: Dataset Formats
  - local: how_to_train
    title: Training FAQ
  - local: logging
    title: Understanding Logs
  title: Conceptual Guides
- sections:
  - local: clis
    title: Command Line Interface (CLI)
  - local: customization
    title: Customizing the Training
  - local: reducing_memory_usage
    title: Reducing Memory Usage
  - local: speeding_up_training
    title: Speeding Up Training
  - local: distributing_training
    title: Distributing Training
  - local: use_model
    title: Using Trained Models
  title: How-to guides
- sections:
  - local: deepspeed_integration
    title: DeepSpeed
  - local: liger_kernel_integration
    title: Liger Kernel
  - local: peft_integration
    title: PEFT
  - local: unsloth_integration
    title: Unsloth
  - local: vllm_integration
    title: vLLM
  title: Integrations
- sections:
  - local: example_overview
    title: Example Overview
  - local: community_tutorials
    title: Community Tutorials
  - local: sentiment_tuning
    title: Sentiment Tuning
  - local: using_llama_models
    title: Training StackLlama
  - local: detoxifying_a_lm
    title: Detoxifying a Language Model
  - local: multi_adapter_rl
    title: Multi Adapter RLHF
  - local: training_vlm_sft
    title: Fine-tuning a Multimodal Model Using SFT (Single or Multi-Image Dataset)
  title: Examples
- sections:
  - sections: # Sorted alphabetically
    - local: alignprop_trainer
      title: AlignProp
    - local: bco_trainer
      title: BCO
    - local: cpo_trainer
      title: CPO
    - local: ddpo_trainer
      title: DDPO
    - local: dpo_trainer
      title: DPO
    - local: online_dpo_trainer
      title: Online DPO
    - local: gkd_trainer
      title: GKD
    - local: grpo_trainer
      title: GRPO
    - local: kto_trainer
      title: KTO
    - local: nash_md_trainer
      title: Nash-MD
    - local: orpo_trainer
      title: ORPO
    - local: ppo_trainer
      title: PPO
    - local: prm_trainer
      title: PRM
    - local: reward_trainer
      title: Reward
    - local: rloo_trainer
      title: RLOO
    - local: sft_trainer
      title: SFT
    - local: iterative_sft_trainer
      title: Iterative SFT
    - local: xpo_trainer
      title: XPO
    title: Trainers
  - local: models
    title: Model Classes
  - local: model_utils
    title: Model Utilities
  - local: best_of_n
    title: Best of N Sampling
  - local: judges
    title: Judges
  - local: callbacks
    title: Callbacks
  - local: data_utils
    title: Data Utilities
  - local: rewards
    title: Reward Functions
  - local: script_utils
    title: Script Utilities
  - local: others
    title: Others
  title: API