mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr2e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
4
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr5e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr5e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
7
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr2e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
9
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr2e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr2e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/llama3-1_8b_webinstruct_original_700k
Text Generation
•
8B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr2e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr2e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
4
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr2e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
7
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr2e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
4
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr2e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr5e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
4
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr5e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr5e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
4
mlfoundations-dev/oh_v1.3_slim_orca_x4
Text Generation
•
8B
•
Updated
•
7
mlfoundations-dev/original_tiger_dataset_small
Text Generation
•
8B
•
Updated
•
6
mlfoundations-dev/hp_ablations_gemma_epoch4
Text Generation
•
9B
•
Updated
•
4
mlfoundations-dev/hp_ablations_gemma_epoch2
Text Generation
•
9B
•
Updated
•
5
mlfoundations-dev/oh-dcft-v3.1-llama-3.1-8b
Text Generation
•
8B
•
Updated
•
6
•
1
mlfoundations-dev/oh-dcft-v3.1-claude-3-5-haiku-20241022
Text Generation
•
8B
•
Updated
•
6
•
5
mlfoundations-dev/oh_v1.3_metamath_x8
Text Generation
•
8B
•
Updated
•
5
mlfoundations-dev/hp_ablations_gemma_epoch3
Text Generation
•
9B
•
Updated
•
5
mlfoundations-dev/hp_ablations_gemma_epoch4_dcftv1.2
Text Generation
•
9B
•
Updated
•
4
mlfoundations-dev/hp_ablations_gemma_bsz1024
Text Generation
•
9B
•
Updated
•
5
mlfoundations-dev/hp_ablations_gemma_epoch2_dcftv1.2
Text Generation
•
9B
•
Updated
•
5
mlfoundations-dev/hp_ablations_gemma_epoch5
Text Generation
•
9B
•
Updated
•
5
mlfoundations-dev/oh_v1.3_metamath_x2
Text Generation
•
8B
•
Updated
•
5
mlfoundations-dev/hp_ablations_gemma_epoch3_dcftv1.2
Text Generation
•
9B
•
Updated
•
5
mlfoundations-dev/oh_v1.3_metamath_x.5
Text Generation
•
8B
•
Updated
•
5