|
{ |
|
"name": "Crystal-Beeper-Harmony-v5", |
|
"context": 512, |
|
"dim": 512, |
|
"n_layers": 6, |
|
"n_heads": 8, |
|
"mlp_ratio": 4.0, |
|
"dropout": 0.0, |
|
"resid_dropout": 0.0, |
|
"grad_checkpoint": false, |
|
"compile_model": false, |
|
"use_ascii": true, |
|
"vocab_size": 8192, |
|
"regions_per_block": 64, |
|
"capoera": { |
|
"enable": true, |
|
"topic_bins": 512, |
|
"mood_bins": 7 |
|
}, |
|
"context_mask_style": "right", |
|
"_alive_entries": [ |
|
{ |
|
"name": "TinyStories", |
|
"path": "roneneldan/TinyStories", |
|
"split": "train[30%:50%]", |
|
"weight": 0.1, |
|
"dialect": [ |
|
0.6000000238418579, |
|
0.10000000149011612, |
|
0.05000000074505806, |
|
0.05000000074505806, |
|
0.20000000298023224 |
|
], |
|
"class_id": 0, |
|
"p": 0.03125000000000001 |
|
}, |
|
{ |
|
"name": "WikipediaEN", |
|
"path": "wikimedia/wikipedia", |
|
"config": "20231101.en", |
|
"split": "train[5%:15%]", |
|
"weight": 0.5, |
|
"dialect": [ |
|
0.11999999731779099, |
|
0.5799999833106995, |
|
0.10000000149011612, |
|
0.10000000149011612, |
|
0.10000000149011612 |
|
], |
|
"class_id": 1, |
|
"p": 0.15625 |
|
}, |
|
{ |
|
"name": "AGNews", |
|
"path": "ag_news", |
|
"split": "train[:]", |
|
"weight": 0.1, |
|
"dialect": [ |
|
0.20000000298023224, |
|
0.5, |
|
0.10000000149011612, |
|
0.10000000149011612, |
|
0.10000000149011612 |
|
], |
|
"class_id": 2, |
|
"p": 0.03125000000000001 |
|
}, |
|
{ |
|
"name": "GSM8K", |
|
"path": "openai/gsm8k", |
|
"config": "main", |
|
"split": "train[40%:60%]", |
|
"weight": 0.6, |
|
"dialect": [ |
|
0.10000000149011612, |
|
0.15000000596046448, |
|
0.5, |
|
0.15000000596046448, |
|
0.10000000149011612 |
|
], |
|
"class_id": 3, |
|
"p": 0.1875 |
|
}, |
|
{ |
|
"name": "AI2-ARC-Easy", |
|
"path": "allenai/ai2_arc", |
|
"config": "ARC-Easy", |
|
"split": "train[30%:60%]", |
|
"weight": 0.6, |
|
"dialect": [ |
|
0.05000000074505806, |
|
0.15000000596046448, |
|
0.4000000059604645, |
|
0.25, |
|
0.15000000596046448 |
|
], |
|
"class_id": 4, |
|
"p": 0.1875 |
|
}, |
|
{ |
|
"name": "HH-RLHF", |
|
"path": "Anthropic/hh-rlhf", |
|
"split": "train[5%:10%]", |
|
"weight": 0.5, |
|
"dialect": [ |
|
0.10000000149011612, |
|
0.25, |
|
0.20000000298023224, |
|
0.25, |
|
0.20000000298023224 |
|
], |
|
"class_id": 5, |
|
"p": 0.15625 |
|
}, |
|
{ |
|
"name": "SVAMP", |
|
"path": "ChilleD/SVAMP", |
|
"split": "train", |
|
"weight": 0.25, |
|
"dialect": [ |
|
0.10000000149011612, |
|
0.15000000596046448, |
|
0.550000011920929, |
|
0.15000000596046448, |
|
0.05000000074505806 |
|
], |
|
"class_id": 6, |
|
"p": 0.078125 |
|
}, |
|
{ |
|
"name": "MATH-500", |
|
"path": "HuggingFaceH4/MATH-500", |
|
"split": "test", |
|
"weight": 0.25, |
|
"dialect": [ |
|
0.05000000074505806, |
|
0.15000000596046448, |
|
0.6000000238418579, |
|
0.15000000596046448, |
|
0.05000000074505806 |
|
], |
|
"class_id": 7, |
|
"p": 0.078125 |
|
}, |
|
{ |
|
"name": "SEP", |
|
"path": "AiresPucrs/stanford-encyclopedia-philosophy", |
|
"split": "train", |
|
"weight": 0.3, |
|
"dialect": [ |
|
0.05000000074505806, |
|
0.44999998807907104, |
|
0.18000000715255737, |
|
0.2199999988079071, |
|
0.10000000149011612 |
|
], |
|
"class_id": 8, |
|
"p": 0.09375 |
|
} |
|
] |
|
} |