|
{ |
|
"cache_cfg": { |
|
"dataset_repo": "EleutherAI/fineweb-edu-dedup-10b", |
|
"dataset_split": "train", |
|
"dataset_name": "", |
|
"dataset_column": "text", |
|
"batch_size": 16, |
|
"cache_ctx_len": 256, |
|
"n_tokens": 1000000, |
|
"n_splits": 5 |
|
}, |
|
"constructor_cfg": { |
|
"faiss_embedding_model": "sentence-transformers/all-MiniLM-L6-v2", |
|
"faiss_embedding_cache_dir": ".embedding_cache", |
|
"faiss_embedding_cache_enabled": true, |
|
"example_ctx_len": 32, |
|
"min_examples": 200, |
|
"n_non_activating": 50, |
|
"center_examples": true, |
|
"non_activating_source": "random", |
|
"neighbours_type": "co-occurrence" |
|
}, |
|
"sampler_cfg": { |
|
"n_examples_train": 40, |
|
"n_examples_test": 50, |
|
"n_quantiles": 10, |
|
"train_type": "quantiles", |
|
"test_type": "quantiles", |
|
"ratio_top": 0.2 |
|
}, |
|
"model": "meta-llama/Llama-3.2-1B", |
|
"sparse_model": "nev/Llama-3.2-1B-mntss-skip-transcoder", |
|
"hookpoints": [ |
|
"layers.0.mlp", |
|
"layers.1.mlp", |
|
"layers.2.mlp", |
|
"layers.3.mlp", |
|
"layers.4.mlp", |
|
"layers.5.mlp", |
|
"layers.6.mlp", |
|
"layers.7.mlp", |
|
"layers.8.mlp", |
|
"layers.9.mlp", |
|
"layers.10.mlp", |
|
"layers.11.mlp", |
|
"layers.12.mlp", |
|
"layers.13.mlp", |
|
"layers.14.mlp", |
|
"layers.15.mlp" |
|
], |
|
"explainer_model": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4", |
|
"explainer_model_max_len": 5120, |
|
"explainer_provider": "offline", |
|
"explainer": "default", |
|
"scorers": [ |
|
"fuzz", |
|
"detection" |
|
], |
|
"name": "transcoder-llama-131k-mntss-1m", |
|
"max_latents": null, |
|
"filter_bos": false, |
|
"log_probs": false, |
|
"load_in_8bit": false, |
|
"hf_token": null, |
|
"pipeline_num_proc": 48, |
|
"num_gpus": 1, |
|
"seed": 22, |
|
"verbose": true, |
|
"num_examples_per_scorer_prompt": 5, |
|
"overwrite": [] |
|
} |