task: wikitext | |
dataset_path: EleutherAI/wikitext_document_level | |
dataset_name: wikitext-2-raw-v1 | |
output_type: loglikelihood_rolling | |
training_split: train | |
validation_split: validation | |
test_split: test | |
doc_to_text: "" | |
doc_to_target: !function preprocess_wikitext.wikitext_detokenizer | |
process_results: !function preprocess_wikitext.process_results | |
should_decontaminate: true | |
doc_to_decontamination_query: "{{page}}" | |
metric_list: | |
- metric: word_perplexity | |
- metric: byte_perplexity | |
- metric: bits_per_byte | |
metadata: | |
version: 2.0 | |
dataset_kwargs: | |
trust_remote_code: true | |