out / lm-evaluation-harness /lm_eval /tasks /arc /arc_challenge_chat.yaml
BayesTensor's picture
Upload folder using huggingface_hub
9d5b280 verified
tag:
- llama
task: arc_challenge_chat
dataset_path: allenai/ai2_arc
dataset_name: ARC-Challenge
output_type: generate_until
training_split: train
validation_split: validation
test_split: test
fewshot_split: train
doc_to_text: 'Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{question.strip()}}\nA. {{choices.text[0]}}\nB. {{choices.text[1]}}\nC. {{choices.text[2]}}{% if choices.text|length > 3 %}\nD. {{choices.text[3]}}{% endif %}\nYour response should end with "The best answer is [the_answer_letter]" where the [the_answer_letter] is one of A, B, C or D.'
gen_prefix: 'The best answer is'
fewshot_delimiter: "\n\n"
doc_to_target: "{{ 'ABCD'[answerKey|int - 1] if answerKey|string in '1234' else answerKey }}"
num_fewshot: 0
generation_kwargs:
max_gen_toks: 100
until:
- "\n\n"
- "."
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
metadata:
version: 1.0