task: humaneval | |
dataset_path: openai/openai_humaneval | |
unsafe_code: true | |
output_type: generate_until | |
test_split: test | |
doc_to_text: "{{prompt}}" | |
doc_to_target: "{{test}}\ncheck({{entry_point}})" | |
metric_list: | |
- metric: !function utils.pass_at_k | |
aggregation: mean | |
higher_is_better: true | |
k: [1] | |
generation_kwargs: | |
until: | |
- "\nclass" | |
- "\ndef" | |
- "\n#" | |
- "\nif" | |
- "\nprint" | |
max_gen_toks: 1024 | |
do_sample: false | |
repeats: 1 | |
num_fewshot: 0 | |
filter_list: | |
- name: "create_test" | |
filter: | |
- function: "custom" | |
filter_fn: !function utils.build_predictions | |
metadata: | |
version: 1.0 | |