from functools import partial choices = [ "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", ] def format_cot_example(example, including_answer=True): prompt = "Question:\n" question = example["question"] options = example["options"] prompt += question + "\n" prompt += "Options:\n" for i, opt in enumerate(options): prompt += "{}. {}\n".format(choices[i], opt) if including_answer: cot_content = example["cot_content"].replace( "A: Let's think step by step.", "Answer: Let's think step by step." ) prompt += cot_content + "\n\n" else: prompt += "Answer: Let's think step by step." return prompt doc_to_text = partial(format_cot_example, including_answer=False) fewshot_to_text = partial(format_cot_example, including_answer=True) def process_docs(dataset, subject): return dataset.filter(lambda x: x["category"] == subject) process_biology = partial(process_docs, subject="biology") process_business = partial(process_docs, subject="business") process_chemistry = partial(process_docs, subject="chemistry") process_computer_science = partial(process_docs, subject="computer science") process_economics = partial(process_docs, subject="economics") process_engineering = partial(process_docs, subject="engineering") process_health = partial(process_docs, subject="health") process_history = partial(process_docs, subject="history") process_law = partial(process_docs, subject="law") process_math = partial(process_docs, subject="math") process_other = partial(process_docs, subject="other") process_philosophy = partial(process_docs, subject="philosophy") process_physics = partial(process_docs, subject="physics") process_psychology = partial(process_docs, subject="psychology")