import random from typing import List import numpy as np import pytest from lm_eval import tasks from lm_eval.tasks import TaskManager from lm_eval.utils import join_iters MMLU_ANATOMY_ZERO_SHOT = """The following are multiple choice questions (with answers) about anatomy. A lesion causing compression of the facial nerve at the stylomastoid foramen will cause ipsilateral A. paralysis of the facial muscles. B. paralysis of the facial muscles and loss of taste. C. paralysis of the facial muscles, loss of taste and lacrimation. D. paralysis of the facial muscles, loss of taste, lacrimation and decreased salivation. Answer:""" MMLU_ANATOMY_FIVE_SHOT = """The following are multiple choice questions (with answers) about anatomy. What is the embryological origin of the hyoid bone? A. The first pharyngeal arch B. The first and second pharyngeal arches C. The second pharyngeal arch D. The second and third pharyngeal arches Answer: D Which of these branches of the trigeminal nerve contain somatic motor processes? A. The supraorbital nerve B. The infraorbital nerve C. The mental nerve D. None of the above Answer: D The pleura A. have no sensory innervation. B. are separated by a 2 mm space. C. extend into the neck. D. are composed of respiratory epithelium. Answer: C In Angle's Class II Div 2 occlusion there is A. excess overbite of the upper lateral incisors. B. negative overjet of the upper central incisors. C. excess overjet of the upper lateral incisors. D. excess overjet of the upper central incisors. Answer: C Which of the following is the body cavity that contains the pituitary gland? A. Abdominal B. Cranial C. Pleural D. Spinal Answer: B A lesion causing compression of the facial nerve at the stylomastoid foramen will cause ipsilateral A. paralysis of the facial muscles. B. paralysis of the facial muscles and loss of taste. C. paralysis of the facial muscles, loss of taste and lacrimation. D. paralysis of the facial muscles, loss of taste, lacrimation and decreased salivation. Answer:""" @pytest.mark.parametrize( "task_names,sets,num_fewshot,seed,num_examples,expected_prompt", [ (["mmlu_anatomy"], "test", 0, 42, 1, MMLU_ANATOMY_ZERO_SHOT), (["mmlu_anatomy"], "test", 5, 42, 1, MMLU_ANATOMY_FIVE_SHOT), ], ) def test_mmlu_prompt_rendering( task_names: List[str], sets: str, num_fewshot: int, seed: int, num_examples: int, expected_prompt: str, ): np.random.seed(seed) task_manager = TaskManager() task_dict = tasks.get_task_dict(task_names, task_manager) for task_name, task in task_dict.items(): if isinstance(task, tuple): _, task = task rnd = random.Random() rnd.seed(seed) iters = [] for set in sets.split(","): docs = None if set == "train" and task.has_training_docs(): docs = task.training_docs() if set == "val" and task.has_validation_docs(): docs = task.validation_docs() if set == "test" and task.has_test_docs(): docs = task.test_docs() if docs is not None: iters.append(docs) if len(iters) == 0: raise ValueError docs = join_iters(iters) for i, doc in ( zip(range(num_examples), docs) if num_examples > 0 else enumerate(docs) ): ctx = task.fewshot_context( doc=doc, num_fewshot=num_fewshot, ) assert ctx == expected_prompt