File size: 5,345 Bytes
9d5b280 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
import yaml
from tqdm import tqdm
eval_logger = logging.getLogger("lm-eval")
SUBJECTS = {
"abstract_algebra": "stem",
"anatomy": "stem",
"astronomy": "stem",
"business_ethics": "other",
"clinical_knowledge": "other",
"college_biology": "stem",
"college_chemistry": "stem",
"college_computer_science": "stem",
"college_mathematics": "stem",
"college_medicine": "other",
"college_physics": "stem",
"computer_security": "stem",
"conceptual_physics": "stem",
"econometrics": "social_sciences",
"electrical_engineering": "stem",
"elementary_mathematics": "stem",
"formal_logic": "humanities",
"global_facts": "other",
"high_school_biology": "stem",
"high_school_chemistry": "stem",
"high_school_computer_science": "stem",
"high_school_european_history": "humanities",
"high_school_geography": "social_sciences",
"high_school_government_and_politics": "social_sciences",
"high_school_macroeconomics": "social_sciences",
"high_school_mathematics": "stem",
"high_school_microeconomics": "social_sciences",
"high_school_physics": "stem",
"high_school_psychology": "social_sciences",
"high_school_statistics": "stem",
"high_school_us_history": "humanities",
"high_school_world_history": "humanities",
"human_aging": "other",
"human_sexuality": "social_sciences",
"international_law": "humanities",
"jurisprudence": "humanities",
"logical_fallacies": "humanities",
"machine_learning": "stem",
"management": "other",
"marketing": "other",
"medical_genetics": "other",
"miscellaneous": "other",
"moral_disputes": "humanities",
"moral_scenarios": "humanities",
"nutrition": "other",
"philosophy": "humanities",
"prehistory": "humanities",
"professional_accounting": "other",
"professional_law": "humanities",
"professional_medicine": "other",
"professional_psychology": "social_sciences",
"public_relations": "social_sciences",
"security_studies": "social_sciences",
"sociology": "social_sciences",
"us_foreign_policy": "social_sciences",
"virology": "other",
"world_religions": "humanities",
}
GROUPS = ["question_and_answer"]
def parse_args():
parser = argparse.ArgumentParser(
description="Generate configuration YAML files for LM Evaluation Harness."
)
# Path to the base YAML file from which to inherit settings
parser.add_argument(
"--base_yaml_path",
required=True,
help="Path to the base YAML configuration file.",
)
# Directory where the generated YAML files will be saved
parser.add_argument(
"--save_dir",
default="/data/local/cat/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer",
)
# Optional prefix to add to task names in the YAML files
parser.add_argument("--task_prefix", default="")
parser.add_argument("--cot_prompt_path", default=None)
# Optional prefix to add to group names in the YAML files
parser.add_argument("--group_prefix", default="")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
# Load base YAML configuration
base_yaml_name = os.path.basename(args.base_yaml_path)
with open(args.base_yaml_path, "r", encoding="utf-8") as f:
base_yaml = yaml.full_load(f)
if args.cot_prompt_path is not None:
import json
with open(args.cot_prompt_path, encoding="utf-8") as f:
cot_file = json.load(f)
for group in GROUPS:
for subject, category in tqdm(SUBJECTS.items()):
if args.cot_prompt_path is not None:
description = cot_file[subject]
else:
description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
yaml_dict = {
"include": base_yaml_name,
"tag": f"mmlusr_{args.group_prefix}{group}_{category}"
if args.group_prefix
else f"mmlusr_{group}_{category}",
"task": f"mmlusr_{args.task_prefix}{group}_{subject}"
if args.task_prefix
else f"mmlusr_{group}_{subject}",
"task_alias": subject.replace("_", " "),
"description": description,
"dataset_name": f"{group}_{subject}",
}
# File path for saving the generated YAML file
file_save_path = os.path.join(args.save_dir, f"{group}_{subject}.yaml")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(yaml_dict, yaml_file, allow_unicode=True, default_style='"')
eval_logger.info(f"Saved YAML for {group} {subject} to {file_save_path}")
# Save group configuration if specified
if args.group_prefix:
file_save_path = os.path.join(
args.save_prefix_path, args.group_prefix + ".yaml"
)
eval_logger.info(f"Saving benchmark config to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(yaml_dict, yaml_file, indent=4, default_flow_style=False)
|