File size: 5,345 Bytes
9d5b280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Take in a YAML, and output all "other" splits with this YAML
"""

import argparse
import logging
import os

import yaml
from tqdm import tqdm


eval_logger = logging.getLogger("lm-eval")


SUBJECTS = {
    "abstract_algebra": "stem",
    "anatomy": "stem",
    "astronomy": "stem",
    "business_ethics": "other",
    "clinical_knowledge": "other",
    "college_biology": "stem",
    "college_chemistry": "stem",
    "college_computer_science": "stem",
    "college_mathematics": "stem",
    "college_medicine": "other",
    "college_physics": "stem",
    "computer_security": "stem",
    "conceptual_physics": "stem",
    "econometrics": "social_sciences",
    "electrical_engineering": "stem",
    "elementary_mathematics": "stem",
    "formal_logic": "humanities",
    "global_facts": "other",
    "high_school_biology": "stem",
    "high_school_chemistry": "stem",
    "high_school_computer_science": "stem",
    "high_school_european_history": "humanities",
    "high_school_geography": "social_sciences",
    "high_school_government_and_politics": "social_sciences",
    "high_school_macroeconomics": "social_sciences",
    "high_school_mathematics": "stem",
    "high_school_microeconomics": "social_sciences",
    "high_school_physics": "stem",
    "high_school_psychology": "social_sciences",
    "high_school_statistics": "stem",
    "high_school_us_history": "humanities",
    "high_school_world_history": "humanities",
    "human_aging": "other",
    "human_sexuality": "social_sciences",
    "international_law": "humanities",
    "jurisprudence": "humanities",
    "logical_fallacies": "humanities",
    "machine_learning": "stem",
    "management": "other",
    "marketing": "other",
    "medical_genetics": "other",
    "miscellaneous": "other",
    "moral_disputes": "humanities",
    "moral_scenarios": "humanities",
    "nutrition": "other",
    "philosophy": "humanities",
    "prehistory": "humanities",
    "professional_accounting": "other",
    "professional_law": "humanities",
    "professional_medicine": "other",
    "professional_psychology": "social_sciences",
    "public_relations": "social_sciences",
    "security_studies": "social_sciences",
    "sociology": "social_sciences",
    "us_foreign_policy": "social_sciences",
    "virology": "other",
    "world_religions": "humanities",
}

GROUPS = ["question_and_answer"]


def parse_args():
    parser = argparse.ArgumentParser(
        description="Generate configuration YAML files for LM Evaluation Harness."
    )
    # Path to the base YAML file from which to inherit settings
    parser.add_argument(
        "--base_yaml_path",
        required=True,
        help="Path to the base YAML configuration file.",
    )

    # Directory where the generated YAML files will be saved
    parser.add_argument(
        "--save_dir",
        default="/data/local/cat/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer",
    )

    # Optional prefix to add to task names in the YAML files
    parser.add_argument("--task_prefix", default="")

    parser.add_argument("--cot_prompt_path", default=None)

    # Optional prefix to add to group names in the YAML files
    parser.add_argument("--group_prefix", default="")

    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()

    # Load base YAML configuration
    base_yaml_name = os.path.basename(args.base_yaml_path)
    with open(args.base_yaml_path, "r", encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    for group in GROUPS:
        for subject, category in tqdm(SUBJECTS.items()):
            if args.cot_prompt_path is not None:
                description = cot_file[subject]
            else:
                description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"

            yaml_dict = {
                "include": base_yaml_name,
                "tag": f"mmlusr_{args.group_prefix}{group}_{category}"
                if args.group_prefix
                else f"mmlusr_{group}_{category}",
                "task": f"mmlusr_{args.task_prefix}{group}_{subject}"
                if args.task_prefix
                else f"mmlusr_{group}_{subject}",
                "task_alias": subject.replace("_", " "),
                "description": description,
                "dataset_name": f"{group}_{subject}",
            }

            # File path for saving the generated YAML file
            file_save_path = os.path.join(args.save_dir, f"{group}_{subject}.yaml")
            with open(file_save_path, "w", encoding="utf-8") as yaml_file:
                yaml.dump(yaml_dict, yaml_file, allow_unicode=True, default_style='"')
            eval_logger.info(f"Saved YAML for {group} {subject} to {file_save_path}")

    # Save group configuration if specified
    if args.group_prefix:
        file_save_path = os.path.join(
            args.save_prefix_path, args.group_prefix + ".yaml"
        )
        eval_logger.info(f"Saving benchmark config to {file_save_path}")
        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(yaml_dict, yaml_file, indent=4, default_flow_style=False)