|
import argparse |
|
|
|
import yaml |
|
|
|
|
|
class FunctionTag: |
|
def __init__(self, value): |
|
self.value = value |
|
|
|
|
|
LANGUAGES = { |
|
"amh": { |
|
"QUESTION_WORD": "ትክክል", |
|
"ENTAILMENT_LABEL": "አዎ", |
|
"NEUTRAL_LABEL": "እንዲሁም", |
|
"CONTRADICTION_LABEL": "አይ", |
|
}, |
|
"eng": { |
|
"QUESTION_WORD": "Right", |
|
"ENTAILMENT_LABEL": "Yes", |
|
"NEUTRAL_LABEL": "Also", |
|
"CONTRADICTION_LABEL": "No", |
|
}, |
|
"ewe": { |
|
"QUESTION_WORD": "Esɔ gbe", |
|
"ENTAILMENT_LABEL": "Ɛ̃", |
|
"NEUTRAL_LABEL": "Hã", |
|
"CONTRADICTION_LABEL": "Ao", |
|
}, |
|
"fra": { |
|
"QUESTION_WORD": "correct", |
|
"ENTAILMENT_LABEL": "Oui", |
|
"NEUTRAL_LABEL": "Aussi", |
|
"CONTRADICTION_LABEL": "Non", |
|
}, |
|
"hau": { |
|
"QUESTION_WORD": "Daidai", |
|
"ENTAILMENT_LABEL": "Ee", |
|
"NEUTRAL_LABEL": "Haka kuma", |
|
"CONTRADICTION_LABEL": "A'a", |
|
}, |
|
"ibo": { |
|
"QUESTION_WORD": "Ziri ezi", |
|
"ENTAILMENT_LABEL": "Éè", |
|
"NEUTRAL_LABEL": "Ọzọkwa", |
|
"CONTRADICTION_LABEL": "Mba", |
|
}, |
|
"kin": { |
|
"QUESTION_WORD": "Nibyo", |
|
"ENTAILMENT_LABEL": "Yego", |
|
"NEUTRAL_LABEL": "Na none", |
|
"CONTRADICTION_LABEL": "Oya", |
|
}, |
|
"lin": { |
|
"QUESTION_WORD": "Malamu", |
|
"ENTAILMENT_LABEL": "Iyo", |
|
"NEUTRAL_LABEL": "Lisusu", |
|
"CONTRADICTION_LABEL": "Te", |
|
}, |
|
"lug": { |
|
"QUESTION_WORD": "Kituufu", |
|
"ENTAILMENT_LABEL": "Yee", |
|
"NEUTRAL_LABEL": "N’ekirala", |
|
"CONTRADICTION_LABEL": "Nedda", |
|
}, |
|
"orm": { |
|
"QUESTION_WORD": "Sirrii", |
|
"ENTAILMENT_LABEL": "Eeyyee", |
|
"NEUTRAL_LABEL": "Akkasumas", |
|
"CONTRADICTION_LABEL": "Lakki", |
|
}, |
|
"sna": { |
|
"QUESTION_WORD": "Chokwadi", |
|
"ENTAILMENT_LABEL": "Hongu", |
|
"NEUTRAL_LABEL": "Uye", |
|
"CONTRADICTION_LABEL": "Kwete", |
|
}, |
|
"sot": { |
|
"QUESTION_WORD": "Nepile", |
|
"ENTAILMENT_LABEL": "E", |
|
"NEUTRAL_LABEL": "Hape", |
|
"CONTRADICTION_LABEL": "Tjhe", |
|
}, |
|
"swa": { |
|
"QUESTION_WORD": "Sahihi", |
|
"ENTAILMENT_LABEL": "Ndiyo", |
|
"NEUTRAL_LABEL": "Pia", |
|
"CONTRADICTION_LABEL": "Hapana", |
|
}, |
|
"twi": { |
|
"QUESTION_WORD": "Nifa", |
|
"ENTAILMENT_LABEL": "Aane", |
|
"NEUTRAL_LABEL": "Anaasɛ", |
|
"CONTRADICTION_LABEL": "Daabi", |
|
}, |
|
"wol": { |
|
"QUESTION_WORD": "Dëgg", |
|
"ENTAILMENT_LABEL": "Waaw", |
|
"NEUTRAL_LABEL": "Itam", |
|
"CONTRADICTION_LABEL": "Déet", |
|
}, |
|
"xho": { |
|
"QUESTION_WORD": "Ichanekile", |
|
"ENTAILMENT_LABEL": "Ewe", |
|
"NEUTRAL_LABEL": "Kananjalo", |
|
"CONTRADICTION_LABEL": "Hayi", |
|
}, |
|
"yor": { |
|
"QUESTION_WORD": "Òótọ́", |
|
"ENTAILMENT_LABEL": "Bẹ́ẹ̀ni", |
|
"NEUTRAL_LABEL": "Àti pé", |
|
"CONTRADICTION_LABEL": "Rárá", |
|
}, |
|
"zul": { |
|
"QUESTION_WORD": "Kulungile", |
|
"ENTAILMENT_LABEL": "Yebo", |
|
"NEUTRAL_LABEL": "Futhi", |
|
"CONTRADICTION_LABEL": "Cha", |
|
}, |
|
} |
|
|
|
|
|
def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: |
|
""" |
|
Generate a yaml file for each language. |
|
|
|
:param output_dir: The directory to output the files to. |
|
:param overwrite: Whether to overwrite files if they already exist. |
|
""" |
|
err = [] |
|
languages = [ |
|
"eng", |
|
"amh", |
|
"ibo", |
|
"fra", |
|
"sna", |
|
"wol", |
|
"ewe", |
|
"lin", |
|
"lug", |
|
"xho", |
|
"kin", |
|
"twi", |
|
"zul", |
|
"orm", |
|
"yor", |
|
"hau", |
|
"sot", |
|
"swa", |
|
] |
|
for lang in languages: |
|
try: |
|
if mode == "native-direct": |
|
QUESTION_WORD = LANGUAGES[lang]["QUESTION_WORD"] |
|
ENTAILMENT_LABEL = LANGUAGES[lang]["ENTAILMENT_LABEL"] |
|
NEUTRAL_LABEL = LANGUAGES[lang]["NEUTRAL_LABEL"] |
|
CONTRADICTION_LABEL = LANGUAGES[lang]["CONTRADICTION_LABEL"] |
|
|
|
file_name = f"afrixnli_native_direct_{lang}.yaml" |
|
task_name = f"afrixnli_native_direct_{lang}" |
|
yaml_template = "afrixnli_native_direct_yaml" |
|
with open( |
|
f"{output_dir}/{file_name}", |
|
"w" if overwrite else "x", |
|
encoding="utf8", |
|
) as f: |
|
f.write("# Generated by utils.py\n") |
|
yaml.dump( |
|
{ |
|
"include": yaml_template, |
|
"task": task_name, |
|
"dataset_name": lang, |
|
"doc_to_choice": f"{{{{[" |
|
f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,""" |
|
f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,""" |
|
f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis""" |
|
f"]}}}}", |
|
}, |
|
f, |
|
allow_unicode=True, |
|
) |
|
else: |
|
file_name = f"afrixnli_{mode}_{lang}.yaml" |
|
task_name = f"afrixnli_{mode}_{lang}" |
|
yaml_template = f"afrixnli_{mode}_yaml" |
|
with open( |
|
f"{output_dir}/{file_name}", |
|
"w" if overwrite else "x", |
|
encoding="utf8", |
|
) as f: |
|
f.write("# Generated by utils.py\n") |
|
yaml.dump( |
|
{ |
|
"include": yaml_template, |
|
"task": task_name, |
|
"dataset_name": lang, |
|
}, |
|
f, |
|
allow_unicode=True, |
|
) |
|
except FileExistsError: |
|
err.append(file_name) |
|
|
|
if len(err) > 0: |
|
raise FileExistsError( |
|
"Files were not created because they already exist (use --overwrite flag):" |
|
f" {', '.join(err)}" |
|
) |
|
|
|
|
|
def main() -> None: |
|
"""Parse CLI args and generate language-specific yaml files.""" |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--overwrite", |
|
default=True, |
|
action="store_true", |
|
help="Overwrite files if they already exist", |
|
) |
|
parser.add_argument( |
|
"--output-dir", |
|
default="./manual/translate", |
|
help="Directory to write yaml files to", |
|
) |
|
parser.add_argument( |
|
"--mode", |
|
default="manual_translate", |
|
choices=["en_direct", "native-direct", "manual_direct", "manual_translate"], |
|
help="Mode of chain-of-thought", |
|
) |
|
args = parser.parse_args() |
|
|
|
gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|