File size: 4,274 Bytes
500fbd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a8d1e8
 
 
4716c27
500fbd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4716c27
 
500fbd7
 
 
 
 
 
 
 
 
 
4a3a142
500fbd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from enum import StrEnum, auto


class Tasks(StrEnum):
    EXTRACTIVE_QUESTION_ANSWERING = auto()
    MULTIPLE_CHOICE = auto()
    SUMMARIZATION = auto()
    NATURAL_LANGUAGE_INFERENCE = auto()
    TEXT_CLASSIFICATION = auto()
    MACHINE_TRANSLATION = auto()
    GRAMMATICAL_ERROR_CORRECTION = auto()


class Metrics(StrEnum):
    F1 = "f1"
    EXACT_MATCH = "exact_match"
    ROGUE1 = "rouge1"
    ROUGE2 = "rouge2"
    ROUGEL = "rougeL"
    ACCURACY = "acc"
    WER = "wer"
    BLEU = "bleu"


DATASET_TASK_DICT = {
    # extractive qa
    'xquad_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
    'tquad': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
    'mkqa_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,  # not exactly

    # summarization
    'xlsum_tr': Tasks.SUMMARIZATION,
    'mlsum_tr': Tasks.SUMMARIZATION,
    'wiki_lingua_tr': Tasks.SUMMARIZATION,
    'tr-wikihow-summ': Tasks.SUMMARIZATION,

    # NLI
    #'nli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'mnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'snli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'xnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,

    # multiple-choice
    'xcopa_tr': Tasks.MULTIPLE_CHOICE,
    'exams_tr': Tasks.MULTIPLE_CHOICE,
    'belebele_tr': Tasks.MULTIPLE_CHOICE,
    'turkish_plu': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_goal_inference': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_next_event_prediction': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_step_inference': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_step_ordering': Tasks.MULTIPLE_CHOICE,
    'turkce_atasozleri': Tasks.MULTIPLE_CHOICE,
    'turkishmmlu':Tasks.MULTIPLE_CHOICE,
    'bilmecebench':Tasks.MULTIPLE_CHOICE,
    'circumflex_tr':Tasks.MULTIPLE_CHOICE,

    # fact-checking, not sure whether these are multi-choice
    # 'trclaim19': Tasks.MULTIPLE_CHOICE,
    'check_worthiness': Tasks.MULTIPLE_CHOICE,
    'relevance_judgment': Tasks.MULTIPLE_CHOICE,

    # text classification
    'sts_tr': Tasks.TEXT_CLASSIFICATION,
    'offenseval_tr': Tasks.TEXT_CLASSIFICATION,
    'news_cat': Tasks.TEXT_CLASSIFICATION,
    'ironytr': Tasks.TEXT_CLASSIFICATION,

    # other generation
    'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION,
    'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION,
}


TASK_METRIC_DICT = {
    Tasks.EXTRACTIVE_QUESTION_ANSWERING: Metrics.EXACT_MATCH,
    Tasks.MULTIPLE_CHOICE: Metrics.ACCURACY,
    Tasks.TEXT_CLASSIFICATION: Metrics.ACCURACY,
    Tasks.NATURAL_LANGUAGE_INFERENCE: Metrics.ACCURACY,
    Tasks.SUMMARIZATION: Metrics.ROUGE2,
    Tasks.MACHINE_TRANSLATION: Metrics.BLEU,
    Tasks.GRAMMATICAL_ERROR_CORRECTION: Metrics.EXACT_MATCH,
}


GENERATIVE_TASKS = (
    Tasks.SUMMARIZATION,
    Tasks.MACHINE_TRANSLATION,
    Tasks.GRAMMATICAL_ERROR_CORRECTION,
)

DATASET_GROUPS = {
    'QA': {
        'datasets': ['xquad_tr', 'tquad', 'mkqa_tr'],
        'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
    },
    'MCQA': {
        'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr', 'turkce_atasozleri', 'bilmecebench', 'turkishmmlu', 'circumflex_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
        'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele, Turkish PLU, Turkce-Atasozleri, BilmeceBench, TurkishMMLU, and CircumflexTR'
    },
    'TC': {
        'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
        'description': 'Text Classification datasets.',
    },
    'NLI': {
        'datasets': ['mnli_tr', 'snli_tr', 'xnli_tr'],
        'description': 'Natural Language Inference (NLI) datasets in Turkish: XNLI, SNLI and MNLI.',
    },
    'SUM': {
        'datasets': ['wiki_lingua_tr', 'xlsum_tr', 'tr-wikihow-summ', 'mlsum_tr'],
        'description': 'Summarization datasets in Turkish (XLSum, MLSum, WikiLingua and TrWikiHowSumm).',
    },
    'GEC': {
        'datasets': ['gecturk_generation',],
        'description': 'Grammatical Error Correction task.',
    },
    'MT': {
        'datasets': ['wmt-tr-en-prompt'],
        'description': 'Machine Translation on WMT-16 dataset (English-to-Turkish).',
    },

    #  'TrClaim19': {
    #     'datasets': ['check_worthiness', 'relevance_judgment'],
    #     'description': 'TrClaim19 dataset for fact-checking.',
    # },
}