|
import re |
|
from itertools import product |
|
|
|
import evaluate |
|
import transformers.data.metrics.squad_metrics as squad_metrics |
|
|
|
from lm_eval.utils import general_detokenize |
|
|
|
|
|
def lowercase_first_letter(text): |
|
return text[0].lower() + text[1:] |
|
|
|
|
|
def process_doc_nli(dataset): |
|
def process_fn(doc): |
|
|
|
doc["premise"] = general_detokenize(doc["premise"]).strip() |
|
doc["hypothesis"] = general_detokenize(doc["hypothesis"]).strip() |
|
|
|
doc["premise"] = ( |
|
doc["premise"][:-1] |
|
if doc["premise"].endswith((".", ",", "!", "?")) |
|
else doc["premise"] |
|
) |
|
|
|
doc["hypothesis"] = lowercase_first_letter(doc["hypothesis"]) |
|
|
|
doc["hypothesis"] = ( |
|
(doc["hypothesis"] + ".") |
|
if not doc["hypothesis"].endswith(".") |
|
else doc["hypothesis"] |
|
) |
|
return doc |
|
|
|
return dataset.map(process_fn) |
|
|
|
|
|
def process_xlsum(dataset): |
|
def _process_doc(doc): |
|
|
|
doc["text"] = re.sub(r" +", " ", doc["text"]) |
|
doc["summary"] = re.sub(r" +", " ", doc["summary"]) |
|
return doc |
|
|
|
return dataset.map(_process_doc) |
|
|
|
|
|
def process_docs_paraphrases(dataset): |
|
empty_docs = [] |
|
|
|
def _process_doc(doc): |
|
if doc["sentence1"] not in [None, ""] and doc["sentence2"] not in [None, ""]: |
|
doc["sentence1"] = general_detokenize(doc["sentence1"]).strip() |
|
doc["sentence2"] = general_detokenize(doc["sentence2"]).strip() |
|
|
|
if doc["sentence1"].endswith((".", ",", ";")): |
|
doc["sentence1"] = doc["sentence1"][:-1] |
|
|
|
doc["sentence2"] = lowercase_first_letter(doc["sentence2"]) |
|
return doc |
|
else: |
|
empty_docs.append(doc) |
|
return doc |
|
|
|
if empty_docs != []: |
|
len_empty_docs = len(empty_docs) |
|
print( |
|
f"Found {len_empty_docs} empty documents out of the {len(dataset)} total docs in the dataset: {empty_docs}" |
|
) |
|
return dataset.filter( |
|
lambda doc: doc["sentence1"] not in [None, ""] |
|
and doc["sentence2"] not in [None, ""] |
|
).map(_process_doc) |
|
|
|
|
|
def process_docs_copa_es(dataset): |
|
def _process_doc(doc): |
|
doc["choice1"] = lowercase_first_letter(doc["choice1"]) |
|
doc["choice2"] = lowercase_first_letter(doc["choice2"]) |
|
return doc |
|
|
|
return dataset.map(_process_doc) |
|
|
|
|
|
def rouge1(items): |
|
""" |
|
# passthrough for efficiency |
|
""" |
|
return items |
|
|
|
|
|
def rouge1_agg(items): |
|
""" |
|
Higher is better |
|
""" |
|
refs = list(zip(*items))[0] |
|
preds = list(zip(*items))[1] |
|
rouge_scorer = evaluate.load("rouge") |
|
|
|
return rouge_scorer.compute(predictions=preds, references=refs)["rouge1"] |
|
|