File size: 1,005 Bytes
9d5b280 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
upper_pronouns = [
"A",
"An",
"The",
"She",
"He",
"It",
"They",
"My",
"His",
"Her",
"Their",
]
def process_doc(dataset):
def process_fn(doc):
# The HF implementation of `wsc273` is not `partial evaluation` friendly.
doc["text"] = doc["text"].replace(" ", " ")
doc["options"][0] = __normalize_option(doc, doc["options"][0])
doc["options"][1] = __normalize_option(doc, doc["options"][1])
return doc
return dataset.map(process_fn)
def __normalize_option(doc, option):
# Append `'s` to possessive determiner based options.
if doc["pronoun"].lower() in ["my", "his", "her", "our", "their"]:
option += "'s"
# Appropriately lowercase the pronoun in the option.
pronoun = option.split()[0]
start_of_sentence = doc["text"][doc["pronoun_loc"] - 2] == "."
if not start_of_sentence and pronoun in upper_pronouns:
return option.replace(pronoun, pronoun.lower())
return option
|