upper_pronouns = [ "A", "An", "The", "She", "He", "It", "They", "My", "His", "Her", "Their", ] def process_doc(dataset): def process_fn(doc): # The HF implementation of `wsc273` is not `partial evaluation` friendly. doc["text"] = doc["text"].replace(" ", " ") doc["options"][0] = __normalize_option(doc, doc["options"][0]) doc["options"][1] = __normalize_option(doc, doc["options"][1]) return doc return dataset.map(process_fn) def __normalize_option(doc, option): # Append `'s` to possessive determiner based options. if doc["pronoun"].lower() in ["my", "his", "her", "our", "their"]: option += "'s" # Appropriately lowercase the pronoun in the option. pronoun = option.split()[0] start_of_sentence = doc["text"][doc["pronoun_loc"] - 2] == "." if not start_of_sentence and pronoun in upper_pronouns: return option.replace(pronoun, pronoun.lower()) return option