BayesTensor
/

out

Generated from Trainer

4-bit precision

Model card Files Files and versions

out / lm-evaluation-harness /lm_eval /tasks /wsc273 /utils.py

BayesTensor's picture

Upload folder using huggingface_hub

9d5b280 verified 7 months ago

history blame contribute delete

1.01 kB

	upper_pronouns = [
	"A",
	"An",
	"The",
	"She",
	"He",
	"It",
	"They",
	"My",
	"His",
	"Her",
	"Their",
	]


	def process_doc(dataset):
	def process_fn(doc):
	# The HF implementation of `wsc273` is not `partial evaluation` friendly.
	doc["text"] = doc["text"].replace(" ", " ")
	doc["options"][0] = __normalize_option(doc, doc["options"][0])
	doc["options"][1] = __normalize_option(doc, doc["options"][1])
	return doc

	return dataset.map(process_fn)


	def __normalize_option(doc, option):
	# Append `'s` to possessive determiner based options.
	if doc["pronoun"].lower() in ["my", "his", "her", "our", "their"]:
	option += "'s"
	# Appropriately lowercase the pronoun in the option.
	pronoun = option.split()[0]
	start_of_sentence = doc["text"][doc["pronoun_loc"] - 2] == "."
	if not start_of_sentence and pronoun in upper_pronouns:
	return option.replace(pronoun, pronoun.lower())
	return option