Upload folder using huggingface_hub

9d5b280 verified 7 months ago

620 Bytes

	from lm_eval.utils import general_detokenize


	def default_doc_to_text(x):
	raw_passage = x["text"]
	# NOTE: HuggingFace span indices are word-based not character-based.
	pre = " ".join(raw_passage.split()[: x["span2_index"]])
	post = raw_passage[len(pre) + len(x["span2_text"]) + 1 :]
	passage = general_detokenize(pre + " {}".format(x["span2_text"]) + post)
	noun = x["span1_text"]
	pronoun = x["span2_text"]
	text = (
	f"Passage: {passage}\n"
	+ f'Question: In the passage above, does the pronoun "{pronoun}" refer to "{noun}"?\n'
	+ "Answer:"
	)
	return text