BayesTensor's picture
Upload folder using huggingface_hub
9d5b280 verified
import os
from itertools import islice
import datasets
import pytest
import lm_eval.tasks as tasks
from lm_eval.api.task import ConfigurableTask
from lm_eval.evaluator_utils import get_task_list
from .utils import new_tasks
datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True
os.environ["TOKENIZERS_PARALLELISM"] = "false"
task_manager = tasks.TaskManager()
# Default Task
TASKS = ["arc_easy"]
def task_class():
global TASKS
# CI: new_tasks checks if any modifications have been made
task_classes = new_tasks()
# Check if task_classes is empty
task_classes = task_classes if task_classes else TASKS
res = tasks.get_task_dict(task_classes, task_manager)
res = [x.task for x in get_task_list(res)]
return res
@pytest.fixture()
def limit() -> int:
return 10
# Tests
@pytest.mark.parametrize("task_class", task_class(), ids=lambda x: f"{x.config.task}")
class TestNewTasks:
def test_download(self, task_class: ConfigurableTask):
task_class.download()
assert task_class.dataset is not None
def test_has_training_docs(self, task_class: ConfigurableTask):
assert task_class.has_training_docs() in [True, False]
def test_check_training_docs(self, task_class: ConfigurableTask):
if task_class.has_training_docs():
assert task_class._config["training_split"] is not None
def test_has_validation_docs(self, task_class):
assert task_class.has_validation_docs() in [True, False]
def test_check_validation_docs(self, task_class):
if task_class.has_validation_docs():
assert task_class._config["validation_split"] is not None
def test_has_test_docs(self, task_class):
assert task_class.has_test_docs() in [True, False]
def test_check_test_docs(self, task_class):
task = task_class
if task.has_test_docs():
assert task._config["test_split"] is not None
def test_should_decontaminate(self, task_class):
task = task_class
assert task.should_decontaminate() in [True, False]
if task.should_decontaminate():
assert task._config["doc_to_decontamination_query"] is not None
def test_doc_to_text(self, task_class, limit):
task = task_class
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
_array = [task.doc_to_text(doc) for doc in arr]
# space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
target_delimiter: str = task.config.target_delimiter
if not task.multiple_input:
for x in _array:
assert isinstance(x, str)
assert (
(x[-1].isspace() is False if len(x) > 0 else True)
if target_delimiter.isspace()
else True
), (
"doc_to_text ends in a whitespace and target delimiter also a whitespace"
)
else:
pass
def test_create_choices(self, task_class, limit):
task = task_class
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
if "multiple_choice" in task._config.output_type:
_array = [task.doc_to_choice(doc) for doc in arr]
assert all(isinstance(x, list) for x in _array)
assert all(isinstance(x[0], str) for x in _array)
def test_doc_to_target(self, task_class, limit):
task = task_class
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
_array_target = [task.doc_to_target(doc) for doc in arr]
if task._config.output_type == "multiple_choice":
# TODO<baber>: label can be string or int; add better test conditions
assert all(
(isinstance(label, int) or isinstance(label, str))
for label in _array_target
)
def test_build_all_requests(self, task_class, limit):
task_class.build_all_requests(rank=1, limit=limit, world_size=1)
assert task_class.instances is not None
# ToDO: Add proper testing
def test_construct_requests(self, task_class, limit):
task = task_class
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
# ctx is "" for multiple input tasks
requests = [
task.construct_requests(
doc=doc, ctx="" if task.multiple_input else task.doc_to_text(doc)
)
for doc in arr
]
assert len(requests) == limit if limit else True