|
import pytest |
|
|
|
from lm_eval import evaluator |
|
from lm_eval.api.registry import get_model |
|
|
|
|
|
SPARSEML_MODELS_TASKS = [ |
|
|
|
("facebook/opt-125m", "lambada_openai"), |
|
|
|
("hf-internal-testing/tiny-random-gpt2", "wikitext"), |
|
|
|
("mgoin/tiny-random-llama-2-quant", "gsm8k"), |
|
] |
|
|
|
DEEPSPARSE_MODELS_TASKS = [ |
|
|
|
("hf:mgoin/llama2.c-stories15M-quant-ds", "lambada_openai"), |
|
|
|
|
|
|
|
("hf:mgoin/llama2.c-stories15M-quant-ds", "gsm8k"), |
|
] |
|
|
|
|
|
@pytest.mark.skip(reason="test failing") |
|
@pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS) |
|
def test_sparseml_eval(model_id, task): |
|
lm = get_model("sparseml").create_from_arg_string( |
|
f"pretrained={model_id}", |
|
{ |
|
"batch_size": 1, |
|
"device": "cpu", |
|
"dtype": "float32", |
|
}, |
|
) |
|
|
|
limit = 5 |
|
evaluator.simple_evaluate( |
|
model=lm, |
|
tasks=[task], |
|
num_fewshot=0, |
|
limit=limit, |
|
) |
|
|
|
|
|
@pytest.mark.parametrize("model_id,task", DEEPSPARSE_MODELS_TASKS) |
|
def test_deepsparse_eval(model_id, task): |
|
lm = get_model("deepsparse").create_from_arg_string( |
|
f"pretrained={model_id}", |
|
{ |
|
"batch_size": 1, |
|
}, |
|
) |
|
|
|
limit = 5 |
|
evaluator.simple_evaluate( |
|
model=lm, |
|
tasks=[task], |
|
num_fewshot=0, |
|
limit=limit, |
|
) |
|
|