|
from importlib.util import find_spec |
|
|
|
from lm_eval import utils |
|
from lm_eval.api.registry import register_model |
|
from lm_eval.models.huggingface import HFLM |
|
from lm_eval.models.utils import get_dtype |
|
|
|
|
|
eval_logger = utils.eval_logger |
|
|
|
|
|
@register_model("ipex") |
|
class IPEXLM(HFLM): |
|
""" |
|
using the HuggingFace transformers + optimum-intel ipex backend, can run on intel cpu and intel gpu |
|
""" |
|
|
|
def __init__( |
|
self, |
|
**kwargs, |
|
) -> None: |
|
if "backend" in kwargs: |
|
|
|
assert kwargs["backend"] == "causal", ( |
|
"Currently, only IPEXModelForCausalLM is supported." |
|
) |
|
|
|
super().__init__( |
|
backend=kwargs.pop("backend", "causal"), |
|
**kwargs, |
|
) |
|
|
|
def _create_model( |
|
self, |
|
pretrained: str, |
|
revision="main", |
|
dtype="auto", |
|
trust_remote_code=False, |
|
|
|
|
|
|
|
parallelize=False, |
|
gpus=None, |
|
max_memory_per_gpu=None, |
|
max_cpu_memory=None, |
|
offload_folder="./offload", |
|
|
|
peft=None, |
|
delta=None, |
|
autogptq=False, |
|
gptqmodel=False, |
|
**kwargs, |
|
) -> None: |
|
if not find_spec("optimum"): |
|
raise ModuleNotFoundError( |
|
"package `optimum` is not installed. Please install it via `pip install optimum[ipex]`" |
|
) |
|
else: |
|
from optimum.intel import IPEXModelForCausalLM |
|
|
|
model_kwargs = kwargs if kwargs else {} |
|
model_kwargs.update( |
|
self._get_accelerate_args( |
|
parallelize=parallelize, |
|
device_map=kwargs.get("device_map", None), |
|
max_memory_per_gpu=max_memory_per_gpu, |
|
max_cpu_memory=max_cpu_memory, |
|
offload_folder=offload_folder, |
|
gpus=gpus, |
|
) |
|
) |
|
|
|
self._model = IPEXModelForCausalLM.from_pretrained( |
|
pretrained, |
|
revision=revision, |
|
torch_dtype=get_dtype(dtype), |
|
trust_remote_code=trust_remote_code, |
|
**model_kwargs, |
|
) |
|
|