File size: 19,390 Bytes
c19ca42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
import os
import json
import shutil
import tempfile
from abc import ABCMeta
from typing import Type, Tuple, List, Any, Dict
from packaging import version
import torch
import diffusers
import onnxruntime as ort
import optimum.onnxruntime
from installer import log, install
from modules import shared
from modules.paths import sd_configs_path, models_path
from modules.sd_models import CheckpointInfo
from modules.processing import StableDiffusionProcessing
from modules.olive_script import config
from modules.onnx_impl import DynamicSessionOptions, TorchCompatibleModule, VAE, run_olive_workflow
from modules.onnx_impl.utils import extract_device, move_inference_session, check_diffusers_cache, check_pipeline_sdxl, check_cache_onnx, load_init_dict, load_submodel, load_submodels, patch_kwargs, load_pipeline, get_base_constructor, get_io_config
from modules.onnx_impl.execution_providers import ExecutionProvider, EP_TO_NAME, get_provider


SUBMODELS_SD = ("text_encoder", "unet", "vae_encoder", "vae_decoder",)
SUBMODELS_SDXL = ("text_encoder", "text_encoder_2", "unet", "vae_encoder", "vae_decoder",)
SUBMODELS_SDXL_REFINER = ("text_encoder_2", "unet", "vae_encoder", "vae_decoder",)

SUBMODELS_LARGE = ("text_encoder_2", "unet",)


class PipelineBase(TorchCompatibleModule, diffusers.DiffusionPipeline, metaclass=ABCMeta):
    model_type: str
    sd_model_hash: str
    sd_checkpoint_info: CheckpointInfo
    sd_model_checkpoint: str

    def __init__(self): # pylint: disable=super-init-not-called
        self.model_type = self.__class__.__name__

    def to(self, *args, **kwargs):
        if self.__class__ == OnnxRawPipeline: # cannot move pipeline which is not preprocessed.
            return self

        expected_modules, _ = self._get_signature_keys(self)
        for name in expected_modules:
            if not hasattr(self, name):
                log.warning(f"Pipeline does not have module '{name}'.")
                continue

            module = getattr(self, name)

            if isinstance(module, optimum.onnxruntime.modeling_diffusion._ORTDiffusionModelPart): # pylint: disable=protected-access
                device = extract_device(args, kwargs)
                if device is None:
                    return self
                module.session = move_inference_session(module.session, device)

            if not isinstance(module, diffusers.OnnxRuntimeModel):
                continue

            try:
                setattr(self, name, module.to(*args, **kwargs))
                del module
            except Exception:
                log.debug(f"Component device/dtype conversion failed: module={name} args={args}, kwargs={kwargs}")
        return self

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **_): # pylint: disable=arguments-differ
        return OnnxRawPipeline(
            cls,
            pretrained_model_name_or_path,
        )

    @classmethod
    def from_single_file(cls, pretrained_model_name_or_path, **_):
        return OnnxRawPipeline(
            cls,
            pretrained_model_name_or_path,
        )

    @classmethod
    def from_ckpt(cls, pretrained_model_name_or_path, **_):
        return cls.from_single_file(pretrained_model_name_or_path)


class CallablePipelineBase(PipelineBase):
    vae: VAE

    def __init__(self):
        super().__init__()
        self.vae = VAE(self)


class OnnxRawPipeline(PipelineBase):
    config = {}
    _is_sdxl: bool
    is_refiner: bool
    from_diffusers_cache: bool
    path: os.PathLike
    original_filename: str

    constructor: Type[PipelineBase]
    init_dict: Dict[str, Tuple[str]] = {}

    scheduler: Any = None # for Img2Img

    def __init__(self, constructor: Type[PipelineBase], path: os.PathLike): # pylint: disable=super-init-not-called
        self._is_sdxl = check_pipeline_sdxl(constructor)
        self.from_diffusers_cache = check_diffusers_cache(path)
        self.path = path
        self.original_filename = os.path.basename(os.path.dirname(os.path.dirname(path)) if self.from_diffusers_cache else path)

        if os.path.isdir(path):
            self.init_dict = load_init_dict(constructor, path)
            self.scheduler = load_submodel(self.path, None, "scheduler", self.init_dict["scheduler"])
        else:
            cls = diffusers.StableDiffusionXLPipeline if self._is_sdxl else diffusers.StableDiffusionPipeline
            try:
                pipeline = cls.from_single_file(path)
                self.scheduler = pipeline.scheduler
                path = shared.opts.onnx_temp_dir
                if os.path.isdir(path):
                    shutil.rmtree(path)
                os.mkdir(path)
                pipeline.save_pretrained(path)
                del pipeline
                self.init_dict = load_init_dict(constructor, path)
            except Exception:
                log.error(f'ONNX: Failed to load ONNX pipeline: is_sdxl={self._is_sdxl}')
                log.warning('ONNX: You cannot load this model using the pipeline you selected. Please check Diffusers pipeline in Compute Settings.')
                return
        if "vae" in self.init_dict:
            del self.init_dict["vae"]

        self.is_refiner = self._is_sdxl and "Img2Img" not in constructor.__name__ and "Img2Img" in diffusers.DiffusionPipeline.load_config(path)["_class_name"]
        self.constructor = constructor
        if self.is_refiner:
            from modules.onnx_impl.pipelines.onnx_stable_diffusion_xl_img2img_pipeline import OnnxStableDiffusionXLImg2ImgPipeline
            self.constructor = OnnxStableDiffusionXLImg2ImgPipeline
        self.model_type = self.constructor.__name__

    def derive_properties(self, pipeline: diffusers.DiffusionPipeline):
        pipeline.sd_model_hash = self.sd_model_hash
        pipeline.sd_checkpoint_info = self.sd_checkpoint_info
        pipeline.sd_model_checkpoint = self.sd_model_checkpoint
        pipeline.scheduler = self.scheduler
        return pipeline

    def convert(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLike):
        install('onnx') # may not be installed yet, this performs check and installs as needed
        import onnx
        shutil.rmtree("cache", ignore_errors=True)
        shutil.rmtree("footprints", ignore_errors=True)

        if shared.opts.onnx_cache_converted:
            shutil.copytree(
                in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
            )

        from modules import olive_script as olv

        for submodel in submodels:
            destination = os.path.join(out_dir, submodel)

            if not os.path.isdir(destination):
                os.mkdir(destination)

            model = getattr(olv, f"{submodel}_load")(in_dir)
            sample = getattr(olv, f"{submodel}_conversion_inputs")(None)
            with tempfile.TemporaryDirectory(prefix="onnx_conversion") as temp_dir:
                temp_path = os.path.join(temp_dir, "model.onnx")
                torch.onnx.export(
                    model,
                    sample,
                    temp_path,
                    opset_version=14,
                    **get_io_config(submodel, self._is_sdxl),
                )
                model = onnx.load(temp_path)
            onnx.save_model(
                model,
                os.path.join(destination, "model.onnx"),
                save_as_external_data=submodel in SUBMODELS_LARGE,
                all_tensors_to_one_file=True,
                location="weights.pb",
            )
            log.info(f"ONNX: Successfully exported converted model: submodel={submodel}")

        kwargs = {}

        init_dict = self.init_dict.copy()
        for submodel in submodels:
            kwargs[submodel] = diffusers.OnnxRuntimeModel.load_model(
                os.path.join(out_dir, submodel, "model.onnx"),
                provider=get_provider(),
            ) if self._is_sdxl else diffusers.OnnxRuntimeModel.from_pretrained(
                os.path.join(out_dir, submodel),
                provider=get_provider(),
            )
            if submodel in init_dict:
                del init_dict[submodel] # already loaded as OnnxRuntimeModel.
        kwargs.update(load_submodels(in_dir, self._is_sdxl, init_dict)) # load others.
        constructor = get_base_constructor(self.constructor, self.is_refiner)
        kwargs = patch_kwargs(constructor, kwargs)

        pipeline = constructor(**kwargs)
        model_index = json.loads(pipeline.to_json_string())
        del pipeline

        for k, v in init_dict.items(): # copy missing submodels. (ORTStableDiffusionXLPipeline)
            if k not in model_index:
                model_index[k] = v

        with open(os.path.join(out_dir, "model_index.json"), 'w', encoding="utf-8") as file:
            json.dump(model_index, file)

    def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathLike):
        if not shared.cmd_opts.debug:
            ort.set_default_logger_severity(4)

        try:
            from olive.model import ONNXModel # olive-ai==0.4.0
        except ImportError:
            from olive.model import ONNXModelHandler as ONNXModel # olive-ai==0.5.0

        shutil.rmtree("cache", ignore_errors=True)
        shutil.rmtree("footprints", ignore_errors=True)

        if shared.opts.olive_cache_optimized:
            shutil.copytree(
                in_dir, out_dir, ignore=shutil.ignore_patterns("weights.pb", "*.onnx", "*.safetensors", "*.ckpt")
            )

        optimized_model_paths = {}

        for submodel in submodels:
            log.info(f"\nProcessing {submodel}")

            with open(os.path.join(sd_configs_path, "olive", 'sdxl' if self._is_sdxl else 'sd', f"{submodel}.json"), "r", encoding="utf-8") as config_file:
                olive_config: Dict[str, Dict[str, Dict]] = json.load(config_file)

            for flow in olive_config["pass_flows"]:
                for i in range(len(flow)):
                    flow[i] = flow[i].replace("AutoExecutionProvider", shared.opts.onnx_execution_provider)
            olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx"))
            olive_config["engine"]["execution_providers"] = [shared.opts.onnx_execution_provider]

            for pass_key in olive_config["passes"]:
                if olive_config["passes"][pass_key]["type"] == "OrtTransformersOptimization":
                    float16 = shared.opts.olive_float16 and not (submodel == "vae_encoder" and shared.opts.olive_vae_encoder_float32)
                    olive_config["passes"][pass_key]["config"]["float16"] = float16
                    if shared.opts.onnx_execution_provider == ExecutionProvider.CUDA or shared.opts.onnx_execution_provider == ExecutionProvider.ROCm:
                        if version.parse(ort.__version__) < version.parse("1.17.0"):
                            olive_config["passes"][pass_key]["config"]["optimization_options"] = {"enable_skip_group_norm": False}
                        if float16:
                            olive_config["passes"][pass_key]["config"]["keep_io_types"] = False

            run_olive_workflow(olive_config)

            with open(os.path.join("footprints", f"{submodel}_{EP_TO_NAME[shared.opts.onnx_execution_provider]}_footprints.json"), "r", encoding="utf-8") as footprint_file:
                footprints = json.load(footprint_file)
            processor_final_pass_footprint = None
            for _, footprint in footprints.items():
                if footprint["from_pass"] == olive_config["passes"][olive_config["pass_flows"][-1][-1]]["type"]:
                    processor_final_pass_footprint = footprint

            assert processor_final_pass_footprint, "Failed to optimize model"

            optimized_model_paths[submodel] = ONNXModel(
                **processor_final_pass_footprint["model_config"]["config"]
            ).model_path

            log.info(f"Olive: Successfully processed model: submodel={submodel}")

        for submodel in submodels:
            src_path = optimized_model_paths[submodel]
            src_parent = os.path.dirname(src_path)
            dst_parent = os.path.join(out_dir, submodel)
            dst_path = os.path.join(dst_parent, "model.onnx")
            if not os.path.isdir(dst_parent):
                os.mkdir(dst_parent)
            shutil.copyfile(src_path, dst_path)

            data_src_path = os.path.join(src_parent, (os.path.basename(src_path) + ".data"))
            if os.path.isfile(data_src_path):
                data_dst_path = os.path.join(dst_parent, (os.path.basename(dst_path) + ".data"))
                shutil.copyfile(data_src_path, data_dst_path)

            weights_src_path = os.path.join(src_parent, "weights.pb")
            if os.path.isfile(weights_src_path):
                weights_dst_path = os.path.join(dst_parent, "weights.pb")
                shutil.copyfile(weights_src_path, weights_dst_path)
        del optimized_model_paths

        kwargs = {}

        init_dict = self.init_dict.copy()
        for submodel in submodels:
            kwargs[submodel] = diffusers.OnnxRuntimeModel.load_model(
                os.path.join(out_dir, submodel, "model.onnx"),
                provider=get_provider(),
            ) if self._is_sdxl else diffusers.OnnxRuntimeModel.from_pretrained(
                os.path.join(out_dir, submodel),
                provider=get_provider(),
            )
            if submodel in init_dict:
                del init_dict[submodel] # already loaded as OnnxRuntimeModel.
        kwargs.update(load_submodels(in_dir, self._is_sdxl, init_dict)) # load others.
        constructor = get_base_constructor(self.constructor, self.is_refiner)
        kwargs = patch_kwargs(constructor, kwargs)

        pipeline = constructor(**kwargs)
        model_index = json.loads(pipeline.to_json_string())
        del pipeline

        for k, v in init_dict.items(): # copy missing submodels. (ORTStableDiffusionXLPipeline)
            if k not in model_index:
                model_index[k] = v

        with open(os.path.join(out_dir, "model_index.json"), 'w', encoding="utf-8") as file:
            json.dump(model_index, file)

    def preprocess(self, p: StableDiffusionProcessing):
        disable_classifier_free_guidance = p.cfg_scale < 0.01

        config.from_diffusers_cache = self.from_diffusers_cache
        config.is_sdxl = self._is_sdxl

        config.vae = os.path.join(models_path, "VAE", shared.opts.sd_vae)
        if not os.path.isfile(config.vae):
            del config.vae
        config.vae_sdxl_fp16_fix = self._is_sdxl and shared.opts.diffusers_vae_upcast == "false"

        config.width = p.width
        config.height = p.height
        config.batch_size = p.batch_size

        if self._is_sdxl and not self.is_refiner:
            config.cross_attention_dim = 2048
            config.time_ids_size = 6
        else:
            config.cross_attention_dim = 768
            config.time_ids_size = 5

        if not disable_classifier_free_guidance and "turbo" in str(self.path).lower():
            log.warning("ONNX: It looks like you are trying to run a Turbo model with CFG Scale, which will lead to 'size mismatch' or 'unexpected parameter' error.")

        out_dir = os.path.join(shared.opts.onnx_cached_models_path, self.original_filename)
        if (self.from_diffusers_cache and check_cache_onnx(self.path)): # if model is ONNX format or had already converted, skip conversion.
            out_dir = self.path
        elif not os.path.isdir(out_dir):
            try:
                self.convert(
                    (SUBMODELS_SDXL_REFINER if self.is_refiner else SUBMODELS_SDXL) if self._is_sdxl else SUBMODELS_SD,
                    self.path if os.path.isdir(self.path) else shared.opts.onnx_temp_dir,
                    out_dir,
                )
            except Exception as e:
                log.error(f"ONNX: Failed to convert model: model='{self.original_filename}', error={e}")
                shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True)
                shutil.rmtree(out_dir, ignore_errors=True)
                return

        kwargs = {
            "provider": get_provider(),
        }
        in_dir = out_dir

        if shared.opts.cuda_compile_backend == "olive-ai":
            if run_olive_workflow is None:
                log.warning('Olive: Skipping model compilation because olive-ai was loaded unsuccessfully.')
            else:
                submodels_for_olive = []

                if "Text Encoder" in shared.opts.cuda_compile:
                    if not self.is_refiner:
                        submodels_for_olive.append("text_encoder")
                    if self._is_sdxl:
                        submodels_for_olive.append("text_encoder_2")
                if "Model" in shared.opts.cuda_compile:
                    submodels_for_olive.append("unet")
                if "VAE" in shared.opts.cuda_compile:
                    submodels_for_olive.append("vae_encoder")
                    submodels_for_olive.append("vae_decoder")

                if len(submodels_for_olive) == 0:
                    log.warning("Olive: Skipping olive run.")
                else:
                    log.warning("Olive implementation is experimental. It contains potentially an issue and is subject to change at any time.")

                    out_dir = os.path.join(shared.opts.onnx_cached_models_path, f"{self.original_filename}-{config.width}w-{config.height}h")
                    if not os.path.isdir(out_dir): # check the model is already optimized (cached)
                        if not shared.opts.olive_cache_optimized:
                            out_dir = shared.opts.onnx_temp_dir

                        if p.width != p.height:
                            log.warning("Olive: Different width and height are detected. The quality of the result is not guaranteed.")

                        if shared.opts.olive_static_dims:
                            sess_options = DynamicSessionOptions()
                            sess_options.enable_static_dims({
                                "is_sdxl": self._is_sdxl,
                                "is_refiner": self.is_refiner,

                                "hidden_batch_size": p.batch_size if disable_classifier_free_guidance else p.batch_size * 2,
                                "height": p.height,
                                "width": p.width,
                            })
                            kwargs["sess_options"] = sess_options

                        try:
                            self.run_olive(submodels_for_olive, in_dir, out_dir)
                        except Exception as e:
                            log.error(f"Olive: Failed to run olive passes: model='{self.original_filename}', error={e}")
                            shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True)
                            shutil.rmtree(out_dir, ignore_errors=True)

        pipeline = self.derive_properties(load_pipeline(self.constructor, out_dir, **kwargs))

        if not shared.opts.onnx_cache_converted and in_dir != self.path:
            shutil.rmtree(in_dir)
        shutil.rmtree(shared.opts.onnx_temp_dir, ignore_errors=True)

        return pipeline