Spaces:
Sleeping
Sleeping
File size: 10,575 Bytes
3515a22 25b989e 3515a22 25b989e e8ff2d4 de4dc78 e8ff2d4 de4dc78 8341849 e8ff2d4 8341849 25b989e 8341849 25b989e e8ff2d4 25b989e 8341849 65fd188 e8ff2d4 8341849 e8ff2d4 8341849 e8ff2d4 8341849 71d6826 25b989e 71d6826 25b989e e8ff2d4 25b989e 8341849 e8ff2d4 8341849 25b989e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# ltx_manager_helpers.py
# Gerente de Pool de Workers LTX para revezamento assíncrono em múltiplas GPUs.
# Este arquivo é parte do projeto Euia-AducSdr e está sob a licença AGPL v3.
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
import torch
import gc
import os
import yaml
import numpy as np
import imageio
from pathlib import Path
import huggingface_hub
import threading
from PIL import Image
# Importa as funções e classes necessárias do inference.py
from inference import (
create_ltx_video_pipeline,
create_latent_upsampler,
ConditioningItem,
calculate_padding,
prepare_conditioning
)
from ltx_video.pipelines.pipeline_ltx_video import LTXMultiScalePipeline
class LtxWorker:
"""
Representa uma única instância do pipeline LTX, associada a uma GPU específica.
O pipeline é carregado na CPU por padrão e movido para a GPU sob demanda.
"""
def __init__(self, device_id='cuda:0'):
self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
print(f"LTX Worker: Inicializando para o dispositivo {self.device} (carregando na CPU)...")
config_file_path = "configs/ltxv-13b-0.9.8-distilled.yaml"
with open(config_file_path, "r") as file:
self.config = yaml.safe_load(file)
LTX_REPO = "Lightricks/LTX-Video"
models_dir = "downloaded_models_gradio"
model_actual_path = huggingface_hub.hf_hub_download(
repo_id=LTX_REPO,
filename=self.config["checkpoint_path"],
local_dir=models_dir,
local_dir_use_symlinks=False
)
self.pipeline = create_ltx_video_pipeline(
ckpt_path=model_actual_path,
precision=self.config["precision"],
text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
sampler=self.config["sampler"],
device='cpu'
)
print(f"LTX Worker para {self.device}: Compilando o transformer (isso pode levar um momento)...")
self.pipeline.transformer.to(memory_format=torch.channels_last)
try:
self.pipeline.transformer = torch.compile(self.pipeline.transformer, mode="reduce-overhead", fullgraph=True)
print(f"LTX Worker para {self.device}: Transformer compilado com sucesso.")
except Exception as e:
print(f"AVISO: A compilação do Transformer falhou em {self.device}: {e}. Continuando sem compilação.")
self.latent_upsampler = None
if self.config.get("pipeline_type") == "multi-scale":
print(f"LTX Worker para {self.device}: Carregando Latent Upsampler (Multi-Scale)...")
upscaler_path = huggingface_hub.hf_hub_download(
repo_id=LTX_REPO,
filename=self.config["spatial_upscaler_model_path"],
local_dir=models_dir,
local_dir_use_symlinks=False
)
self.latent_upsampler = create_latent_upsampler(upscaler_path, 'cpu')
print(f"LTX Worker para {self.device} pronto na CPU.")
def to_gpu(self):
"""Move o pipeline e o upsampler para a GPU designada."""
if self.device.type == 'cpu': return
print(f"LTX Worker: Movendo pipeline para {self.device}...")
self.pipeline.to(self.device)
if self.latent_upsampler:
print(f"LTX Worker: Movendo Latent Upsampler para {self.device}...")
self.latent_upsampler.to(self.device)
print(f"LTX Worker: Pipeline na GPU {self.device}.")
def to_cpu(self):
"""Move o pipeline de volta para a CPU e limpa a memória da GPU."""
if self.device.type == 'cpu': return
print(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
self.pipeline.to('cpu')
if self.latent_upsampler:
self.latent_upsampler.to('cpu')
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
print(f"LTX Worker: GPU {self.device} limpa.")
def generate_video_fragment_internal(self, **kwargs):
"""A lógica real da geração de vídeo, que espera estar na GPU."""
return self.pipeline(**kwargs)
class LtxPoolManager:
"""
Gerencia um pool de LtxWorkers, orquestrando um revezamento entre GPUs
para permitir que a limpeza de uma GPU ocorra em paralelo com a computação em outra.
"""
def __init__(self, device_ids=['cuda:2', 'cuda:3']):
print(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
self.workers = [LtxWorker(device_id) for device_id in device_ids]
self.current_worker_index = 0
self.lock = threading.Lock()
self.last_cleanup_thread = None
def _cleanup_worker(self, worker):
"""Função alvo para a thread de limpeza."""
print(f"CLEANUP THREAD: Iniciando limpeza da GPU {worker.device} em background...")
worker.to_cpu()
print(f"CLEANUP THREAD: Limpeza da GPU {worker.device} concluída.")
def generate_video_fragment(
self,
motion_prompt: str, conditioning_items_data: list,
width: int, height: int, seed: int, cfg: float, video_total_frames: int,
video_fps: int, num_inference_steps: int, use_attention_slicing: bool,
decode_timestep: float, image_cond_noise_scale: float,
current_fragment_index: int, output_path: str, progress
):
worker_to_use = None
try:
with self.lock:
if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
print("LTX POOL MANAGER: Aguardando limpeza da GPU anterior...")
self.last_cleanup_thread.join()
worker_to_use = self.workers[self.current_worker_index]
previous_worker_index = (self.current_worker_index - 1 + len(self.workers)) % len(self.workers)
worker_to_cleanup = self.workers[previous_worker_index]
cleanup_thread = threading.Thread(target=self._cleanup_worker, args=(worker_to_cleanup,))
cleanup_thread.start()
self.last_cleanup_thread = cleanup_thread
worker_to_use.to_gpu()
self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
target_device = worker_to_use.device
if use_attention_slicing:
worker_to_use.pipeline.enable_attention_slicing()
media_paths = [item[0] for item in conditioning_items_data]
start_frames = [item[1] for item in conditioning_items_data]
strengths = [item[2] for item in conditioning_items_data]
padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
padding_vals = calculate_padding(height, width, padded_h, padded_w)
conditioning_items = prepare_conditioning(
conditioning_media_paths=media_paths, conditioning_strengths=strengths,
conditioning_start_frames=start_frames, height=height, width=width,
num_frames=video_total_frames, padding=padding_vals, pipeline=worker_to_use.pipeline,
)
for item in conditioning_items:
item.media_item = item.media_item.to(target_device)
kwargs = {
"prompt": motion_prompt,
"negative_prompt": "blurry, distorted, bad quality, artifacts",
"height": padded_h,
"width": padded_w,
"num_frames": video_total_frames,
"frame_rate": video_fps,
"generator": torch.Generator(device=target_device).manual_seed(int(seed) + current_fragment_index),
"output_type": "pt",
"guidance_scale": float(cfg),
"conditioning_items": conditioning_items,
"decode_timestep": decode_timestep,
"decode_noise_scale": worker_to_use.config.get("decode_noise_scale"),
"image_cond_noise_scale": image_cond_noise_scale,
"stochastic_sampling": worker_to_use.config.get("stochastic_sampling"),
"is_video": True,
"vae_per_channel_normalize": True,
"mixed_precision": (worker_to_use.config.get("precision") == "mixed_precision"),
"enhance_prompt": False,
}
# Verifica se o config do modelo especifica uma lista de timesteps.
# Se sim, usa essa lista. Se não, usa o num_inference_steps da UI.
first_pass_config = worker_to_use.config.get("first_pass", {})
if "timesteps" in first_pass_config:
print("Usando timesteps customizados do arquivo de configuração para o modelo distilled.")
kwargs["timesteps"] = first_pass_config["timesteps"]
kwargs["num_inference_steps"] = len(first_pass_config["timesteps"])
# Para modelos distilled, a UI de steps é ignorada, mas outros params do config são usados
kwargs.update({k: v for k, v in first_pass_config.items() if k != "timesteps"})
else:
print(f"Usando num_inference_steps da UI: {num_inference_steps}")
kwargs["num_inference_steps"] = int(num_inference_steps)
progress(0.1, desc=f"[Câmera LTX em {worker_to_use.device}] Filmando Cena {current_fragment_index}...")
result_tensor = worker_to_use.generate_video_fragment_internal(**kwargs).images
pad_l, pad_r, pad_t, pad_b = map(int, padding_vals)
slice_h = -pad_b if pad_b > 0 else None
slice_w = -pad_r if pad_r > 0 else None
cropped_tensor = result_tensor[:, :, :video_total_frames, pad_t:slice_h, pad_l:slice_w]
video_np = (cropped_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
with imageio.get_writer(output_path, fps=video_fps, codec='libx264', quality=8) as writer:
for frame in video_np:
writer.append_data(frame)
return output_path, video_total_frames
finally:
if use_attention_slicing and worker_to_use and worker_to_use.pipeline:
worker_to_use.pipeline.disable_attention_slicing()
ltx_manager_singleton = LtxPoolManager(device_ids=['cuda:2', 'cuda:3']) |