File size: 5,139 Bytes
a53788d
4723a94
a53788d
de4dc78
 
 
 
 
 
 
 
 
 
 
a5fc109
de4dc78
 
 
a5fc109
de4dc78
 
ecb25a4
de4dc78
 
ecb25a4
de4dc78
ecb25a4
de4dc78
 
 
 
 
 
 
 
ecb25a4
de4dc78
 
 
 
ecb25a4
de4dc78
 
 
ecb25a4
de4dc78
 
 
 
 
 
 
 
ecb25a4
de4dc78
ecb25a4
de4dc78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a53788d
 
de4dc78
a53788d
de4dc78
 
4723a94
 
 
 
 
de4dc78
 
ecb25a4
de4dc78
 
 
 
 
a53788d
4723a94
a53788d
 
 
 
 
 
 
 
 
 
 
 
 
 
4723a94
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#--- START OF MODIFIED FILE app_fluxContext_Ltx/ltx_worker_upscaler.py ---
# ltx_worker_upscaler.py
# Worker para fazer upscale de latentes de vídeo para alta resolução.
# Este arquivo é parte do projeto Euia-AducSdr e está sob a licença AGPL v3.
# Copyright (C) 4 de Agosto de 2025  Carlos Rodrigues dos Santos

import torch
import gc
import os
import yaml
import numpy as np 
import imageio
from pathlib import Path
import huggingface_hub
from PIL import Image # <--- IMPORTAÇÃO ADICIONADA AQUI

from inference import create_ltx_video_pipeline
from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode

class LtxUpscaler:
    def __init__(self, device_id='cuda:0'):
        print(f"WORKER CÂMERA-UPSCALER: Inicializando para {device_id}...")
        self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
        self.model_dtype = torch.bfloat16
        
        config_file_path = "configs/ltxv-13b-0.9.8-dev.yaml"
        with open(config_file_path, "r") as file:
            self.config = yaml.safe_load(file)

        LTX_REPO = "Lightricks/LTX-Video"
        models_dir = "downloaded_models_gradio"
        Path(models_dir).mkdir(parents=True, exist_ok=True)

        print(f"WORKER CÂMERA-UPSCALER ({self.device}): Carregando VAE na CPU...")
        model_actual_path = huggingface_hub.hf_hub_download(
            repo_id=LTX_REPO, filename=self.config["checkpoint_path"],
            local_dir=models_dir, local_dir_use_symlinks=False
        )
        temp_pipeline = create_ltx_video_pipeline(
            ckpt_path=model_actual_path, precision=self.config["precision"],
            text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
            sampler=self.config["sampler"], device='cpu'
        )
        self.vae = temp_pipeline.vae.to(self.model_dtype)
        del temp_pipeline
        gc.collect()

        print(f"WORKER CÂMERA-UPSCALER ({self.device}): Carregando Latent Upsampler na CPU...")
        upscaler_path = huggingface_hub.hf_hub_download(
            repo_id=LTX_REPO, filename=self.config["spatial_upscaler_model_path"],
            local_dir=models_dir, local_dir_use_symlinks=False
        )
        self.latent_upsampler = LatentUpsampler.from_pretrained(upscaler_path).to(self.model_dtype)
        self.latent_upsampler.to('cpu')
        self.vae.to('cpu')
        
        print(f"WORKER CÂMERA-UPSCALER ({self.device}): Pronto (na CPU).")

    def to_gpu(self):
        if self.latent_upsampler and self.vae and torch.cuda.is_available():
            print(f"WORKER CÂMERA-UPSCALER: Movendo modelos para {self.device}...")
            self.latent_upsampler.to(self.device)
            self.vae.to(self.device)

    def to_cpu(self):
        if self.latent_upsampler and self.vae:
            print(f"WORKER CÂMERA-UPSCALER: Descarregando modelos da GPU {self.device}...")
            self.latent_upsampler.to('cpu')
            self.vae.to('cpu')
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
    
    @torch.no_grad()
    def upscale_latents_to_video(self, latent_path: str, output_path: str, video_fps: int):
        print(f"UPSCALER ({self.device}): Processando latentes de {os.path.basename(latent_path)}")
        
        latents = torch.load(latent_path).to(self.device, dtype=self.model_dtype)

        upsampled_latents = self.latent_upsampler(latents)
        
        decode_timestep = torch.tensor([0.0] * upsampled_latents.shape[0], device=self.device)
        upsampled_video_tensor = vae_decode(
            upsampled_latents, self.vae, is_video=True, timestep=decode_timestep
        )
        
        upsampled_video_tensor = (upsampled_video_tensor.clamp(-1, 1) + 1) / 2.0
        video_np_high_res = (upsampled_video_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
        
        with imageio.get_writer(output_path, fps=video_fps, codec='libx264', quality=8) as writer:
            for frame in video_np_high_res:
                writer.append_data(frame)

        print(f"UPSCALER ({self.device}): Arquivo de vídeo salvo em {os.path.basename(output_path)}")
        return output_path
        
    @torch.no_grad()
    def decode_single_latent_frame(self, latent_frame_tensor: torch.Tensor) -> Image.Image:
        """Decodifica um único frame latente para uma imagem PIL para o Gemini."""
        latent_frame_tensor = latent_frame_tensor.to(self.device, dtype=self.model_dtype)
        
        decode_timestep = torch.tensor([0.0] * latent_frame_tensor.shape[0], device=self.device)
        decoded_tensor = vae_decode(
            latent_frame_tensor, self.vae, is_video=True, timestep=decode_timestep
        )
        
        decoded_tensor = (decoded_tensor.clamp(-1, 1) + 1) / 2.0
        numpy_image = (decoded_tensor[0].permute(2, 3, 1, 0).squeeze().cpu().float().numpy() * 255).astype(np.uint8)
        return Image.fromarray(numpy_image)
#--- END OF MODIFIED FILE app_fluxContext_Ltx/ltx_worker_upscaler.py ---