Spaces:
Build error
Build error
File size: 8,676 Bytes
57276d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# Tencent HunyuanWorld-1.0 is licensed under TENCENT HUNYUANWORLD-1.0 COMMUNITY LICENSE AGREEMENT
# THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION, UNITED KINGDOM AND SOUTH KOREA AND
# IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW.
# By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying
# any portion or element of the Tencent HunyuanWorld-1.0 Works, including via any Hosted Service,
# You will be deemed to have recognized and accepted the content of this Agreement,
# which is effective immediately.
# For avoidance of doubts, Tencent HunyuanWorld-1.0 means the 3D generation models
# and their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent at [https://github.com/Tencent-Hunyuan/HunyuanWorld-1.0].
import os
import torch
import numpy as np
import cv2
from PIL import Image
import argparse
# huanyuan3d text to panorama
from hy3dworld import Text2PanoramaPipelines
# huanyuan3d image to panorama
from hy3dworld import Image2PanoramaPipelines
from hy3dworld import Perspective
class Text2PanoramaDemo:
def __init__(self):
# set default parameters
self.height = 960
self.width = 1920
# panorama parameters
# these parameters are used to control the panorama generation
# you can adjust them according to your needs
self.guidance_scale = 30
self.shifting_extend = 0
self.num_inference_steps = 50
self.true_cfg_scale = 0.0
self.blend_extend = 6
# model paths
self.lora_path = "tencent/HunyuanWorld-1"
self.model_path = "black-forest-labs/FLUX.1-dev"
# load the pipeline
# use bfloat16 to save some VRAM
self.pipe = Text2PanoramaPipelines.from_pretrained(
self.model_path,
torch_dtype=torch.bfloat16
).to("cuda")
# and enable lora weights
self.pipe.load_lora_weights(
self.lora_path,
subfolder="HunyuanWorld-PanoDiT-Text",
weight_name="lora.safetensors",
torch_dtype=torch.bfloat16
)
# save some VRAM by offloading the model to CPU
self.pipe.enable_model_cpu_offload()
self.pipe.enable_vae_tiling() # and enable vae tiling to save some VRAM
def run(self, prompt, negative_prompt=None, seed=42, output_path='output_panorama'):
# get panorama
image = self.pipe(
prompt,
height=self.height,
width=self.width,
negative_prompt=negative_prompt,
generator=torch.Generator("cpu").manual_seed(seed),
num_inference_steps=self.num_inference_steps,
guidance_scale=self.guidance_scale,
blend_extend=self.blend_extend,
true_cfg_scale=self.true_cfg_scale,
).images[0]
# create output directory if it does not exist
os.makedirs(output_path, exist_ok=True)
# save the panorama image
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# save the image to the output path
image.save(os.path.join(output_path, 'panorama.png'))
return image
class Image2PanoramaDemo:
def __init__(self):
# set default parameters
self.height, self.width = 960, 1920 # 768, 1536 #
# panorama parameters
# these parameters are used to control the panorama generation
# you can adjust them according to your needs
self.THETA = 0
self.PHI = 0
self.FOV = 80
self.guidance_scale = 30
self.num_inference_steps = 50
self.true_cfg_scale = 2.0
self.shifting_extend = 0
self.blend_extend = 6
# model paths
self.lora_path = "tencent/HunyuanWorld-1"
self.model_path = "black-forest-labs/FLUX.1-Fill-dev"
# load the pipeline
# use bfloat16 to save some VRAM
self.pipe = Image2PanoramaPipelines.from_pretrained(
self.model_path,
torch_dtype=torch.bfloat16
).to("cuda")
# and enable lora weights
self.pipe.load_lora_weights(
self.lora_path,
subfolder="HunyuanWorld-PanoDiT-Image",
weight_name="lora.safetensors",
torch_dtype=torch.bfloat16
)
# save some VRAM by offloading the model to CPU
self.pipe.enable_model_cpu_offload()
self.pipe.enable_vae_tiling() # and enable vae tiling to save some VRAM
# set general prompts
self.general_negative_prompt = (
"human, person, people, messy,"
"low-quality, blur, noise, low-resolution"
)
self.general_positive_prompt = "high-quality, high-resolution, sharp, clear, 8k"
def run(self, prompt, negative_prompt, image_path, seed=42, output_path='output_panorama'):
# preprocess prompt
prompt = prompt + ", " + self.general_positive_prompt
negative_prompt = self.general_negative_prompt + ", " + negative_prompt
# read image
perspective_img = cv2.imread(image_path)
height_fov, width_fov = perspective_img.shape[:2]
if width_fov > height_fov:
ratio = width_fov / height_fov
w = int((self.FOV / 360) * self.width)
h = int(w / ratio)
perspective_img = cv2.resize(
perspective_img, (w, h), interpolation=cv2.INTER_AREA)
else:
ratio = height_fov / width_fov
h = int((self.FOV / 180) * self.height)
w = int(h / ratio)
perspective_img = cv2.resize(
perspective_img, (w, h), interpolation=cv2.INTER_AREA)
equ = Perspective(perspective_img, self.FOV,
self.THETA, self.PHI, crop_bound=False)
img, mask = equ.GetEquirec(self.height, self.width)
# erode mask
mask = cv2.erode(mask.astype(np.uint8), np.ones(
(3, 3), np.uint8), iterations=5)
img = img * mask
mask = mask.astype(np.uint8) * 255
mask = 255 - mask
mask = Image.fromarray(mask[:, :, 0])
img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
image = self.pipe(
prompt=prompt,
image=img,
mask_image=mask,
height=self.height,
width=self.width,
negative_prompt=negative_prompt,
guidance_scale=self.guidance_scale,
num_inference_steps=self.num_inference_steps,
generator=torch.Generator("cpu").manual_seed(seed),
blend_extend=self.blend_extend,
shifting_extend=self.shifting_extend,
true_cfg_scale=self.true_cfg_scale,
).images[0]
image.save(os.path.join(output_path, 'panorama.png'))
return image
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Text/Image to Panorama Demo")
parser.add_argument("--prompt", type=str,
default="", help="Prompt for image generation")
parser.add_argument("--negative_prompt", type=str,
default="", help="Negative prompt for image generation")
parser.add_argument("--image_path", type=str,
default=None, help="Path to the input image")
parser.add_argument("--seed", type=int, default=42,
help="Random seed for reproducibility")
parser.add_argument("--output_path", type=str, default="results",
help="Path to save the output results")
args = parser.parse_args()
os.makedirs(args.output_path, exist_ok=True)
print(f"Output will be saved to: {args.output_path}")
if args.image_path is None:
print("No image path provided, using text-to-panorama generation.")
demo_T2P = Text2PanoramaDemo()
panorama_image = demo_T2P.run(
args.prompt, args.negative_prompt, args.seed, args.output_path)
else:
if not os.path.exists(args.image_path):
raise FileNotFoundError(
f"Image path {args.image_path} does not exist.")
print(f"Using image at {args.image_path} for panorama generation.")
demo_I2P = Image2PanoramaDemo()
panorama_image = demo_I2P.run(
args.prompt, args.negative_prompt, args.image_path, args.seed, args.output_path)
|