# Tencent HunyuanWorld-1.0 is licensed under TENCENT HUNYUANWORLD-1.0 COMMUNITY LICENSE AGREEMENT # THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION, UNITED KINGDOM AND SOUTH KOREA AND # IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW. # By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying # any portion or element of the Tencent HunyuanWorld-1.0 Works, including via any Hosted Service, # You will be deemed to have recognized and accepted the content of this Agreement, # which is effective immediately. # For avoidance of doubts, Tencent HunyuanWorld-1.0 means the 3D generation models # and their software and algorithms, including trained model weights, parameters (including # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent at [https://github.com/Tencent-Hunyuan/HunyuanWorld-1.0]. import os import torch import numpy as np import cv2 from PIL import Image import argparse # huanyuan3d text to panorama from hy3dworld import Text2PanoramaPipelines # huanyuan3d image to panorama from hy3dworld import Image2PanoramaPipelines from hy3dworld import Perspective class Text2PanoramaDemo: def __init__(self): # set default parameters self.height = 960 self.width = 1920 # panorama parameters # these parameters are used to control the panorama generation # you can adjust them according to your needs self.guidance_scale = 30 self.shifting_extend = 0 self.num_inference_steps = 50 self.true_cfg_scale = 0.0 self.blend_extend = 6 # model paths self.lora_path = "tencent/HunyuanWorld-1" self.model_path = "black-forest-labs/FLUX.1-dev" # load the pipeline # use bfloat16 to save some VRAM self.pipe = Text2PanoramaPipelines.from_pretrained( self.model_path, torch_dtype=torch.bfloat16 ).to("cuda") # and enable lora weights self.pipe.load_lora_weights( self.lora_path, subfolder="HunyuanWorld-PanoDiT-Text", weight_name="lora.safetensors", torch_dtype=torch.bfloat16 ) # save some VRAM by offloading the model to CPU self.pipe.enable_model_cpu_offload() self.pipe.enable_vae_tiling() # and enable vae tiling to save some VRAM def run(self, prompt, negative_prompt=None, seed=42, output_path='output_panorama'): # get panorama image = self.pipe( prompt, height=self.height, width=self.width, negative_prompt=negative_prompt, generator=torch.Generator("cpu").manual_seed(seed), num_inference_steps=self.num_inference_steps, guidance_scale=self.guidance_scale, blend_extend=self.blend_extend, true_cfg_scale=self.true_cfg_scale, ).images[0] # create output directory if it does not exist os.makedirs(output_path, exist_ok=True) # save the panorama image if not isinstance(image, Image.Image): image = Image.fromarray(image) # save the image to the output path image.save(os.path.join(output_path, 'panorama.png')) return image class Image2PanoramaDemo: def __init__(self): # set default parameters self.height, self.width = 960, 1920 # 768, 1536 # # panorama parameters # these parameters are used to control the panorama generation # you can adjust them according to your needs self.THETA = 0 self.PHI = 0 self.FOV = 80 self.guidance_scale = 30 self.num_inference_steps = 50 self.true_cfg_scale = 2.0 self.shifting_extend = 0 self.blend_extend = 6 # model paths self.lora_path = "tencent/HunyuanWorld-1" self.model_path = "black-forest-labs/FLUX.1-Fill-dev" # load the pipeline # use bfloat16 to save some VRAM self.pipe = Image2PanoramaPipelines.from_pretrained( self.model_path, torch_dtype=torch.bfloat16 ).to("cuda") # and enable lora weights self.pipe.load_lora_weights( self.lora_path, subfolder="HunyuanWorld-PanoDiT-Image", weight_name="lora.safetensors", torch_dtype=torch.bfloat16 ) # save some VRAM by offloading the model to CPU self.pipe.enable_model_cpu_offload() self.pipe.enable_vae_tiling() # and enable vae tiling to save some VRAM # set general prompts self.general_negative_prompt = ( "human, person, people, messy," "low-quality, blur, noise, low-resolution" ) self.general_positive_prompt = "high-quality, high-resolution, sharp, clear, 8k" def run(self, prompt, negative_prompt, image_path, seed=42, output_path='output_panorama'): # preprocess prompt prompt = prompt + ", " + self.general_positive_prompt negative_prompt = self.general_negative_prompt + ", " + negative_prompt # read image perspective_img = cv2.imread(image_path) height_fov, width_fov = perspective_img.shape[:2] if width_fov > height_fov: ratio = width_fov / height_fov w = int((self.FOV / 360) * self.width) h = int(w / ratio) perspective_img = cv2.resize( perspective_img, (w, h), interpolation=cv2.INTER_AREA) else: ratio = height_fov / width_fov h = int((self.FOV / 180) * self.height) w = int(h / ratio) perspective_img = cv2.resize( perspective_img, (w, h), interpolation=cv2.INTER_AREA) equ = Perspective(perspective_img, self.FOV, self.THETA, self.PHI, crop_bound=False) img, mask = equ.GetEquirec(self.height, self.width) # erode mask mask = cv2.erode(mask.astype(np.uint8), np.ones( (3, 3), np.uint8), iterations=5) img = img * mask mask = mask.astype(np.uint8) * 255 mask = 255 - mask mask = Image.fromarray(mask[:, :, 0]) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) img = Image.fromarray(img) image = self.pipe( prompt=prompt, image=img, mask_image=mask, height=self.height, width=self.width, negative_prompt=negative_prompt, guidance_scale=self.guidance_scale, num_inference_steps=self.num_inference_steps, generator=torch.Generator("cpu").manual_seed(seed), blend_extend=self.blend_extend, shifting_extend=self.shifting_extend, true_cfg_scale=self.true_cfg_scale, ).images[0] image.save(os.path.join(output_path, 'panorama.png')) return image if __name__ == "__main__": parser = argparse.ArgumentParser(description="Text/Image to Panorama Demo") parser.add_argument("--prompt", type=str, default="", help="Prompt for image generation") parser.add_argument("--negative_prompt", type=str, default="", help="Negative prompt for image generation") parser.add_argument("--image_path", type=str, default=None, help="Path to the input image") parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility") parser.add_argument("--output_path", type=str, default="results", help="Path to save the output results") args = parser.parse_args() os.makedirs(args.output_path, exist_ok=True) print(f"Output will be saved to: {args.output_path}") if args.image_path is None: print("No image path provided, using text-to-panorama generation.") demo_T2P = Text2PanoramaDemo() panorama_image = demo_T2P.run( args.prompt, args.negative_prompt, args.seed, args.output_path) else: if not os.path.exists(args.image_path): raise FileNotFoundError( f"Image path {args.image_path} does not exist.") print(f"Using image at {args.image_path} for panorama generation.") demo_I2P = Image2PanoramaDemo() panorama_image = demo_I2P.run( args.prompt, args.negative_prompt, args.image_path, args.seed, args.output_path)