import gradio as gr import torch import os import numpy as np import cv2 from PIL import Image import open3d as o3d import shutil # --- Model Classes (adapted from demo scripts) --- # Panorama Generation from hy3dworld import Text2PanoramaPipelines, Image2PanoramaPipelines, Perspective class Text2PanoramaDemo: def __init__(self): self.pipe = Text2PanoramaPipelines.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16 ).to("cuda") self.pipe.load_lora_weights( "tencent/HunyuanWorld-1", subfolder="HunyuanWorld-PanoDiT-Text", weight_name="lora.safetensors", torch_dtype=torch.bfloat16 ) self.pipe.enable_model_cpu_offload() self.pipe.enable_vae_tiling() def run(self, prompt, negative_prompt, seed, height, width, guidance_scale, steps): image = self.pipe( prompt, height=height, width=width, negative_prompt=negative_prompt, generator=torch.Generator("cuda").manual_seed(seed), num_inference_steps=steps, guidance_scale=guidance_scale, blend_extend=6, true_cfg_scale=0.0, ).images[0] return image class Image2PanoramaDemo: def __init__(self): self.pipe = Image2PanoramaPipelines.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16 ).to("cuda") self.pipe.load_lora_weights( "tencent/HunyuanWorld-1", subfolder="HunyuanWorld-PanoDiT-Image", weight_name="lora.safetensors", torch_dtype=torch.bfloat16 ) self.pipe.enable_model_cpu_offload() self.pipe.enable_vae_tiling() self.general_negative_prompt = "human, person, people, messy, low-quality, blur, noise, low-resolution" self.general_positive_prompt = "high-quality, high-resolution, sharp, clear, 8k" def run(self, prompt, negative_prompt, image, seed, height, width, guidance_scale, steps, fov): prompt = prompt + ", " + self.general_positive_prompt negative_prompt = self.general_negative_prompt + ", " + negative_prompt perspective_img = np.array(image) height_fov, width_fov = perspective_img.shape[:2] ratio = width_fov / height_fov w = int((fov / 360) * width) h = int(w / ratio) perspective_img = cv2.resize(perspective_img, (w, h), interpolation=cv2.INTER_AREA) equ = Perspective(perspective_img, fov, 0, 0, crop_bound=False) img, mask = equ.GetEquirec(height, width) mask = cv2.erode(mask.astype(np.uint8), np.ones((3, 3), np.uint8), iterations=5) img = img * mask mask = 255 - (mask.astype(np.uint8) * 255) mask = Image.fromarray(mask[:, :, 0]) img = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)) image = self.pipe( prompt=prompt, image=img, mask_image=mask, height=height, width=width, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=steps, generator=torch.Generator("cuda").manual_seed(seed), blend_extend=6, shifting_extend=0, true_cfg_scale=2.0, ).images[0] return image # Scene Generation from hy3dworld import LayerDecomposition, WorldComposer, process_file class HYworldDemo: def __init__(self, seed=42): target_size = 3840 kernel_scale = max(1, int(target_size / 1920)) self.LayerDecomposer = LayerDecomposition() self.hy3d_world = WorldComposer( device=torch.device("cuda"), resolution=(target_size, target_size // 2), seed=seed, filter_mask=True, kernel_scale=kernel_scale, ) def run(self, image_path, labels_fg1, labels_fg2, classes, output_dir): os.makedirs(output_dir, exist_ok=True) fg1_infos = [{"image_path": image_path, "output_path": output_dir, "labels": labels_fg1, "class": classes}] fg2_infos = [{"image_path": os.path.join(output_dir, 'remove_fg1_image.png'), "output_path": output_dir, "labels": labels_fg2, "class": classes}] self.LayerDecomposer(fg1_infos, layer=0) self.LayerDecomposer(fg2_infos, layer=1) self.LayerDecomposer(fg2_infos, layer=2) separate_pano, fg_bboxes = self.hy3d_world._load_separate_pano_from_dir(output_dir, sr=True) layered_world_mesh = self.hy3d_world.generate_world(separate_pano=separate_pano, fg_bboxes=fg_bboxes, world_type='mesh') mesh_files = [] for layer_idx, layer_info in enumerate(layered_world_mesh): output_path = os.path.join(output_dir, f"mesh_layer{layer_idx}.ply") o3d.io.write_triangle_mesh(output_path, layer_info['mesh']) mesh_files.append(output_path) return mesh_files # --- Gradio UI --- # Instantiate models t2p_demo = Text2PanoramaDemo() i2p_demo = Image2PanoramaDemo() hy_demo = HYworldDemo() def generate_text_to_pano(prompt, neg_prompt, seed, height, width, scale, steps): image = t2p_demo.run(prompt, neg_prompt, seed, height, width, scale, steps) # Save to a temporary file to pass to the next stage temp_dir = "temp_outputs" os.makedirs(temp_dir, exist_ok=True) temp_path = os.path.join(temp_dir, f"pano_{seed}.png") image.save(temp_path) return image, temp_path def generate_image_to_pano(prompt, neg_prompt, image, seed, height, width, scale, steps, fov): pil_image = Image.fromarray(image) result_image = i2p_demo.run(prompt, neg_prompt, pil_image, seed, height, width, scale, steps, fov) temp_dir = "temp_outputs" os.makedirs(temp_dir, exist_ok=True) temp_path = os.path.join(temp_dir, f"pano_i2p_{seed}.png") result_image.save(temp_path) return result_image, temp_path def generate_scene(panorama_file_path, fg1, fg2, classes, seed): if panorama_file_path is None or not os.path.exists(panorama_file_path): raise gr.Error("Please generate or upload a panorama image first.") output_dir = f"output_scene_{seed}" shutil.rmtree(output_dir, ignore_errors=True) labels_fg1 = [label.strip() for label in fg1.split(',') if label.strip()] labels_fg2 = [label.strip() for label in fg2.split(',') if label.strip()] mesh_files = hy_demo.run(panorama_file_path, labels_fg1, labels_fg2, classes, output_dir) # For now, let's just display the first layer. Gradio's Model3D doesn't support multiple files well. # A better UI might zip and offer for download, or show multiple viewers. return mesh_files[0] if mesh_files else None css = """ #col-container {margin-left: auto; margin-right: auto;} #pano_output {min-height: 320px;} #scene_output {min-height: 480px;} """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: gr.Markdown("