Spaces:
Build error
Build error
import gradio as gr | |
import torch | |
import os | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
import open3d as o3d | |
import shutil | |
# --- Model Classes (adapted from demo scripts) --- | |
# Panorama Generation | |
from hy3dworld import Text2PanoramaPipelines, Image2PanoramaPipelines, Perspective | |
class Text2PanoramaDemo: | |
def __init__(self): | |
self.pipe = Text2PanoramaPipelines.from_pretrained( | |
"black-forest-labs/FLUX.1-dev", | |
torch_dtype=torch.bfloat16 | |
).to("cuda") | |
self.pipe.load_lora_weights( | |
"tencent/HunyuanWorld-1", | |
subfolder="HunyuanWorld-PanoDiT-Text", | |
weight_name="lora.safetensors", | |
torch_dtype=torch.bfloat16 | |
) | |
self.pipe.enable_model_cpu_offload() | |
self.pipe.enable_vae_tiling() | |
def run(self, prompt, negative_prompt, seed, height, width, guidance_scale, steps): | |
image = self.pipe( | |
prompt, | |
height=height, | |
width=width, | |
negative_prompt=negative_prompt, | |
generator=torch.Generator("cuda").manual_seed(seed), | |
num_inference_steps=steps, | |
guidance_scale=guidance_scale, | |
blend_extend=6, | |
true_cfg_scale=0.0, | |
).images[0] | |
return image | |
class Image2PanoramaDemo: | |
def __init__(self): | |
self.pipe = Image2PanoramaPipelines.from_pretrained( | |
"black-forest-labs/FLUX.1-dev", | |
torch_dtype=torch.bfloat16 | |
).to("cuda") | |
self.pipe.load_lora_weights( | |
"tencent/HunyuanWorld-1", | |
subfolder="HunyuanWorld-PanoDiT-Image", | |
weight_name="lora.safetensors", | |
torch_dtype=torch.bfloat16 | |
) | |
self.pipe.enable_model_cpu_offload() | |
self.pipe.enable_vae_tiling() | |
self.general_negative_prompt = "human, person, people, messy, low-quality, blur, noise, low-resolution" | |
self.general_positive_prompt = "high-quality, high-resolution, sharp, clear, 8k" | |
def run(self, prompt, negative_prompt, image, seed, height, width, guidance_scale, steps, fov): | |
prompt = prompt + ", " + self.general_positive_prompt | |
negative_prompt = self.general_negative_prompt + ", " + negative_prompt | |
perspective_img = np.array(image) | |
height_fov, width_fov = perspective_img.shape[:2] | |
ratio = width_fov / height_fov | |
w = int((fov / 360) * width) | |
h = int(w / ratio) | |
perspective_img = cv2.resize(perspective_img, (w, h), interpolation=cv2.INTER_AREA) | |
equ = Perspective(perspective_img, fov, 0, 0, crop_bound=False) | |
img, mask = equ.GetEquirec(height, width) | |
mask = cv2.erode(mask.astype(np.uint8), np.ones((3, 3), np.uint8), iterations=5) | |
img = img * mask | |
mask = 255 - (mask.astype(np.uint8) * 255) | |
mask = Image.fromarray(mask[:, :, 0]) | |
img = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)) | |
image = self.pipe( | |
prompt=prompt, image=img, mask_image=mask, height=height, width=width, | |
negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=steps, | |
generator=torch.Generator("cuda").manual_seed(seed), blend_extend=6, shifting_extend=0, true_cfg_scale=2.0, | |
).images[0] | |
return image | |
# Scene Generation | |
from hy3dworld import LayerDecomposition, WorldComposer, process_file | |
class HYworldDemo: | |
def __init__(self, seed=42): | |
target_size = 3840 | |
kernel_scale = max(1, int(target_size / 1920)) | |
self.LayerDecomposer = LayerDecomposition() | |
self.hy3d_world = WorldComposer( | |
device=torch.device("cuda"), resolution=(target_size, target_size // 2), | |
seed=seed, filter_mask=True, kernel_scale=kernel_scale, | |
) | |
def run(self, image_path, labels_fg1, labels_fg2, classes, output_dir): | |
os.makedirs(output_dir, exist_ok=True) | |
fg1_infos = [{"image_path": image_path, "output_path": output_dir, "labels": labels_fg1, "class": classes}] | |
fg2_infos = [{"image_path": os.path.join(output_dir, 'remove_fg1_image.png'), "output_path": output_dir, "labels": labels_fg2, "class": classes}] | |
self.LayerDecomposer(fg1_infos, layer=0) | |
self.LayerDecomposer(fg2_infos, layer=1) | |
self.LayerDecomposer(fg2_infos, layer=2) | |
separate_pano, fg_bboxes = self.hy3d_world._load_separate_pano_from_dir(output_dir, sr=True) | |
layered_world_mesh = self.hy3d_world.generate_world(separate_pano=separate_pano, fg_bboxes=fg_bboxes, world_type='mesh') | |
mesh_files = [] | |
for layer_idx, layer_info in enumerate(layered_world_mesh): | |
output_path = os.path.join(output_dir, f"mesh_layer{layer_idx}.ply") | |
o3d.io.write_triangle_mesh(output_path, layer_info['mesh']) | |
mesh_files.append(output_path) | |
return mesh_files | |
# --- Gradio UI --- | |
# Instantiate models | |
t2p_demo = Text2PanoramaDemo() | |
i2p_demo = Image2PanoramaDemo() | |
hy_demo = HYworldDemo() | |
def generate_text_to_pano(prompt, neg_prompt, seed, height, width, scale, steps): | |
image = t2p_demo.run(prompt, neg_prompt, seed, height, width, scale, steps) | |
# Save to a temporary file to pass to the next stage | |
temp_dir = "temp_outputs" | |
os.makedirs(temp_dir, exist_ok=True) | |
temp_path = os.path.join(temp_dir, f"pano_{seed}.png") | |
image.save(temp_path) | |
return image, temp_path | |
def generate_image_to_pano(prompt, neg_prompt, image, seed, height, width, scale, steps, fov): | |
pil_image = Image.fromarray(image) | |
result_image = i2p_demo.run(prompt, neg_prompt, pil_image, seed, height, width, scale, steps, fov) | |
temp_dir = "temp_outputs" | |
os.makedirs(temp_dir, exist_ok=True) | |
temp_path = os.path.join(temp_dir, f"pano_i2p_{seed}.png") | |
result_image.save(temp_path) | |
return result_image, temp_path | |
def generate_scene(panorama_file_path, fg1, fg2, classes, seed): | |
if panorama_file_path is None or not os.path.exists(panorama_file_path): | |
raise gr.Error("Please generate or upload a panorama image first.") | |
output_dir = f"output_scene_{seed}" | |
shutil.rmtree(output_dir, ignore_errors=True) | |
labels_fg1 = [label.strip() for label in fg1.split(',') if label.strip()] | |
labels_fg2 = [label.strip() for label in fg2.split(',') if label.strip()] | |
mesh_files = hy_demo.run(panorama_file_path, labels_fg1, labels_fg2, classes, output_dir) | |
# For now, let's just display the first layer. Gradio's Model3D doesn't support multiple files well. | |
# A better UI might zip and offer for download, or show multiple viewers. | |
return mesh_files[0] if mesh_files else None | |
css = """ | |
#col-container {margin-left: auto; margin-right: auto;} | |
#pano_output {min-height: 320px;} | |
#scene_output {min-height: 480px;} | |
""" | |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: | |
gr.Markdown("<h1>HunyuanWorld-1.0: A One-Stop Solution for Text-driven 3D Scene Generation</h1>") | |
gr.Markdown("Official Repo: [Tencent-Hunyuan/HunyuanWorld-1.0](https://github.com/Tencent-Hunyuan/HunyuanWorld-1.0)") | |
# State to hold the path of the generated panorama | |
panorama_path_state = gr.State(None) | |
with gr.Tabs(): | |
with gr.TabItem("Step 1: Panorama Generation"): | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Tabs(): | |
with gr.TabItem("Text-to-Panorama") as t2p_tab: | |
t2p_prompt = gr.Textbox(label="Prompt", value="A beautiful sunset over a mountain range, fantasy style") | |
t2p_neg_prompt = gr.Textbox(label="Negative Prompt", value="blurry, low quality") | |
t2p_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=42) | |
with gr.Accordion("Advanced Settings", open=False): | |
t2p_height = gr.Slider(label="Height", minimum=512, maximum=1024, step=64, value=960) | |
t2p_width = gr.Slider(label="Width", minimum=1024, maximum=2048, step=128, value=1920) | |
t2p_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=50, step=1, value=30) | |
t2p_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=5, value=50) | |
t2p_button = gr.Button("Generate Panorama", variant="primary") | |
with gr.TabItem("Image-to-Panorama") as i2p_tab: | |
i2p_image = gr.Image(type="numpy", label="Input Image") | |
i2p_prompt = gr.Textbox(label="Prompt", value="A photo of a room, modern design") | |
i2p_neg_prompt = gr.Textbox(label="Negative Prompt", value="watermark, text") | |
i2p_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=100) | |
with gr.Accordion("Advanced Settings", open=False): | |
i2p_fov = gr.Slider(label="Field of View (FOV)", minimum=40, maximum=120, step=5, value=80) | |
i2p_height = gr.Slider(label="Height", minimum=512, maximum=1024, step=64, value=960) | |
i2p_width = gr.Slider(label="Width", minimum=1024, maximum=2048, step=128, value=1920) | |
i2p_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=50, step=1, value=30) | |
i2p_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=5, value=50) | |
i2p_button = gr.Button("Generate Panorama", variant="primary") | |
with gr.Column(): | |
pano_output = gr.Image(label="Panorama Output", elem_id="pano_output") | |
send_to_scene_btn = gr.Button("Step 2: Send to Scene Generation") | |
with gr.TabItem("Step 2: Scene Generation") as scene_tab: | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("Load the panorama generated in Step 1, or upload your own.") | |
scene_input_image = gr.Image(type="filepath", label="Input Panorama") | |
scene_classes = gr.Radio(["outdoor", "indoor"], label="Scene Class", value="outdoor") | |
scene_fg1 = gr.Textbox(label="Foreground Labels (Layer 1)", placeholder="e.g., tree, car, person") | |
scene_fg2 = gr.Textbox(label="Foreground Labels (Layer 2)", placeholder="e.g., building, mountain") | |
scene_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=2024) | |
scene_button = gr.Button("Generate 3D Scene", variant="primary") | |
with gr.Column(): | |
scene_output = gr.Model3D(label="3D Scene Output (.ply)", elem_id="scene_output") | |
# Wire up components | |
t2p_button.click( | |
fn=generate_text_to_pano, | |
inputs=[t2p_prompt, t2p_neg_prompt, t2p_seed, t2p_height, t2p_width, t2p_scale, t2p_steps], | |
outputs=[pano_output, panorama_path_state] | |
) | |
i2p_button.click( | |
fn=generate_image_to_pano, | |
inputs=[i2p_prompt, i2p_neg_prompt, i2p_image, i2p_seed, i2p_height, i2p_width, i2p_scale, i2p_steps, i2p_fov], | |
outputs=[pano_output, panorama_path_state] | |
) | |
def transfer_to_scene_gen(path): | |
return {scene_input_image: gr.update(value=path)} | |
send_to_scene_btn.click( | |
fn=lambda path: path, | |
inputs=panorama_path_state, | |
outputs=scene_input_image | |
).then( | |
lambda: gr.Tabs.update(selected=scene_tab), | |
outputs=demo.children[1] # This is a bit of a hack to select the tab | |
) | |
scene_button.click( | |
fn=generate_scene, | |
inputs=[scene_input_image, scene_fg1, scene_fg2, scene_classes, scene_seed], | |
outputs=scene_output | |
) | |
demo.queue().launch(debug=True) | |