import torch | |
from diffusers.utils import load_image | |
# before merging, please import via local path | |
from controlnet_qwenimage import QwenImageControlNetModel | |
from transformer_qwenimage import QwenImageTransformer2DModel | |
from pipeline_qwenimage_controlnet import QwenImageControlNetPipeline | |
if __name__ == "__main__": | |
base_model = "Qwen/Qwen-Image" | |
controlnet_model = "InstantX/Qwen-Image-ControlNet-Union" | |
controlnet = QwenImageControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16) | |
transformer = QwenImageTransformer2DModel.from_pretrained(base_model, subfolder="transformer", torch_dtype=torch.bfloat16) | |
pipe = QwenImageControlNetPipeline.from_pretrained( | |
base_model, controlnet=controlnet, transformer=transformer, torch_dtype=torch.bfloat16 | |
) | |
pipe.to("cuda") | |
# canny | |
# it is highly suggested to add 'TEXT' into prompt | |
control_image = load_image("conds/canny.png") | |
prompt = "Aesthetics art, traditional asian pagoda, elaborate golden accents, sky blue and white color palette, swirling cloud pattern, digital illustration, east asian architecture, ornamental rooftop, intricate detailing on building, cultural representation." | |
controlnet_conditioning_scale = 1.0 | |
# soft edge, recommended scale: 0.8 - 1.0 | |
# control_image = load_image("conds/soft_edge.png") | |
# prompt = "Photograph of a young man with light brown hair jumping mid-air off a large, reddish-brown rock. He's wearing a navy blue sweater, light blue shirt, gray pants, and brown shoes. His arms are outstretched, and he has a slight smile on his face. The background features a cloudy sky and a distant, leafless tree line. The grass around the rock is patchy." | |
# controlnet_conditioning_scale = 0.9 | |
# depth | |
# control_image = load_image("conds/depth.png") | |
# prompt = "A swanky, minimalist living room with a huge floor-to-ceiling window letting in loads of natural light. A beige couch with white cushions sits on a wooden floor, with a matching coffee table in front. The walls are a soft, warm beige, decorated with two framed botanical prints. A potted plant chills in the corner near the window. Sunlight pours through the leaves outside, casting cool shadows on the floor." | |
# controlnet_conditioning_scale = 0.9 | |
# pose | |
# control_image = load_image("conds/pose.png") | |
# prompt = "Photograph of a young man with light brown hair and a beard, wearing a beige flat cap, black leather jacket, gray shirt, brown pants, and white sneakers. He's sitting on a concrete ledge in front of a large circular window, with a cityscape reflected in the glass. The wall is cream-colored, and the sky is clear blue. His shadow is cast on the wall." | |
# controlnet_conditioning_scale = 1.0 | |
image = pipe( | |
prompt=prompt, | |
negative_prompt=" ", | |
control_image=control_image, | |
controlnet_conditioning_scale=controlnet_conditioning_scale, | |
width=control_image.size[0], | |
height=control_image.size[1], | |
num_inference_steps=30, | |
true_cfg_scale=4.0, | |
generator=torch.Generator(device="cuda").manual_seed(42), | |
).images[0] | |
image.save(f"qwenimage_cn_union_result.png") |