wan2-2-fp8da-aoti-image

Running on Zero

App Files Files Community

Update optimization.py

by linoyts HF Staff - opened Aug 6

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+44

-80

Files changed (2) hide show

app.py +28 -64
optimization.py +16 -16

app.py CHANGED Viewed

@@ -19,8 +19,8 @@ from optimization import optimize_pipeline_
 MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
-LANDSCAPE_WIDTH = 832
-LANDSCAPE_HEIGHT = 480
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
@@ -46,34 +46,6 @@ pipe = WanPipeline.from_pretrained(MODEL_ID,
     torch_dtype=torch.bfloat16,
 ).to('cuda')
-# load, fuse, unload before compilation
-# pipe.load_lora_weights(
-#    "vrgamedevgirl84/Wan14BT2VFusioniX",
-#    weight_name="FusionX_LoRa/Phantom_Wan_14B_FusionX_LoRA.safetensors",
-#     adapter_name="phantom"
-# )
-# pipe.set_adapters(["phantom"], adapter_weights=[0.95])
-# pipe.fuse_lora(adapter_names=["phantom"], lora_scale=1.0)
-# pipe.unload_lora_weights()
-# pipe.load_lora_weights(
-#    "vrgamedevgirl84/Wan14BT2VFusioniX",
-#    weight_name="FusionX_LoRa/Phantom_Wan_14B_FusionX_LoRA.safetensors",
-#     adapter_name="phantom"
-# )
-# kwargs = {}
-# kwargs["load_into_transformer_2"] = True
-# pipe.load_lora_weights(
-#    "vrgamedevgirl84/Wan14BT2VFusioniX",
-#    weight_name="FusionX_LoRa/Phantom_Wan_14B_FusionX_LoRA.safetensors",
-#     adapter_name="phantom_2", **kwargs
-# )
-# pipe.set_adapters(["phantom", "phantom_2"], adapter_weights=[1., 1.])
-# pipe.fuse_lora(adapter_names=["phantom"], lora_scale=3., components=["transformer"])
-# pipe.fuse_lora(adapter_names=["phantom_2"], lora_scale=1., components=["transformer_2"])
-# pipe.unload_lora_weights()
 for i in range(3):
     gc.collect()
@@ -95,7 +67,6 @@ default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字
 def get_duration(
     prompt,
     negative_prompt,
-    duration_seconds,
     guidance_scale,
     guidance_scale_2,
     steps,
@@ -106,13 +77,12 @@ def get_duration(
     return steps * 15
 @spaces.GPU(duration=get_duration)
-def generate_video(
     prompt,
     negative_prompt=default_negative_prompt,
-    duration_seconds = MAX_DURATION,
-    guidance_scale = 1,
-    guidance_scale_2 = 3,
-    steps = 4,
     seed = 42,
     randomize_seed = False,
     progress=gr.Progress(track_tqdm=True),
@@ -128,8 +98,6 @@ def generate_video(
         prompt (str): Text prompt describing the desired animation or motion.
         negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
             Defaults to default_negative_prompt (contains unwanted visual artifacts).
-        duration_seconds (float, optional): Duration of the generated video in seconds.
-            Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
         guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
             Defaults to 1.0. Range: 0.0-20.0.
         guidance_scale_2 (float, optional): Controls adherence to the prompt. Higher values = more adherence.
@@ -158,62 +126,58 @@ def generate_video(
         - Generation time varies based on steps and duration (see get_duration function)
     """
-    num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-    output_frames_list = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
-        height=480,
-        width=832,
-        num_frames=num_frames,
         guidance_scale=float(guidance_scale),
         guidance_scale_2=float(guidance_scale_2),
         num_inference_steps=int(steps),
         generator=torch.Generator(device="cuda").manual_seed(current_seed),
-    ).frames[0]
-    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
-        video_path = tmpfile.name
-    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
-    return video_path, current_seed
 with gr.Blocks() as demo:
-    gr.Markdown("# Fast 6 steps Wan 2.2 I2V (14B) with Phantom LoRA")
-    gr.Markdown("run Wan 2.2 in just 6-8 steps, with [FusionX Phantom LoRA by DeeJayT](https://huggingface.co/vrgamedevgirl84/Wan14BT2VFusioniX/tree/main/FusionX_LoRa), compatible with 🧨 diffusers")
     with gr.Row():
         with gr.Column():
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=MAX_DURATION, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
-                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
-                guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
-                guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=3, label="Guidance Scale 2 - low noise stage")
-            generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
-            video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
-    ui_inputs = [
-        input_image_component, prompt_input,
-        negative_prompt_input, duration_seconds_input,
         guidance_scale_input, guidance_scale_2_input, steps_slider, seed_input, randomize_seed_checkbox
     ]
-    generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
     gr.Examples(
         examples=[
             [
-                "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside.",
             ],
         ],
-        inputs=[prompt_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
     )
 if __name__ == "__main__":

 MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
+LANDSCAPE_WIDTH = 1024
+LANDSCAPE_HEIGHT = 1024
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
     torch_dtype=torch.bfloat16,
 ).to('cuda')
 for i in range(3):
     gc.collect()
 def get_duration(
     prompt,
     negative_prompt,
     guidance_scale,
     guidance_scale_2,
     steps,
     return steps * 15
 @spaces.GPU(duration=get_duration)
+def generate_image(
     prompt,
     negative_prompt=default_negative_prompt,
+    guidance_scale = 3.5,
+    guidance_scale_2 = 4,
+    steps = 27,
     seed = 42,
     randomize_seed = False,
     progress=gr.Progress(track_tqdm=True),
         prompt (str): Text prompt describing the desired animation or motion.
         negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
             Defaults to default_negative_prompt (contains unwanted visual artifacts).
         guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
             Defaults to 1.0. Range: 0.0-20.0.
         guidance_scale_2 (float, optional): Controls adherence to the prompt. Higher values = more adherence.
         - Generation time varies based on steps and duration (see get_duration function)
     """
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+    out_img = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
+        height=1024,
+        width=1024,
+        num_frames=1,
         guidance_scale=float(guidance_scale),
         guidance_scale_2=float(guidance_scale_2),
         num_inference_steps=int(steps),
+        output_type="pil",
         generator=torch.Generator(device="cuda").manual_seed(current_seed),
+    ).frames[0][0]
+    return out_img, current_seed
 with gr.Blocks() as demo:
+    gr.Markdown("# Wan 2.2 T2I (14B)")
+    #gr.Markdown("run Wan 2.2 in just 6-8 steps, with [FusionX Phantom LoRA by DeeJayT](https://huggingface.co/vrgamedevgirl84/Wan14BT2VFusioniX/tree/main/FusionX_LoRa), compatible with 🧨 diffusers")
     with gr.Row():
         with gr.Column():
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
+            #duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=MAX_DURATION, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
+                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=27, label="Inference Steps")
+                guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=3.5, label="Guidance Scale - high noise stage")
+                guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=4, label="Guidance Scale 2 - low noise stage")
+            generate_button = gr.Button("Generate Image", variant="primary")
         with gr.Column():
+            img_output = gr.Image(label="Generated Image", interactive=False)
+    ui_inputs = [
+        prompt_input,
+        negative_prompt_input,
         guidance_scale_input, guidance_scale_2_input, steps_slider, seed_input, randomize_seed_checkbox
     ]
+    generate_button.click(fn=generate_image, inputs=ui_inputs, outputs=[img_output, seed_input])
     gr.Examples(
         examples=[
             [
+                "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
             ],
         ],
+        inputs=[prompt_input], outputs=[img_output, seed_input], fn=generate_image, cache_examples="lazy"
     )
 if __name__ == "__main__":

optimization.py CHANGED Viewed

@@ -43,22 +43,22 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
     @spaces.GPU(duration=1500)
     def compile_transformer():
-        pipeline.load_lora_weights(
-            "Kijai/WanVideo_comfy",
-            weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-            adapter_name="lightx2v"
-        )
-        kwargs_lora = {}
-        kwargs_lora["load_into_transformer_2"] = True
-        pipeline.load_lora_weights(
-            "Kijai/WanVideo_comfy",
-            weight_name="Wan22-Lightning/Wan2.2-Lightning_T2V-A14B-4steps-lora_LOW_fp16.safetensors",
-            adapter_name="lightx2v_2", **kwargs_lora
-        )
-        pipeline.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
-        pipeline.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
-        pipeline.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
-        pipeline.unload_lora_weights()
         with capture_component_call(pipeline, 'transformer') as call:
             pipeline(*args, **kwargs)

     @spaces.GPU(duration=1500)
     def compile_transformer():
+        # pipeline.load_lora_weights(
+        #     "Kijai/WanVideo_comfy",
+        #     weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+        #     adapter_name="lightx2v"
+        # )
+        # kwargs_lora = {}
+        # kwargs_lora["load_into_transformer_2"] = True
+        # pipeline.load_lora_weights(
+        #     "Kijai/WanVideo_comfy",
+        #     weight_name="Wan22-Lightning/Wan2.2-Lightning_T2V-A14B-4steps-lora_LOW_fp16.safetensors",
+        #     adapter_name="lightx2v_2", **kwargs_lora
+        # )
+        # pipeline.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
+        # pipeline.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
+        # pipeline.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
+        # pipeline.unload_lora_weights()
         with capture_component_call(pipeline, 'transformer') as call:
             pipeline(*args, **kwargs)