Spaces:

cp524
/

smc_meissonic

Running on Zero

App Files Files Community

cp524 commited on 20 days ago

Commit

3e0672b

1 Parent(s): 9712fd8

Add finetuned model

Browse files

Files changed (3) hide show

app.py +55 -10
requirements.txt +2 -1
src/smc/inference.py +65 -0

app.py CHANGED Viewed

@@ -7,8 +7,10 @@ import gradio as gr
 from src.smc.inference import (
     infer_pretrained,
     infer_smc_grad,
     PretrainedInferenceConfig,
     SMCGradInferenceConfig,
 )
 def get_device():
@@ -45,11 +47,7 @@ def _format_inference_output(out) -> str:
 # --- Per-method runner functions ---
 def run_pretrained_ui(prompt, pretrained_negative_prompt, pretrained_CFG, pretrained_steps):
-    """Run the pretrained inference method and return (gallery, info).
-    This function is designed to be attached directly to a Gradio event so it can
-    execute independently and return only the components it owns.
-    """
     try:
         pretrained_cfg = PretrainedInferenceConfig(
             prompt=prompt,
@@ -110,6 +108,25 @@ def run_smc_grad_ui(
         traceback.print_exc()
         err_msg = f"SMC-grad inference error: {e}"
         return [err_msg], err_msg
 def mark_all_running():
@@ -121,7 +138,7 @@ def mark_all_running():
     running_info = gr.update(value="Running...", interactive=False)
     empty_gallery = gr.update(value=[])
     # Return values must match the components this function is attached to (see below)
-    return empty_gallery, running_info, empty_gallery, running_info
 with gr.Blocks() as demo:
@@ -136,7 +153,7 @@ with gr.Blocks() as demo:
     # --- Pretrained method row ---
     with gr.Row():
         with gr.Column(scale=1, min_width=280):
-            with gr.Accordion("Pretrained method — settings", open=False):
                 pretrained_negative_prompt = gr.Textbox(
                     label="Negative prompt", value=PretrainedInferenceConfig.negative_prompt, lines=1
                 )
@@ -145,7 +162,7 @@ with gr.Blocks() as demo:
         with gr.Column(scale=2):
             pretrained_gallery = gr.Gallery(
-                label="Pretrained outputs", show_label=True, elem_id="pretrained_gallery", height="240px", columns=4,
                 object_fit="contain",
             )
             pretrained_info = gr.Textbox(label="Pretrained info", interactive=False)
@@ -192,13 +209,30 @@ with gr.Blocks() as demo:
                 object_fit="contain",
             )
             smc_grad_info = gr.Textbox(label="SMC-grad info", interactive=False)
     # --- Wiring ---
     # 1) Quick 'running' update attached to the button so the UI shows immediate feedback.
     run_button.click(
         fn=mark_all_running,
         inputs=[],
-        outputs=[pretrained_gallery, pretrained_info, smc_grad_gallery, smc_grad_info],
     )
     # 2) Attach the per-method heavy functions separately. Gradio's queue() will allow
@@ -229,12 +263,18 @@ with gr.Blocks() as demo:
         ],
         outputs=[smc_grad_gallery, smc_grad_info],
     )
     # Also allow pressing Enter in the prompt to trigger the same set of handlers
     prompt.submit(
         fn=mark_all_running,
         inputs=[],
-        outputs=[pretrained_gallery, pretrained_info, smc_grad_gallery, smc_grad_info],
     )
     prompt.submit(
         fn=run_pretrained_ui,
@@ -261,6 +301,11 @@ with gr.Blocks() as demo:
         ],
         outputs=[smc_grad_gallery, smc_grad_info],
     )
 # Enable Gradio queue to allow parallel execution of multiple handlers. Set concurrency
 # to 2 (one per method) — increase if you add more methods.

 from src.smc.inference import (
     infer_pretrained,
     infer_smc_grad,
+    infer_ft,
     PretrainedInferenceConfig,
     SMCGradInferenceConfig,
+    FTInferenceConfig,
 )
 def get_device():
 # --- Per-method runner functions ---
 def run_pretrained_ui(prompt, pretrained_negative_prompt, pretrained_CFG, pretrained_steps):
+    """Run the pretrained inference method and return (gallery, info)."""
     try:
         pretrained_cfg = PretrainedInferenceConfig(
             prompt=prompt,
         traceback.print_exc()
         err_msg = f"SMC-grad inference error: {e}"
         return [err_msg], err_msg
+def run_ft_ui(prompt, ft_negative_prompt, ft_CFG, ft_steps):
+    """Run the finetuned model inference and return (gallery, info)."""
+    try:
+        ft_cfg = FTInferenceConfig(
+            prompt=prompt,
+            negative_prompt=ft_negative_prompt or "",
+            CFG=float(ft_CFG),
+            steps=int(ft_steps),
+        )
+        out = infer_ft(ft_cfg, device=get_device())
+        gallery = out.images
+        info = _format_inference_output(out)
+        return gallery, info
+    except Exception as e:
+        traceback.print_exc()
+        err_msg = f"FT inference error: {e}"
+        # Return a simple textual error in the gallery and the info box
+        return [err_msg], err_msg
 def mark_all_running():
     running_info = gr.update(value="Running...", interactive=False)
     empty_gallery = gr.update(value=[])
     # Return values must match the components this function is attached to (see below)
+    return empty_gallery, running_info, empty_gallery, running_info, empty_gallery, running_info
 with gr.Blocks() as demo:
     # --- Pretrained method row ---
     with gr.Row():
         with gr.Column(scale=1, min_width=280):
+            with gr.Accordion("Pretrained model — settings", open=False):
                 pretrained_negative_prompt = gr.Textbox(
                     label="Negative prompt", value=PretrainedInferenceConfig.negative_prompt, lines=1
                 )
         with gr.Column(scale=2):
             pretrained_gallery = gr.Gallery(
+                label="Pretrained model outputs", show_label=True, elem_id="pretrained_gallery", height="240px", columns=4,
                 object_fit="contain",
             )
             pretrained_info = gr.Textbox(label="Pretrained info", interactive=False)
                 object_fit="contain",
             )
             smc_grad_info = gr.Textbox(label="SMC-grad info", interactive=False)
+    # --- FT method row ---
+    with gr.Row():
+        with gr.Column(scale=1, min_width=280):
+            with gr.Accordion("Finetuned model — settings", open=False):
+                ft_negative_prompt = gr.Textbox(
+                    label="Negative prompt", value=FTInferenceConfig.negative_prompt, lines=1
+                )
+                ft_CFG = gr.Slider(0.0, 30.0, step=0.1, value=FTInferenceConfig.CFG, label="CFG")
+                ft_steps = gr.Slider(1, 200, step=1, value=FTInferenceConfig.steps, label="Steps")
+        with gr.Column(scale=2):
+            ft_gallery = gr.Gallery(
+                label="Finetuned model outputs", show_label=True, elem_id="ft_gallery", height="240px", columns=4,
+                object_fit="contain",
+            )
+            ft_info = gr.Textbox(label="Finetuned info", interactive=False)
     # --- Wiring ---
     # 1) Quick 'running' update attached to the button so the UI shows immediate feedback.
     run_button.click(
         fn=mark_all_running,
         inputs=[],
+        outputs=[pretrained_gallery, pretrained_info, smc_grad_gallery, smc_grad_info, ft_gallery, ft_info],
     )
     # 2) Attach the per-method heavy functions separately. Gradio's queue() will allow
         ],
         outputs=[smc_grad_gallery, smc_grad_info],
     )
+    run_button.click(
+        fn=run_ft_ui,
+        inputs=[prompt, ft_negative_prompt, ft_CFG, ft_steps],
+        outputs=[ft_gallery, ft_info],
+    )
     # Also allow pressing Enter in the prompt to trigger the same set of handlers
     prompt.submit(
         fn=mark_all_running,
         inputs=[],
+        outputs=[pretrained_gallery, pretrained_info, smc_grad_gallery, smc_grad_info, ft_gallery, ft_info],
     )
     prompt.submit(
         fn=run_pretrained_ui,
         ],
         outputs=[smc_grad_gallery, smc_grad_info],
     )
+    prompt.submit(
+        fn=run_ft_ui,
+        inputs=[prompt, ft_negative_prompt, ft_CFG, ft_steps],
+        outputs=[ft_gallery, ft_info],
+    )
 # Enable Gradio queue to allow parallel execution of multiple handlers. Set concurrency
 # to 2 (one per method) — increase if you add more methods.

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ xformers
 gradio
 spaces
 image-reward
-openai-clip

 gradio
 spaces
 image-reward
+openai-clip
+peft

src/smc/inference.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import math
 import threading
 import spaces
@@ -25,6 +26,7 @@ MIN_GPU_DURATION = 60
 pipe_build_lock = threading.Lock()
 pipe_load_lock = threading.Lock()
 reward_model_load_lock = threading.Lock()
 def build_pipe(device):
@@ -43,6 +45,13 @@ def build_pipe(device):
     pipe = Pipeline(vq_model, tokenizer=tokenizer, text_encoder=text_encoder, transformer=model, scheduler=scheduler_new)
     return pipe
 @dataclass
 class InferenceOutput:
     images: List[Image.Image]
@@ -205,3 +214,59 @@ def infer_smc_grad_with_pipe(config: SMCGradInferenceConfig, pipe: Pipeline, dev
     pil_images: List[Image.Image] = pipe.image_processor.postprocess(images, "pil") # type: ignore
     gpu_mem_used = torch.cuda.max_memory_allocated(device) / 1024**3
     return InferenceOutput(images=pil_images, image_rewards=image_rewards, gpu_mem_used=gpu_mem_used)

+import os
 import math
 import threading
 import spaces
 pipe_build_lock = threading.Lock()
 pipe_load_lock = threading.Lock()
 reward_model_load_lock = threading.Lock()
+lora_load_lock = threading.Lock()
 def build_pipe(device):
     pipe = Pipeline(vq_model, tokenizer=tokenizer, text_encoder=text_encoder, transformer=model, scheduler=scheduler_new)
     return pipe
+def load_lora_weights(pipe, lora_ckpt_uuid):
+    # LORA lora checkpoint
+    ckpt_path = os.path.join('checkpoints', lora_ckpt_uuid)
+    pipe.load_lora_weights(
+        pretrained_model_name_or_path_or_dict=ckpt_path,
+    )
 @dataclass
 class InferenceOutput:
     images: List[Image.Image]
     pil_images: List[Image.Image] = pipe.image_processor.postprocess(images, "pil") # type: ignore
     gpu_mem_used = torch.cuda.max_memory_allocated(device) / 1024**3
     return InferenceOutput(images=pil_images, image_rewards=image_rewards, gpu_mem_used=gpu_mem_used)
+@dataclass
+class FTInferenceConfig:
+    prompt: str
+    negative_prompt: str = "worst quality, low quality, low res, blurry, distortion, watermark, logo, signature, text, jpeg artifacts, signature, sketch, duplicate, ugly, identifying mark"
+    resolution: int = 512
+    CFG: float = 9.0
+    steps: int = 48
+    num_batches: int = 4
+    ckpt_uuid: str = "a1e906e1-16a9-44a3-abe8-6dd2c17e12a2"
+def infer_ft(config: FTInferenceConfig, device='cpu'):
+    with pipe_build_lock:
+        pipe = build_pipe(device)
+    return infer_ft_with_pipe(config, pipe, device=device)
+def _get_ft_duration(config: FTInferenceConfig, pipe: Pipeline, device='cpu') -> int:
+    setup_duration = 30.0
+    step_duration = 1.0
+    total_duration = math.ceil(setup_duration + step_duration * config.steps)
+    return max(total_duration, MIN_GPU_DURATION)
+@spaces.GPU(duration=_get_ft_duration)
+def infer_ft_with_pipe(config: FTInferenceConfig, pipe: Pipeline, device='cpu'):
+    if isinstance(device, str):
+        device = torch.device(device)
+    with pipe_load_lock:
+        pipe = pipe.to(device)
+    with lora_load_lock:
+        load_lora_weights(pipe, config.ckpt_uuid)
+    reward_bias = 5.0
+    with reward_model_load_lock:
+        reward_fn, reward_name = rewards.ImageReward_Fk_Steering(device=device, bias=reward_bias), "image_reward_plus_5"
+    image_reward_fn = lambda images: reward_fn(
+        images,
+        [config.prompt] * len(images)
+    )
+    images = pipe(
+        prompt=config.prompt,
+        reward_fn=image_reward_fn,
+        resample_fn=lambda log_w: resample(log_w),
+        negative_prompt=config.negative_prompt,
+        height=config.resolution,
+        width=config.resolution,
+        guidance_scale=config.CFG,
+        num_inference_steps=config.steps,
+        batches=config.num_batches,
+        num_particles=1,
+        batch_p=config.num_batches,
+        proposal_type="without_SMC",
+        output_type="pt",
+    )
+    image_rewards = (image_reward_fn(images) - reward_bias).tolist()
+    pil_images: List[Image.Image] = pipe.image_processor.postprocess(images, "pil") # type: ignore
+    gpu_mem_used = torch.cuda.max_memory_allocated(device) / 1024**3
+    return InferenceOutput(images=pil_images, image_rewards=image_rewards, gpu_mem_used=gpu_mem_used)