|
|
|
import os |
|
import numpy as np |
|
import torch |
|
from PIL import Image |
|
import gradio as gr |
|
|
|
|
|
weight_dtype = torch.float32 |
|
|
|
|
|
from DAI.pipeline_all import DAIPipeline |
|
from DAI.controlnetvae import ControlNetVAEModel |
|
from DAI.decoder import CustomAutoencoderKL |
|
from diffusers import AutoencoderKL, UNet2DConditionModel |
|
from transformers import CLIPTextModel, AutoTokenizer |
|
|
|
pretrained_model_name_or_path = "sjtu-deepvision/dereflection-any-image-v0" |
|
pretrained_model_name_or_path2 = "stabilityai/stable-diffusion-2-1" |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
controlnet = ControlNetVAEModel.from_pretrained(pretrained_model_name_or_path, subfolder="controlnet", torch_dtype=weight_dtype).to(device) |
|
unet = UNet2DConditionModel.from_pretrained(pretrained_model_name_or_path, subfolder="unet", torch_dtype=weight_dtype).to(device) |
|
vae_2 = CustomAutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae_2", torch_dtype=weight_dtype).to(device) |
|
vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path2, subfolder="vae").to(device) |
|
text_encoder = CLIPTextModel.from_pretrained(pretrained_model_name_or_path2, subfolder="text_encoder").to(device) |
|
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path2, subfolder="tokenizer", use_fast=False) |
|
|
|
|
|
pipe = DAIPipeline( |
|
vae=vae, |
|
text_encoder=text_encoder, |
|
tokenizer=tokenizer, |
|
unet=unet, |
|
controlnet=controlnet, |
|
safety_checker=None, |
|
scheduler=None, |
|
feature_extractor=None, |
|
t_start=0, |
|
).to(device) |
|
|
|
def resize_image(image, max_size): |
|
"""Resize the image so that the maximum side is max_size.""" |
|
width, height = image.size |
|
if max(width, height) > max_size: |
|
if width > height: |
|
new_width = max_size |
|
new_height = int(height * (max_size / width)) |
|
else: |
|
new_height = max_size |
|
new_width = int(width * (max_size / height)) |
|
image = image.resize((new_width, new_height), Image.LANCZOS) |
|
return image |
|
|
|
|
|
def process_image(input_image, resolution_choice): |
|
|
|
input_image = Image.fromarray(input_image) |
|
|
|
|
|
if resolution_choice == "768": |
|
input_image = resize_image(input_image, 768) |
|
processing_resolution = None |
|
else: |
|
if input_image.size[0] > 2560 or input_image.size[1] > 2560: |
|
processing_resolution = 2560 |
|
input_image = resize_image(input_image, 2560) |
|
else: |
|
processing_resolution = 0 |
|
|
|
|
|
pipe_out = pipe( |
|
image=input_image, |
|
prompt="remove glass reflection", |
|
vae_2=vae_2, |
|
processing_resolution=processing_resolution, |
|
) |
|
|
|
|
|
processed_frame = (pipe_out.prediction.clip(-1, 1) + 1) / 2 |
|
processed_frame = (processed_frame[0] * 255).astype(np.uint8) |
|
processed_frame = Image.fromarray(processed_frame) |
|
|
|
return input_image, processed_frame |
|
|
|
|
|
def create_gradio_interface(): |
|
|
|
example_images = [ |
|
[os.path.join("files", "image", f"{i}.png"), "768"] for i in range(1, 14) |
|
] |
|
title = "# Dereflection Any Image" |
|
description = """Official demo for **Dereflection Any Image**. |
|
Please refer to our [paper](), [project page](https://abuuu122.github.io/DAI.github.io/), and [github](https://github.com/Abuuu122/Dereflection-Any-Image) for more details.""" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(title) |
|
gr.Markdown(description) |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_image = gr.Image(label="Input Image", type="numpy") |
|
resolution_choice = gr.Radio( |
|
choices=["768", "Original Resolution"], |
|
label="Processing Resolution", |
|
value="768", |
|
) |
|
gr.Markdown( |
|
"Select the resolution for processing the image, 768 is recommended for faster processing and stable performance. Higher resolution may take longer to process, we restrict the maximum resolution to 2560." |
|
) |
|
submit_btn = gr.Button("Remove Reflection", variant="primary") |
|
with gr.Column(): |
|
output_image = gr.Image(label="Processed Image") |
|
|
|
|
|
gr.Examples( |
|
examples=example_images, |
|
inputs=[input_image, resolution_choice], |
|
outputs=output_image, |
|
fn=process_image, |
|
cache_examples=False, |
|
label="Example Images", |
|
) |
|
|
|
|
|
submit_btn.click( |
|
fn=process_image, |
|
inputs=[input_image, resolution_choice], |
|
outputs=[input_image, output_image], |
|
) |
|
|
|
return demo |
|
|
|
|
|
def main(): |
|
demo = create_gradio_interface() |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|
if __name__ == "__main__": |
|
main() |