Spaces:

LPX55
/

Kontext-Multi_Lightning_4bit-nf4

Running on Zero

Kontext-Multi_Lightning_4bit-nf4 / float16.py

LPX

remove check

9e6ebab 19 days ago

11.9 kB

	import gradio as gr
	import numpy as np
	import spaces
	import torch
	import random
	import os
	import subprocess
	import logging
	import safetensors
	#####################################################
	# Forced Diffusers upgrade when cache was being stubborn; probably not needed now
	# force = subprocess.run("pip install -U diffusers", shell=True)
	# force = subprocess.run("pip install git+https://github.com/huggingface/diffusers.git", shell=True)
	# force = subprocess.run("pip install git+https://github.com/huggingface/transformers.git", shell=True)
	force = subprocess.run("git lfs install", shell=True)

	#####################################################
	import transformers
	import diffusers
	from diffusers import DiffusionPipeline
	import bitsandbytes
	from diffusers.quantizers import PipelineQuantizationConfig
	from diffusers.utils import load_image
	from diffusers import FluxKontextPipeline
	from PIL import Image
	from huggingface_hub import hf_hub_download
	from huggingface_hub import create_repo, upload_folder
	from huggingface_hub.utils._runtime import dump_environment_info
	from safetensors import safe_open

	#####################################################

	MAX_SEED = np.iinfo(np.int32).max
	API_TOKEN = os.environ['HF_TOKEN']
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
	os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')

	dump_environment_info()
	logging.basicConfig(level=logging.DEBUG)
	logger = logging.getLogger(__name__)

	#####################################################

	# TESTING TWO QUANTIZATION METHODS
	# 1) If FP8 is supported; `torchao` for quantization
	# quant_config = PipelineQuantizationConfig(
	# quant_backend="torchao",
	# quant_kwargs={"quant_type": "float8dq_e4m3_row"},
	# components_to_quantize=["transformer"]
	# )
	# 2) Otherwise, standard 4-bit quantization with bitsandbytes
	# quant_config = PipelineQuantizationConfig(
	# quant_backend="bitsandbytes_4bit",
	# quant_kwargs={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16, "bnb_4bit_quant_type": "nf4"},
	# components_to_quantize=["transformer"]
	# )

	try:
	# Set max memory usage for ZeroGPU
	torch.cuda.set_per_process_memory_fraction(1.0)
	torch.set_float32_matmul_precision("high")
	except Exception as e:
	print(f"Error setting memory usage: {e}")

	#####################################################
	# Load the pipeline with the specified quantization configuration.
	# We use bfloat16 as the base dtype for mixed-precision inference.
	# HF Spaces VRAM (50 GB) is sufficient to hold the entire pipeline (31.424 GB),
	# Leave the entire pipeline to the GPU for the best performance.

	# FLUX.1 Dev Kontext Lightning Model / 8-Steps
	kontext_model = "LPX55/FLUX.1_Kontext-Lightning"
	pipe = FluxKontextPipeline.from_pretrained(
	"LPX55/FLUX.1_Kontext-Lightning",
	torch_dtype=torch.float16
	).to("cuda")
	# Save as a single `.safetensors` file
	pipe.save_pretrained(
	"./flux_16bit",
	safe_serialization=True,
	max_shard_size="100GB" # Forces all shards into one file (no split files)
	)

	local_folder = "./flux_16bit"
	hub_repo_name = "LPX55/FLUX.1_Kontext-Lightning"

	# create_repo(hub_repo_name, exist_ok=True, private=False)

	# with safe_open("./flux_16bit/model.safetensors", framework="pt", device="cuda") as f:
	# for k in f.keys():
	# print(k, f.get_slice(k).shape)

	upload_folder(
	folder_path=local_folder,
	path_in_repo="float16",
	repo_id=hub_repo_name,
	repo_type="model",
	commit_message="Upload half-precision FLUX.1 Kontext Lightning model",
	token=API_TOKEN
	)
	###################################################
	# SECTION FOR LORA(S); SKIP FOR NOW

	# try:
	# repo_name = ""
	# ckpt_name = ""
	# pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name), adapter_name="A1")
	# pipe.set_adapters(["A1"], adapter_weights=[0.5])
	# pipe.fuse_lora(adapter_names=["A1"], lora_scale=1.0)
	# pipe.unload_lora_weights()

	# except Exception as e:
	# print(f"Error while loading Lora: {e}")

	#####################################################
	def concatenate_images(images, direction="horizontal"):
	"""
	Concatenate multiple PIL images either horizontally or vertically.

	Args:
	images: List of PIL Images
	direction: "horizontal" or "vertical"

	Returns:
	PIL Image: Concatenated image
	"""
	if not images:
	return None

	# Filter out None images
	valid_images = [img for img in images if img is not None]

	if not valid_images:
	return None

	if len(valid_images) == 1:
	return valid_images[0].convert("RGB")

	# Convert all images to RGB
	valid_images = [img.convert("RGB") for img in valid_images]

	if direction == "horizontal":
	# Calculate total width and max height
	total_width = sum(img.width for img in valid_images)
	max_height = max(img.height for img in valid_images)

	# Create new image
	concatenated = Image.new('RGB', (total_width, max_height), (255, 255, 255))

	# Paste images
	x_offset = 0
	for img in valid_images:
	# Center image vertically if heights differ
	y_offset = (max_height - img.height) // 2
	concatenated.paste(img, (x_offset, y_offset))
	x_offset += img.width

	else: # vertical
	# Calculate max width and total height
	max_width = max(img.width for img in valid_images)
	total_height = sum(img.height for img in valid_images)

	# Create new image
	concatenated = Image.new('RGB', (max_width, total_height), (255, 255, 255))

	# Paste images
	y_offset = 0
	for img in valid_images:
	# Center image horizontally if widths differ
	x_offset = (max_width - img.width) // 2
	concatenated.paste(img, (x_offset, y_offset))
	y_offset += img.height

	return concatenated

	@spaces.GPU
	@torch.no_grad()
	def infer(input_images, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=8, width=1024, height=1024, progress=gr.Progress(track_tqdm=True)):

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	# Handle input_images - it could be a single image or a list of images
	if input_images is None:
	raise gr.Error("Please upload at least one image.")

	# If it's a single image (not a list), convert to list
	if not isinstance(input_images, list):
	input_images = [input_images]

	# Filter out None images
	valid_images = [img[0] for img in input_images if img is not None]

	if not valid_images:
	raise gr.Error("Please upload at least one valid image.")

	# Concatenate images horizontally
	concatenated_image = concatenate_images(valid_images, "horizontal")

	if concatenated_image is None:
	raise gr.Error("Failed to process the input images.")

	# original_width, original_height = concatenated_image.size

	# if original_width >= original_height:
	# new_width = 1024
	# new_height = int(original_height * (new_width / original_width))
	# new_height = round(new_height / 64) * 64
	# else:
	# new_height = 1024
	# new_width = int(original_width * (new_height / original_height))
	# new_width = round(new_width / 64) * 64

	#concatenated_image_resized = concatenated_image.resize((new_width, new_height), Image.LANCZOS)

	final_prompt = f"From the provided reference images, create a unified, cohesive image such that {prompt}. Maintain the identity and characteristics of each subject while adjusting their proportions, scale, and positioning to create a harmonious, naturally balanced composition. Blend and integrate all elements seamlessly with consistent lighting, perspective, and style.the final result should look like a single naturally captured scene where all subjects are properly sized and positioned relative to each other, not assembled from multiple sources."

	image = pipe(
	image=concatenated_image,
	prompt=final_prompt,
	guidance_scale=guidance_scale,
	width=width,
	height=height,
	max_area=width * height,
	num_inference_steps=steps,
	generator=torch.Generator().manual_seed(seed),
	).images[0]

	return image, seed, gr.update(visible=True)

	css="""
	#col-container {
	margin: 0 auto;
	max-width: 86vw;
	}
	"""

	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):
	gr.Markdown(f"""# FLUX.1 Kontext \| Lightning 8-Step Model ⚡
	""")
	with gr.Row():
	with gr.Column():
	input_images = gr.Gallery(
	label="Upload image(s) for editing",
	show_label=True,
	elem_id="gallery_input",
	columns=3,
	rows=2,
	object_fit="contain",
	height="auto",
	file_types=['image'],
	type='pil'
	)

	with gr.Row():
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
	container=False,
	)
	run_button = gr.Button("Run", scale=0)

	with gr.Accordion("Advanced Settings", open=True):

	with gr.Group():
	width = gr.Slider(
	label="W",
	minimum=512,
	maximum=2560,
	step=64,
	value=1024,
	)

	height = gr.Slider(
	label="H",
	minimum=512,
	maximum=2560,
	step=64,
	value=1024,
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)

	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1,
	maximum=10,
	step=0.1,
	value=2.5,
	)
	input_steps = gr.Slider(
	label="Steps",
	minimum=1,
	maximum=30,
	step=1,
	value=16,
	)

	with gr.Column():
	result = gr.Image(label="Result", show_label=False, interactive=False)
	reuse_button = gr.Button("Reuse this image", visible=False)

	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn = infer,
	inputs = [input_images, prompt, seed, randomize_seed, guidance_scale, input_steps, width, height],
	outputs = [result, seed, reuse_button]
	)

	reuse_button.click(
	fn = lambda image: [image] if image is not None else [], # Convert single image to list for gallery
	inputs = [result],
	outputs = [input_images]
	)

	demo.queue().launch()