NAG_FLUX.1-Kontext-Dev / nag_multi_app.py
mcuo's picture
Upload nag_multi_app.py
66dfd85 verified
raw
history blame
16.6 kB
import gradio as gr
import numpy as np
import spaces
import torch
import random
from PIL import Image
import math
# --- nag_app.pyから移植した機能 ---
# 翻訳ライブラリのインポート
from deep_translator import GoogleTranslator
from langdetect import detect
# NAG対応パイプラインのインポート
# 注: このコードを実行するには、nag_app.pyのHugging Face Spaceから
# `src`ディレクトリ(pipeline_flux_kontext_nag.pyとtransformer_flux.pyを含む)を
# このファイルと同じ階層に配置する必要があります。
from src.pipeline_flux_kontext_nag import NAGFluxKontextPipeline
from src.transformer_flux import NAGFluxTransformer2DModel
# --- ここまでが移植部分 ---
# エラー解決のためにdiffusersの内部マッピングをインポート
from diffusers.loaders.peft import _SET_ADAPTER_SCALE_FN_MAPPING
# 定数の設定
MAX_SEED = np.iinfo(np.int32).max
DEFAULT_NAG_NEGATIVE_PROMPT = "Low resolution, blurry, lack of details, big head"
OUTPUT_RESOLUTION = 1024
# --- nag_app.pyから移植したモデル読み込み処理 ---
# NAG対応のKontextモデルをロード
transformer = NAGFluxTransformer2DModel.from_pretrained(
"black-forest-labs/FLUX.1-Kontext-dev",
subfolder="transformer",
torch_dtype=torch.bfloat16,
)
pipe = NAGFluxKontextPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Kontext-dev",
transformer=transformer,
torch_dtype=torch.bfloat16,
)
pipe = pipe.to("cuda")
# --- ここまでが移植部分 ---
# --- LoRAの読み込み処理 (5つ) ---
print("Loading LoRA weights...")
# LoRA名とアダプター名のマッピング
LORA_MAPPING = {
"Hyper-SD": "hyper",
"Relighting": "relight",
"LoRA 3": "lora_3",
"LoRA 4": "lora_4",
"LoRA 5": "lora_5",
}
# 1. Hyper-SD LoRA
pipe.load_lora_weights(
"ByteDance/Hyper-SD",
weight_name="Hyper-FLUX.1-dev-8steps-lora.safetensors",
adapter_name=LORA_MAPPING["Hyper-SD"]
)
# 2. Relighting LoRA
pipe.load_lora_weights(
"linoyts/relighting-kontext-dev-lora",
weight_name="relighting-kontext-dev-lora.safetensors",
adapter_name=LORA_MAPPING["Relighting"]
)
# 3. 追加のLoRA 3 (後で設定)
# ★ 注意: 以下のリポジトリ名とファイル名は仮のものです。後で正しいものに置き換えてください。
try:
pipe.load_lora_weights(
"author/repo_name_3", # 例: "cagliostrolab/animagine-xl-3.0"
weight_name="lora_file_3.safetensors", # 例: "animagine-xl-3.0.safetensors"
adapter_name=LORA_MAPPING["LoRA 3"]
)
except Exception as e:
print(f"Warning: Could not load {list(LORA_MAPPING.keys())[2]}. Please check repository and file names. Error:", e)
# 4. 追加のLoRA 4 (後で設定)
try:
pipe.load_lora_weights(
"author/repo_name_4",
weight_name="lora_file_4.safetensors",
adapter_name=LORA_MAPPING["LoRA 4"]
)
except Exception as e:
print(f"Warning: Could not load {list(LORA_MAPPING.keys())[3]}. Please check repository and file names. Error:", e)
# 5. 追加のLoRA 5 (後で設定)
try:
pipe.load_lora_weights(
"author/repo_name_5",
weight_name="lora_file_5.safetensors",
adapter_name=LORA_MAPPING["LoRA 5"]
)
except Exception as e:
print(f"Warning: Could not load {list(LORA_MAPPING.keys())[4]}. Please check repository and file names. Error:", e)
print("LoRA weights loading process finished.")
# --- ここまでが変更部分 ---
# カスタムモデルをdiffusersのLoRA対応表に登録する
_SET_ADAPTER_SCALE_FN_MAPPING[NAGFluxTransformer2DModel.__name__] = _SET_ADAPTER_SCALE_FN_MAPPING["FluxTransformer2DModel"]
print("Custom model 'NAGFluxTransformer2DModel' registered for LoRA.")
def round_to_multiple(number, multiple=8):
return multiple * round(number / multiple)
def concatenate_images(images, direction="horizontal"):
if not images: return None
valid_images = [img for img in images if img is not None]
if not valid_images: return None
if len(valid_images) == 1: return valid_images[0].convert("RGB")
valid_images = [img.convert("RGB") for img in valid_images]
if direction == "horizontal":
total_width = sum(img.width for img in valid_images)
max_height = max(img.height for img in valid_images)
concatenated = Image.new('RGB', (total_width, max_height), (255, 255, 255))
x_offset = 0
for img in valid_images:
y_offset = (max_height - img.height) // 2
concatenated.paste(img, (x_offset, y_offset))
x_offset += img.width
else:
max_width = max(img.width for img in valid_images)
total_height = sum(img.height for img in valid_images)
concatenated = Image.new('RGB', (max_width, total_height), (255, 255, 255))
y_offset = 0
for img in valid_images:
x_offset = (max_width - img.width) // 2
concatenated.paste(img, (x_offset, y_offset))
y_offset += img.height
return concatenated
@spaces.GPU(duration=25)
# ★ infer関数の引数に negative_prompt を追加
def infer(input_images, prompt, negative_prompt, seed, randomize_seed, guidance_scale, nag_negative_prompt, nag_scale, num_inference_steps,
# LoRAの有効/無効と強度を個別に受け取る
enable_lora1, weight_lora1,
enable_lora2, weight_lora2,
enable_lora3, weight_lora3,
enable_lora4, weight_lora4,
enable_lora5, weight_lora5,
progress=gr.Progress(track_tqdm=True)):
active_adapters = []
active_weights = []
lora_params = [
(enable_lora1, weight_lora1, "Hyper-SD"),
(enable_lora2, weight_lora2, "Relighting"),
(enable_lora3, weight_lora3, "LoRA 3"),
(enable_lora4, weight_lora4, "LoRA 4"),
(enable_lora5, weight_lora5, "LoRA 5"),
]
for is_enabled, weight, name in lora_params:
if is_enabled:
adapter_name = LORA_MAPPING[name]
active_adapters.append(adapter_name)
active_weights.append(weight)
print(f"Applying {name} LoRA with weight {weight}")
if active_adapters:
pipe.set_adapters(active_adapters, adapter_weights=active_weights)
else:
print("No LoRA selected. Running without LoRA.")
pipe.disable_lora()
prompt = prompt.strip()
if prompt:
print(f"Original prompt: {prompt}")
try:
detected_lang = detect(prompt)
if detected_lang != 'en':
print(f"Detected language: {detected_lang}. Translating to English...")
translated_prompt = GoogleTranslator(source=detected_lang, target='en').translate(prompt)
prompt = translated_prompt
print(f"Translated prompt: {prompt}")
else:
print("Prompt is already in English.")
except Exception as e:
print(f"Warning: Translation or language detection failed: {e}. Using original prompt.")
# ★ negative_promptを処理するコードを追加
negative_prompt = negative_prompt.strip() if negative_prompt and negative_prompt.strip() else None
if randomize_seed:
seed = random.randint(0, MAX_SEED)
if input_images is None:
raise gr.Error("Please upload at least one image.")
if not isinstance(input_images, list):
input_images = [input_images]
valid_images = [img[0] for img in input_images if img is not None]
if not valid_images:
raise gr.Error("Please upload at least one valid image.")
if len(valid_images) == 1:
print("Single image detected. Calculating aspect-ratio aware dimensions.")
input_for_pipe = valid_images[0]
input_width, input_height = input_for_pipe.size
aspect_ratio = input_width / input_height
target_pixels = OUTPUT_RESOLUTION * OUTPUT_RESOLUTION
final_height = int(math.sqrt(target_pixels / aspect_ratio))
final_width = int(aspect_ratio * final_height)
final_width = round_to_multiple(final_width, 8)
final_height = round_to_multiple(final_height, 8)
print(f"Output dimensions set to: {final_width}x{final_height}")
else:
print(f"Multiple ({len(valid_images)}) images detected. Using fixed 1024x1024 output.")
input_for_pipe = concatenate_images(valid_images, "horizontal")
if input_for_pipe is None:
raise gr.Error("Failed to process the input images.")
final_width = OUTPUT_RESOLUTION
final_height = OUTPUT_RESOLUTION
final_prompt = f"From the provided reference images, create a unified, cohesive image such that {prompt}. Maintain the identity and characteristics of each subject while adjusting their proportions, scale, and positioning to create a harmonious, naturally balanced composition. Blend and integrate all elements seamlessly with consistent lighting, perspective, and style.the final result should look like a single naturally captured scene where all subjects are properly sized and positioned relative to each other, not assembled from multiple sources."
# ★ pipe()呼び出しに negative_prompt を追加
image = pipe(
image=input_for_pipe,
prompt=final_prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
nag_negative_prompt=nag_negative_prompt,
nag_scale=nag_scale,
width=final_width,
height=final_height,
num_inference_steps=num_inference_steps,
generator=torch.Generator().manual_seed(seed),
).images[0]
pipe.disable_lora()
return image, seed, gr.update(visible=True)
css="""
#col-container {
margin: 0 auto;
max-width: 960px;
}
.lora-row {
align-items: center;
margin-bottom: 8px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""# FLUX.1 Kontext [dev] - Multi-Image with NAG
Compose a new image from multiple images using FLUX.1 Kontext, enhanced with Normalized Attention Guidance (NAG) and automatic prompt translation.
- **Single Image Input**: Output will match the input aspect ratio.
- **Multiple Image Inputs**: Output will be a fixed 1024x1024 resolution.
""")
with gr.Row():
with gr.Column():
input_images = gr.Gallery(
label="Upload image(s) for editing",
show_label=True,
elem_id="gallery_input",
columns=3,
rows=2,
object_fit="contain",
height="auto",
file_types=['image'],
type='pil'
)
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt (auto-translates to English)",
container=False,
)
run_button = gr.Button("Run", scale=0)
with gr.Accordion("Advanced Settings", open=False):
# --- ★ UIを修正: 各LoRAコンポーネントを個別の変数として定義 ---
gr.Markdown("### LoRA Settings")
with gr.Row(elem_classes="lora-row"):
enable_lora1 = gr.Checkbox(label="Hyper-SD", value=True, scale=1)
weight_lora1 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.02, value=0.12, scale=3, visible=True)
with gr.Row(elem_classes="lora-row"):
enable_lora2 = gr.Checkbox(label="Relighting", value=False, scale=1)
weight_lora2 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=1.0, scale=3, visible=False)
with gr.Row(elem_classes="lora-row"):
enable_lora3 = gr.Checkbox(label="LoRA 3", value=False, scale=1)
weight_lora3 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, scale=3, visible=False)
with gr.Row(elem_classes="lora-row"):
enable_lora4 = gr.Checkbox(label="LoRA 4", value=False, scale=1)
weight_lora4 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, scale=3, visible=False)
with gr.Row(elem_classes="lora-row"):
enable_lora5 = gr.Checkbox(label="LoRA 5", value=False, scale=1)
weight_lora5 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, scale=3, visible=False)
# --- ★ ここまでが変更部分 ---
gr.Markdown("### Generation Settings")
# ★ UIに negative_prompt を追加
negative_prompt = gr.Text(
label="Negative Prompt (Standard)",
placeholder="Enter concepts to avoid (e.g., ugly, deformed)",
max_lines=2,
)
num_inference_steps = gr.Slider(
label="Inference Steps",
minimum=8,
maximum=50,
step=1,
value=8,
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1,
maximum=10,
step=0.25,
value=4.5,
)
nag_negative_prompt = gr.Text(
label="Negative Prompt for NAG",
value=DEFAULT_NAG_NEGATIVE_PROMPT,
max_lines=2,
placeholder="Enter concepts to avoid with NAG",
)
nag_scale = gr.Slider(
label="NAG Scale",
minimum=0.0,
maximum=20.0,
step=0.25,
value=3.5
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Column():
result = gr.Image(label="Result", show_label=False, interactive=False, format="png")
reuse_button = gr.Button("Reuse this image", visible=False)
# ★ イベントハンドラを更新: all_inputsに negative_prompt を追加
all_inputs = [
input_images, prompt, negative_prompt, seed, randomize_seed, guidance_scale,
nag_negative_prompt, nag_scale, num_inference_steps,
enable_lora1, weight_lora1,
enable_lora2, weight_lora2,
enable_lora3, weight_lora3,
enable_lora4, weight_lora4,
enable_lora5, weight_lora5,
]
gr.on(
triggers=[run_button.click, prompt.submit],
fn = infer,
inputs = all_inputs,
outputs = [result, seed, reuse_button]
)
# --- ★ ここまでが変更部分 ---
reuse_button.click(
fn = lambda image: [image] if image is not None else [],
inputs = [result],
outputs = [input_images]
)
# --- ★ 各チェックボックスとスライダーの表示を個別に連動させる ---
def update_visibility(is_checked):
return gr.update(visible=is_checked)
enable_lora1.change(fn=update_visibility, inputs=enable_lora1, outputs=weight_lora1)
enable_lora2.change(fn=update_visibility, inputs=enable_lora2, outputs=weight_lora2)
enable_lora3.change(fn=update_visibility, inputs=enable_lora3, outputs=weight_lora3)
enable_lora4.change(fn=update_visibility, inputs=enable_lora4, outputs=weight_lora4)
enable_lora5.change(fn=update_visibility, inputs=enable_lora5, outputs=weight_lora5)
# --- ★ ここまでが変更部分 ---
demo.launch()