Upload nag_multi_app.py

#1
Files changed (1) hide show
  1. nag_multi_app.py +401 -0
nag_multi_app.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import spaces
4
+ import torch
5
+ import random
6
+ from PIL import Image
7
+ import math
8
+
9
+ # --- nag_app.pyから移植した機能 ---
10
+ # 翻訳ライブラリのインポート
11
+ from deep_translator import GoogleTranslator
12
+ from langdetect import detect
13
+
14
+ # NAG対応パイプラインのインポート
15
+ # 注: このコードを実行するには、nag_app.pyのHugging Face Spaceから
16
+ # `src`ディレクトリ(pipeline_flux_kontext_nag.pyとtransformer_flux.pyを含む)を
17
+ # このファイルと同じ階層に配置する必要があります。
18
+ from src.pipeline_flux_kontext_nag import NAGFluxKontextPipeline
19
+ from src.transformer_flux import NAGFluxTransformer2DModel
20
+ # --- ここまでが移植部分 ---
21
+
22
+ # エラー解決のためにdiffusersの内部マッピングをインポート
23
+ from diffusers.loaders.peft import _SET_ADAPTER_SCALE_FN_MAPPING
24
+
25
+
26
+ # 定数の設定
27
+ MAX_SEED = np.iinfo(np.int32).max
28
+ DEFAULT_NAG_NEGATIVE_PROMPT = "Low resolution, blurry, lack of details, big head"
29
+ OUTPUT_RESOLUTION = 1024
30
+
31
+ # --- nag_app.pyから移植したモデル読み込み処理 ---
32
+ # NAG対応のKontextモデルをロード
33
+ transformer = NAGFluxTransformer2DModel.from_pretrained(
34
+ "black-forest-labs/FLUX.1-Kontext-dev",
35
+ subfolder="transformer",
36
+ torch_dtype=torch.bfloat16,
37
+ )
38
+ pipe = NAGFluxKontextPipeline.from_pretrained(
39
+ "black-forest-labs/FLUX.1-Kontext-dev",
40
+ transformer=transformer,
41
+ torch_dtype=torch.bfloat16,
42
+ )
43
+ pipe = pipe.to("cuda")
44
+ # --- ここまでが移植部分 ---
45
+
46
+ # --- LoRAの読み込み処理 (5つ) ---
47
+ print("Loading LoRA weights...")
48
+ # LoRA名とアダプター名のマッピング
49
+ LORA_MAPPING = {
50
+ "Hyper-SD": "hyper",
51
+ "Relighting": "relight",
52
+ "LoRA 3": "lora_3",
53
+ "LoRA 4": "lora_4",
54
+ "LoRA 5": "lora_5",
55
+ }
56
+ # 1. Hyper-SD LoRA
57
+ pipe.load_lora_weights(
58
+ "ByteDance/Hyper-SD",
59
+ weight_name="Hyper-FLUX.1-dev-8steps-lora.safetensors",
60
+ adapter_name=LORA_MAPPING["Hyper-SD"]
61
+ )
62
+ # 2. Relighting LoRA
63
+ pipe.load_lora_weights(
64
+ "linoyts/relighting-kontext-dev-lora",
65
+ weight_name="relighting-kontext-dev-lora.safetensors",
66
+ adapter_name=LORA_MAPPING["Relighting"]
67
+ )
68
+ # 3. 追加のLoRA 3 (後で設定)
69
+ # ★ 注意: 以下のリポジトリ名とファイル名は仮のものです。後で正しいものに置き換えてください。
70
+ try:
71
+ pipe.load_lora_weights(
72
+ "author/repo_name_3", # 例: "cagliostrolab/animagine-xl-3.0"
73
+ weight_name="lora_file_3.safetensors", # 例: "animagine-xl-3.0.safetensors"
74
+ adapter_name=LORA_MAPPING["LoRA 3"]
75
+ )
76
+ except Exception as e:
77
+ print(f"Warning: Could not load {list(LORA_MAPPING.keys())[2]}. Please check repository and file names. Error:", e)
78
+
79
+ # 4. 追加のLoRA 4 (後で設定)
80
+ try:
81
+ pipe.load_lora_weights(
82
+ "author/repo_name_4",
83
+ weight_name="lora_file_4.safetensors",
84
+ adapter_name=LORA_MAPPING["LoRA 4"]
85
+ )
86
+ except Exception as e:
87
+ print(f"Warning: Could not load {list(LORA_MAPPING.keys())[3]}. Please check repository and file names. Error:", e)
88
+
89
+ # 5. 追加のLoRA 5 (後で設定)
90
+ try:
91
+ pipe.load_lora_weights(
92
+ "author/repo_name_5",
93
+ weight_name="lora_file_5.safetensors",
94
+ adapter_name=LORA_MAPPING["LoRA 5"]
95
+ )
96
+ except Exception as e:
97
+ print(f"Warning: Could not load {list(LORA_MAPPING.keys())[4]}. Please check repository and file names. Error:", e)
98
+
99
+ print("LoRA weights loading process finished.")
100
+ # --- ここまでが変更部分 ---
101
+
102
+ # カスタムモデルをdiffusersのLoRA対応表に登録する
103
+ _SET_ADAPTER_SCALE_FN_MAPPING[NAGFluxTransformer2DModel.__name__] = _SET_ADAPTER_SCALE_FN_MAPPING["FluxTransformer2DModel"]
104
+ print("Custom model 'NAGFluxTransformer2DModel' registered for LoRA.")
105
+
106
+
107
+ def round_to_multiple(number, multiple=8):
108
+ return multiple * round(number / multiple)
109
+
110
+ def concatenate_images(images, direction="horizontal"):
111
+ if not images: return None
112
+ valid_images = [img for img in images if img is not None]
113
+ if not valid_images: return None
114
+ if len(valid_images) == 1: return valid_images[0].convert("RGB")
115
+ valid_images = [img.convert("RGB") for img in valid_images]
116
+ if direction == "horizontal":
117
+ total_width = sum(img.width for img in valid_images)
118
+ max_height = max(img.height for img in valid_images)
119
+ concatenated = Image.new('RGB', (total_width, max_height), (255, 255, 255))
120
+ x_offset = 0
121
+ for img in valid_images:
122
+ y_offset = (max_height - img.height) // 2
123
+ concatenated.paste(img, (x_offset, y_offset))
124
+ x_offset += img.width
125
+ else:
126
+ max_width = max(img.width for img in valid_images)
127
+ total_height = sum(img.height for img in valid_images)
128
+ concatenated = Image.new('RGB', (max_width, total_height), (255, 255, 255))
129
+ y_offset = 0
130
+ for img in valid_images:
131
+ x_offset = (max_width - img.width) // 2
132
+ concatenated.paste(img, (x_offset, y_offset))
133
+ y_offset += img.height
134
+ return concatenated
135
+
136
+ @spaces.GPU(duration=25)
137
+ # ★ infer関数の引数に negative_prompt を追加
138
+ def infer(input_images, prompt, negative_prompt, seed, randomize_seed, guidance_scale, nag_negative_prompt, nag_scale, num_inference_steps,
139
+ # LoRAの有効/無効と強度を個別に受け取る
140
+ enable_lora1, weight_lora1,
141
+ enable_lora2, weight_lora2,
142
+ enable_lora3, weight_lora3,
143
+ enable_lora4, weight_lora4,
144
+ enable_lora5, weight_lora5,
145
+ progress=gr.Progress(track_tqdm=True)):
146
+
147
+ active_adapters = []
148
+ active_weights = []
149
+
150
+ lora_params = [
151
+ (enable_lora1, weight_lora1, "Hyper-SD"),
152
+ (enable_lora2, weight_lora2, "Relighting"),
153
+ (enable_lora3, weight_lora3, "LoRA 3"),
154
+ (enable_lora4, weight_lora4, "LoRA 4"),
155
+ (enable_lora5, weight_lora5, "LoRA 5"),
156
+ ]
157
+
158
+ for is_enabled, weight, name in lora_params:
159
+ if is_enabled:
160
+ adapter_name = LORA_MAPPING[name]
161
+ active_adapters.append(adapter_name)
162
+ active_weights.append(weight)
163
+ print(f"Applying {name} LoRA with weight {weight}")
164
+
165
+ if active_adapters:
166
+ pipe.set_adapters(active_adapters, adapter_weights=active_weights)
167
+ else:
168
+ print("No LoRA selected. Running without LoRA.")
169
+ pipe.disable_lora()
170
+
171
+ prompt = prompt.strip()
172
+ if prompt:
173
+ print(f"Original prompt: {prompt}")
174
+ try:
175
+ detected_lang = detect(prompt)
176
+ if detected_lang != 'en':
177
+ print(f"Detected language: {detected_lang}. Translating to English...")
178
+ translated_prompt = GoogleTranslator(source=detected_lang, target='en').translate(prompt)
179
+ prompt = translated_prompt
180
+ print(f"Translated prompt: {prompt}")
181
+ else:
182
+ print("Prompt is already in English.")
183
+ except Exception as e:
184
+ print(f"Warning: Translation or language detection failed: {e}. Using original prompt.")
185
+
186
+ # ★ negative_promptを処理するコードを追加
187
+ negative_prompt = negative_prompt.strip() if negative_prompt and negative_prompt.strip() else None
188
+
189
+ if randomize_seed:
190
+ seed = random.randint(0, MAX_SEED)
191
+
192
+ if input_images is None:
193
+ raise gr.Error("Please upload at least one image.")
194
+
195
+ if not isinstance(input_images, list):
196
+ input_images = [input_images]
197
+
198
+ valid_images = [img[0] for img in input_images if img is not None]
199
+
200
+ if not valid_images:
201
+ raise gr.Error("Please upload at least one valid image.")
202
+
203
+ if len(valid_images) == 1:
204
+ print("Single image detected. Calculating aspect-ratio aware dimensions.")
205
+ input_for_pipe = valid_images[0]
206
+
207
+ input_width, input_height = input_for_pipe.size
208
+ aspect_ratio = input_width / input_height
209
+ target_pixels = OUTPUT_RESOLUTION * OUTPUT_RESOLUTION
210
+
211
+ final_height = int(math.sqrt(target_pixels / aspect_ratio))
212
+ final_width = int(aspect_ratio * final_height)
213
+
214
+ final_width = round_to_multiple(final_width, 8)
215
+ final_height = round_to_multiple(final_height, 8)
216
+
217
+ print(f"Output dimensions set to: {final_width}x{final_height}")
218
+
219
+ else:
220
+ print(f"Multiple ({len(valid_images)}) images detected. Using fixed 1024x1024 output.")
221
+ input_for_pipe = concatenate_images(valid_images, "horizontal")
222
+ if input_for_pipe is None:
223
+ raise gr.Error("Failed to process the input images.")
224
+
225
+ final_width = OUTPUT_RESOLUTION
226
+ final_height = OUTPUT_RESOLUTION
227
+
228
+ final_prompt = f"From the provided reference images, create a unified, cohesive image such that {prompt}. Maintain the identity and characteristics of each subject while adjusting their proportions, scale, and positioning to create a harmonious, naturally balanced composition. Blend and integrate all elements seamlessly with consistent lighting, perspective, and style.the final result should look like a single naturally captured scene where all subjects are properly sized and positioned relative to each other, not assembled from multiple sources."
229
+
230
+ # ★ pipe()呼び出しに negative_prompt を追加
231
+ image = pipe(
232
+ image=input_for_pipe,
233
+ prompt=final_prompt,
234
+ negative_prompt=negative_prompt,
235
+ guidance_scale=guidance_scale,
236
+ nag_negative_prompt=nag_negative_prompt,
237
+ nag_scale=nag_scale,
238
+ width=final_width,
239
+ height=final_height,
240
+ num_inference_steps=num_inference_steps,
241
+ generator=torch.Generator().manual_seed(seed),
242
+ ).images[0]
243
+
244
+ pipe.disable_lora()
245
+
246
+ return image, seed, gr.update(visible=True)
247
+
248
+ css="""
249
+ #col-container {
250
+ margin: 0 auto;
251
+ max-width: 960px;
252
+ }
253
+ .lora-row {
254
+ align-items: center;
255
+ margin-bottom: 8px;
256
+ }
257
+ """
258
+
259
+ with gr.Blocks(css=css) as demo:
260
+
261
+ with gr.Column(elem_id="col-container"):
262
+ gr.Markdown(f"""# FLUX.1 Kontext [dev] - Multi-Image with NAG
263
+ Compose a new image from multiple images using FLUX.1 Kontext, enhanced with Normalized Attention Guidance (NAG) and automatic prompt translation.
264
+ - **Single Image Input**: Output will match the input aspect ratio.
265
+ - **Multiple Image Inputs**: Output will be a fixed 1024x1024 resolution.
266
+ """)
267
+ with gr.Row():
268
+ with gr.Column():
269
+ input_images = gr.Gallery(
270
+ label="Upload image(s) for editing",
271
+ show_label=True,
272
+ elem_id="gallery_input",
273
+ columns=3,
274
+ rows=2,
275
+ object_fit="contain",
276
+ height="auto",
277
+ file_types=['image'],
278
+ type='pil'
279
+ )
280
+
281
+ with gr.Row():
282
+ prompt = gr.Text(
283
+ label="Prompt",
284
+ show_label=False,
285
+ max_lines=1,
286
+ placeholder="Enter your prompt (auto-translates to English)",
287
+ container=False,
288
+ )
289
+ run_button = gr.Button("Run", scale=0)
290
+
291
+ with gr.Accordion("Advanced Settings", open=False):
292
+ # --- ★ UIを修正: 各LoRAコンポーネントを個別の変数として定義 ---
293
+ gr.Markdown("### LoRA Settings")
294
+
295
+ with gr.Row(elem_classes="lora-row"):
296
+ enable_lora1 = gr.Checkbox(label="Hyper-SD", value=True, scale=1)
297
+ weight_lora1 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.02, value=0.12, scale=3, visible=True)
298
+
299
+ with gr.Row(elem_classes="lora-row"):
300
+ enable_lora2 = gr.Checkbox(label="Relighting", value=False, scale=1)
301
+ weight_lora2 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=1.0, scale=3, visible=False)
302
+
303
+ with gr.Row(elem_classes="lora-row"):
304
+ enable_lora3 = gr.Checkbox(label="LoRA 3", value=False, scale=1)
305
+ weight_lora3 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, scale=3, visible=False)
306
+
307
+ with gr.Row(elem_classes="lora-row"):
308
+ enable_lora4 = gr.Checkbox(label="LoRA 4", value=False, scale=1)
309
+ weight_lora4 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, scale=3, visible=False)
310
+
311
+ with gr.Row(elem_classes="lora-row"):
312
+ enable_lora5 = gr.Checkbox(label="LoRA 5", value=False, scale=1)
313
+ weight_lora5 = gr.Slider(label="Weight", minimum=0.0, maximum=2.0, step=0.05, value=0.8, scale=3, visible=False)
314
+ # --- ★ ここまでが変更部分 ---
315
+
316
+ gr.Markdown("### Generation Settings")
317
+
318
+ # ★ UIに negative_prompt を追加
319
+ negative_prompt = gr.Text(
320
+ label="Negative Prompt (Standard)",
321
+ placeholder="Enter concepts to avoid (e.g., ugly, deformed)",
322
+ max_lines=2,
323
+ )
324
+
325
+ num_inference_steps = gr.Slider(
326
+ label="Inference Steps",
327
+ minimum=8,
328
+ maximum=50,
329
+ step=1,
330
+ value=8,
331
+ )
332
+ guidance_scale = gr.Slider(
333
+ label="Guidance Scale",
334
+ minimum=1,
335
+ maximum=10,
336
+ step=0.25,
337
+ value=4.5,
338
+ )
339
+ nag_negative_prompt = gr.Text(
340
+ label="Negative Prompt for NAG",
341
+ value=DEFAULT_NAG_NEGATIVE_PROMPT,
342
+ max_lines=2,
343
+ placeholder="Enter concepts to avoid with NAG",
344
+ )
345
+ nag_scale = gr.Slider(
346
+ label="NAG Scale",
347
+ minimum=0.0,
348
+ maximum=20.0,
349
+ step=0.25,
350
+ value=3.5
351
+ )
352
+ seed = gr.Slider(
353
+ label="Seed",
354
+ minimum=0,
355
+ maximum=MAX_SEED,
356
+ step=1,
357
+ value=0,
358
+ )
359
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
360
+
361
+ with gr.Column():
362
+ result = gr.Image(label="Result", show_label=False, interactive=False, format="png")
363
+ reuse_button = gr.Button("Reuse this image", visible=False)
364
+
365
+ # ★ イベントハンドラを更新: all_inputsに negative_prompt を追加
366
+ all_inputs = [
367
+ input_images, prompt, negative_prompt, seed, randomize_seed, guidance_scale,
368
+ nag_negative_prompt, nag_scale, num_inference_steps,
369
+ enable_lora1, weight_lora1,
370
+ enable_lora2, weight_lora2,
371
+ enable_lora3, weight_lora3,
372
+ enable_lora4, weight_lora4,
373
+ enable_lora5, weight_lora5,
374
+ ]
375
+
376
+ gr.on(
377
+ triggers=[run_button.click, prompt.submit],
378
+ fn = infer,
379
+ inputs = all_inputs,
380
+ outputs = [result, seed, reuse_button]
381
+ )
382
+ # --- ★ ここまでが変更部分 ---
383
+
384
+ reuse_button.click(
385
+ fn = lambda image: [image] if image is not None else [],
386
+ inputs = [result],
387
+ outputs = [input_images]
388
+ )
389
+
390
+ # --- ★ 各チェックボックスとスライダーの表示を個別に連動させる ---
391
+ def update_visibility(is_checked):
392
+ return gr.update(visible=is_checked)
393
+
394
+ enable_lora1.change(fn=update_visibility, inputs=enable_lora1, outputs=weight_lora1)
395
+ enable_lora2.change(fn=update_visibility, inputs=enable_lora2, outputs=weight_lora2)
396
+ enable_lora3.change(fn=update_visibility, inputs=enable_lora3, outputs=weight_lora3)
397
+ enable_lora4.change(fn=update_visibility, inputs=enable_lora4, outputs=weight_lora4)
398
+ enable_lora5.change(fn=update_visibility, inputs=enable_lora5, outputs=weight_lora5)
399
+ # --- ★ ここまでが変更部分 ---
400
+
401
+ demo.launch()