Fabrice-TIERCELIN commited on
Commit
c5f9ee1
·
verified ·
1 Parent(s): ca6455d

Adapt the code

Browse files
Files changed (1) hide show
  1. app.py +8 -136
app.py CHANGED
@@ -14,7 +14,6 @@ import numpy as np
14
  import argparse
15
  import random
16
  import math
17
- import time
18
  # 20250506 pftq: Added for video input loading
19
  import decord
20
  # 20250506 pftq: Added for progress bars in video_encode
@@ -107,8 +106,6 @@ stream = AsyncStream()
107
  outputs_folder = './outputs/'
108
  os.makedirs(outputs_folder, exist_ok=True)
109
 
110
- input_image_debug_value = input_video_debug_value = prompt_debug_value = total_second_length_debug_value = None
111
-
112
  def check_parameters(generation_mode, input_image, input_video):
113
  if generation_mode == "image" and input_image is None:
114
  raise gr.Error("Please provide an image to extend.")
@@ -515,10 +512,6 @@ def worker(input_image, prompts, n_prompt, seed, total_second_length, latent_win
515
  return
516
 
517
  def get_duration(input_image, prompt, generation_mode, n_prompt, randomize_seed, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
518
- global total_second_length_debug_value
519
-
520
- if total_second_length_debug_value is not None:
521
- return min(total_second_length_debug_value * 60 * 10, 600)
522
  return total_second_length * 60
523
 
524
 
@@ -538,18 +531,12 @@ def process(input_image, prompt,
538
  use_teacache=False,
539
  mp4_crf=16
540
  ):
541
- global stream, input_image_debug_value, prompt_debug_value, total_second_length_debug_value
542
 
543
  if torch.cuda.device_count() == 0:
544
  gr.Warning('Set this space to GPU config to make it work.')
545
  return None, None, None, None, None, None
546
 
547
- if input_image_debug_value is not None or prompt_debug_value is not None or total_second_length_debug_value is not None:
548
- input_image = input_image_debug_value
549
- prompt = prompt_debug_value
550
- total_second_length = total_second_length_debug_value
551
- input_image_debug_value = prompt_debug_value = total_second_length_debug_value = None
552
-
553
  if randomize_seed:
554
  seed = random.randint(0, np.iinfo(np.int32).max)
555
 
@@ -778,28 +765,15 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
778
  clean_latent_4x_indices=clean_latent_4x_indices,
779
  callback=callback,
780
  )
781
- start = time.time()
782
 
783
  total_generated_latent_frames += int(generated_latents.shape[2])
784
  history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
785
- end = time.time()
786
- secondes = int(end - start)
787
- print("1 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
788
- start = time.time()
789
 
790
  if not high_vram:
791
  offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
792
  load_model_as_complete(vae, target_device=gpu)
793
- end = time.time()
794
- secondes = int(end - start)
795
- print("2 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
796
- start = time.time()
797
 
798
  real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
799
- end = time.time()
800
- secondes = int(end - start)
801
- print("3 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
802
- start = time.time()
803
 
804
  if history_pixels is None:
805
  history_pixels = vae_decode(real_history_latents, vae).cpu()
@@ -814,17 +788,9 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
814
 
815
  current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
816
  history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)
817
- end = time.time()
818
- secondes = int(end - start)
819
- print("4 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
820
- start = time.time()
821
 
822
  if not high_vram:
823
  unload_complete_models()
824
- end = time.time()
825
- secondes = int(end - start)
826
- print("5 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
827
- start = time.time()
828
 
829
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
830
 
@@ -834,10 +800,6 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
834
  # 20250508 pftq: Save prompt to mp4 metadata comments
835
  set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompt} | Negative Prompt: {n_prompt}");
836
  print(f"Prompt saved to mp4 metadata comments: {output_filename}")
837
- end = time.time()
838
- secondes = int(end - start)
839
- print("6 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
840
- start = time.time()
841
 
842
  # 20250506 pftq: Clean up previous partial files
843
  if previous_video is not None and os.path.exists(previous_video):
@@ -847,17 +809,10 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
847
  except Exception as e:
848
  print(f"Error deleting previous partial video {previous_video}: {e}")
849
  previous_video = output_filename
850
- end = time.time()
851
- secondes = int(end - start)
852
- print("7 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
853
- start = time.time()
854
 
855
  print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
856
 
857
  stream.output_queue.push(('file', output_filename))
858
- end = time.time()
859
- secondes = int(end - start)
860
- print("8 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
861
 
862
  seed = (seed + 1) % np.iinfo(np.int32).max
863
 
@@ -873,26 +828,17 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
873
  return
874
 
875
  def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
876
- global total_second_length_debug_value
877
- if total_second_length_debug_value is not None:
878
- return min(total_second_length_debug_value * 60 * 10, 600)
879
  return total_second_length * 60 * 2
880
 
881
  # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
882
  @spaces.GPU(duration=get_duration_video)
883
  def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
884
- global stream, high_vram, input_video_debug_value, prompt_debug_value, total_second_length_debug_value
885
 
886
  if torch.cuda.device_count() == 0:
887
  gr.Warning('Set this space to GPU config to make it work.')
888
  return None, None, None, None, None, None
889
 
890
- if input_video_debug_value is not None or prompt_debug_value is not None or total_second_length_debug_value is not None:
891
- input_video = input_video_debug_value
892
- prompt = prompt_debug_value
893
- total_second_length = total_second_length_debug_value
894
- input_video_debug_value = prompt_debug_value = total_second_length_debug_value = None
895
-
896
  if randomize_seed:
897
  seed = random.randint(0, np.iinfo(np.int32).max)
898
 
@@ -971,9 +917,9 @@ with block:
971
  if torch.cuda.device_count() == 0:
972
  with gr.Row():
973
  gr.HTML("""
974
- <p style="background-color: red;"><big><big><big><b>⚠️To use FramePack, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/FramePack?duplicate=true">duplicate this space</a> and set a GPU with 30 GB VRAM.</b>
975
 
976
- You can't use FramePack directly here because this space runs on a CPU, which is not enough for FramePack. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/FramePack/discussions/new">feedback</a> if you have issues.
977
  </big></big></big></p>
978
  """)
979
  gr.HTML(title_html)
@@ -983,7 +929,7 @@ with block:
983
  text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.", visible=False)
984
  input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
985
  input_video = gr.Video(sources='upload', label="Input Video", height=320, visible=False)
986
- prompt = gr.Textbox(label="Prompt", value='', info='Use ; to separate in time', placeholder="The creature starts to move, fast motion, fixed camera")
987
  prompt_number = gr.Slider(label="Timed prompt number", minimum=0, maximum=1000, value=0, step=1, info='Not for video extension')
988
  prompt_number.change(fn=handle_prompt_number_change, inputs=[], outputs=[])
989
 
@@ -1040,12 +986,6 @@ with block:
1040
 
1041
  mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
1042
 
1043
- with gr.Accordion("Debug", open=False):
1044
- input_image_debug = gr.Image(type="numpy", label="Image Debug", height=320)
1045
- input_video_debug = gr.Video(sources='upload', label="Input Video Debug", height=320)
1046
- prompt_debug = gr.Textbox(label="Prompt Debug", value='')
1047
- total_second_length_debug = gr.Slider(label="Additional Video Length to Generate (Seconds) Debug", minimum=1, maximum=120, value=1, step=0.1)
1048
-
1049
  with gr.Column():
1050
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
1051
  result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
@@ -1064,8 +1004,7 @@ with block:
1064
  ], outputs = [], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
1065
  end_button.click(fn=end_process)
1066
 
1067
- with gr.Row(elem_id="image_examples", visible=False):
1068
- gr.Examples(
1069
  examples = [
1070
  [
1071
  "./img_examples/Example1.png", # input_image
@@ -1100,41 +1039,7 @@ with block:
1100
  6, # gpu_memory_preservation
1101
  False, # use_teacache
1102
  16 # mp4_crf
1103
- ],
1104
- [
1105
- "./img_examples/Example1.png", # input_image
1106
- "We are sinking, photorealistic, realistic, intricate details, 8k, insanely detailed",
1107
- "image", # generation_mode
1108
- "Missing arm, unrealistic position, blurred, blurry", # n_prompt
1109
- True, # randomize_seed
1110
- 42, # seed
1111
- 1, # total_second_length
1112
- 9, # latent_window_size
1113
- 25, # steps
1114
- 1.0, # cfg
1115
- 10.0, # gs
1116
- 0.0, # rs
1117
- 6, # gpu_memory_preservation
1118
- False, # use_teacache
1119
- 16 # mp4_crf
1120
- ],
1121
- [
1122
- "./img_examples/Example1.png", # input_image
1123
- "A boat is passing, photorealistic, realistic, intricate details, 8k, insanely detailed",
1124
- "image", # generation_mode
1125
- "Missing arm, unrealistic position, blurred, blurry", # n_prompt
1126
- True, # randomize_seed
1127
- 42, # seed
1128
- 1, # total_second_length
1129
- 9, # latent_window_size
1130
- 25, # steps
1131
- 1.0, # cfg
1132
- 10.0, # gs
1133
- 0.0, # rs
1134
- 6, # gpu_memory_preservation
1135
- False, # use_teacache
1136
- 16 # mp4_crf
1137
- ],
1138
  ],
1139
  run_on_click = True,
1140
  fn = process,
@@ -1143,8 +1048,7 @@ with block:
1143
  cache_examples = torch.cuda.device_count() > 0,
1144
  )
1145
 
1146
- with gr.Row(elem_id="video_examples", visible=False):
1147
- gr.Examples(
1148
  examples = [
1149
  [
1150
  "./img_examples/Example1.mp4", # input_video
@@ -1185,42 +1089,10 @@ with block:
1185
  return [gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True)]
1186
 
1187
 
1188
- def handle_field_debug_change(input_image_debug_data, input_video_debug_data, prompt_debug_data, total_second_length_debug_data):
1189
- global input_image_debug_value, input_video_debug_value, prompt_debug_value, total_second_length_debug_value
1190
- input_image_debug_value = input_image_debug_data
1191
- input_video_debug_value = input_video_debug_data
1192
- prompt_debug_value = prompt_debug_data
1193
- total_second_length_debug_value = total_second_length_debug_data
1194
- return []
1195
-
1196
  generation_mode.change(
1197
  fn=handle_generation_mode_change,
1198
  inputs=[generation_mode],
1199
  outputs=[text_to_video_hint, input_image, input_video, start_button, start_button_video]
1200
  )
1201
 
1202
- input_image_debug.upload(
1203
- fn=handle_field_debug_change,
1204
- inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
1205
- outputs=[]
1206
- )
1207
-
1208
- input_video_debug.upload(
1209
- fn=handle_field_debug_change,
1210
- inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
1211
- outputs=[]
1212
- )
1213
-
1214
- prompt_debug.change(
1215
- fn=handle_field_debug_change,
1216
- inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
1217
- outputs=[]
1218
- )
1219
-
1220
- total_second_length_debug.change(
1221
- fn=handle_field_debug_change,
1222
- inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
1223
- outputs=[]
1224
- )
1225
-
1226
  block.launch(mcp_server=False, ssr_mode=False)
 
14
  import argparse
15
  import random
16
  import math
 
17
  # 20250506 pftq: Added for video input loading
18
  import decord
19
  # 20250506 pftq: Added for progress bars in video_encode
 
106
  outputs_folder = './outputs/'
107
  os.makedirs(outputs_folder, exist_ok=True)
108
 
 
 
109
  def check_parameters(generation_mode, input_image, input_video):
110
  if generation_mode == "image" and input_image is None:
111
  raise gr.Error("Please provide an image to extend.")
 
512
  return
513
 
514
  def get_duration(input_image, prompt, generation_mode, n_prompt, randomize_seed, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
 
 
 
 
515
  return total_second_length * 60
516
 
517
 
 
531
  use_teacache=False,
532
  mp4_crf=16
533
  ):
534
+ global stream
535
 
536
  if torch.cuda.device_count() == 0:
537
  gr.Warning('Set this space to GPU config to make it work.')
538
  return None, None, None, None, None, None
539
 
 
 
 
 
 
 
540
  if randomize_seed:
541
  seed = random.randint(0, np.iinfo(np.int32).max)
542
 
 
765
  clean_latent_4x_indices=clean_latent_4x_indices,
766
  callback=callback,
767
  )
 
768
 
769
  total_generated_latent_frames += int(generated_latents.shape[2])
770
  history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
 
 
 
 
771
 
772
  if not high_vram:
773
  offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
774
  load_model_as_complete(vae, target_device=gpu)
 
 
 
 
775
 
776
  real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
 
 
 
 
777
 
778
  if history_pixels is None:
779
  history_pixels = vae_decode(real_history_latents, vae).cpu()
 
788
 
789
  current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
790
  history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)
 
 
 
 
791
 
792
  if not high_vram:
793
  unload_complete_models()
 
 
 
 
794
 
795
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
796
 
 
800
  # 20250508 pftq: Save prompt to mp4 metadata comments
801
  set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompt} | Negative Prompt: {n_prompt}");
802
  print(f"Prompt saved to mp4 metadata comments: {output_filename}")
 
 
 
 
803
 
804
  # 20250506 pftq: Clean up previous partial files
805
  if previous_video is not None and os.path.exists(previous_video):
 
809
  except Exception as e:
810
  print(f"Error deleting previous partial video {previous_video}: {e}")
811
  previous_video = output_filename
 
 
 
 
812
 
813
  print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
814
 
815
  stream.output_queue.push(('file', output_filename))
 
 
 
816
 
817
  seed = (seed + 1) % np.iinfo(np.int32).max
818
 
 
828
  return
829
 
830
  def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
 
 
 
831
  return total_second_length * 60 * 2
832
 
833
  # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
834
  @spaces.GPU(duration=get_duration_video)
835
  def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
836
+ global stream
837
 
838
  if torch.cuda.device_count() == 0:
839
  gr.Warning('Set this space to GPU config to make it work.')
840
  return None, None, None, None, None, None
841
 
 
 
 
 
 
 
842
  if randomize_seed:
843
  seed = random.randint(0, np.iinfo(np.int32).max)
844
 
 
917
  if torch.cuda.device_count() == 0:
918
  with gr.Row():
919
  gr.HTML("""
920
+ <p style="background-color: red;"><big><big><big><b>⚠️To use FramePack, <a href="?duplicate=true">duplicate this space</a> and set a GPU with 30 GB VRAM.</b>
921
 
922
+ You can't use FramePack directly here because this space runs on a CPU, which is not enough for FramePack. Please provide <a href="discussions/new">feedback</a> if you have issues.
923
  </big></big></big></p>
924
  """)
925
  gr.HTML(title_html)
 
929
  text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.", visible=False)
930
  input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
931
  input_video = gr.Video(sources='upload', label="Input Video", height=320, visible=False)
932
+ prompt = gr.Textbox(label="Prompt", value='', info='Use ; to separate in time', placeholder="The creature starts to move, fast motion, focus motion, consistent arm, consistent position, fixed camera")
933
  prompt_number = gr.Slider(label="Timed prompt number", minimum=0, maximum=1000, value=0, step=1, info='Not for video extension')
934
  prompt_number.change(fn=handle_prompt_number_change, inputs=[], outputs=[])
935
 
 
986
 
987
  mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
988
 
 
 
 
 
 
 
989
  with gr.Column():
990
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
991
  result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
 
1004
  ], outputs = [], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
1005
  end_button.click(fn=end_process)
1006
 
1007
+ gr.Examples(
 
1008
  examples = [
1009
  [
1010
  "./img_examples/Example1.png", # input_image
 
1039
  6, # gpu_memory_preservation
1040
  False, # use_teacache
1041
  16 # mp4_crf
1042
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
  ],
1044
  run_on_click = True,
1045
  fn = process,
 
1048
  cache_examples = torch.cuda.device_count() > 0,
1049
  )
1050
 
1051
+ gr.Examples(
 
1052
  examples = [
1053
  [
1054
  "./img_examples/Example1.mp4", # input_video
 
1089
  return [gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True)]
1090
 
1091
 
 
 
 
 
 
 
 
 
1092
  generation_mode.change(
1093
  fn=handle_generation_mode_change,
1094
  inputs=[generation_mode],
1095
  outputs=[text_to_video_hint, input_image, input_video, start_button, start_button_video]
1096
  )
1097
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1098
  block.launch(mcp_server=False, ssr_mode=False)