Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF Staff commited on Apr 17

Commit

48d6121

1 Parent(s): 6fff6df

time to test iamge conditioning

Browse files

Files changed (5) hide show

docs/gradio/external_plugin--gradio_modal.md +108 -0
vms/config.py +183 -195
vms/ui/app_ui.py +19 -20
vms/ui/project/services/training.py +26 -16
vms/ui/project/tabs/train_tab.py +151 -298

docs/gradio/external_plugin--gradio_modal.md ADDED Viewed

	@@ -0,0 +1,108 @@

+Description du projet
+---------------------
+`gradio_modal`
+==============
+[![PyPI - Version](https://pypi-camo.freetls.fastly.net/19d01702f9691477566e07fbd3c8eb08188e6eae/68747470733a2f2f696d672e736869656c64732e696f2f707970692f762f67726164696f5f6d6f64616c)](https://pypi.org/project/gradio_modal/)
+A popup modal component
+Installation
+------------
+pip install gradio\_modal
+Usage
+-----
+import gradio as gr
+from gradio\_modal import Modal
+with gr.Blocks() as demo:
+    with gr.Tab("Tab 1"):
+        text\_1 \= gr.Textbox(label\="Input 1")
+        text\_2 \= gr.Textbox(label\="Input 2")
+        text\_1.submit(lambda x:x, text\_1, text\_2)
+        show\_btn \= gr.Button("Show Modal")
+        show\_btn2 \= gr.Button("Show Modal 2")
+        gr.Examples(
+            \[\["Text 1", "Text 2"\], \["Text 3", "Text 4"\]\],
+            inputs\=\[text\_1, text\_2\],
+        )
+    with gr.Tab("Tab 2"):
+        gr.Markdown("This is tab 2")
+    with Modal(visible\=False) as modal:
+        for i in range(5):
+            gr.Markdown("Hello world!")
+    with Modal(visible\=False) as modal2:
+        for i in range(100):
+            gr.Markdown("Hello world!")
+    show\_btn.click(lambda: Modal(visible\=True), None, modal)
+    show\_btn2.click(lambda: Modal(visible\=True), None, modal2)
+if \_\_name\_\_ \== "\_\_main\_\_":
+    demo.launch()
+`Modal`
+-------
+### Initialization
+name
+type
+default
+description
+`visible`
+bool
+`False`
+If False, modal will be hidden.
+`elem_id`
+str | None
+`None`
+An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles.
+`elem_classes`
+list\[str\] | str | None
+`None`
+An optional string or list of strings that are assigned as the class of this component in the HTML DOM. Can be used for targeting CSS styles.
+`allow_user_close`
+bool
+`True`
+If True, user can close the modal (by clicking outside, clicking the X, or the escape key).
+`render`
+bool
+`True`
+If False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later.
+### Events
+name
+description
+`blur`
+This listener is triggered when the Modal is unfocused/blurred.

vms/config.py CHANGED Viewed

@@ -304,39 +304,71 @@ DEFAULT_VALIDATION_WIDTH = 768
 DEFAULT_VALIDATION_NB_FRAMES = 49
 DEFAULT_VALIDATION_FRAMERATE = 8
-# it is best to use resolutions that are powers of 8
-# The resolution should be divisible by 32
-# so we cannot use 1080, 540 etc as they are not divisible by 32
-MEDIUM_19_9_RATIO_WIDTH = 768 # 32 * 24
-MEDIUM_19_9_RATIO_HEIGHT = 512 # 32 * 16
-# 1920 = 32 * 60 (divided by 2: 960 = 32 * 30)
-# 1920 = 32 * 60 (divided by 2: 960 = 32 * 30)
-# 1056 = 32 * 33 (divided by 2: 544 = 17 * 32)
-# 1024 = 32 * 32 (divided by 2: 512 = 16 * 32)
 # it is important that the resolution buckets properly cover the training dataset,
 # or else that we exclude from the dataset videos that are out of this range
 # right now, finetrainers will crash if that happens, so the workaround is to have more buckets in here
-NB_FRAMES_1 = 1  #  1
-NB_FRAMES_9 = 8 + 1 # 8 + 1
-NB_FRAMES_17 = 8 * 2 + 1 # 16 + 1
-NB_FRAMES_33 = 8 * 4 + 1  # 32 + 1
-NB_FRAMES_49 = 8 * 6 + 1 # 48 + 1
-NB_FRAMES_65 = 8 * 8 + 1  # 64 + 1
-NB_FRAMES_81 = 8 * 10 + 1  # 80 + 1
-NB_FRAMES_97 = 8 * 12 + 1  # 96 + 1
 NB_FRAMES_113 = 8 * 14 + 1  # 112 + 1
 NB_FRAMES_129 = 8 * 16 + 1  # 128 + 1
 NB_FRAMES_145 = 8 * 18 + 1  # 144 + 1
-NB_FRAMES_161  = 8 * 20 + 1  # 160 + 1
 NB_FRAMES_177 = 8 * 22 + 1  # 176 + 1
 NB_FRAMES_193 = 8 * 24 + 1  # 192 + 1
 NB_FRAMES_225 = 8 * 28 + 1  # 224 + 1
 NB_FRAMES_257 = 8 * 32 + 1  # 256 + 1
-# 256 isn't a lot by the way, especially with 60 FPS videos..
-# can we crank it and put more frames in here?
 NB_FRAMES_273 = 8 * 34 + 1  # 272 + 1
 NB_FRAMES_289 = 8 * 36 + 1  # 288 + 1
 NB_FRAMES_305 = 8 * 38 + 1  # 304 + 1
@@ -347,199 +379,155 @@ NB_FRAMES_369 = 8 * 46 + 1  # 368 + 1
 NB_FRAMES_385 = 8 * 48 + 1  # 384 + 1
 NB_FRAMES_401 = 8 * 50 + 1  # 400 + 1
-SMALL_TRAINING_BUCKETS = [
-    (NB_FRAMES_1,   MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 1
-    (NB_FRAMES_9,   MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 8 + 1
-    (NB_FRAMES_17,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 16 + 1
-    (NB_FRAMES_33,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 32 + 1
-    (NB_FRAMES_49,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 48 + 1
-    (NB_FRAMES_65,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 64 + 1
-    (NB_FRAMES_81,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 80 + 1
-    (NB_FRAMES_97,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 96 + 1
-    (NB_FRAMES_113, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 112 + 1
-    (NB_FRAMES_129, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 128 + 1
-    (NB_FRAMES_145, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 144 + 1
-    (NB_FRAMES_161, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 160 + 1
-    (NB_FRAMES_177, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 176 + 1
-    (NB_FRAMES_193, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 192 + 1
-    (NB_FRAMES_225, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 224 + 1
-    (NB_FRAMES_257, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 256 + 1
 ]
-MEDIUM_19_9_RATIO_WIDTH = 928 # 32 * 29
-MEDIUM_19_9_RATIO_HEIGHT = 512 # 32 * 16
-MEDIUM_19_9_RATIO_BUCKETS = [
-    (NB_FRAMES_1,   MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), #  1
-    (NB_FRAMES_9,   MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 8 + 1
-    (NB_FRAMES_17,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 16 + 1
-    (NB_FRAMES_33,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 32 + 1
-    (NB_FRAMES_49,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 48 + 1
-    (NB_FRAMES_65,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 64 + 1
-    (NB_FRAMES_81,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 80 + 1
-    (NB_FRAMES_97,  MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 96 + 1
-    (NB_FRAMES_113, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 112 + 1
-    (NB_FRAMES_129, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 128 + 1
-    (NB_FRAMES_145, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 144 + 1
-    (NB_FRAMES_161, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 160 + 1
-    (NB_FRAMES_177, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 176 + 1
-    (NB_FRAMES_193, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 192 + 1
-    (NB_FRAMES_225, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 224 + 1
-    (NB_FRAMES_257, MEDIUM_19_9_RATIO_HEIGHT, MEDIUM_19_9_RATIO_WIDTH), # 256 + 1
 ]
-# Updated training presets to include Wan-2.1-T2V and support both LoRA and full-finetune
-TRAINING_PRESETS = {
-    "HunyuanVideo (normal)": {
-        "model_type": "hunyuan_video",
-        "training_type": "lora",
-        "lora_rank": DEFAULT_LORA_RANK_STR,
-        "lora_alpha": DEFAULT_LORA_ALPHA_STR,
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
         "learning_rate": 2e-5,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": SMALL_TRAINING_BUCKETS,
         "flow_weighting_scheme": "none",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
-    },
-    "LTX-Video (normal)": {
-        "model_type": "ltx_video",
-        "training_type": "lora",
         "lora_rank": DEFAULT_LORA_RANK_STR,
-        "lora_alpha": DEFAULT_LORA_ALPHA_STR,
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
-        "learning_rate": DEFAULT_LEARNING_RATE,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": SMALL_TRAINING_BUCKETS,
-        "flow_weighting_scheme": "none",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
     },
-    "LTX-Video (16:9, HQ)": {
-        "model_type": "ltx_video",
-        "training_type": "lora",
-        "lora_rank": "256",
-        "lora_alpha": DEFAULT_LORA_ALPHA_STR,
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
         "learning_rate": DEFAULT_LEARNING_RATE,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": MEDIUM_19_9_RATIO_BUCKETS,
-        "flow_weighting_scheme": "logit_normal",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
     },
-    "LTX-Video (Full Finetune)": {
-        "model_type": "ltx_video",
-        "training_type": "full-finetune",
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
         "learning_rate": DEFAULT_LEARNING_RATE,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": SMALL_TRAINING_BUCKETS,
-        "flow_weighting_scheme": "logit_normal",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
-    },
-    "Wan-2.1-T2V (normal)": {
-        "model_type": "wan",
-        "training_type": "lora",
-        "lora_rank": "32",
-        "lora_alpha": "32",
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
-        "learning_rate": 5e-5,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": SMALL_TRAINING_BUCKETS,
-        "flow_weighting_scheme": "logit_normal",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
     },
-    "Wan-2.1-T2V (HQ)": {
-        "model_type": "wan",
-        "training_type": "lora",
-        "lora_rank": "64",
-        "lora_alpha": "64",
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
         "learning_rate": DEFAULT_LEARNING_RATE,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": MEDIUM_19_9_RATIO_BUCKETS,
-        "flow_weighting_scheme": "logit_normal",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
-    },
-    "Wan-2.1-I2V (Control LoRA)": {
-        "model_type": "wan",
-        "training_type": "control-lora",
-        "lora_rank": "32",
-        "lora_alpha": "32",
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
-        "learning_rate": 5e-5,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": SMALL_TRAINING_BUCKETS,
         "flow_weighting_scheme": "logit_normal",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
-        "control_type": "custom",
-        "train_qk_norm": True,
-        "frame_conditioning_type": "index",
-        "frame_conditioning_index": 0,
-        "frame_conditioning_concatenate_mask": True,
-        "description": "Image-conditioned video generation with LoRA adapters"
-    },
-    "LTX-Video (Control LoRA)": {
-        "model_type": "ltx_video",
-        "training_type": "control-lora",
         "lora_rank": "128",
         "lora_alpha": "128",
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
-        "learning_rate": DEFAULT_LEARNING_RATE,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": SMALL_TRAINING_BUCKETS,
-        "flow_weighting_scheme": "logit_normal",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
         "control_type": "custom",
         "train_qk_norm": True,
         "frame_conditioning_type": "index",
         "frame_conditioning_index": 0,
-        "frame_conditioning_concatenate_mask": True,
-        "description": "Image-conditioned video generation with LoRA adapters"
     },
-    "HunyuanVideo (Control LoRA)": {
-        "model_type": "hunyuan_video",
-        "training_type": "control-lora",
-        "lora_rank": "128",
-        "lora_alpha": "128",
-        "train_steps": DEFAULT_NB_TRAINING_STEPS,
-        "batch_size": DEFAULT_BATCH_SIZE,
-        "learning_rate": 2e-5,
-        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-        "training_buckets": SMALL_TRAINING_BUCKETS,
-        "flow_weighting_scheme": "none",
-        "num_gpus": DEFAULT_NUM_GPUS,
-        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
-        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
         "control_type": "custom",
         "train_qk_norm": True,
         "frame_conditioning_type": "index",
         "frame_conditioning_index": 0,
-        "frame_conditioning_concatenate_mask": True,
-        "description": "Image-conditioned video generation with HunyuanVideo and LoRA adapters"
     }
 }
@@ -567,7 +555,7 @@ class TrainingConfig:
     caption_column: str = "prompts.txt"
     id_token: Optional[str] = None
-    video_resolution_buckets: List[Tuple[int, int, int]] = field(default_factory=lambda: SMALL_TRAINING_BUCKETS)
     video_reshape_mode: str = "center"
     caption_dropout_p: float = DEFAULT_CAPTION_DROPOUT_P
     caption_dropout_technique: str = "empty"
@@ -632,7 +620,7 @@ class TrainingConfig:
             gradient_accumulation_steps=1,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
-            video_resolution_buckets=buckets or SMALL_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="none",  # Hunyuan specific
             training_type="lora"
@@ -654,7 +642,7 @@ class TrainingConfig:
             gradient_accumulation_steps=4,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
-            video_resolution_buckets=buckets or SMALL_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="logit_normal",  # LTX specific
             training_type="lora"
@@ -674,7 +662,7 @@ class TrainingConfig:
             gradient_checkpointing=True,
             id_token=None,
             gradient_accumulation_steps=1,
-            video_resolution_buckets=buckets or SMALL_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="logit_normal",  # LTX specific
             training_type="full-finetune"
@@ -697,7 +685,7 @@ class TrainingConfig:
             lora_rank=32,
             lora_alpha=32,
             target_modules=["blocks.*(to_q|to_k|to_v|to_out.0)"],  # Wan-specific target modules
-            video_resolution_buckets=buckets or SMALL_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="logit_normal",  # Wan specific
             training_type="lora"

 DEFAULT_VALIDATION_NB_FRAMES = 49
 DEFAULT_VALIDATION_FRAMERATE = 8
+# you should use resolutions that are powers of 8
+# using a 16:9 ratio is also super-recommended
+# SD
+SD_16_9_W = 1024 # 8*128
+SD_16_9_H = 576  # 8*72
+SD_9_16_W = 576  # 8*72
+SD_9_16_H = 1024 # 8*128
+# MD (720p)
+MD_16_9_W = 1280 # 8*160
+MD_16_9_H = 720  # 8*90
+MD_9_16_W = 720  # 8*90
+MD_9_16_H = 1280 # 8*160
+# HD (1080p)
+HD_16_9_W = 1920 # 8*240
+HD_16_9_H = 1080 # 8*135
+HD_9_16_W = 1080 # 8*135
+HD_9_16_H = 1920 # 8*240
+# QHD (2K)
+QHD_16_9_W = 2160 # 8*270
+QHD_16_9_H = 1440 # 8*180
+QHD_9_16_W = 1440 # 8*180
+QHD_9_16_H = 2160 # 8*270
+# UHD (4K)
+UHD_16_9_W = 3840 # 8*480
+UHD_16_9_H = 2160 # 8*270
+UHD_9_16_W = 2160 # 8*270
+UHD_9_16_H = 3840 # 8*480
 # it is important that the resolution buckets properly cover the training dataset,
 # or else that we exclude from the dataset videos that are out of this range
 # right now, finetrainers will crash if that happens, so the workaround is to have more buckets in here
+NB_FRAMES_1   =          1  # 1
+NB_FRAMES_9   = 8      + 1  # 8 + 1
+NB_FRAMES_17  = 8 *  2 + 1  # 16 + 1
+NB_FRAMES_33  = 8 *  4 + 1  # 32 + 1
+NB_FRAMES_49  = 8 *  6 + 1  # 48 + 1
+NB_FRAMES_65  = 8 *  8 + 1  # 64 + 1
+NB_FRAMES_73  = 8 *  9 + 1  # 72 + 1
+NB_FRAMES_81  = 8 * 10 + 1  # 80 + 1
+NB_FRAMES_89  = 8 * 11 + 1  # 88 + 1
+NB_FRAMES_97  = 8 * 12 + 1  # 96 + 1
+NB_FRAMES_105 = 8 * 13 + 1  # 104 + 1
 NB_FRAMES_113 = 8 * 14 + 1  # 112 + 1
+NB_FRAMES_121 = 8 * 14 + 1  # 120 + 1
 NB_FRAMES_129 = 8 * 16 + 1  # 128 + 1
+NB_FRAMES_137 = 8 * 16 + 1  # 136 + 1
 NB_FRAMES_145 = 8 * 18 + 1  # 144 + 1
+NB_FRAMES_161 = 8 * 20 + 1  # 160 + 1
 NB_FRAMES_177 = 8 * 22 + 1  # 176 + 1
 NB_FRAMES_193 = 8 * 24 + 1  # 192 + 1
+NB_FRAMES_201 = 8 * 25 + 1  # 200 + 1
+NB_FRAMES_209 = 8 * 26 + 1  # 208 + 1
+NB_FRAMES_217 = 8 * 27 + 1  # 216 + 1
 NB_FRAMES_225 = 8 * 28 + 1  # 224 + 1
+NB_FRAMES_233 = 8 * 29 + 1  # 232 + 1
+NB_FRAMES_241 = 8 * 30 + 1  # 240 + 1
+NB_FRAMES_249 = 8 * 31 + 1  # 248 + 1
 NB_FRAMES_257 = 8 * 32 + 1  # 256 + 1
+NB_FRAMES_265 = 8 * 33 + 1  # 264 + 1
 NB_FRAMES_273 = 8 * 34 + 1  # 272 + 1
 NB_FRAMES_289 = 8 * 36 + 1  # 288 + 1
 NB_FRAMES_305 = 8 * 38 + 1  # 304 + 1
 NB_FRAMES_385 = 8 * 48 + 1  # 384 + 1
 NB_FRAMES_401 = 8 * 50 + 1  # 400 + 1
+# ------ HOW BUCKETS WORK:----------
+# Basically, to train or fine-tune a video model with Finetrainers, we need to specify all the possible accepted videos lengths AND size combinations (buckets), in the form: (BUCKET_CONFIGURATION_1, BUCKET_CONFIGURATION_2, ..., BUCKET_CONFIGURATION_N)
+# Where a bucket is: (NUMBER_OF_FRAMES_PLUS_ONE, HEIGHT_IN_PIXELS, WIDTH_IN_PIXELS)
+# For instance, for 2 seconds of a 1024x576 video at 24 frames per second, plus one frame (I think there is always an extra frame for the initial starting image), we would get:
+#   NUMBER_OF_FRAMES_PLUS_ONE = (2*24) + 1 = 48 + 1 = 49
+#   HEIGHT_IN_PIXELS = 576
+#   WIDTH_IN_PIXELS = 1024
+# -> This would give a bucket like this: (49, 576, 1024)
+#
+SD_TRAINING_BUCKETS = [
+    (NB_FRAMES_1,   SD_16_9_H, SD_16_9_W), # 1
+    (NB_FRAMES_9,   SD_16_9_H, SD_16_9_W), # 8 + 1
+    (NB_FRAMES_17,  SD_16_9_H, SD_16_9_W), # 16 + 1
+    (NB_FRAMES_33,  SD_16_9_H, SD_16_9_W), # 32 + 1
+    (NB_FRAMES_49,  SD_16_9_H, SD_16_9_W), # 48 + 1
+    (NB_FRAMES_65,  SD_16_9_H, SD_16_9_W), # 64 + 1
+    (NB_FRAMES_73,  SD_16_9_H, SD_16_9_W), # 72 + 1
+    (NB_FRAMES_81,  SD_16_9_H, SD_16_9_W), # 80 + 1
+    (NB_FRAMES_89,  SD_16_9_H, SD_16_9_W), # 88 + 1
+    (NB_FRAMES_97,  SD_16_9_H, SD_16_9_W), # 96 + 1
+    (NB_FRAMES_105, SD_16_9_H, SD_16_9_W), # 104 + 1
+    (NB_FRAMES_113, SD_16_9_H, SD_16_9_W), # 112 + 1
+    (NB_FRAMES_121, SD_16_9_H, SD_16_9_W), # 121 + 1
+    (NB_FRAMES_129, SD_16_9_H, SD_16_9_W), # 128 + 1
+    (NB_FRAMES_137, SD_16_9_H, SD_16_9_W), # 136 + 1
+    (NB_FRAMES_145, SD_16_9_H, SD_16_9_W), # 144 + 1
+    (NB_FRAMES_161, SD_16_9_H, SD_16_9_W), # 160 + 1
+    (NB_FRAMES_177, SD_16_9_H, SD_16_9_W), # 176 + 1
+    (NB_FRAMES_193, SD_16_9_H, SD_16_9_W), # 192 + 1
+    (NB_FRAMES_201, SD_16_9_H, SD_16_9_W), # 200 + 1
+    (NB_FRAMES_209, SD_16_9_H, SD_16_9_W), # 208 + 1
+    (NB_FRAMES_217, SD_16_9_H, SD_16_9_W), # 216 + 1
+    (NB_FRAMES_225, SD_16_9_H, SD_16_9_W), # 224 + 1
+    (NB_FRAMES_233, SD_16_9_H, SD_16_9_W), # 232 + 1
+    (NB_FRAMES_241, SD_16_9_H, SD_16_9_W), # 240 + 1
+    (NB_FRAMES_249, SD_16_9_H, SD_16_9_W), # 248 + 1
+    (NB_FRAMES_257, SD_16_9_H, SD_16_9_W), # 256 + 1
+    (NB_FRAMES_265, SD_16_9_H, SD_16_9_W), # 264 + 1
+    (NB_FRAMES_273, SD_16_9_H, SD_16_9_W), # 272 + 1
 ]
+# For 1280x720 images and videos (from 1 frame up to 272)
+MD_TRAINING_BUCKETS = [
+    (NB_FRAMES_1,   MD_16_9_H, MD_16_9_W), # 1
+    (NB_FRAMES_9,   MD_16_9_H, MD_16_9_W), # 8 + 1
+    (NB_FRAMES_17,  MD_16_9_H, MD_16_9_W), # 16 + 1
+    (NB_FRAMES_33,  MD_16_9_H, MD_16_9_W), # 32 + 1
+    (NB_FRAMES_49,  MD_16_9_H, MD_16_9_W), # 48 + 1
+    (NB_FRAMES_65,  MD_16_9_H, MD_16_9_W), # 64 + 1
+    (NB_FRAMES_73,  MD_16_9_H, MD_16_9_W), # 72 + 1
+    (NB_FRAMES_81,  MD_16_9_H, MD_16_9_W), # 80 + 1
+    (NB_FRAMES_89,  MD_16_9_H, MD_16_9_W), # 88 + 1
+    (NB_FRAMES_97,  MD_16_9_H, MD_16_9_W), # 96 + 1
+    (NB_FRAMES_105, MD_16_9_H, MD_16_9_W), # 104 + 1
+    (NB_FRAMES_113, MD_16_9_H, MD_16_9_W), # 112 + 1
+    (NB_FRAMES_121, MD_16_9_H, MD_16_9_W), # 121 + 1
+    (NB_FRAMES_129, MD_16_9_H, MD_16_9_W), # 128 + 1
+    (NB_FRAMES_137, MD_16_9_H, MD_16_9_W), # 136 + 1
+    (NB_FRAMES_145, MD_16_9_H, MD_16_9_W), # 144 + 1
+    (NB_FRAMES_161, MD_16_9_H, MD_16_9_W), # 160 + 1
+    (NB_FRAMES_177, MD_16_9_H, MD_16_9_W), # 176 + 1
+    (NB_FRAMES_193, MD_16_9_H, MD_16_9_W), # 192 + 1
+    (NB_FRAMES_201, MD_16_9_H, MD_16_9_W), # 200 + 1
+    (NB_FRAMES_209, MD_16_9_H, MD_16_9_W), # 208 + 1
+    (NB_FRAMES_217, MD_16_9_H, MD_16_9_W), # 216 + 1
+    (NB_FRAMES_225, MD_16_9_H, MD_16_9_W), # 224 + 1
+    (NB_FRAMES_233, MD_16_9_H, MD_16_9_W), # 232 + 1
+    (NB_FRAMES_241, MD_16_9_H, MD_16_9_W), # 240 + 1
+    (NB_FRAMES_249, MD_16_9_H, MD_16_9_W), # 248 + 1
+    (NB_FRAMES_257, MD_16_9_H, MD_16_9_W), # 256 + 1
+    (NB_FRAMES_265, MD_16_9_H, MD_16_9_W), # 264 + 1
+    (NB_FRAMES_273, MD_16_9_H, MD_16_9_W), # 272 + 1
 ]
+# Model specific default parameters
+# These are used instead of the previous TRAINING_PRESETS
+# Resolution buckets for different models
+RESOLUTION_OPTIONS = {
+    "SD (1024x576)": "SD_TRAINING_BUCKETS",
+    "HD (1280x720)": "MD_TRAINING_BUCKETS"
+}
+# Default parameters for Hunyuan Video
+HUNYUAN_VIDEO_DEFAULTS = {
+    "lora": {
         "learning_rate": 2e-5,
         "flow_weighting_scheme": "none",
         "lora_rank": DEFAULT_LORA_RANK_STR,
+        "lora_alpha": DEFAULT_LORA_ALPHA_STR
     },
+    "control-lora": {
+        "learning_rate": 2e-5,
+        "flow_weighting_scheme": "none",
+        "lora_rank": "128",
+        "lora_alpha": "128",
+        "control_type": "custom",
+        "train_qk_norm": True,
+        "frame_conditioning_type": "index",
+        "frame_conditioning_index": 0,
+        "frame_conditioning_concatenate_mask": True
+    }
+}
+# Default parameters for LTX Video
+LTX_VIDEO_DEFAULTS = {
+    "lora": {
         "learning_rate": DEFAULT_LEARNING_RATE,
+        "flow_weighting_scheme": "none",
+        "lora_rank": DEFAULT_LORA_RANK_STR,
+        "lora_alpha": DEFAULT_LORA_ALPHA_STR
     },
+    "full-finetune": {
         "learning_rate": DEFAULT_LEARNING_RATE,
+        "flow_weighting_scheme": "logit_normal"
     },
+    "control-lora": {
         "learning_rate": DEFAULT_LEARNING_RATE,
         "flow_weighting_scheme": "logit_normal",
         "lora_rank": "128",
         "lora_alpha": "128",
         "control_type": "custom",
         "train_qk_norm": True,
         "frame_conditioning_type": "index",
         "frame_conditioning_index": 0,
+        "frame_conditioning_concatenate_mask": True
+    }
+}
+# Default parameters for Wan
+WAN_DEFAULTS = {
+    "lora": {
+        "learning_rate": 5e-5,
+        "flow_weighting_scheme": "logit_normal",
+        "lora_rank": "32",
+        "lora_alpha": "32"
     },
+    "control-lora": {
+        "learning_rate": 5e-5,
+        "flow_weighting_scheme": "logit_normal",
+        "lora_rank": "32",
+        "lora_alpha": "32",
         "control_type": "custom",
         "train_qk_norm": True,
         "frame_conditioning_type": "index",
         "frame_conditioning_index": 0,
+        "frame_conditioning_concatenate_mask": True
     }
 }
     caption_column: str = "prompts.txt"
     id_token: Optional[str] = None
+    video_resolution_buckets: List[Tuple[int, int, int]] = field(default_factory=lambda: SD_TRAINING_BUCKETS)
     video_reshape_mode: str = "center"
     caption_dropout_p: float = DEFAULT_CAPTION_DROPOUT_P
     caption_dropout_technique: str = "empty"
             gradient_accumulation_steps=1,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
+            video_resolution_buckets=buckets or SD_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="none",  # Hunyuan specific
             training_type="lora"
             gradient_accumulation_steps=4,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
+            video_resolution_buckets=buckets or SD_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="logit_normal",  # LTX specific
             training_type="lora"
             gradient_checkpointing=True,
             id_token=None,
             gradient_accumulation_steps=1,
+            video_resolution_buckets=buckets or SD_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="logit_normal",  # LTX specific
             training_type="full-finetune"
             lora_rank=32,
             lora_alpha=32,
             target_modules=["blocks.*(to_q|to_k|to_v|to_out.0)"],  # Wan-specific target modules
+            video_resolution_buckets=buckets or SD_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             flow_weighting_scheme="logit_normal",  # Wan specific
             training_type="lora"

vms/ui/app_ui.py CHANGED Viewed

@@ -9,8 +9,8 @@ from typing import Any, Optional, Dict, List, Union, Tuple
 from vms.config import (
     STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH,
-    TRAINING_PRESETS,
-    MODEL_TYPES, SMALL_TRAINING_BUCKETS, TRAINING_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
@@ -23,6 +23,7 @@ from vms.config import (
     DEFAULT_NB_TRAINING_STEPS,
     DEFAULT_NB_LR_WARMUP_STEPS,
     DEFAULT_AUTO_RESUME,
     get_project_paths,
     generate_model_project_id,
@@ -363,7 +364,6 @@ class AppUI:
                     self.project_tabs["train_tab"].components["resume_btn"],
                     self.project_tabs["train_tab"].components["stop_btn"],
                     self.project_tabs["train_tab"].components["delete_checkpoints_btn"],
-                    self.project_tabs["train_tab"].components["training_preset"],
                     self.project_tabs["train_tab"].components["model_type"],
                     self.project_tabs["train_tab"].components["model_version"],
                     self.project_tabs["train_tab"].components["training_type"],
@@ -377,7 +377,8 @@ class AppUI:
                     self.project_tabs["train_tab"].components["num_gpus"],
                     self.project_tabs["train_tab"].components["precomputation_items"],
                     self.project_tabs["train_tab"].components["lr_warmup_steps"],
-                    self.project_tabs["train_tab"].components["auto_resume"]
                 ]
             )
@@ -485,7 +486,7 @@ class AppUI:
             # Copy other parameters
             for param in ["lora_rank", "lora_alpha", "train_steps",
-                        "batch_size", "learning_rate", "save_iterations", "training_preset"]:
                 if param in recovery_ui:
                     ui_state[param] = recovery_ui[param]
@@ -544,21 +545,22 @@ class AppUI:
                 model_version_val = available_model_versions[0]
                 logger.info(f"Using first available model version: {model_version_val}")
-            # IMPORTANT: Create a new list of simple strings for the dropdown choices
-            # This ensures each choice is a single string, not a tuple or other structure
-            simple_choices = [str(version) for version in available_model_versions]
             # Update the dropdown choices directly in the UI component
             try:
-                self.project_tabs["train_tab"].components["model_version"].choices = simple_choices
-                logger.info(f"Updated model_version dropdown choices: {len(simple_choices)} options")
             except Exception as e:
                 logger.error(f"Error updating model_version dropdown: {str(e)}")
         else:
             logger.warning(f"No versions available for model type: {model_type_val}")
-            # Set empty choices to avoid errors
             try:
                 self.project_tabs["train_tab"].components["model_version"].choices = []
             except Exception as e:
                 logger.error(f"Error setting empty model_version choices: {str(e)}")
@@ -577,11 +579,8 @@ class AppUI:
                 training_type_val = list(TRAINING_TYPES.keys())[0]
                 logger.warning(f"Invalid training type '{training_type_val}', using default: {training_type_val}")
-        # Validate training preset
-        training_preset = ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0])
-        if training_preset not in TRAINING_PRESETS:
-            training_preset = list(TRAINING_PRESETS.keys())[0]
-            logger.warning(f"Invalid training preset '{training_preset}', using default: {training_preset}")
         lora_rank_val = ui_state.get("lora_rank", DEFAULT_LORA_RANK_STR)
         lora_alpha_val = ui_state.get("lora_alpha", DEFAULT_LORA_ALPHA_STR)
@@ -616,7 +615,6 @@ class AppUI:
             resume_btn,
             stop_btn,
             delete_checkpoints_btn,
-            training_preset,
             model_type_val,
             model_version_val,
             training_type_val,
@@ -630,7 +628,8 @@ class AppUI:
             num_gpus_val,
             precomputation_items_val,
             lr_warmup_steps_val,
-            auto_resume_val
         )
     def initialize_ui_from_state(self):
@@ -650,7 +649,6 @@ class AppUI:
         # Return values in order matching the outputs in app.load
         return (
-            ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
             model_type,
             model_version,
             ui_state.get("training_type", list(TRAINING_TYPES.keys())[0]),
@@ -659,7 +657,8 @@ class AppUI:
             ui_state.get("train_steps", DEFAULT_NB_TRAINING_STEPS),
             ui_state.get("batch_size", DEFAULT_BATCH_SIZE),
             ui_state.get("learning_rate", DEFAULT_LEARNING_RATE),
-            ui_state.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS)
         )
     def update_ui_state(self, **kwargs):

 from vms.config import (
     STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH,
+    MODEL_TYPES, SD_TRAINING_BUCKETS, MD_TRAINING_BUCKETS, TRAINING_TYPES, MODEL_VERSIONS,
+    RESOLUTION_OPTIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
     DEFAULT_NB_TRAINING_STEPS,
     DEFAULT_NB_LR_WARMUP_STEPS,
     DEFAULT_AUTO_RESUME,
+    HUNYUAN_VIDEO_DEFAULTS, LTX_VIDEO_DEFAULTS, WAN_DEFAULTS,
     get_project_paths,
     generate_model_project_id,
                     self.project_tabs["train_tab"].components["resume_btn"],
                     self.project_tabs["train_tab"].components["stop_btn"],
                     self.project_tabs["train_tab"].components["delete_checkpoints_btn"],
                     self.project_tabs["train_tab"].components["model_type"],
                     self.project_tabs["train_tab"].components["model_version"],
                     self.project_tabs["train_tab"].components["training_type"],
                     self.project_tabs["train_tab"].components["num_gpus"],
                     self.project_tabs["train_tab"].components["precomputation_items"],
                     self.project_tabs["train_tab"].components["lr_warmup_steps"],
+                    self.project_tabs["train_tab"].components["auto_resume"],
+                    self.project_tabs["train_tab"].components["resolution"]
                 ]
             )
             # Copy other parameters
             for param in ["lora_rank", "lora_alpha", "train_steps",
+                        "batch_size", "learning_rate", "save_iterations"]:
                 if param in recovery_ui:
                     ui_state[param] = recovery_ui[param]
                 model_version_val = available_model_versions[0]
                 logger.info(f"Using first available model version: {model_version_val}")
+            # IMPORTANT: Create a new list of tuples (label, value) for the dropdown choices
+            # This ensures compatibility with Gradio Dropdown component expectations
+            choices_tuples = [(str(version), str(version)) for version in available_model_versions]
             # Update the dropdown choices directly in the UI component
             try:
+                self.project_tabs["train_tab"].components["model_version"].choices = choices_tuples
+                logger.info(f"Updated model_version dropdown choices: {len(choices_tuples)} options")
             except Exception as e:
                 logger.error(f"Error updating model_version dropdown: {str(e)}")
         else:
             logger.warning(f"No versions available for model type: {model_type_val}")
+            # Set empty choices as an empty list of tuples to avoid errors
             try:
                 self.project_tabs["train_tab"].components["model_version"].choices = []
+                logger.info("Set empty model_version dropdown choices")
             except Exception as e:
                 logger.error(f"Error setting empty model_version choices: {str(e)}")
                 training_type_val = list(TRAINING_TYPES.keys())[0]
                 logger.warning(f"Invalid training type '{training_type_val}', using default: {training_type_val}")
+        # Get resolution value
+        resolution_val = ui_state.get("resolution", list(RESOLUTION_OPTIONS.keys())[0])
         lora_rank_val = ui_state.get("lora_rank", DEFAULT_LORA_RANK_STR)
         lora_alpha_val = ui_state.get("lora_alpha", DEFAULT_LORA_ALPHA_STR)
             resume_btn,
             stop_btn,
             delete_checkpoints_btn,
             model_type_val,
             model_version_val,
             training_type_val,
             num_gpus_val,
             precomputation_items_val,
             lr_warmup_steps_val,
+            auto_resume_val,
+            resolution_val
         )
     def initialize_ui_from_state(self):
         # Return values in order matching the outputs in app.load
         return (
             model_type,
             model_version,
             ui_state.get("training_type", list(TRAINING_TYPES.keys())[0]),
             ui_state.get("train_steps", DEFAULT_NB_TRAINING_STEPS),
             ui_state.get("batch_size", DEFAULT_BATCH_SIZE),
             ui_state.get("learning_rate", DEFAULT_LEARNING_RATE),
+            ui_state.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
+            ui_state.get("resolution", list(RESOLUTION_OPTIONS.keys())[0])
         )
     def update_ui_state(self, **kwargs):

vms/ui/project/services/training.py CHANGED Viewed

@@ -22,7 +22,7 @@ from typing import Any, Optional, Dict, List, Union, Tuple
 from huggingface_hub import upload_folder, create_repo
 from vms.config import (
-    TrainingConfig, TRAINING_PRESETS,
     STORAGE_PATH, HF_API_TOKEN,
     MODEL_TYPES, TRAINING_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
@@ -228,7 +228,7 @@ class TrainingService:
             "batch_size": DEFAULT_BATCH_SIZE,
             "learning_rate": DEFAULT_LEARNING_RATE,
             "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-            "training_preset": list(TRAINING_PRESETS.keys())[0],
             "num_gpus": DEFAULT_NUM_GPUS,
             "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
             "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
@@ -354,10 +354,10 @@ class TrainingService:
                             merged_state["training_type"] = default_state["training_type"]
                             logger.warning(f"Invalid training type in saved state, using default")
-                    # Validate training_preset is in available choices
-                    if merged_state["training_preset"] not in TRAINING_PRESETS:
-                        merged_state["training_preset"] = default_state["training_preset"]
-                        logger.warning(f"Invalid training preset in saved state, using default")
                     # Validate lora_rank is in allowed values
                     if merged_state.get("lora_rank") not in ["16", "32", "64", "128", "256", "512", "1024"]:
@@ -566,7 +566,6 @@ class TrainingService:
         learning_rate: float,
         save_iterations: int,
         repo_id: str,
-        preset_name: str,
         training_type: str = DEFAULT_TRAINING_TYPE,
         model_version: str = "",
         resume_from_checkpoint: Optional[str] = None,
@@ -577,7 +576,6 @@ class TrainingService:
     ) -> Tuple[str, str]:
         """Start training with finetrainers"""
-        training_path
         self.clear_logs()
         if not model_type:
@@ -646,11 +644,24 @@ class TrainingService:
             #if progress:
             #    progress(0.25, desc="Creating dataset configuration")
-            # Get preset configuration
-            preset = TRAINING_PRESETS[preset_name]
-            training_buckets = preset["training_buckets"]
-            flow_weighting_scheme = preset.get("flow_weighting_scheme", "none")
-            preset_training_type = preset.get("training_type", "lora")
             # Get the custom prompt prefix from the tabs
             custom_prompt_prefix = None
@@ -1117,7 +1128,7 @@ class TrainingService:
                             "batch_size": ui_state.get("batch_size", DEFAULT_BATCH_SIZE),
                             "learning_rate": ui_state.get("learning_rate", DEFAULT_LEARNING_RATE),
                             "save_iterations": ui_state.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
-                            "preset_name": ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
                             "repo_id": "",  # Default empty repo ID,
                             "auto_resume": ui_state.get("auto_resume", DEFAULT_AUTO_RESUME)
                         }
@@ -1190,7 +1201,7 @@ class TrainingService:
                 "batch_size": params.get('batch_size', DEFAULT_BATCH_SIZE),
                 "learning_rate": params.get('learning_rate', DEFAULT_LEARNING_RATE),
                 "save_iterations": params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
-                "training_preset": params.get('preset_name', list(TRAINING_PRESETS.keys())[0]),
                 "auto_resume": params.get("auto_resume", DEFAULT_AUTO_RESUME)
             })
@@ -1211,7 +1222,6 @@ class TrainingService:
                         save_iterations=params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
                         model_version=params.get('model_version', ''),
                         repo_id=params.get('repo_id', ''),
-                        preset_name=params.get('preset_name', list(TRAINING_PRESETS.keys())[0]),
                         training_type=training_type_internal,
                         resume_from_checkpoint="latest"
                     )

 from huggingface_hub import upload_folder, create_repo
 from vms.config import (
+    TrainingConfig, RESOLUTION_OPTIONS, SD_TRAINING_BUCKETS, MD_TRAINING_BUCKETS,
     STORAGE_PATH, HF_API_TOKEN,
     MODEL_TYPES, TRAINING_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
             "batch_size": DEFAULT_BATCH_SIZE,
             "learning_rate": DEFAULT_LEARNING_RATE,
             "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
+            "resolution": list(RESOLUTION_OPTIONS.keys())[0],
             "num_gpus": DEFAULT_NUM_GPUS,
             "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
             "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
                             merged_state["training_type"] = default_state["training_type"]
                             logger.warning(f"Invalid training type in saved state, using default")
+                    # Validate resolution is in available choices
+                    if "resolution" in merged_state and merged_state["resolution"] not in RESOLUTION_OPTIONS:
+                        merged_state["resolution"] = default_state["resolution"]
+                        logger.warning(f"Invalid resolution in saved state, using default")
                     # Validate lora_rank is in allowed values
                     if merged_state.get("lora_rank") not in ["16", "32", "64", "128", "256", "512", "1024"]:
         learning_rate: float,
         save_iterations: int,
         repo_id: str,
         training_type: str = DEFAULT_TRAINING_TYPE,
         model_version: str = "",
         resume_from_checkpoint: Optional[str] = None,
     ) -> Tuple[str, str]:
         """Start training with finetrainers"""
         self.clear_logs()
         if not model_type:
             #if progress:
             #    progress(0.25, desc="Creating dataset configuration")
+            # Get resolution configuration from UI state
+            ui_state = self.load_ui_state()
+            resolution_option = ui_state.get("resolution", list(RESOLUTION_OPTIONS.keys())[0])
+            training_buckets_name = RESOLUTION_OPTIONS.get(resolution_option, "SD_TRAINING_BUCKETS")
+            # Determine which buckets to use based on the selected resolution
+            if training_buckets_name == "SD_TRAINING_BUCKETS":
+                training_buckets = SD_TRAINING_BUCKETS
+            elif training_buckets_name == "MD_TRAINING_BUCKETS":
+                training_buckets = MD_TRAINING_BUCKETS
+            else:
+                training_buckets = SD_TRAINING_BUCKETS  # Default fallback
+            # Determine flow weighting scheme based on model type
+            if model_type == "hunyuan_video":
+                flow_weighting_scheme = "none"
+            else:
+                flow_weighting_scheme = "logit_normal"
             # Get the custom prompt prefix from the tabs
             custom_prompt_prefix = None
                             "batch_size": ui_state.get("batch_size", DEFAULT_BATCH_SIZE),
                             "learning_rate": ui_state.get("learning_rate", DEFAULT_LEARNING_RATE),
                             "save_iterations": ui_state.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
+                            "resolution": ui_state.get("resolution", list(RESOLUTION_OPTIONS.keys())[0]),
                             "repo_id": "",  # Default empty repo ID,
                             "auto_resume": ui_state.get("auto_resume", DEFAULT_AUTO_RESUME)
                         }
                 "batch_size": params.get('batch_size', DEFAULT_BATCH_SIZE),
                 "learning_rate": params.get('learning_rate', DEFAULT_LEARNING_RATE),
                 "save_iterations": params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
+                "resolution": params.get('resolution', list(RESOLUTION_OPTIONS.keys())[0]),
                 "auto_resume": params.get("auto_resume", DEFAULT_AUTO_RESUME)
             })
                         save_iterations=params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
                         model_version=params.get('model_version', ''),
                         repo_id=params.get('repo_id', ''),
                         training_type=training_type_internal,
                         resume_from_checkpoint="latest"
                     )

vms/ui/project/tabs/train_tab.py CHANGED Viewed

@@ -13,8 +13,9 @@ from pathlib import Path
 from vms.utils import BaseTab
 from vms.config import (
     ASK_USER_TO_DUPLICATE_SPACE,
-    SMALL_TRAINING_BUCKETS,
-    TRAINING_PRESETS, TRAINING_TYPES, MODEL_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
@@ -29,7 +30,8 @@ from vms.config import (
     DEFAULT_AUTO_RESUME,
     DEFAULT_CONTROL_TYPE, DEFAULT_TRAIN_QK_NORM,
     DEFAULT_FRAME_CONDITIONING_TYPE, DEFAULT_FRAME_CONDITIONING_INDEX,
-    DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK
 )
 logger = logging.getLogger(__name__)
@@ -50,15 +52,6 @@ class TrainTab(BaseTab):
                     with gr.Row():
                         self.components["train_title"] = gr.Markdown("## 0 files in the training dataset")
-                    with gr.Row():
-                        with gr.Column():
-                            self.components["training_preset"] = gr.Dropdown(
-                                choices=list(TRAINING_PRESETS.keys()),
-                                label="Training Preset",
-                                value=list(TRAINING_PRESETS.keys())[0]
-                            )
-                        self.components["preset_info"] = gr.Markdown()
                     with gr.Row():
                         with gr.Column():
                             # Get the default model type from the first preset
@@ -115,6 +108,15 @@ class TrainTab(BaseTab):
                         self.components["model_info"] = gr.Markdown(
                             value=self.get_model_info(list(MODEL_TYPES.keys())[0], list(TRAINING_TYPES.keys())[0])
                         )
                     # LoRA specific parameters (will show/hide based on training type)
                     with gr.Row(visible=True) as lora_params_row:
@@ -140,18 +142,18 @@ class TrainTab(BaseTab):
                             with gr.Accordion("What is LoRA Rank?", open=False):
                                 gr.Markdown("""
-                                **LoRA Rank** determines the complexity of the LoRA adapters:
-                                - **Lower rank (16-32)**: Smaller file size, faster training, but less expressive
-                                - **Medium rank (64-128)**: Good balance between quality and file size
-                                - **Higher rank (256-1024)**: More expressive adapters, better quality but larger file size
-                                Think of rank as the "capacity" of your adapter. Higher ranks can learn more complex modifications to the base model but require more VRAM during training and result in larger files.
-                                **Quick guide:**
-                                - For Wan models: Use 32-64 (Wan models work well with lower ranks)
-                                - For LTX-Video: Use 128-256
-                                - For Hunyuan Video: Use 128
                                 """)
                         with gr.Column():
@@ -162,32 +164,31 @@ class TrainTab(BaseTab):
                                 type="value",
                                 info="Controls the effective learning rate scaling of LoRA adapters. Usually set to same value as rank"
                             )
                             with gr.Accordion("What is LoRA Alpha?", open=False):
                                 gr.Markdown("""
-                                **LoRA Alpha** controls the effective scale of the LoRA updates:
-                                - The actual scaling factor is calculated as `alpha ÷ rank`
-                                - Usually set to match the rank value (alpha = rank)
-                                - Higher alpha = stronger effect from the adapters
-                                - Lower alpha = more subtle adapter influence
-                                **Best practice:**
-                                - For most cases, set alpha equal to rank
-                                - For more aggressive training, set alpha higher than rank
-                                - For more conservative training, set alpha lower than rank
                                 """)
                     # Control specific parameters (will show/hide based on training type)
                     with gr.Row(visible=False) as control_params_row:
                         self.components["control_params_row"] = control_params_row
                         with gr.Column():
                             gr.Markdown("""
-                            ## 🖼️ Control Training Settings
-                            Control training enables **image-to-video generation** by teaching the model how to use an image as a guide for video creation.
-                            This is ideal for turning still images into dynamic videos while preserving composition, style, and content.
                             """)
                     # Second row for control parameters
@@ -203,10 +204,10 @@ class TrainTab(BaseTab):
                             with gr.Accordion("What is Control Conditioning?", open=False):
                                 gr.Markdown("""
-                                **Control Conditioning** allows the model to be guided by an input image, adapting the video generation based on the image content. This is used for image-to-video generation where you want to turn an image into a moving video while maintaining its style, composition or content.
-                                - **canny**: Uses edge detection to extract outlines from images for structure-preserving video generation
-                                - **custom**: Direct image conditioning without preprocessing, preserving more image details
                                 """)
                         with gr.Column():
@@ -218,11 +219,11 @@ class TrainTab(BaseTab):
                             with gr.Accordion("What is QK Normalization?", open=False):
                                 gr.Markdown("""
-                                **QK Normalization** refers to normalizing the query and key values in the attention mechanism of transformers.
-                                - When enabled, allows the model to better integrate control signals with content generation
-                                - Improves training stability for control models
-                                - Generally recommended for control training, especially with image conditioning
                                 """)
                     with gr.Row(visible=False) as frame_conditioning_row:
@@ -237,15 +238,15 @@ class TrainTab(BaseTab):
                             with gr.Accordion("Frame Conditioning Type Explanation", open=False):
                                 gr.Markdown("""
-                                **Frame Conditioning Types** determine which frames in the video receive image conditioning:
-                                - **index**: Only applies conditioning to a single frame at the specified index
-                                - **prefix**: Applies conditioning to all frames before a certain point
-                                - **random**: Randomly selects frames to receive conditioning during training
-                                - **first_and_last**: Only applies conditioning to the first and last frames
-                                - **full**: Applies conditioning to all frames in the video
-                                For image-to-video tasks, 'index' (usually with index 0) is most common as it conditions only the first frame.
                                 """)
                         with gr.Column():
@@ -267,12 +268,12 @@ class TrainTab(BaseTab):
                             with gr.Accordion("What is Frame Mask Concatenation?", open=False):
                                 gr.Markdown("""
-                                **Frame Mask Concatenation** adds an additional channel to the control signal that indicates which frames are being conditioned:
-                                - Creates a binary mask (0/1) indicating which frames receive conditioning
-                                - Helps the model distinguish between conditioned and unconditioned frames
-                                - Particularly useful for 'index' conditioning where only select frames are conditioned
-                                - Generally improves temporal consistency between conditioned and unconditioned frames
                                 """)
                         with gr.Column():
@@ -448,7 +449,7 @@ class TrainTab(BaseTab):
         return None
     def handle_new_training_start(
-        self, preset, model_type, model_version, training_type,
         lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
         save_iterations, repo_id, progress=gr.Progress()
     ):
@@ -469,13 +470,13 @@ class TrainTab(BaseTab):
         # Start training normally
         return self.handle_training_start(
-            preset, model_type, model_version, training_type,
             lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
             save_iterations, repo_id, progress
         )
     def handle_resume_training(
-        self, preset, model_type, model_version, training_type,
         lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
         save_iterations, repo_id, progress=gr.Progress()
     ):
@@ -490,7 +491,7 @@ class TrainTab(BaseTab):
         # Start training with the checkpoint
         return self.handle_training_start(
-            preset, model_type, model_version, training_type,
             lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
             save_iterations, repo_id, progress,
             resume_from_checkpoint="latest"
@@ -498,20 +499,34 @@ class TrainTab(BaseTab):
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
-        # Model type change event - Update model version dropdown choices
         self.components["model_type"].change(
             fn=self.update_model_versions,
             inputs=[self.components["model_type"]],
             outputs=[self.components["model_version"]]
         ).then(
-            fn=self.update_model_type_and_version,  # Add this new function
             inputs=[self.components["model_type"], self.components["model_version"]],
             outputs=[]
         ).then(
-            # Use get_model_info instead of update_model_info
-            fn=self.get_model_info,
             inputs=[self.components["model_type"], self.components["training_type"]],
-            outputs=[self.components["model_info"]]
         )
         # Model version change event
@@ -535,7 +550,14 @@ class TrainTab(BaseTab):
                 self.components["batch_size"],
                 self.components["learning_rate"],
                 self.components["save_iterations"],
-                self.components["lora_params_row"]
             ]
         )
@@ -632,50 +654,17 @@ class TrainTab(BaseTab):
             outputs=[]
         )
-        # Training preset change event
-        self.components["training_preset"].change(
-            fn=lambda v: self.app.update_ui_state(training_preset=v),
-            inputs=[self.components["training_preset"]],
             outputs=[]
-        ).then(
-            fn=self.update_training_params,
-            inputs=[self.components["training_preset"]],
-            outputs=[
-                self.components["model_type"],
-                self.components["training_type"],
-                self.components["lora_rank"],
-                self.components["lora_alpha"],
-                self.components["train_steps"],
-                self.components["batch_size"],
-                self.components["learning_rate"],
-                self.components["save_iterations"],
-                self.components["preset_info"],
-                self.components["lora_params_row"],
-                self.components["lora_settings_row"],
-                self.components["num_gpus"],
-                self.components["precomputation_items"],
-                self.components["lr_warmup_steps"],
-                # Add model_version to the outputs
-                self.components["model_version"],
-                # Control parameters rows visibility
-                self.components["control_params_row"],
-                self.components["control_settings_row"],
-                self.components["frame_conditioning_row"],
-                self.components["control_options_row"],
-                # Control parameter values
-                self.components["control_type"],
-                self.components["train_qk_norm"],
-                self.components["frame_conditioning_type"],
-                self.components["frame_conditioning_index"],
-                self.components["frame_conditioning_concatenate_mask"],
-            ]
         )
         # Training control events
         self.components["start_btn"].click(
             fn=self.handle_new_training_start,
             inputs=[
-                self.components["training_preset"],
                 self.components["model_type"],
                 self.components["model_version"],
                 self.components["training_type"],
@@ -696,7 +685,6 @@ class TrainTab(BaseTab):
         self.components["resume_btn"].click(
             fn=self.handle_resume_training,
             inputs=[
-                self.components["training_preset"],
                 self.components["model_type"],
                 self.components["model_version"],
                 self.components["training_type"],
@@ -761,23 +749,25 @@ class TrainTab(BaseTab):
             # Update UI state with proper model_type first
             self.app.update_ui_state(model_type=model_type)
-            # Ensure model_versions is a simple list of strings
-            model_versions = [str(version) for version in model_versions]
             # Create a new dropdown with the updated choices
-            if not model_versions:
                 logger.warning(f"No model versions available for {model_type}, using empty list")
                 # Return empty dropdown to avoid errors
                 return gr.Dropdown(choices=[], value=None)
             # Ensure default_version is in model_versions
-            if default_version not in model_versions and model_versions:
-                default_version = model_versions[0]
                 logger.info(f"Default version not in choices, using first available: {default_version}")
             # Return the updated dropdown
-            logger.info(f"Returning dropdown with {len(model_versions)} choices")
-            return gr.Dropdown(choices=model_versions, value=default_version)
         except Exception as e:
             # Log any exceptions for debugging
             logger.error(f"Error in update_model_versions: {str(e)}")
@@ -785,7 +775,7 @@ class TrainTab(BaseTab):
             return gr.Dropdown(choices=[], value=None)
     def handle_training_start(
-        self, preset, model_type, model_version, training_type,
         lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
         save_iterations, repo_id,
         progress=gr.Progress(),
@@ -844,7 +834,6 @@ class TrainTab(BaseTab):
                 learning_rate,
                 save_iterations,
                 repo_id,
-                preset_name=preset,
                 training_type=training_internal_type,
                 model_version=model_version,
                 resume_from_checkpoint=resume_from,
@@ -898,14 +887,14 @@ class TrainTab(BaseTab):
         # Add general information about the selected training type
         if training_type == "Full Finetune":
             finetune_info = """
-            ## 🧠 Full Finetune Mode
-            Full finetune mode trains all parameters of the model, requiring more VRAM but potentially enabling higher quality results.
-            - Requires 20-50GB+ VRAM depending on model
-            - Creates a complete standalone model (~8GB+ file size)
-            - Recommended only for high-end GPUs (A100, H100, etc.)
-            - Not recommended for the larger models like Hunyuan Video on consumer hardware
             """
             model_info = finetune_info + "\n\n" + model_info
@@ -925,6 +914,8 @@ class TrainTab(BaseTab):
             self.components["batch_size"]: params["batch_size"],
             self.components["learning_rate"]: params["learning_rate"],
             self.components["save_iterations"]: params["save_iterations"],
             self.components["lora_params_row"]: gr.Row(visible=show_lora_params),
             self.components["lora_settings_row"]: gr.Row(visible=show_lora_params),
             self.components["control_params_row"]: gr.Row(visible=show_control_params),
@@ -936,11 +927,11 @@ class TrainTab(BaseTab):
     def get_model_info(self, model_type: str, training_type: str) -> str:
         """Get information about the selected model type and training method"""
         if model_type == "HunyuanVideo":
-            base_info = """### HunyuanVideo
-    - Required VRAM: ~48GB minimum
-    - Recommended batch size: 1-2
-    - Typical training time: 2-4 hours
-    - Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
@@ -952,10 +943,10 @@ class TrainTab(BaseTab):
                 return base_info + "\n- Required VRAM: ~48GB minimum\n- **Full finetune not recommended due to VRAM requirements**"
         elif model_type == "LTX-Video":
-            base_info = """### LTX-Video
-    - Recommended batch size: 1-4
-    - Typical training time: 1-3 hours
-    - Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
@@ -967,10 +958,10 @@ class TrainTab(BaseTab):
                 return base_info + "\n- Required VRAM: ~21GB minimum\n- Full model size: ~8GB"
         elif model_type == "Wan":
-            base_info = """### Wan
-    - Recommended batch size: 1-4
-    - Typical training time: 1-3 hours
-    - Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~16GB minimum\n- Default LoRA rank: 32 (~120 MB)"
@@ -986,168 +977,30 @@ class TrainTab(BaseTab):
     def get_default_params(self, model_type: str, training_type: str) -> Dict[str, Any]:
         """Get default training parameters for model type"""
-        # Find preset that matches model type and training type
-        matching_presets = [
-            preset for preset_name, preset in TRAINING_PRESETS.items()
-            if preset["model_type"] == model_type and preset["training_type"] == training_type
-        ]
-        if matching_presets:
-            # Use the first matching preset
-            preset = matching_presets[0]
-            return {
-                "train_steps": preset.get("train_steps", DEFAULT_NB_TRAINING_STEPS),
-                "batch_size": preset.get("batch_size", DEFAULT_BATCH_SIZE),
-                "learning_rate": preset.get("learning_rate", DEFAULT_LEARNING_RATE),
-                "save_iterations": preset.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
-                "lora_rank": preset.get("lora_rank", DEFAULT_LORA_RANK_STR),
-                "lora_alpha": preset.get("lora_alpha", DEFAULT_LORA_ALPHA_STR)
-            }
-        # Default fallbacks
-        if model_type == "hunyuan_video":
-            return {
-                "train_steps": DEFAULT_NB_TRAINING_STEPS,
-                "batch_size": DEFAULT_BATCH_SIZE,
-                "learning_rate": 2e-5,
-                "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-                "lora_rank": DEFAULT_LORA_RANK_STR,
-                "lora_alpha": DEFAULT_LORA_ALPHA_STR
-            }
-        elif model_type == "ltx_video":
-            return {
-                "train_steps": DEFAULT_NB_TRAINING_STEPS,
-                "batch_size": DEFAULT_BATCH_SIZE,
-                "learning_rate": DEFAULT_LEARNING_RATE,
-                "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-                "lora_rank": DEFAULT_LORA_RANK_STR,
-                "lora_alpha": DEFAULT_LORA_ALPHA_STR
-            }
-        elif model_type == "wan":
-            return {
-                "train_steps": DEFAULT_NB_TRAINING_STEPS,
-                "batch_size": DEFAULT_BATCH_SIZE,
-                "learning_rate": 5e-5,
-                "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-                "lora_rank": "32",
-                "lora_alpha": "32"
-            }
-        else:
-            # Generic defaults
-            return {
-                "train_steps": DEFAULT_NB_TRAINING_STEPS,
-                "batch_size": DEFAULT_BATCH_SIZE,
-                "learning_rate": DEFAULT_LEARNING_RATE,
-                "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
-                "lora_rank": DEFAULT_LORA_RANK_STR,
-                "lora_alpha": DEFAULT_LORA_ALPHA_STR
-            }
-    def update_training_params(self, preset_name: str) -> Tuple:
-        """Update UI components based on selected preset while preserving custom settings"""
-        preset = TRAINING_PRESETS[preset_name]
-        # Load current UI state to check if user has customized values
-        current_state = self.app.load_ui_values()
-        # Find the display name that maps to our model type
-        model_display_name = next(
-            key for key, value in MODEL_TYPES.items()
-            if value == preset["model_type"]
-        )
-        # Find the display name that maps to our training type
-        training_display_name = next(
-            key for key, value in TRAINING_TYPES.items()
-            if value == preset["training_type"]
-        )
-        # Get preset description for display
-        description = preset.get("description", "")
-        # Get max values from buckets
-        buckets = preset["training_buckets"]
-        max_frames = max(frames for frames, _, _ in buckets)
-        max_height = max(height for _, height, _ in buckets)
-        max_width = max(width for _, _, width in buckets)
-        bucket_info = f"\nMaximum video size: {max_frames} frames at {max_width}x{max_height} resolution"
-        info_text = f"{description}{bucket_info}"
-        # Check if LoRA params should be visible
-        training_type_internal = preset["training_type"]
-        show_lora_params = training_type_internal == "lora" or training_type_internal == "control-lora"
-        # Check if Control params should be visible
-        show_control_params = training_type_internal == "control-lora" or training_type_internal == "control-full-finetune"
-        # Use preset defaults but preserve user-modified values if they exist
-        lora_rank_val = current_state.get("lora_rank") if current_state.get("lora_rank") != preset.get("lora_rank", DEFAULT_LORA_RANK_STR) else preset.get("lora_rank", DEFAULT_LORA_RANK_STR)
-        lora_alpha_val = current_state.get("lora_alpha") if current_state.get("lora_alpha") != preset.get("lora_alpha", DEFAULT_LORA_ALPHA_STR) else preset.get("lora_alpha", DEFAULT_LORA_ALPHA_STR)
-        train_steps_val = current_state.get("train_steps") if current_state.get("train_steps") != preset.get("train_steps", DEFAULT_NB_TRAINING_STEPS) else preset.get("train_steps", DEFAULT_NB_TRAINING_STEPS)
-        batch_size_val = current_state.get("batch_size") if current_state.get("batch_size") != preset.get("batch_size", DEFAULT_BATCH_SIZE) else preset.get("batch_size", DEFAULT_BATCH_SIZE)
-        learning_rate_val = current_state.get("learning_rate") if current_state.get("learning_rate") != preset.get("learning_rate", DEFAULT_LEARNING_RATE) else preset.get("learning_rate", DEFAULT_LEARNING_RATE)
-        save_iterations_val = current_state.get("save_iterations") if current_state.get("save_iterations") != preset.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS) else preset.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS)
-        num_gpus_val = current_state.get("num_gpus") if current_state.get("num_gpus") != preset.get("num_gpus", DEFAULT_NUM_GPUS) else preset.get("num_gpus", DEFAULT_NUM_GPUS)
-        precomputation_items_val = current_state.get("precomputation_items") if current_state.get("precomputation_items") != preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS) else preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS)
-        lr_warmup_steps_val = current_state.get("lr_warmup_steps") if current_state.get("lr_warmup_steps") != preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS) else preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS)
-        # Control parameters
-        control_type_val = current_state.get("control_type") if current_state.get("control_type") != preset.get("control_type", DEFAULT_CONTROL_TYPE) else preset.get("control_type", DEFAULT_CONTROL_TYPE)
-        train_qk_norm_val = current_state.get("train_qk_norm") if current_state.get("train_qk_norm") != preset.get("train_qk_norm", DEFAULT_TRAIN_QK_NORM) else preset.get("train_qk_norm", DEFAULT_TRAIN_QK_NORM)
-        frame_conditioning_type_val = current_state.get("frame_conditioning_type") if current_state.get("frame_conditioning_type") != preset.get("frame_conditioning_type", DEFAULT_FRAME_CONDITIONING_TYPE) else preset.get("frame_conditioning_type", DEFAULT_FRAME_CONDITIONING_TYPE)
-        frame_conditioning_index_val = current_state.get("frame_conditioning_index") if current_state.get("frame_conditioning_index") != preset.get("frame_conditioning_index", DEFAULT_FRAME_CONDITIONING_INDEX) else preset.get("frame_conditioning_index", DEFAULT_FRAME_CONDITIONING_INDEX)
-        frame_conditioning_concatenate_mask_val = current_state.get("frame_conditioning_concatenate_mask") if current_state.get("frame_conditioning_concatenate_mask") != preset.get("frame_conditioning_concatenate_mask", DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK) else preset.get("frame_conditioning_concatenate_mask", DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK)
-        # Get the appropriate model version for the selected model type
-        model_versions = self.get_model_version_choices(model_display_name)
-        default_model_version = self.get_default_model_version(model_display_name)
-        # Ensure we have valid choices and values
-        if not model_versions:
-            logger.warning(f"No versions found for {model_display_name}, using empty list")
-            model_versions = []
-            default_model_version = None
-        elif default_model_version not in model_versions and model_versions:
-            default_model_version = model_versions[0]
-            logger.info(f"Reset default version to first available: {default_model_version}")
-        # Ensure model_versions is a simple list of strings
-        model_versions = [str(version) for version in model_versions]
-        # Create the model version dropdown update
-        model_version_update = gr.Dropdown(choices=model_versions, value=default_model_version)
-        # Return values in the same order as the output components listed in line 644
-        # Make sure we return exactly 24 values to match what's expected
-        return (
-            model_display_name,                             # model_type
-            training_display_name,                          # training_type
-            lora_rank_val,                                 # lora_rank
-            lora_alpha_val,                                # lora_alpha
-            train_steps_val,                               # train_steps
-            batch_size_val,                                # batch_size
-            learning_rate_val,                             # learning_rate
-            save_iterations_val,                           # save_iterations
-            info_text,                                     # preset_info
-            gr.Row(visible=show_lora_params),              # lora_params_row
-            gr.Row(visible=show_lora_params),              # lora_settings_row (added missing row)
-            num_gpus_val,                                  # num_gpus
-            precomputation_items_val,                      # precomputation_items
-            lr_warmup_steps_val,                           # lr_warmup_steps
-            model_version_update,                          # model_version
-            # Control parameters rows visibility
-            gr.Row(visible=show_control_params),           # control_params_row
-            gr.Row(visible=show_control_params),           # control_settings_row
-            gr.Row(visible=show_control_params),           # frame_conditioning_row
-            gr.Row(visible=show_control_params),           # control_options_row
-            # Control parameter values
-            control_type_val,                              # control_type
-            train_qk_norm_val,                             # train_qk_norm
-            frame_conditioning_type_val,                   # frame_conditioning_type
-            frame_conditioning_index_val,                  # frame_conditioning_index
-            frame_conditioning_concatenate_mask_val,       # frame_conditioning_concatenate_mask
-        )
     def get_latest_status_message_and_logs(self) -> Tuple[str, str, str]:

 from vms.utils import BaseTab
 from vms.config import (
     ASK_USER_TO_DUPLICATE_SPACE,
+    SD_TRAINING_BUCKETS, MD_TRAINING_BUCKETS,
+    RESOLUTION_OPTIONS,
+    TRAINING_TYPES, MODEL_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
     DEFAULT_AUTO_RESUME,
     DEFAULT_CONTROL_TYPE, DEFAULT_TRAIN_QK_NORM,
     DEFAULT_FRAME_CONDITIONING_TYPE, DEFAULT_FRAME_CONDITIONING_INDEX,
+    DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK,
+    HUNYUAN_VIDEO_DEFAULTS, LTX_VIDEO_DEFAULTS, WAN_DEFAULTS
 )
 logger = logging.getLogger(__name__)
                     with gr.Row():
                         self.components["train_title"] = gr.Markdown("## 0 files in the training dataset")
                     with gr.Row():
                         with gr.Column():
                             # Get the default model type from the first preset
                         self.components["model_info"] = gr.Markdown(
                             value=self.get_model_info(list(MODEL_TYPES.keys())[0], list(TRAINING_TYPES.keys())[0])
                         )
+                    with gr.Row():
+                        with gr.Column():
+                            self.components["resolution"] = gr.Dropdown(
+                                choices=list(RESOLUTION_OPTIONS.keys()),
+                                label="Resolution",
+                                value=list(RESOLUTION_OPTIONS.keys())[0],
+                                info="Select the resolution for training videos"
+                            )
                     # LoRA specific parameters (will show/hide based on training type)
                     with gr.Row(visible=True) as lora_params_row:
                             with gr.Accordion("What is LoRA Rank?", open=False):
                                 gr.Markdown("""
+**LoRA Rank** determines the complexity of the LoRA adapters:
+- **Lower rank (16-32)**: Smaller file size, faster training, but less expressive
+- **Medium rank (64-128)**: Good balance between quality and file size
+- **Higher rank (256-1024)**: More expressive adapters, better quality but larger file size
+Think of rank as the "capacity" of your adapter. Higher ranks can learn more complex modifications to the base model but require more VRAM during training and result in larger files.
+**Quick guide:**
+- For Wan models: Use 32-64 (Wan models work well with lower ranks)
+- For LTX-Video: Use 128-256
+- For Hunyuan Video: Use 128
                                 """)
                         with gr.Column():
                                 type="value",
                                 info="Controls the effective learning rate scaling of LoRA adapters. Usually set to same value as rank"
                             )
                             with gr.Accordion("What is LoRA Alpha?", open=False):
                                 gr.Markdown("""
+**LoRA Alpha** controls the effective scale of the LoRA updates:
+- The actual scaling factor is calculated as `alpha ÷ rank`
+- Usually set to match the rank value (alpha = rank)
+- Higher alpha = stronger effect from the adapters
+- Lower alpha = more subtle adapter influence
+**Best practice:**
+- For most cases, set alpha equal to rank
+- For more aggressive training, set alpha higher than rank
+- For more conservative training, set alpha lower than rank
                                 """)
                     # Control specific parameters (will show/hide based on training type)
                     with gr.Row(visible=False) as control_params_row:
                         self.components["control_params_row"] = control_params_row
                         with gr.Column():
                             gr.Markdown("""
+## 🖼️ Control Training Settings
+Control training enables **image-to-video generation** by teaching the model how to use an image as a guide for video creation.
+This is ideal for turning still images into dynamic videos while preserving composition, style, and content.
                             """)
                     # Second row for control parameters
                             with gr.Accordion("What is Control Conditioning?", open=False):
                                 gr.Markdown("""
+**Control Conditioning** allows the model to be guided by an input image, adapting the video generation based on the image content. This is used for image-to-video generation where you want to turn an image into a moving video while maintaining its style, composition or content.
+- **canny**: Uses edge detection to extract outlines from images for structure-preserving video generation
+- **custom**: Direct image conditioning without preprocessing, preserving more image details
                                 """)
                         with gr.Column():
                             with gr.Accordion("What is QK Normalization?", open=False):
                                 gr.Markdown("""
+**QK Normalization** refers to normalizing the query and key values in the attention mechanism of transformers.
+- When enabled, allows the model to better integrate control signals with content generation
+- Improves training stability for control models
+- Generally recommended for control training, especially with image conditioning
                                 """)
                     with gr.Row(visible=False) as frame_conditioning_row:
                             with gr.Accordion("Frame Conditioning Type Explanation", open=False):
                                 gr.Markdown("""
+**Frame Conditioning Types** determine which frames in the video receive image conditioning:
+- **index**: Only applies conditioning to a single frame at the specified index
+- **prefix**: Applies conditioning to all frames before a certain point
+- **random**: Randomly selects frames to receive conditioning during training
+- **first_and_last**: Only applies conditioning to the first and last frames
+- **full**: Applies conditioning to all frames in the video
+For image-to-video tasks, 'index' (usually with index 0) is most common as it conditions only the first frame.
                                 """)
                         with gr.Column():
                             with gr.Accordion("What is Frame Mask Concatenation?", open=False):
                                 gr.Markdown("""
+**Frame Mask Concatenation** adds an additional channel to the control signal that indicates which frames are being conditioned:
+- Creates a binary mask (0/1) indicating which frames receive conditioning
+- Helps the model distinguish between conditioned and unconditioned frames
+- Particularly useful for 'index' conditioning where only select frames are conditioned
+- Generally improves temporal consistency between conditioned and unconditioned frames
                                 """)
                         with gr.Column():
         return None
     def handle_new_training_start(
+        self, model_type, model_version, training_type,
         lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
         save_iterations, repo_id, progress=gr.Progress()
     ):
         # Start training normally
         return self.handle_training_start(
+            model_type, model_version, training_type,
             lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
             save_iterations, repo_id, progress
         )
     def handle_resume_training(
+        self, model_type, model_version, training_type,
         lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
         save_iterations, repo_id, progress=gr.Progress()
     ):
         # Start training with the checkpoint
         return self.handle_training_start(
+            model_type, model_version, training_type,
             lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
             save_iterations, repo_id, progress,
             resume_from_checkpoint="latest"
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
+        # Model type change event - Update model version dropdown choices and default parameters
         self.components["model_type"].change(
             fn=self.update_model_versions,
             inputs=[self.components["model_type"]],
             outputs=[self.components["model_version"]]
         ).then(
+            fn=self.update_model_type_and_version,
             inputs=[self.components["model_type"], self.components["model_version"]],
             outputs=[]
         ).then(
+            # Update model info and recommended default values based on model and training type
+            fn=self.update_model_info,
             inputs=[self.components["model_type"], self.components["training_type"]],
+            outputs=[
+                self.components["model_info"],
+                self.components["train_steps"],
+                self.components["batch_size"],
+                self.components["learning_rate"],
+                self.components["save_iterations"],
+                self.components["lora_params_row"],
+                self.components["lora_settings_row"],
+                self.components["control_params_row"],
+                self.components["control_settings_row"],
+                self.components["frame_conditioning_row"],
+                self.components["control_options_row"],
+                self.components["lora_rank"],
+                self.components["lora_alpha"]
+            ]
         )
         # Model version change event
                 self.components["batch_size"],
                 self.components["learning_rate"],
                 self.components["save_iterations"],
+                self.components["lora_params_row"],
+                self.components["lora_settings_row"],
+                self.components["control_params_row"],
+                self.components["control_settings_row"],
+                self.components["frame_conditioning_row"],
+                self.components["control_options_row"],
+                self.components["lora_rank"],
+                self.components["lora_alpha"]
             ]
         )
             outputs=[]
         )
+        # Resolution change event
+        self.components["resolution"].change(
+            fn=lambda v: self.app.update_ui_state(resolution=v),
+            inputs=[self.components["resolution"]],
             outputs=[]
         )
         # Training control events
         self.components["start_btn"].click(
             fn=self.handle_new_training_start,
             inputs=[
                 self.components["model_type"],
                 self.components["model_version"],
                 self.components["training_type"],
         self.components["resume_btn"].click(
             fn=self.handle_resume_training,
             inputs=[
                 self.components["model_type"],
                 self.components["model_version"],
                 self.components["training_type"],
             # Update UI state with proper model_type first
             self.app.update_ui_state(model_type=model_type)
+            # Create a list of tuples (label, value) for the dropdown choices
+            # This ensures compatibility with Gradio Dropdown component expectations
+            choices_tuples = [(str(version), str(version)) for version in model_versions]
             # Create a new dropdown with the updated choices
+            if not choices_tuples:
                 logger.warning(f"No model versions available for {model_type}, using empty list")
                 # Return empty dropdown to avoid errors
                 return gr.Dropdown(choices=[], value=None)
             # Ensure default_version is in model_versions
+            string_versions = [str(v) for v in model_versions]
+            if default_version not in string_versions and string_versions:
+                default_version = string_versions[0]
                 logger.info(f"Default version not in choices, using first available: {default_version}")
             # Return the updated dropdown
+            logger.info(f"Returning dropdown with {len(choices_tuples)} choices")
+            return gr.Dropdown(choices=choices_tuples, value=default_version)
         except Exception as e:
             # Log any exceptions for debugging
             logger.error(f"Error in update_model_versions: {str(e)}")
             return gr.Dropdown(choices=[], value=None)
     def handle_training_start(
+        self, model_type, model_version, training_type,
         lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
         save_iterations, repo_id,
         progress=gr.Progress(),
                 learning_rate,
                 save_iterations,
                 repo_id,
                 training_type=training_internal_type,
                 model_version=model_version,
                 resume_from_checkpoint=resume_from,
         # Add general information about the selected training type
         if training_type == "Full Finetune":
             finetune_info = """
+## 🧠 Full Finetune Mode
+Full finetune mode trains all parameters of the model, requiring more VRAM but potentially enabling higher quality results.
+- Requires 20-50GB+ VRAM depending on model
+- Creates a complete standalone model (~8GB+ file size)
+- Recommended only for high-end GPUs (A100, H100, etc.)
+- Not recommended for the larger models like Hunyuan Video on consumer hardware
             """
             model_info = finetune_info + "\n\n" + model_info
             self.components["batch_size"]: params["batch_size"],
             self.components["learning_rate"]: params["learning_rate"],
             self.components["save_iterations"]: params["save_iterations"],
+            self.components["lora_rank"]: params["lora_rank"],
+            self.components["lora_alpha"]: params["lora_alpha"],
             self.components["lora_params_row"]: gr.Row(visible=show_lora_params),
             self.components["lora_settings_row"]: gr.Row(visible=show_lora_params),
             self.components["control_params_row"]: gr.Row(visible=show_control_params),
     def get_model_info(self, model_type: str, training_type: str) -> str:
         """Get information about the selected model type and training method"""
         if model_type == "HunyuanVideo":
+            base_info = """## HunyuanVideo Training
+- Required VRAM: ~48GB minimum
+- Recommended batch size: 1-2
+- Typical training time: 2-4 hours
+- Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
                 return base_info + "\n- Required VRAM: ~48GB minimum\n- **Full finetune not recommended due to VRAM requirements**"
         elif model_type == "LTX-Video":
+            base_info = """## LTX-Video Training
+- Recommended batch size: 1-4
+- Typical training time: 1-3 hours
+- Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
                 return base_info + "\n- Required VRAM: ~21GB minimum\n- Full model size: ~8GB"
         elif model_type == "Wan":
+            base_info = """## Wan2.1 Training
+- Recommended batch size: 1-4
+- Typical training time: 1-3 hours
+- Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~16GB minimum\n- Default LoRA rank: 32 (~120 MB)"
     def get_default_params(self, model_type: str, training_type: str) -> Dict[str, Any]:
         """Get default training parameters for model type"""
+        # Use model-specific defaults based on model_type and training_type
+        model_defaults = {}
+        if model_type == "hunyuan_video" and training_type in HUNYUAN_VIDEO_DEFAULTS:
+            model_defaults = HUNYUAN_VIDEO_DEFAULTS[training_type]
+        elif model_type == "ltx_video" and training_type in LTX_VIDEO_DEFAULTS:
+            model_defaults = LTX_VIDEO_DEFAULTS[training_type]
+        elif model_type == "wan" and training_type in WAN_DEFAULTS:
+            model_defaults = WAN_DEFAULTS[training_type]
+        # Build the complete params dict with defaults plus model-specific overrides
+        params = {
+            "train_steps": DEFAULT_NB_TRAINING_STEPS,
+            "batch_size": DEFAULT_BATCH_SIZE,
+            "learning_rate": DEFAULT_LEARNING_RATE,
+            "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
+            "lora_rank": DEFAULT_LORA_RANK_STR,
+            "lora_alpha": DEFAULT_LORA_ALPHA_STR
+        }
+        # Override with model-specific values
+        params.update(model_defaults)
+        return params
     def get_latest_status_message_and_logs(self) -> Tuple[str, str, str]: