> not usable for inference, omly temp_model to test training scripts.


1. was created with 

```python
from transformers.models.qwen3_omni_moe import Qwen3OmniMoeThinkerTextModel
from transformers.models.qwen3_omni_moe.configuration_qwen3_omni_moe import Qwen3OmniMoeTextConfig 
# 1. init orig model
import os
os.environ["CUDA_VISIBLE_DEVICES"]='0'
import soundfile as sf

from transformers import Qwen3OmniMoeForConditionalGeneration, Qwen3OmniMoeProcessor
from qwen_omni_utils import process_mm_info

MODEL_PATH = "/scratch/vladimir_albrekht/qwen3omni-fine-tuning/models/Qwen3-Omni-30B-A3B-Instruct"
# MODEL_PATH = "Qwen/Qwen3-Omni-30B-A3B-Thinking"

model = Qwen3OmniMoeForConditionalGeneration.from_pretrained(
    MODEL_PATH,
    dtype="auto",
    device_map="auto",
    attn_implementation="flash_attention_2",
)

processor = Qwen3OmniMoeProcessor.from_pretrained(MODEL_PATH)

# 2. Get the original config and create a new, smaller version
# where model == qwen3omni orig 30B model  `Qwen/Qwen3-Omni-30B-A3B-Instruct`
original_text_config = model.config.thinker_config.text_config

# new conf
small_text_config = Qwen3OmniMoeTextConfig(
    hidden_size=original_text_config.hidden_size,          # Keep this the same
    vocab_size=original_text_config.vocab_size,            # Keep this the same
    num_hidden_layers=4,                                   # Reduced from 48
    num_attention_heads=16,                                # Reduced from 32
    num_key_value_heads=2,                                 # Reduced from 4
    intermediate_size=1024,                                # Reduced from 768
    num_experts=8,                                         # Reduced from 128
    num_experts_per_tok=2,                                 # Reduced from 8
    moe_intermediate_size=256,                             # Reduced from 768
    # You can keep other parameters the same or modify them as needed
    **{k: v for k, v in original_text_config.to_dict().items() if k not in 
       ['hidden_size', 'vocab_size', 'num_hidden_layers', 'num_attention_heads', 
        'num_key_value_heads', 'intermediate_size', 'num_experts', 
        'num_experts_per_tok', 'moe_intermediate_size']}
)

print("--- Initializing new, smaller Thinker LLM part from scratch ---")

# 3. Initialize the smaller Thinker LLM part from scratch using the new config
# This creates a new model with randomly initialized weights.
small_thinker_llm = Qwen3OmniMoeThinkerTextModel(small_text_config)

# 4. replace
model.thinker.model = small_thinker_llm
model.config.thinker_config.text_config = small_thinker_llm.config

# 5. save
PATH = "./qwen3omni_5B_random_thinker_weights"
model.save_pretrained(PATH)
processor.save_pretrained(PATH)

```