{ "_name_or_path": "/leonardo_work/EUHPC_D19_097/mllm/checkpoints/whisper-large-v3_Llama-Krikri-8B-Base_injec=21_frozenWhisper=True_inputMasking=False_full=True/checkpoint-3000", "add_lora": true, "architectures": [ "SLM" ], "attention_bias": false, "attention_dropout": 0.0, "attn_implementation": null, "bos_token_id": 1, "eos_token_id": 2, "frozen_whisper": true, "head_dim": 128, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "injection_layer": 21, "intermediate_size": 11008, "llm_lora_config": { "alpha_pattern": {}, "auto_mapping": null, "base_model_name_or_path": "ilsp/Llama-Krikri-8B-Base", "bias": "none", "eva_config": null, "exclude_modules": null, "fan_in_fan_out": false, "inference_mode": false, "init_lora_weights": true, "layer_replication": null, "layers_pattern": null, "layers_to_transform": null, "loftq_config": {}, "lora_alpha": 16, "lora_bias": "none", "lora_dropout": 0.1, "megatron_config": null, "megatron_core": "megatron.core", "modules_to_save": null, "peft_type": "LORA", "r": 8, "rank_pattern": {}, "revision": null, "target_modules": "layers\\.\\d+\\.(self_attn\\.(q_proj|k_proj|v_proj|o_proj)|mlp\\.(gate_proj|up_proj|down_proj))", "task_type": null, "use_dora": false, "use_rslora": false }, "llm_path": "ilsp/Llama-Krikri-8B-Base", "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "streaming": false, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.50.0.dev0", "use_cache": true, "vocab_size": 149248, "whisper_generation_config": null, "whisper_is_multilingual": false, "whisper_language": "el", "whisper_lora_config": { "alpha_pattern": {}, "auto_mapping": null, "base_model_name_or_path": null, "bias": "none", "eva_config": null, "exclude_modules": null, "fan_in_fan_out": false, "inference_mode": false, "init_lora_weights": true, "layer_replication": null, "layers_pattern": null, "layers_to_transform": null, "loftq_config": {}, "lora_alpha": 16, "lora_bias": "none", "lora_dropout": 0.1, "megatron_config": null, "megatron_core": "megatron.core", "modules_to_save": null, "peft_type": "LORA", "r": 8, "rank_pattern": {}, "revision": null, "target_modules": "model\\.decoder\\.layers\\.\\d+\\.(self_attn|encoder_attn)\\.(q_proj|k_proj|v_proj|out_proj)|model\\.decoder\\.layers\\.\\d+\\.(fc1|fc2)", "task_type": null, "use_dora": false, "use_rslora": false }, "whisper_path": "openai/whisper-large-v3", "whisper_task": "transcribe", "whisper_temperature": 0.0 }