yongjielv commited on
Commit
56ce1c4
·
verified ·
1 Parent(s): fc28fd9

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BailingMMNativeForConditionalGeneration"
4
+ ],
5
+ "audio_tokenizer_config": {
6
+ "_name_or_path": "",
7
+ "add_cross_attention": false,
8
+ "architectures": [
9
+ "AudioVAE"
10
+ ],
11
+ "bad_words_ids": null,
12
+ "begin_suppress_tokens": null,
13
+ "bos_token_id": null,
14
+ "chunk_size_feed_forward": 0,
15
+ "cross_attention_hidden_size": null,
16
+ "dec_kwargs": {
17
+ "backbone": {
18
+ "_attn_implementation": "flash_attention_2",
19
+ "attention_dropout": 0.0,
20
+ "attn_implementation": null,
21
+ "bos_token_id": 151643,
22
+ "eos_token_id": 151645,
23
+ "hidden_act": "silu",
24
+ "hidden_size": 896,
25
+ "initializer_range": 0.02,
26
+ "intermediate_size": 4864,
27
+ "is_causal": true,
28
+ "max_position_embeddings": 32768,
29
+ "max_window_layers": 0,
30
+ "model_type": "qwen2",
31
+ "num_attention_heads": 14,
32
+ "num_hidden_layers": 24,
33
+ "num_key_value_heads": 2,
34
+ "rms_norm_eps": 1e-06,
35
+ "rope_theta": 1000000.0,
36
+ "sliding_window": 32,
37
+ "tie_word_embeddings": true,
38
+ "torch_dtype": "bfloat16",
39
+ "transformers_version": "4.43.1",
40
+ "use_cache": false,
41
+ "use_sliding_window": true,
42
+ "vocab_size": 1
43
+ },
44
+ "latent_dim": 64,
45
+ "output_dim": 320
46
+ },
47
+ "decoder_start_token_id": null,
48
+ "diversity_penalty": 0.0,
49
+ "do_sample": false,
50
+ "early_stopping": false,
51
+ "enc_kwargs": {
52
+ "backbone": {
53
+ "_attn_implementation": "flash_attention_2",
54
+ "attention_dropout": 0.0,
55
+ "attn_implementation": null,
56
+ "bos_token_id": 151643,
57
+ "eos_token_id": 151645,
58
+ "hidden_act": "silu",
59
+ "hidden_size": 896,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 4864,
62
+ "is_causal": true,
63
+ "max_position_embeddings": 32768,
64
+ "max_window_layers": 0,
65
+ "model_type": "qwen2",
66
+ "num_attention_heads": 14,
67
+ "num_hidden_layers": 24,
68
+ "num_key_value_heads": 2,
69
+ "rms_norm_eps": 1e-06,
70
+ "rope_theta": 1000000.0,
71
+ "sliding_window": 32,
72
+ "tie_word_embeddings": true,
73
+ "torch_dtype": "bfloat16",
74
+ "transformers_version": "4.43.1",
75
+ "use_cache": false,
76
+ "use_sliding_window": true,
77
+ "vocab_size": 1
78
+ },
79
+ "hop_size": 320,
80
+ "input_dim": 320,
81
+ "latent_dim": 64
82
+ },
83
+ "encoder_no_repeat_ngram_size": 0,
84
+ "eos_token_id": null,
85
+ "exponential_decay_length_penalty": null,
86
+ "finetuning_task": null,
87
+ "forced_bos_token_id": null,
88
+ "forced_eos_token_id": null,
89
+ "hifi_gan_disc_kwargs": null,
90
+ "id2label": {
91
+ "0": "LABEL_0",
92
+ "1": "LABEL_1"
93
+ },
94
+ "init_method": "kaiming",
95
+ "is_decoder": false,
96
+ "is_encoder_decoder": false,
97
+ "label2id": {
98
+ "LABEL_0": 0,
99
+ "LABEL_1": 1
100
+ },
101
+ "lambda_adv": 1.0,
102
+ "lambda_disc": 1.0,
103
+ "lambda_feat_match_loss": 1.0,
104
+ "lambda_mel_loss": 15,
105
+ "lambda_semantic": 5.0,
106
+ "length_penalty": 1.0,
107
+ "max_length": 20,
108
+ "min_length": 0,
109
+ "model_type": "",
110
+ "no_repeat_ngram_size": 0,
111
+ "num_beam_groups": 1,
112
+ "num_beams": 1,
113
+ "num_return_sequences": 1,
114
+ "output_attentions": false,
115
+ "output_hidden_states": false,
116
+ "output_scores": false,
117
+ "pad_token_id": null,
118
+ "patch_size": -1,
119
+ "prefix": null,
120
+ "problem_type": null,
121
+ "pruned_heads": {},
122
+ "remove_invalid_values": false,
123
+ "repetition_penalty": 1.0,
124
+ "return_dict": true,
125
+ "return_dict_in_generate": false,
126
+ "semantic_module_kwargs": {
127
+ "casual": true,
128
+ "whisper_encoder": {
129
+ "n_ctx": 1500,
130
+ "n_head": 20,
131
+ "n_layer": 32,
132
+ "n_mels": 128,
133
+ "n_state": 1280
134
+ }
135
+ },
136
+ "sep_token_id": null,
137
+ "spec_disc_kwargs": null,
138
+ "suppress_tokens": null,
139
+ "task_specific_params": null,
140
+ "temperature": 1.0,
141
+ "tf_legacy_loss": false,
142
+ "tie_encoder_decoder": false,
143
+ "tie_word_embeddings": true,
144
+ "tokenizer_class": null,
145
+ "top_k": 50,
146
+ "top_p": 1.0,
147
+ "torch_dtype": null,
148
+ "torchscript": false,
149
+ "typical_p": 1.0,
150
+ "use_bfloat16": false
151
+ },
152
+ "auto_map": {
153
+ "AutoConfig": "configuration_bailingmm.BailingMMConfig"
154
+ },
155
+ "ditar_config": {
156
+ "attn_backend": "torch",
157
+ "attn_mask_enabled": false,
158
+ "depth": 8,
159
+ "dropout": 0.0,
160
+ "hidden_size": 1024,
161
+ "mlp_ratio": 4,
162
+ "num_heads": 16,
163
+ "pe_attn_head": null,
164
+ "qk_norm": null
165
+ },
166
+ "llm_config": {
167
+ "_name_or_path": "",
168
+ "add_cross_attention": false,
169
+ "architectures": [
170
+ "BailingMoeForCausalLM"
171
+ ],
172
+ "attention_dropout": 0.0,
173
+ "auto_map": {
174
+ "AutoConfig": "configuration_bailing_moe.BailingMoeConfig",
175
+ "AutoModel": "modeling_bailing_moe.BailingMoeModel",
176
+ "AutoModelForCausalLM": "modeling_bailing_moe.BailingMoeForCausalLM",
177
+ "AutoModelForTokenClassification": "modeling_bailing_moe.BailingMoeForTokenClassification"
178
+ },
179
+ "bad_words_ids": null,
180
+ "begin_suppress_tokens": null,
181
+ "bos_token_id": null,
182
+ "chunk_size_feed_forward": 0,
183
+ "cross_attention_hidden_size": null,
184
+ "decoder_start_token_id": null,
185
+ "diversity_penalty": 0.0,
186
+ "do_sample": false,
187
+ "early_stopping": false,
188
+ "embedding_dropout": 0.0,
189
+ "encoder_no_repeat_ngram_size": 0,
190
+ "eos_token_id": 126081,
191
+ "exponential_decay_length_penalty": null,
192
+ "finetuning_task": null,
193
+ "first_k_dense_replace": 0,
194
+ "forced_bos_token_id": null,
195
+ "forced_eos_token_id": null,
196
+ "head_dim": 128,
197
+ "hidden_act": "silu",
198
+ "hidden_size": 2048,
199
+ "id2label": {
200
+ "0": "LABEL_0",
201
+ "1": "LABEL_1"
202
+ },
203
+ "image_patch_token": 126346,
204
+ "image_start_token": 126347,
205
+ "initializer_range": 0.006,
206
+ "intermediate_size": 5632,
207
+ "is_decoder": false,
208
+ "is_encoder_decoder": false,
209
+ "label2id": {
210
+ "LABEL_0": 0,
211
+ "LABEL_1": 1
212
+ },
213
+ "length_penalty": 1.0,
214
+ "max_length": 20,
215
+ "max_position_embeddings": 32768,
216
+ "max_window_layers": 28,
217
+ "min_length": 0,
218
+ "model_type": "bailing_moe",
219
+ "moe_intermediate_size": 1408,
220
+ "multi_gate": true,
221
+ "no_repeat_ngram_size": 0,
222
+ "norm_head": false,
223
+ "norm_softmax": false,
224
+ "norm_topk_prob": true,
225
+ "num_attention_heads": 16,
226
+ "num_beam_groups": 1,
227
+ "num_beams": 1,
228
+ "num_experts": 64,
229
+ "num_experts_per_tok": 6,
230
+ "num_hidden_layers": 28,
231
+ "num_key_value_heads": 4,
232
+ "num_return_sequences": 1,
233
+ "num_shared_experts": 2,
234
+ "output_attentions": false,
235
+ "output_dropout": 0.0,
236
+ "output_hidden_states": false,
237
+ "output_router_logits": false,
238
+ "output_scores": false,
239
+ "pad_token_id": 126081,
240
+ "prefix": null,
241
+ "pretraining_tp": 1,
242
+ "problem_type": null,
243
+ "pruned_heads": {},
244
+ "remove_invalid_values": false,
245
+ "repetition_penalty": 1.0,
246
+ "return_dict": true,
247
+ "return_dict_in_generate": false,
248
+ "rms_norm_eps": 1e-05,
249
+ "rope_scaling": {
250
+ "factor": null,
251
+ "type": "3D"
252
+ },
253
+ "rope_theta": 600000,
254
+ "sep_token_id": null,
255
+ "sliding_window": 4096,
256
+ "suppress_tokens": null,
257
+ "task_specific_params": null,
258
+ "temperature": 1.0,
259
+ "tf_legacy_loss": false,
260
+ "tie_encoder_decoder": false,
261
+ "tie_word_embeddings": false,
262
+ "tokenizer_class": null,
263
+ "top_k": 50,
264
+ "top_p": 1.0,
265
+ "torch_dtype": "bfloat16",
266
+ "torchscript": false,
267
+ "typical_p": 1.0,
268
+ "use_bfloat16": false,
269
+ "use_bias": false,
270
+ "use_cache": true,
271
+ "use_qkv_bias": false,
272
+ "use_sliding_window": false,
273
+ "video_start_token": 126349,
274
+ "vocab_size": 126464
275
+ },
276
+ "mlp_depth": 2,
277
+ "model_type": "bailingmm",
278
+ "torch_dtype": "bfloat16",
279
+ "transformers_version": "4.52.4"
280
+ }
model-00001-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf0878c52036fb377f710fa7e55e820a05d9bbad2b1a6826b57e1927a7a95465
3
+ size 4998949136
model-00002-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7731be15d86b3590925d1691a219ef02cfd2f7a378624f83c13cf4c302cde651
3
+ size 4999501536
model-00003-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d993b14ce3bae503361dca556dd35ec904dc8ccaa1b3e39ab6112e9d6882f6
3
+ size 4995037352
model-00004-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8994018d97ac394b723288d715b8670131fffeab1808e97872ac57fa834f0e01
3
+ size 4995037712
model-00005-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c4caf48788eca7139db74a381aa4d38eac6854ecaab524e7ce4c5c8c4ffceb
3
+ size 4999502432
model-00006-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671032cd4061f11db8b453268a31cab0fe4176553a0e6067b79ea9dc279d121d
3
+ size 4995037688
model-00007-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dbf0bb0f1655022eaf6c607ab6d64aba31997bf3e09f91d04c58d98acfe46d6
3
+ size 4993293032
model-00008-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314bf206abb346e3cf566335fa9d843e9564f292f3f5dfeb40afdb6cdc2c3063
3
+ size 1575810528
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff