howanching-clara's picture
Training in progress, epoch 1
d893d69 verified
{
"_name_or_path": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
"architectures": [
"BertForSequenceClassification"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 384,
"id2label": {
"0": "ddc:400",
"1": "ddc:490",
"10": "ddc:470",
"11": "ddc:425",
"12": "ddc:423",
"13": "ddc:437",
"14": "ddc:435",
"15": "ddc:450",
"16": "ddc:491",
"17": "ddc:415",
"18": "ddc:418",
"19": "ddc:417",
"2": "ddc:440",
"20": "ddc:438",
"21": "ddc:433",
"22": "ddc:432",
"23": "ddc:431",
"24": "ddc:413",
"25": "ddc:412",
"26": "ddc:427",
"27": "ddc:404",
"28": "ddc:411",
"29": "ddc:407",
"3": "ddc:430",
"30": "ddc:447",
"31": "ddc:410",
"32": "ddc:495",
"33": "ddc:469",
"34": "ddc:409",
"35": "ddc:465",
"36": "ddc:414",
"37": "ddc:421",
"4": "ddc:401",
"5": "ddc:480",
"6": "ddc:492",
"7": "ddc:460",
"8": "ddc:439",
"9": "ddc:420"
},
"initializer_range": 0.02,
"intermediate_size": 1536,
"label2id": {
"ddc:400": 0,
"ddc:401": 4,
"ddc:404": 27,
"ddc:407": 29,
"ddc:409": 34,
"ddc:410": 31,
"ddc:411": 28,
"ddc:412": 25,
"ddc:413": 24,
"ddc:414": 36,
"ddc:415": 17,
"ddc:417": 19,
"ddc:418": 18,
"ddc:420": 9,
"ddc:421": 37,
"ddc:423": 12,
"ddc:425": 11,
"ddc:427": 26,
"ddc:430": 3,
"ddc:431": 23,
"ddc:432": 22,
"ddc:433": 21,
"ddc:435": 14,
"ddc:437": 13,
"ddc:438": 20,
"ddc:439": 8,
"ddc:440": 2,
"ddc:447": 30,
"ddc:450": 15,
"ddc:460": 7,
"ddc:465": 35,
"ddc:469": 33,
"ddc:470": 10,
"ddc:480": 5,
"ddc:490": 1,
"ddc:491": 16,
"ddc:492": 6,
"ddc:495": 32
},
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 6,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"problem_type": "single_label_classification",
"torch_dtype": "float32",
"transformers_version": "4.39.3",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 30522
}