fondress commited on
Commit
3aebc7c
·
verified ·
1 Parent(s): f92db2e

Upload PDeepPP_Antibacterial to Hugging Face Hub.

Browse files
config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "PDeepPPModel"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_pdeeppp.PDeepPPConfig",
7
+ "AutoModel": "modeling_PDeepPP.PDeepPPModel"
8
+ },
9
+ "dropout": 0.3,
10
+ "esm_ratio": 1.0,
11
+ "hidden_size": 256,
12
+ "input_size": 1280,
13
+ "lambda_": 0.96,
14
+ "model_type": "PDeepPP",
15
+ "num_heads": 8,
16
+ "num_transformer_layers": 4,
17
+ "output_size": 128,
18
+ "ptm_type": "ACE",
19
+ "task_type": "Antibacterial",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.35.2"
22
+ }
configuration_pdeeppp.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict, List, Optional, Union
3
+
4
+ from transformers.configuration_utils import PretrainedConfig
5
+ from transformers.utils import logging
6
+
7
+ logger = logging.get_logger(__name__)
8
+
9
+ class PDeepPPConfig(PretrainedConfig):
10
+
11
+ model_type = "PDeepPP"
12
+
13
+ def __init__(
14
+ self,
15
+ input_size=1280,
16
+ output_size=128,
17
+ num_heads=8,
18
+ hidden_size=256,
19
+ num_transformer_layers=4,
20
+ dropout=0.3,
21
+ ptm_type="ACE",
22
+ esm_ratio=0.96,
23
+ lambda_=1,
24
+ **kwargs
25
+ ):
26
+ super().__init__(**kwargs)
27
+ self.input_size = input_size
28
+ self.output_size = output_size
29
+ self.num_heads = num_heads
30
+ self.hidden_size = hidden_size
31
+ self.num_transformer_layers = num_transformer_layers
32
+ self.dropout = dropout
33
+ self.ptm_type = ptm_type
34
+ self.esm_ratio = esm_ratio
35
+ self.lambda_ = lambda_
36
+
37
+ PDeepPPConfig.register_for_auto_class()
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3fc50e68422370f5923c43f57d4ec0b6642138dc1186db23ba5b523557eedc6
3
+ size 33264668
modeling_PDeepPP.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from typing import Optional, Tuple, Union
4
+
5
+ from transformers.modeling_utils import PreTrainedModel
6
+ from transformers.utils import logging
7
+
8
+ from configuration_pdeeppp import PDeepPPConfig
9
+
10
+ logger = logging.get_logger(__name__)
11
+
12
+ class SelfAttentionGlobalFeatures(nn.Module):
13
+ def __init__(self, config):
14
+ super().__init__()
15
+ self.self_attention = nn.MultiheadAttention(
16
+ embed_dim=config.input_size,
17
+ num_heads=config.num_heads,
18
+ batch_first=True
19
+ )
20
+ self.fc1 = nn.Linear(config.input_size, config.hidden_size)
21
+ self.fc2 = nn.Linear(config.hidden_size, config.output_size)
22
+ self.layer_norm = nn.LayerNorm(config.input_size)
23
+ self.dropout = nn.Dropout(config.dropout)
24
+
25
+ def forward(self, x):
26
+ attn_output, _ = self.self_attention(x, x, x)
27
+ x = self.layer_norm(x + attn_output)
28
+ x = self.fc1(x)
29
+ x = self.dropout(x)
30
+ x = self.fc2(x)
31
+ return x
32
+
33
+ class TransConv1d(nn.Module):
34
+ def __init__(self, config):
35
+ super().__init__()
36
+ self.self_attention_global_features = SelfAttentionGlobalFeatures(config)
37
+ self.transformer_encoder = nn.TransformerEncoderLayer(
38
+ d_model=config.output_size,
39
+ nhead=config.num_heads,
40
+ dim_feedforward=config.hidden_size*2,
41
+ dropout=config.dropout,
42
+ batch_first=True
43
+ )
44
+ self.transformer = nn.TransformerEncoder(
45
+ self.transformer_encoder,
46
+ num_layers=config.num_transformer_layers
47
+ )
48
+ self.fc1 = nn.Linear(config.output_size, config.output_size)
49
+ self.fc2 = nn.Linear(config.output_size, config.output_size)
50
+ self.layer_norm = nn.LayerNorm(config.output_size)
51
+
52
+ def forward(self, x):
53
+ x = self.self_attention_global_features(x)
54
+ residual = x
55
+ x = self.transformer(x)
56
+ x = self.fc1(x)
57
+ residual = x
58
+ x = self.fc2(x)
59
+ x = self.layer_norm(x + residual)
60
+ return x
61
+
62
+ class PosCNN(nn.Module):
63
+ def __init__(self, config, use_position_encoding=True):
64
+ super().__init__()
65
+ self.use_position_encoding = use_position_encoding
66
+ self.conv1d = nn.Conv1d(
67
+ in_channels=config.input_size,
68
+ out_channels=64,
69
+ kernel_size=3,
70
+ padding=1
71
+ )
72
+ self.relu = nn.ReLU()
73
+ self.global_pooling = nn.AdaptiveAvgPool1d(1)
74
+ self.fc = nn.Linear(64, config.output_size)
75
+
76
+ if self.use_position_encoding:
77
+ self.position_encoding = nn.Parameter(torch.zeros(64, config.input_size))
78
+
79
+ def forward(self, x):
80
+ x = x.permute(0, 2, 1)
81
+ x = self.conv1d(x)
82
+ x = self.relu(x)
83
+
84
+ if self.use_position_encoding:
85
+ seq_len = x.size(2)
86
+ pos_encoding = self.position_encoding[:, :seq_len].unsqueeze(0)
87
+ x = x + pos_encoding
88
+
89
+ x = self.global_pooling(x)
90
+ x = x.squeeze(-1)
91
+ x = self.fc(x)
92
+ return x
93
+
94
+ class PDeepPPPreTrainedModel(PreTrainedModel):
95
+ """
96
+ 抽象基类,包含所有PDeepPP模型所需的方法
97
+ """
98
+ config_class = PDeepPPConfig
99
+ base_model_prefix = "PDeepPP"
100
+ supports_gradient_checkpointing = True
101
+
102
+ def _init_weights(self, module):
103
+ """初始化权重"""
104
+ if isinstance(module, nn.Linear):
105
+ module.weight.data.normal_(mean=0.0, std=0.02)
106
+ if module.bias is not None:
107
+ module.bias.data.zero_()
108
+ elif isinstance(module, nn.LayerNorm):
109
+ module.bias.data.zero_()
110
+ module.weight.data.fill_(1.0)
111
+
112
+ class PDeepPPModel(PDeepPPPreTrainedModel):
113
+ def __init__(self, config):
114
+ super().__init__(config)
115
+ self.config = config
116
+
117
+ self.transformer = TransConv1d(config)
118
+ self.cnn = PosCNN(config)
119
+ self.cnn_layers = nn.Sequential(
120
+ nn.Conv1d(config.output_size*2, 32, kernel_size=3, padding=1),
121
+ nn.ReLU(),
122
+ nn.AdaptiveMaxPool1d(1),
123
+ nn.Dropout(config.dropout/2),
124
+ nn.Conv1d(32, 64, kernel_size=3, padding=1),
125
+ nn.ReLU(),
126
+ nn.AdaptiveMaxPool1d(1),
127
+ nn.Dropout(config.dropout/2),
128
+ nn.Flatten(),
129
+ nn.Linear(64, 1)
130
+ )
131
+
132
+ # 初始化权重
133
+ self.post_init()
134
+
135
+ def forward(
136
+ self,
137
+ input_embeds=None,
138
+ labels=None,
139
+ return_dict=None,
140
+ ):
141
+ r"""
142
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
143
+ Labels for computing the classification loss.
144
+
145
+ Returns:
146
+ dict or tuple: 根据return_dict参数返回不同格式的结果
147
+ """
148
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
149
+
150
+ transformer_output = self.transformer(input_embeds)
151
+ cnn_output = self.cnn(input_embeds)
152
+ cnn_output = cnn_output.unsqueeze(1).expand(-1, transformer_output.size(1), -1)
153
+ combined = torch.cat([transformer_output, cnn_output], dim=2)
154
+ combined = combined.permute(0, 2, 1)
155
+ logits = self.cnn_layers(combined).squeeze(1)
156
+
157
+ loss = None
158
+ if labels is not None:
159
+ loss_fct = nn.BCEWithLogitsLoss()
160
+ loss = loss_fct(logits, labels.float())
161
+
162
+ # 添加您自定义的损失函数
163
+ probs = torch.sigmoid(logits)
164
+ ent = -(probs*torch.log(probs+1e-12) +
165
+ (1-probs)*torch.log(1-probs+1e-12)).mean()
166
+ cond_ent = -(probs*torch.log(probs+1e-12)).mean()
167
+ reg_loss = self.config.lambda_ * ent - self.config.lambda_ * cond_ent
168
+
169
+ loss = self.config.lambda_ * loss + (1 - self.config.lambda_) * reg_loss
170
+
171
+ if return_dict:
172
+ return {
173
+ "loss": loss,
174
+ "logits": logits,
175
+ }
176
+ else:
177
+ return (loss, logits) if loss is not None else logits
178
+
179
+ PDeepPPModel.register_for_auto_class("AutoModel")
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ac16c58263eb5f0bada91fd7ecc0bb34a996e7b79f490e575281f2cf8538f2
3
+ size 33283123