yifan0sun commited on
Commit
67aa9c5
·
verified ·
1 Parent(s): 93adbb1

Update ROBERTAmodel.py

Browse files
Files changed (1) hide show
  1. ROBERTAmodel.py +15 -2
ROBERTAmodel.py CHANGED
@@ -6,6 +6,8 @@ from transformers import (
6
  RobertaForMaskedLM, RobertaForSequenceClassification
7
  )
8
  import os
 
 
9
 
10
  CACHE_DIR = "/data/hf_cache"
11
 
@@ -149,6 +151,7 @@ class RoBERTaVisualizer(TransformerVisualizer):
149
  def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = None):
150
  print(task, sentence, hypothesis)
151
  print('Tokenize')
 
152
  if task == 'mnli':
153
  inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
154
  elif task == 'mlm':
@@ -160,12 +163,16 @@ class RoBERTaVisualizer(TransformerVisualizer):
160
  tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
161
  print(tokens)
162
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
 
 
163
 
164
  print('Input embeddings with grad')
165
  embedding_layer = self.model.roberta.embeddings.word_embeddings
166
  inputs_embeds = embedding_layer(inputs["input_ids"])
167
  inputs_embeds.requires_grad_()
168
-
 
 
169
  print('Forward pass')
170
  outputs = self.model.roberta(
171
  inputs_embeds=inputs_embeds,
@@ -173,9 +180,13 @@ class RoBERTaVisualizer(TransformerVisualizer):
173
  output_attentions=True
174
  )
175
  attentions = outputs.attentions # list of [1, heads, seq, seq]
176
-
 
 
177
  print('Average attentions per layer')
178
  mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
 
 
179
 
180
  attn_matrices_all = []
181
  grad_matrices_all = []
@@ -183,6 +194,8 @@ class RoBERTaVisualizer(TransformerVisualizer):
183
  grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
184
  grad_matrices_all.append(grad_matrix.tolist())
185
  attn_matrices_all.append(attn_matrix.tolist())
 
 
186
  return grad_matrices_all, attn_matrices_all
187
 
188
  def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):
 
6
  RobertaForMaskedLM, RobertaForSequenceClassification
7
  )
8
  import os
9
+ import time
10
+
11
 
12
  CACHE_DIR = "/data/hf_cache"
13
 
 
151
  def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = None):
152
  print(task, sentence, hypothesis)
153
  print('Tokenize')
154
+ start = time.time()
155
  if task == 'mnli':
156
  inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
157
  elif task == 'mlm':
 
163
  tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
164
  print(tokens)
165
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
166
+ print(1,time.time()-start)
167
+ start = time.time()
168
 
169
  print('Input embeddings with grad')
170
  embedding_layer = self.model.roberta.embeddings.word_embeddings
171
  inputs_embeds = embedding_layer(inputs["input_ids"])
172
  inputs_embeds.requires_grad_()
173
+
174
+ print(2,time.time()-start)
175
+ start = time.time()
176
  print('Forward pass')
177
  outputs = self.model.roberta(
178
  inputs_embeds=inputs_embeds,
 
180
  output_attentions=True
181
  )
182
  attentions = outputs.attentions # list of [1, heads, seq, seq]
183
+
184
+ print(3,time.time()-start)
185
+ start = time.time()
186
  print('Average attentions per layer')
187
  mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
188
+ print(4,time.time()-start)
189
+ start = time.time()
190
 
191
  attn_matrices_all = []
192
  grad_matrices_all = []
 
194
  grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
195
  grad_matrices_all.append(grad_matrix.tolist())
196
  attn_matrices_all.append(attn_matrix.tolist())
197
+
198
+ print(5,time.time()-start)
199
  return grad_matrices_all, attn_matrices_all
200
 
201
  def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):