Spaces:

yifan0sun
/

BERTGradGraph

Running on CPU Upgrade

App Files Files Community

yifan0sun commited on 19 days ago

Commit

67aa9c5

verified ·

1 Parent(s): 93adbb1

Update ROBERTAmodel.py

Browse files

Files changed (1) hide show

ROBERTAmodel.py +15 -2

ROBERTAmodel.py CHANGED Viewed

@@ -6,6 +6,8 @@ from transformers import (
     RobertaForMaskedLM, RobertaForSequenceClassification
 )
 import os
 CACHE_DIR  = "/data/hf_cache"
@@ -149,6 +151,7 @@ class RoBERTaVisualizer(TransformerVisualizer):
     def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = None):
         print(task, sentence,  hypothesis)
         print('Tokenize')
         if task == 'mnli':
             inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
         elif task == 'mlm':
@@ -160,12 +163,16 @@ class RoBERTaVisualizer(TransformerVisualizer):
         tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
         print(tokens)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
         print('Input embeddings with grad')
         embedding_layer = self.model.roberta.embeddings.word_embeddings
         inputs_embeds = embedding_layer(inputs["input_ids"])
         inputs_embeds.requires_grad_()
         print('Forward pass')
         outputs = self.model.roberta(
             inputs_embeds=inputs_embeds,
@@ -173,9 +180,13 @@ class RoBERTaVisualizer(TransformerVisualizer):
             output_attentions=True
         )
         attentions = outputs.attentions  # list of [1, heads, seq, seq]
         print('Average attentions per layer')
         mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
         attn_matrices_all = []
         grad_matrices_all = []
@@ -183,6 +194,8 @@ class RoBERTaVisualizer(TransformerVisualizer):
             grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
             grad_matrices_all.append(grad_matrix.tolist())
             attn_matrices_all.append(attn_matrix.tolist())
         return grad_matrices_all, attn_matrices_all
     def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):

     RobertaForMaskedLM, RobertaForSequenceClassification
 )
 import os
+import time
 CACHE_DIR  = "/data/hf_cache"
     def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = None):
         print(task, sentence,  hypothesis)
         print('Tokenize')
+        start = time.time()
         if task == 'mnli':
             inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
         elif task == 'mlm':
         tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
         print(tokens)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        print(1,time.time()-start)
+        start = time.time()
         print('Input embeddings with grad')
         embedding_layer = self.model.roberta.embeddings.word_embeddings
         inputs_embeds = embedding_layer(inputs["input_ids"])
         inputs_embeds.requires_grad_()
+        print(2,time.time()-start)
+        start = time.time()
         print('Forward pass')
         outputs = self.model.roberta(
             inputs_embeds=inputs_embeds,
             output_attentions=True
         )
         attentions = outputs.attentions  # list of [1, heads, seq, seq]
+        print(3,time.time()-start)
+        start = time.time()
         print('Average attentions per layer')
         mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
+        print(4,time.time()-start)
+        start = time.time()
         attn_matrices_all = []
         grad_matrices_all = []
             grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
             grad_matrices_all.append(grad_matrix.tolist())
             attn_matrices_all.append(attn_matrix.tolist())
+        print(5,time.time()-start)
         return grad_matrices_all, attn_matrices_all
     def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):