Spaces:

yifan0sun
/

BERTGradGraph

Runtime error

App Files Files Community

yifan0sun commited on May 23

Commit

04ccab0

verified ·

1 Parent(s): 27ab4ec

Upload 5 files

Browse files

Files changed (5) hide show

BERTmodel.py +294 -289
DISTILLBERTmodel.py +257 -253
ROBERTAmodel.py +207 -199
models.py +15 -15
server.py +349 -370

BERTmodel.py CHANGED Viewed

@@ -1,290 +1,295 @@
-import torch
-import torch.nn as nn
-from transformers import BertTokenizer
-from models import TransformerVisualizer
-from transformers import (
-    BertTokenizer,
-    BertForMaskedLM,
-    BertForSequenceClassification,
-)
-import torch.nn.functional as F
-import os
-CACHE_DIR  = "/data/hf_cache"
-class BERTVisualizer(TransformerVisualizer):
-    def __init__(self,task):
-        super().__init__()
-        self.task = task
-        print(task,'BERTVIS START')
-        TOKENIZER = 'bert-base-uncased'
-        LOCAL_PATH = os.path.join(CACHE_DIR, "tokenizers",TOKENIZER.replace("/", "_"))
-        try:
-            self.tokenizer = BertTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
-        except Exception as e:
-            self.tokenizer = BertTokenizer.from_pretrained(TOKENIZER)
-            self.tokenizer.save_pretrained(LOCAL_PATH)
-        print('finding model', self.task)
-        if self.task == 'mlm':
-            MODEL = 'bert-base-uncased'
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = BertForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True,   attn_implementation="eager" ).to(self.device)
-            except Exception as e:
-                self.model = BertForMaskedLM.from_pretrained(  MODEL,    attn_implementation="eager" ).to(self.device)
-                self.model.save_pretrained(LOCAL_PATH)
-        elif self.task == 'sst':
-            MODEL = "textattack/bert-base-uncased-SST-2"
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = BertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True,  device_map=None )
-            except Exception as e:
-                self.model = BertForSequenceClassification.from_pretrained(  MODEL,    device_map=None )
-                self.model.save_pretrained(LOCAL_PATH)
-        elif self.task == 'mnli':
-            MODEL = 'textattack/bert-base-uncased-MNLI'
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = BertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True,  device_map=None )
-            except Exception as e:
-                self.model = BertForSequenceClassification.from_pretrained(  MODEL,    device_map=None)
-                self.model.save_pretrained(LOCAL_PATH)
-        else:
-            raise ValueError(f"Unsupported task: {self.task}")
-        print('model found')
-        #self.model.to(self.device)
-        print('self device junk')
-        self.model.eval()
-        print('self model eval')
-        self.num_attention_layers = len(self.model.bert.encoder.layer)
-        print('init finished')
-    def tokenize(self, text, hypothesis = ''):
-        print('TTTokenize',text,'H:', hypothesis)
-        if len(hypothesis) == 0:
-            encoded = self.tokenizer(text, return_tensors='pt', return_attention_mask=True)
-        else:
-            encoded = self.tokenizer(text, hypothesis, return_tensors='pt', return_attention_mask=True)
-        input_ids = encoded['input_ids'].to(self.device)
-        attention_mask = encoded['attention_mask'].to(self.device)
-        tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
-        return {
-            'input_ids': input_ids,
-            'attention_mask': attention_mask,
-            'tokens': tokens
-        }
-    def predict(self, task, text, hypothesis='', maskID = None):
-        print(task,text,hypothesis)
-        if task == 'mlm':
-            # Tokenize and find [MASK] position
-            print('Tokenize and find [MASK] position')
-            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True)
-            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
-                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
-                mask_index = maskID
-            else:
-                raise ValueError(f"Invalid maskID {maskID} for input length {inputs['input_ids'].size(1)}")
-            # Move to device
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-            # Get embeddings
-            embedding_layer = self.model.bert.embeddings.word_embeddings
-            inputs_embeds = embedding_layer(inputs['input_ids'])
-            # Forward through BERT encoder
-            hidden_states = self.model.bert(inputs_embeds=inputs_embeds,
-                                    attention_mask=inputs['attention_mask']).last_hidden_state
-            # Predict logits via MLM head
-            logits = self.model.cls(hidden_states)
-            mask_logits = logits[0, mask_index]
-            top_probs, top_indices = torch.topk(mask_logits, k=10, dim=-1)
-            top_probs = F.softmax(top_probs, dim=-1)
-            decoded = self.tokenizer.convert_ids_to_tokens(top_indices.tolist())
-            return decoded, top_probs
-        elif task == 'sst':
-            print('input')
-            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True).to(self.device)
-            print('output')
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits  # shape: [1, 2]
-                probs = F.softmax(logits, dim=1).squeeze()
-            labels = ["negative", "positive"]
-            print('ready to return')
-            return labels, probs
-        elif task == 'mnli':
-            inputs = self.tokenizer(text, hypothesis, return_tensors='pt', padding=True, truncation=True).to(self.device)
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits
-                probs = F.softmax(logits, dim=1).squeeze()
-            labels = ["entailment", "neutral", "contradiction"]
-            return labels, probs
-    def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = 0):
-        print('GET GRAD:', task,'sentence',sentence, 'hypothesis', hypothesis)
-        print('Tokenize')
-        if task == 'mnli':
-            inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
-        elif task == 'mlm':
-            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
-            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
-                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
-            else:
-                raise ValueError(f"Invalid maskID {maskID} for input length {inputs['input_ids'].size(1)}")
-        else:
-            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
-        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        print(inputs['input_ids'].shape)
-        print(tokens,len(tokens))
-        print('Input embeddings with grad')
-        embedding_layer = self.model.bert.embeddings.word_embeddings
-        inputs_embeds = embedding_layer(inputs["input_ids"])
-        inputs_embeds.requires_grad_()
-        print('Forward pass')
-        outputs = self.model.bert(
-            inputs_embeds=inputs_embeds,
-            attention_mask=inputs["attention_mask"],
-            output_attentions=True
-        )
-        attentions = outputs.attentions  # list of [1, heads, seq, seq]
-        print('Optional: store average attentions per layer')
-        mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
-        attn_matrices_all = []
-        grad_matrices_all = []
-        for target_layer in range(len(attentions)):
-            grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
-            grad_matrices_all.append(grad_matrix.tolist())
-            attn_matrices_all.append(attn_matrix.tolist())
-        return grad_matrices_all, attn_matrices_all
-    def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):
-        attn_matrix = mean_attns[target_layer]
-        seq_len = attn_matrix.shape[0]
-        attn_layer = attentions[target_layer].squeeze(0).mean(dim=0)  # [seq, seq]
-        print('computing gradnorms now')
-        grad_norms_list = []
-        for k in range(seq_len):
-            scalar = attn_layer[:, k].sum()  # ✅ total attention received by token k
-            # Compute gradient: d scalar / d inputs_embeds
-            grad = torch.autograd.grad(scalar, inputs_embeds, retain_graph=True)[0].squeeze(0)  # shape: [seq, hidden]
-            grad_norms = grad.norm(dim=1)  # shape: [seq]
-            grad_norms_list.append(grad_norms.unsqueeze(1))  # shape: [seq, 1]
-        grad_matrix = torch.cat(grad_norms_list, dim=1)  # shape: [seq, seq]
-        print('ready to send!')
-        grad_matrix = grad_matrix[:seq_len, :seq_len]
-        attn_matrix = attn_matrix[:seq_len, :seq_len]
-        #tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-        return grad_matrix, attn_matrix
-if __name__ == "__main__":
-    import sys
-    MODEL_CLASSES = {
-        "bert": BERTVisualizer,
-        "roberta": RoBERTaVisualizer,
-        "distilbert": DistilBERTVisualizer,
-        "bart": BARTVisualizer,
-    }
-    # Parse command-line args or fallback to default
-    model_name = sys.argv[1] if len(sys.argv) > 1 else "bert"
-    text = " ".join(sys.argv[2:]) if len(sys.argv) > 2 else "The quick brown fox jumps over the lazy dog."
-    if model_name.lower() not in MODEL_CLASSES:
-        print(f"Supported models: {list(MODEL_CLASSES.keys())}")
-        sys.exit(1)
-    # Instantiate the visualizer
-    visualizer_class = MODEL_CLASSES[model_name.lower()]
-    visualizer = visualizer_class()
-    # Tokenize
-    token_info = visualizer.tokenize(text)
-    # Report
-    print(f"\nModel: {model_name}")
-    print(f"Num attention layers: {visualizer.num_attention_layers}")
-    print(f"Tokens: {token_info['tokens']}")
-    print(f"Input IDs: {token_info['input_ids'].tolist()}")
-    print(f"Attention mask: {token_info['attention_mask'].tolist()}")
-"""
-usage for debug:
-python your_file.py bert "The rain in Spain falls mainly on the plain."
 """

+import torch
+import torch.nn as nn
+from transformers import BertTokenizer
+from models import TransformerVisualizer
+from transformers import (
+    BertTokenizer,
+    BertForMaskedLM,
+    BertForSequenceClassification,
+)
+import torch.nn.functional as F
+import os
+CACHE_DIR  = "/data/hf_cache"
+class BERTVisualizer(TransformerVisualizer):
+    def __init__(self,task):
+        super().__init__()
+        self.task = task
+        print(task,'BERT VIS START')
+        TOKENIZER = 'bert-base-uncased'
+        LOCAL_PATH = os.path.join(CACHE_DIR, "tokenizers",TOKENIZER)
+        self.tokenizer = BertTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
+        """
+        try:
+            self.tokenizer = BertTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
+        except Exception as e:
+            self.tokenizer = BertTokenizer.from_pretrained(TOKENIZER)
+            self.tokenizer.save_pretrained(LOCAL_PATH)
+        """
+        print('finding model', self.task)
+        if self.task == 'mlm':
+            MODEL = 'bert-base-uncased'
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = BertForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True,   attn_implementation="eager" ).to(self.device)
+            """
+            try:
+                self.model = BertForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True,   attn_implementation="eager" ).to(self.device)
+            except Exception as e:
+                self.model = BertForMaskedLM.from_pretrained(  MODEL,    attn_implementation="eager" ).to(self.device)
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        elif self.task == 'sst':
+            MODEL = "textattack_bert-base-uncased-SST-2"
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = BertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True,  device_map=None )
+            """
+            try:
+                self.model = BertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True,  device_map=None )
+            except Exception as e:
+                self.model = BertForSequenceClassification.from_pretrained(  MODEL,    device_map=None )
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        elif self.task == 'mnli':
+            MODEL = 'textattack_bert-base-uncased-MNLI'
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = BertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True,  device_map=None )
+            """
+            try:
+                self.model = BertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True,  device_map=None )
+            except Exception as e:
+                self.model = BertForSequenceClassification.from_pretrained(  MODEL,    device_map=None)
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        else:
+            raise ValueError(f"Unsupported task: {self.task}")
+        print('model found')
+        #self.model.to(self.device)
+        print('self device junk')
+        self.model.eval()
+        print('self model eval')
+        self.num_attention_layers = len(self.model.bert.encoder.layer)
+        print('init finished')
+    def tokenize(self, text, hypothesis = ''):
+        print('TTTokenize',text,'H:', hypothesis)
+        if len(hypothesis) == 0:
+            encoded = self.tokenizer(text, return_tensors='pt', return_attention_mask=True)
+        else:
+            encoded = self.tokenizer(text, hypothesis, return_tensors='pt', return_attention_mask=True)
+        input_ids = encoded['input_ids'].to(self.device)
+        attention_mask = encoded['attention_mask'].to(self.device)
+        tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
+        return {
+            'input_ids': input_ids,
+            'attention_mask': attention_mask,
+            'tokens': tokens
+        }
+    def predict(self, task, text, hypothesis='', maskID = None):
+        print(task,text,hypothesis)
+        if task == 'mlm':
+            # Tokenize and find [MASK] position
+            print('Tokenize and find [MASK] position')
+            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True)
+            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
+                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
+                mask_index = maskID
+            else:
+                raise ValueError(f"Invalid maskID {maskID} for input length {inputs['input_ids'].size(1)}")
+            # Move to device
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            # Get embeddings
+            embedding_layer = self.model.bert.embeddings.word_embeddings
+            inputs_embeds = embedding_layer(inputs['input_ids'])
+            # Forward through BERT encoder
+            hidden_states = self.model.bert(inputs_embeds=inputs_embeds,
+                                    attention_mask=inputs['attention_mask']).last_hidden_state
+            # Predict logits via MLM head
+            logits = self.model.cls(hidden_states)
+            mask_logits = logits[0, mask_index]
+            top_probs, top_indices = torch.topk(mask_logits, k=10, dim=-1)
+            top_probs = F.softmax(top_probs, dim=-1)
+            decoded = self.tokenizer.convert_ids_to_tokens(top_indices.tolist())
+            return decoded, top_probs
+        elif task == 'sst':
+            print('input')
+            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True).to(self.device)
+            print('output')
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits  # shape: [1, 2]
+                probs = F.softmax(logits, dim=1).squeeze()
+            labels = ["negative", "positive"]
+            print('ready to return')
+            return labels, probs
+        elif task == 'mnli':
+            inputs = self.tokenizer(text, hypothesis, return_tensors='pt', padding=True, truncation=True).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits
+                probs = F.softmax(logits, dim=1).squeeze()
+            labels = ["entailment", "neutral", "contradiction"]
+            return labels, probs
+    def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = 0):
+        print('GET GRAD:', task,'sentence',sentence, 'hypothesis', hypothesis)
+        print('Tokenize')
+        if task == 'mnli':
+            inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
+        elif task == 'mlm':
+            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
+            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
+                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
+            else:
+                raise ValueError(f"Invalid maskID {maskID} for input length {inputs['input_ids'].size(1)}")
+        else:
+            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
+        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        print(inputs['input_ids'].shape)
+        print(tokens,len(tokens))
+        print('Input embeddings with grad')
+        embedding_layer = self.model.bert.embeddings.word_embeddings
+        inputs_embeds = embedding_layer(inputs["input_ids"])
+        inputs_embeds.requires_grad_()
+        print('Forward pass')
+        outputs = self.model.bert(
+            inputs_embeds=inputs_embeds,
+            attention_mask=inputs["attention_mask"],
+            output_attentions=True
+        )
+        attentions = outputs.attentions  # list of [1, heads, seq, seq]
+        print('Optional: store average attentions per layer')
+        mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
+        attn_matrices_all = []
+        grad_matrices_all = []
+        for target_layer in range(len(attentions)):
+            grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
+            grad_matrices_all.append(grad_matrix.tolist())
+            attn_matrices_all.append(attn_matrix.tolist())
+        return grad_matrices_all, attn_matrices_all
+    def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):
+        attn_matrix = mean_attns[target_layer]
+        seq_len = attn_matrix.shape[0]
+        attn_layer = attentions[target_layer].squeeze(0).mean(dim=0)  # [seq, seq]
+        print('computing gradnorms now')
+        grad_norms_list = []
+        for k in range(seq_len):
+            scalar = attn_layer[:, k].sum()  # ✅ total attention received by token k
+            # Compute gradient: d scalar / d inputs_embeds
+            grad = torch.autograd.grad(scalar, inputs_embeds, retain_graph=True)[0].squeeze(0)  # shape: [seq, hidden]
+            grad_norms = grad.norm(dim=1)  # shape: [seq]
+            grad_norms_list.append(grad_norms.unsqueeze(1))  # shape: [seq, 1]
+        grad_matrix = torch.cat(grad_norms_list, dim=1)  # shape: [seq, seq]
+        print('ready to send!')
+        grad_matrix = grad_matrix[:seq_len, :seq_len]
+        attn_matrix = attn_matrix[:seq_len, :seq_len]
+        #tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        return grad_matrix, attn_matrix
+if __name__ == "__main__":
+    import sys
+    MODEL_CLASSES = {
+        "bert": BERTVisualizer,
+        "roberta": RoBERTaVisualizer,
+        "distilbert": DistilBERTVisualizer,
+        "bart": BARTVisualizer,
+    }
+    # Parse command-line args or fallback to default
+    model_name = sys.argv[1] if len(sys.argv) > 1 else "bert"
+    text = " ".join(sys.argv[2:]) if len(sys.argv) > 2 else "The quick brown fox jumps over the lazy dog."
+    if model_name.lower() not in MODEL_CLASSES:
+        print(f"Supported models: {list(MODEL_CLASSES.keys())}")
+        sys.exit(1)
+    # Instantiate the visualizer
+    visualizer_class = MODEL_CLASSES[model_name.lower()]
+    visualizer = visualizer_class()
+    # Tokenize
+    token_info = visualizer.tokenize(text)
+    # Report
+    print(f"\nModel: {model_name}")
+    print(f"Num attention layers: {visualizer.num_attention_layers}")
+    print(f"Tokens: {token_info['tokens']}")
+    print(f"Input IDs: {token_info['input_ids'].tolist()}")
+    print(f"Attention mask: {token_info['attention_mask'].tolist()}")
+"""
+usage for debug:
+python your_file.py bert "The rain in Spain falls mainly on the plain."
 """

DISTILLBERTmodel.py CHANGED Viewed

@@ -1,254 +1,258 @@
-import torch
-import torch.nn.functional as F
-import os
-from models import TransformerVisualizer
-from transformers import (
-     DistilBertTokenizer,
-    DistilBertForMaskedLM, DistilBertForSequenceClassification
-)
-CACHE_DIR  = "/data/hf_cache"
-class DistilBERTVisualizer(TransformerVisualizer):
-    def __init__(self, task):
-        super().__init__()
-        self.task = task
-        TOKENIZER = 'distilbert-base-uncased'
-        LOCAL_PATH = os.path.join(CACHE_DIR, "tokenizers",TOKENIZER.replace("/", "_"))
-        try:
-            self.tokenizer = DistilBertTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
-        except Exception as e:
-            self.tokenizer = DistilBertTokenizer.from_pretrained(TOKENIZER)
-            self.tokenizer.save_pretrained(LOCAL_PATH)
-        print('finding model', self.task)
-        if self.task == 'mlm':
-            MODEL = 'distilbert-base-uncased'
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = DistilBertForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True )
-            except Exception as e:
-                self.model = DistilBertForMaskedLM.from_pretrained(  MODEL  )
-                self.model.save_pretrained(LOCAL_PATH)
-        elif self.task == 'sst':
-            MODEL = 'distilbert-base-uncased-finetuned-sst-2-english'
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = DistilBertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True )
-            except Exception as e:
-                self.model = DistilBertForSequenceClassification.from_pretrained(  MODEL )
-                self.model.save_pretrained(LOCAL_PATH)
-        elif self.task == 'mnli':
-            MODEL = "textattack/distilbert-base-uncased-MNLI"
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = DistilBertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True)
-            except Exception as e:
-                self.model = DistilBertForSequenceClassification.from_pretrained(  MODEL)
-                self.model.save_pretrained(LOCAL_PATH)
-        else:
-            raise ValueError(f"Unsupported task: {self.task}")
-        self.model.eval()
-        self.num_attention_layers = len(self.model.distilbert.transformer.layer)
-        self.model.to(self.device)
-    def tokenize(self, text, hypothesis = ''):
-        if len(hypothesis) == 0:
-            encoded = self.tokenizer(text, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
-        else:
-            encoded = self.tokenizer(text, hypothesis, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
-        input_ids = encoded['input_ids'].to(self.device)
-        attention_mask = encoded['attention_mask'].to(self.device)
-        tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
-        return {
-            'input_ids': input_ids,
-            'attention_mask': attention_mask,
-            'tokens': tokens
-        }
-    def predict(self, task, text, hypothesis='', maskID = 0):
-        if task  == 'mlm':
-            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True)
-            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
-                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
-                mask_index = maskID
-            else:
-                raise ValueError(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits
-            mask_logits = logits[0, mask_index]
-            top_probs, top_indices = torch.topk(F.softmax(mask_logits, dim=-1), 10)
-            decoded = self.tokenizer.convert_ids_to_tokens(top_indices.tolist())
-            return decoded, top_probs
-        elif task == 'sst':
-            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True).to(self.device)
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits
-                probs = F.softmax(logits, dim=1).squeeze()
-            labels = ["negative", "positive"]
-            return labels, probs
-        elif task == 'mnli':
-            inputs = self.tokenizer(text, hypothesis, return_tensors='pt', padding=True, truncation=True).to(self.device)
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits
-                probs = F.softmax(logits, dim=1).squeeze()
-            labels = ["entailment", "neutral", "contradiction"]
-            return labels, probs
-        else:
-            raise NotImplementedError(f"Task '{task}' not supported for DistilBERT")
-    def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = 0):
-        print(task, sentence,hypothesis)
-        print('Tokenize')
-        if task == 'mnli':
-            inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
-        elif task == 'mlm':
-            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
-            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
-                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
-            else:
-                print(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
-                raise ValueError(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        else:
-            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
-        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-        print(tokens)
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        print('Input embeddings with grad')
-        embedding_layer = self.model.distilbert.embeddings.word_embeddings
-        inputs_embeds = embedding_layer(inputs["input_ids"])
-        inputs_embeds.requires_grad_()
-        print('Forward pass')
-        outputs = self.model.distilbert(
-            inputs_embeds=inputs_embeds,
-            attention_mask=inputs["attention_mask"],
-            output_attentions=True,
-        )
-        attentions = outputs.attentions  # list of [1, heads, seq, seq]
-        print('Mean attentions per layer')
-        mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
-        attn_matrices_all = []
-        grad_matrices_all = []
-        for target_layer in range(len(attentions)):
-            grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
-            grad_matrices_all.append(grad_matrix.tolist())
-            attn_matrices_all.append(attn_matrix.tolist())
-        return grad_matrices_all, attn_matrices_all
-    def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):
-        attn_matrix = mean_attns[target_layer]
-        seq_len = attn_matrix.shape[0]
-        attn_layer = attentions[target_layer].squeeze(0).mean(dim=0)
-        print('Computing grad norms')
-        grad_norms_list = []
-        for k in range(seq_len):
-            scalar = attn_layer[:, k].sum()
-            grad = torch.autograd.grad(scalar, inputs_embeds, retain_graph=True)[0].squeeze(0)
-            grad_norms = grad.norm(dim=1)
-            grad_norms_list.append(grad_norms.unsqueeze(1))
-        grad_matrix = torch.cat(grad_norms_list, dim=1)
-        grad_matrix = grad_matrix[:seq_len, :seq_len]
-        attn_matrix = attn_matrix[:seq_len, :seq_len]
-        return grad_matrix, attn_matrix
-if __name__ == "__main__":
-    import sys
-    MODEL_CLASSES = {
-        "bert": BERTVisualizer,
-        "roberta": RoBERTaVisualizer,
-        "distilbert": DistilBERTVisualizer,
-        "bart": BARTVisualizer,
-    }
-    # Parse command-line args or fallback to default
-    model_name = sys.argv[1] if len(sys.argv) > 1 else "bert"
-    text = " ".join(sys.argv[2:]) if len(sys.argv) > 2 else "The quick brown fox jumps over the lazy dog."
-    if model_name.lower() not in MODEL_CLASSES:
-        print(f"Supported models: {list(MODEL_CLASSES.keys())}")
-        sys.exit(1)
-    # Instantiate the visualizer
-    visualizer_class = MODEL_CLASSES[model_name.lower()]
-    visualizer = visualizer_class()
-    # Tokenize
-    token_info = visualizer.tokenize(text)
-    # Report
-    print(f"\nModel: {model_name}")
-    print(f"Num attention layers: {visualizer.num_attention_layers}")
-    print(f"Tokens: {token_info['tokens']}")
-    print(f"Input IDs: {token_info['input_ids'].tolist()}")
-    print(f"Attention mask: {token_info['attention_mask'].tolist()}")
-"""
-usage for debug:
-python your_file.py bert "The rain in Spain falls mainly on the plain."
 """

+import torch
+import torch.nn.functional as F
+import os
+from models import TransformerVisualizer
+from transformers import (
+     DistilBertTokenizer,
+    DistilBertForMaskedLM, DistilBertForSequenceClassification
+)
+CACHE_DIR  = "/data/hf_cache"
+class DistilBERTVisualizer(TransformerVisualizer):
+    def __init__(self, task):
+        super().__init__()
+        self.task = task
+        TOKENIZER = 'distilbert-base-uncased'
+        LOCAL_PATH = os.path.join(CACHE_DIR, "tokenizers",TOKENIZER.replace("/", "_"))
+        self.tokenizer = DistilBertTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
+        """
+        try:
+            self.tokenizer = DistilBertTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
+        except Exception as e:
+            self.tokenizer = DistilBertTokenizer.from_pretrained(TOKENIZER)
+            self.tokenizer.save_pretrained(LOCAL_PATH)
+        """
+        print('finding model', self.task)
+        if self.task == 'mlm':
+            MODEL = 'distilbert-base-uncased'
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = DistilBertForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True )
+            """
+            try:
+            except Exception as e:
+                self.model = DistilBertForMaskedLM.from_pretrained(  MODEL  )
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        elif self.task == 'sst':
+            MODEL = 'distilbert-base-uncased-finetuned-sst-2-english'
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = DistilBertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True )
+            """
+            try:
+                self.model = DistilBertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True )
+            except Exception as e:
+                self.model = DistilBertForSequenceClassification.from_pretrained(  MODEL )
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        elif self.task == 'mnli':
+            MODEL = "textattack_distilbert-base-uncased-MNLI"
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = DistilBertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True)
+            """
+            try:
+                self.model = DistilBertForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True)
+            except Exception as e:
+                self.model = DistilBertForSequenceClassification.from_pretrained(  MODEL)
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        else:
+            raise ValueError(f"Unsupported task: {self.task}")
+        self.model.eval()
+        self.num_attention_layers = len(self.model.distilbert.transformer.layer)
+        self.model.to(self.device)
+    def tokenize(self, text, hypothesis = ''):
+        if len(hypothesis) == 0:
+            encoded = self.tokenizer(text, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
+        else:
+            encoded = self.tokenizer(text, hypothesis, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
+        input_ids = encoded['input_ids'].to(self.device)
+        attention_mask = encoded['attention_mask'].to(self.device)
+        tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
+        return {
+            'input_ids': input_ids,
+            'attention_mask': attention_mask,
+            'tokens': tokens
+        }
+    def predict(self, task, text, hypothesis='', maskID = 0):
+        if task  == 'mlm':
+            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True)
+            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
+                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
+                mask_index = maskID
+            else:
+                raise ValueError(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits
+            mask_logits = logits[0, mask_index]
+            top_probs, top_indices = torch.topk(F.softmax(mask_logits, dim=-1), 10)
+            decoded = self.tokenizer.convert_ids_to_tokens(top_indices.tolist())
+            return decoded, top_probs
+        elif task == 'sst':
+            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits
+                probs = F.softmax(logits, dim=1).squeeze()
+            labels = ["negative", "positive"]
+            return labels, probs
+        elif task == 'mnli':
+            inputs = self.tokenizer(text, hypothesis, return_tensors='pt', padding=True, truncation=True).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits
+                probs = F.softmax(logits, dim=1).squeeze()
+            labels = ["entailment", "neutral", "contradiction"]
+            return labels, probs
+        else:
+            raise NotImplementedError(f"Task '{task}' not supported for DistilBERT")
+    def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = 0):
+        print(task, sentence,hypothesis)
+        print('Tokenize')
+        if task == 'mnli':
+            inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
+        elif task == 'mlm':
+            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
+            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
+                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
+            else:
+                print(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
+                raise ValueError(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        else:
+            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
+        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        print(tokens)
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        print('Input embeddings with grad')
+        embedding_layer = self.model.distilbert.embeddings.word_embeddings
+        inputs_embeds = embedding_layer(inputs["input_ids"])
+        inputs_embeds.requires_grad_()
+        print('Forward pass')
+        outputs = self.model.distilbert(
+            inputs_embeds=inputs_embeds,
+            attention_mask=inputs["attention_mask"],
+            output_attentions=True,
+        )
+        attentions = outputs.attentions  # list of [1, heads, seq, seq]
+        print('Mean attentions per layer')
+        mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
+        attn_matrices_all = []
+        grad_matrices_all = []
+        for target_layer in range(len(attentions)):
+            grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
+            grad_matrices_all.append(grad_matrix.tolist())
+            attn_matrices_all.append(attn_matrix.tolist())
+        return grad_matrices_all, attn_matrices_all
+    def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):
+        attn_matrix = mean_attns[target_layer]
+        seq_len = attn_matrix.shape[0]
+        attn_layer = attentions[target_layer].squeeze(0).mean(dim=0)
+        print('Computing grad norms')
+        grad_norms_list = []
+        for k in range(seq_len):
+            scalar = attn_layer[:, k].sum()
+            grad = torch.autograd.grad(scalar, inputs_embeds, retain_graph=True)[0].squeeze(0)
+            grad_norms = grad.norm(dim=1)
+            grad_norms_list.append(grad_norms.unsqueeze(1))
+        grad_matrix = torch.cat(grad_norms_list, dim=1)
+        grad_matrix = grad_matrix[:seq_len, :seq_len]
+        attn_matrix = attn_matrix[:seq_len, :seq_len]
+        return grad_matrix, attn_matrix
+if __name__ == "__main__":
+    import sys
+    MODEL_CLASSES = {
+        "bert": BERTVisualizer,
+        "roberta": RoBERTaVisualizer,
+        "distilbert": DistilBERTVisualizer,
+        "bart": BARTVisualizer,
+    }
+    # Parse command-line args or fallback to default
+    model_name = sys.argv[1] if len(sys.argv) > 1 else "bert"
+    text = " ".join(sys.argv[2:]) if len(sys.argv) > 2 else "The quick brown fox jumps over the lazy dog."
+    if model_name.lower() not in MODEL_CLASSES:
+        print(f"Supported models: {list(MODEL_CLASSES.keys())}")
+        sys.exit(1)
+    # Instantiate the visualizer
+    visualizer_class = MODEL_CLASSES[model_name.lower()]
+    visualizer = visualizer_class()
+    # Tokenize
+    token_info = visualizer.tokenize(text)
+    # Report
+    print(f"\nModel: {model_name}")
+    print(f"Num attention layers: {visualizer.num_attention_layers}")
+    print(f"Tokens: {token_info['tokens']}")
+    print(f"Input IDs: {token_info['input_ids'].tolist()}")
+    print(f"Attention mask: {token_info['attention_mask'].tolist()}")
+"""
+usage for debug:
+python your_file.py bert "The rain in Spain falls mainly on the plain."
 """

ROBERTAmodel.py CHANGED Viewed

@@ -1,199 +1,207 @@
-from transformers import RobertaTokenizer, RobertaForMaskedLM
-import torch
-import torch.nn.functional as F
-from models import TransformerVisualizer
-from transformers import (
-    RobertaForMaskedLM, RobertaForSequenceClassification
-)
-import os
-CACHE_DIR  = "/data/hf_cache"
-class RoBERTaVisualizer(TransformerVisualizer):
-    def __init__(self, task):
-        super().__init__()
-        self.task = task
-        TOKENIZER = 'roberta-base'
-        LOCAL_PATH = os.path.join(CACHE_DIR, "tokenizers",TOKENIZER.replace("/", "_"))
-        try:
-            self.tokenizer = RobertaTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
-        except Exception as e:
-            self.tokenizer = RobertaTokenizer.from_pretrained(TOKENIZER)
-            self.tokenizer.save_pretrained(LOCAL_PATH)
-        if self.task == 'mlm':
-            MODEL = "roberta-base"
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = RobertaForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True )
-            except Exception as e:
-                self.model = RobertaForMaskedLM.from_pretrained(  MODEL  )
-                self.model.save_pretrained(LOCAL_PATH)
-        elif self.task == 'sst':
-            MODEL = 'textattack/roberta-base-SST-2'
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = RobertaForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True )
-            except Exception as e:
-                self.model = RobertaForSequenceClassification.from_pretrained(  MODEL )
-                self.model.save_pretrained(LOCAL_PATH)
-        elif self.task == 'mnli':
-            MODEL = "roberta-large-mnli"
-            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL.replace("/", "_"))
-            try:
-                self.model = RobertaForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True)
-            except Exception as e:
-                self.model = RobertaForSequenceClassification.from_pretrained(  MODEL)
-                self.model.save_pretrained(LOCAL_PATH)
-        self.model.to(self.device)
-        self.model.eval()
-        self.num_attention_layers = self.model.config.num_hidden_layers
-    def tokenize(self, text, hypothesis = ''):
-        if len(hypothesis) == 0:
-            encoded = self.tokenizer(text, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
-        else:
-            encoded = self.tokenizer(text, hypothesis, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
-        input_ids = encoded['input_ids'].to(self.device)
-        attention_mask = encoded['attention_mask'].to(self.device)
-        tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
-        print('First time tokenizing:', tokens, len(tokens))
-        response = {
-            'input_ids': input_ids,
-            'attention_mask': attention_mask,
-            'tokens': tokens
-        }
-        print(response)
-        return response
-    def predict(self, task, text, hypothesis='', maskID = None):
-        if task == 'mlm':
-            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True)
-            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
-                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
-                mask_index = maskID
-            else:
-                raise ValueError(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits
-            mask_logits = logits[0, mask_index]
-            top_probs, top_indices = torch.topk(F.softmax(mask_logits, dim=-1), 10)
-            decoded = self.tokenizer.convert_ids_to_tokens(top_indices.tolist())
-            return decoded, top_probs
-        elif task == 'sst':
-            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True).to(self.device)
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits
-                probs = F.softmax(logits, dim=1).squeeze()
-            labels = ["negative", "positive"]
-            return labels, probs
-        elif task == 'mnli':
-            inputs = self.tokenizer(text, hypothesis, return_tensors='pt', padding=True, truncation=True).to(self.device)
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                logits = outputs.logits
-                probs = F.softmax(logits, dim=1).squeeze()
-            labels = ["entailment", "neutral", "contradiction"]
-            return labels, probs
-        else:
-            raise NotImplementedError(f"Task '{task}' not supported for RoBERTa")
-    def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = None):
-        print(task, sentence,  hypothesis)
-        print('Tokenize')
-        if task == 'mnli':
-            inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
-        elif task == 'mlm':
-            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
-            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
-                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
-        else:
-            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
-        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-        print(tokens)
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        print('Input embeddings with grad')
-        embedding_layer = self.model.roberta.embeddings.word_embeddings
-        inputs_embeds = embedding_layer(inputs["input_ids"])
-        inputs_embeds.requires_grad_()
-        print('Forward pass')
-        outputs = self.model.roberta(
-            inputs_embeds=inputs_embeds,
-            attention_mask=inputs["attention_mask"],
-            output_attentions=True
-        )
-        attentions = outputs.attentions  # list of [1, heads, seq, seq]
-        print('Average attentions per layer')
-        mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
-        attn_matrices_all = []
-        grad_matrices_all = []
-        for target_layer in range(len(attentions)):
-            grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
-            grad_matrices_all.append(grad_matrix.tolist())
-            attn_matrices_all.append(attn_matrix.tolist())
-        return grad_matrices_all, attn_matrices_all
-    def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):
-        attn_matrix = mean_attns[target_layer]
-        seq_len = attn_matrix.shape[0]
-        attn_layer = attentions[target_layer].squeeze(0).mean(dim=0)  # [seq, seq]
-        print('Computing grad norms')
-        grad_norms_list = []
-        for k in range(seq_len):
-            scalar = attn_layer[:, k].sum()
-            grad = torch.autograd.grad(scalar, inputs_embeds, retain_graph=True)[0].squeeze(0)
-            grad_norms = grad.norm(dim=1)
-            grad_norms_list.append(grad_norms.unsqueeze(1))
-        grad_matrix = torch.cat(grad_norms_list, dim=1)
-        grad_matrix = grad_matrix[:seq_len, :seq_len]
-        attn_matrix = attn_matrix[:seq_len, :seq_len]
-        return grad_matrix, attn_matrix

+from transformers import RobertaTokenizer, RobertaForMaskedLM
+import torch
+import torch.nn.functional as F
+from models import TransformerVisualizer
+from transformers import (
+    RobertaForMaskedLM, RobertaForSequenceClassification
+)
+import os
+CACHE_DIR  = "/data/hf_cache"
+class RoBERTaVisualizer(TransformerVisualizer):
+    def __init__(self, task):
+        super().__init__()
+        self.task = task
+        TOKENIZER = 'roberta-base'
+        LOCAL_PATH = os.path.join(CACHE_DIR, "tokenizers",TOKENIZER)
+        self.tokenizer = RobertaTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
+        """
+        try:
+            self.tokenizer = RobertaTokenizer.from_pretrained(LOCAL_PATH, local_files_only=True)
+        except Exception as e:
+            self.tokenizer = RobertaTokenizer.from_pretrained(TOKENIZER)
+            self.tokenizer.save_pretrained(LOCAL_PATH)
+        """
+        if self.task == 'mlm':
+            MODEL = "roberta-base"
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = RobertaForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True )
+            """
+            try:
+                self.model = RobertaForMaskedLM.from_pretrained(  LOCAL_PATH, local_files_only=True )
+            except Exception as e:
+                self.model = RobertaForMaskedLM.from_pretrained(  MODEL  )
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        elif self.task == 'sst':
+            MODEL = 'textattack_roberta-base-SST-2'
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = RobertaForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True )
+            """
+            try:
+                self.model = RobertaForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True )
+            except Exception as e:
+                self.model = RobertaForSequenceClassification.from_pretrained(  MODEL )
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        elif self.task == 'mnli':
+            MODEL = "roberta-large-mnli"
+            LOCAL_PATH = os.path.join(CACHE_DIR, "models",MODEL)
+            self.model = RobertaForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True)
+            """
+            try:
+                self.model = RobertaForSequenceClassification.from_pretrained(  LOCAL_PATH, local_files_only=True)
+            except Exception as e:
+                self.model = RobertaForSequenceClassification.from_pretrained(  MODEL)
+                self.model.save_pretrained(LOCAL_PATH)
+            """
+        self.model.to(self.device)
+        self.model.eval()
+        self.num_attention_layers = self.model.config.num_hidden_layers
+    def tokenize(self, text, hypothesis = ''):
+        if len(hypothesis) == 0:
+            encoded = self.tokenizer(text, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
+        else:
+            encoded = self.tokenizer(text, hypothesis, return_tensors='pt', return_attention_mask=True,padding=False, truncation=True)
+        input_ids = encoded['input_ids'].to(self.device)
+        attention_mask = encoded['attention_mask'].to(self.device)
+        tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
+        print('First time tokenizing:', tokens, len(tokens))
+        response = {
+            'input_ids': input_ids,
+            'attention_mask': attention_mask,
+            'tokens': tokens
+        }
+        print(response)
+        return response
+    def predict(self, task, text, hypothesis='', maskID = None):
+        if task == 'mlm':
+            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True)
+            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
+                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
+                mask_index = maskID
+            else:
+                raise ValueError(f"Invalid maskID {maskID} for input of length {inputs['input_ids'].size(1)}")
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits
+            mask_logits = logits[0, mask_index]
+            top_probs, top_indices = torch.topk(F.softmax(mask_logits, dim=-1), 10)
+            decoded = self.tokenizer.convert_ids_to_tokens(top_indices.tolist())
+            return decoded, top_probs
+        elif task == 'sst':
+            inputs = self.tokenizer(text, return_tensors='pt', padding=False, truncation=True).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits
+                probs = F.softmax(logits, dim=1).squeeze()
+            labels = ["negative", "positive"]
+            return labels, probs
+        elif task == 'mnli':
+            inputs = self.tokenizer(text, hypothesis, return_tensors='pt', padding=True, truncation=True).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                logits = outputs.logits
+                probs = F.softmax(logits, dim=1).squeeze()
+            labels = ["entailment", "neutral", "contradiction"]
+            return labels, probs
+        else:
+            raise NotImplementedError(f"Task '{task}' not supported for RoBERTa")
+    def get_all_grad_attn_matrix(self, task, sentence, hypothesis='', maskID = None):
+        print(task, sentence,  hypothesis)
+        print('Tokenize')
+        if task == 'mnli':
+            inputs = self.tokenizer(sentence, hypothesis, return_tensors='pt', padding=False, truncation=True)
+        elif task == 'mlm':
+            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
+            if maskID is not None and 0 <= maskID < inputs['input_ids'].size(1):
+                inputs['input_ids'][0][maskID] = self.tokenizer.mask_token_id
+        else:
+            inputs = self.tokenizer(sentence,  return_tensors='pt', padding=False, truncation=True)
+        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        print(tokens)
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        print('Input embeddings with grad')
+        embedding_layer = self.model.roberta.embeddings.word_embeddings
+        inputs_embeds = embedding_layer(inputs["input_ids"])
+        inputs_embeds.requires_grad_()
+        print('Forward pass')
+        outputs = self.model.roberta(
+            inputs_embeds=inputs_embeds,
+            attention_mask=inputs["attention_mask"],
+            output_attentions=True
+        )
+        attentions = outputs.attentions  # list of [1, heads, seq, seq]
+        print('Average attentions per layer')
+        mean_attns = [a.squeeze(0).mean(dim=0).detach().cpu() for a in attentions]
+        attn_matrices_all = []
+        grad_matrices_all = []
+        for target_layer in range(len(attentions)):
+            grad_matrix, attn_matrix = self.get_grad_attn_matrix(inputs_embeds, attentions, mean_attns, target_layer)
+            grad_matrices_all.append(grad_matrix.tolist())
+            attn_matrices_all.append(attn_matrix.tolist())
+        return grad_matrices_all, attn_matrices_all
+    def get_grad_attn_matrix(self,inputs_embeds, attentions, mean_attns, target_layer):
+        attn_matrix = mean_attns[target_layer]
+        seq_len = attn_matrix.shape[0]
+        attn_layer = attentions[target_layer].squeeze(0).mean(dim=0)  # [seq, seq]
+        print('Computing grad norms')
+        grad_norms_list = []
+        for k in range(seq_len):
+            scalar = attn_layer[:, k].sum()
+            grad = torch.autograd.grad(scalar, inputs_embeds, retain_graph=True)[0].squeeze(0)
+            grad_norms = grad.norm(dim=1)
+            grad_norms_list.append(grad_norms.unsqueeze(1))
+        grad_matrix = torch.cat(grad_norms_list, dim=1)
+        grad_matrix = grad_matrix[:seq_len, :seq_len]
+        attn_matrix = attn_matrix[:seq_len, :seq_len]
+        return grad_matrix, attn_matrix

models.py CHANGED Viewed

@@ -1,16 +1,16 @@
-import torch
-class TransformerVisualizer():
-    def __init__(self):
-        self.device = torch.device('cpu')
-    def predict(self, task, text):
-        return task, text,1
-    def get_attention_gradient_matrix(self, task, text, target_layer):
-        return task, text,target_layer,1

+import torch
+class TransformerVisualizer():
+    def __init__(self):
+        self.device = torch.device('cpu')
+    def predict(self, task, text):
+        return task, text,1
+    def get_attention_gradient_matrix(self, task, text, target_layer):
+        return task, text,target_layer,1

server.py CHANGED Viewed

@@ -1,370 +1,349 @@
-from fastapi import FastAPI, Request
-from pydantic import BaseModel
-from pathlib import Path
-import torch
-from fastapi import UploadFile, File
-import os
-from fastapi.middleware.cors import CORSMiddleware
-from ROBERTAmodel import *
-from BERTmodel import *
-from DISTILLBERTmodel import *
-import os
-import zipfile
-import shutil
-from fastapi import Form
-from fastapi import UploadFile, File, Form
-from pathlib import Path
-VISUALIZER_CLASSES = {
-    "BERT": BERTVisualizer,
-    "RoBERTa": RoBERTaVisualizer,
-    "DistilBERT": DistilBERTVisualizer,
-}
-VISUALIZER_CACHE = {}
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-MODEL_MAP = {
-    "BERT": "bert-base-uncased",
-    "RoBERTa": "roberta-base",
-    "DistilBERT": "distilbert-base-uncased",
-}
-class LoadModelRequest(BaseModel):
-    model: str
-    sentence: str
-    task:str
-    hypothesis:str
-class GradAttnModelRequest(BaseModel):
-    model: str
-    task: str
-    sentence: str
-    hypothesis:str
-    maskID: int | None = None
-class PredModelRequest(BaseModel):
-    model: str
-    sentence: str
-    task:str
-    hypothesis:str
-    maskID: int | None = None
-@app.get("/ping")
-def ping():
-    return {"message": "pong"}
-@app.post("/upload_to_path")
-async def upload_to_path(
-    file: UploadFile = File(...),
-    dest_path: str = Form(...)
-):
-    base_path = Path("/data")
-    target_path = base_path / dest_path
-    # If the path ends with "/", or is a directory, treat it as a folder
-    if str(dest_path).endswith("/") or target_path.is_dir():
-        target_path = target_path / file.filename
-    # Ensure parent directories exist
-    target_path.parent.mkdir(parents=True, exist_ok=True)
-    # Write file
-    with open(target_path, "wb") as f:
-        f.write(await file.read())
-    return {"status": "uploaded", "path": str(target_path)}
-@app.post("/make_dir")
-def make_directory(
-    dir_path: str = Form(...)  # e.g., "logs/test_run"
-):
-    full_dir = Path("/data") / dir_path
-    full_dir.mkdir(parents=True, exist_ok=True)
-    return {"status": "created", "directory": str(full_dir)}
-@app.get("/list_data")
-def list_data():
-    base_path = Path("/data")
-    all_items = []
-    for path in base_path.rglob("*"):  # recursive glob
-        all_items.append({
-            "path": str(path.relative_to(base_path)),
-            "type": "dir" if path.is_dir() else "file",
-            "size": path.stat().st_size if path.is_file() else None
-        })
-    return {"items": all_items}
-@app.post("/purge_data_123456789")
-def purge_data():
-    base_path = Path("/data")
-    if not base_path.exists():
-        return {"status": "error", "message": "/data does not exist"}
-    deleted = []
-    for child in base_path.iterdir():
-        try:
-            if child.is_file() or child.is_symlink():
-                child.unlink()
-            elif child.is_dir():
-                shutil.rmtree(child)
-            deleted.append(str(child.name))
-        except Exception as e:
-            deleted.append(f"FAILED: {child.name} ({e})")
-    return {
-        "status": "done",
-        "deleted": deleted,
-        "total": len(deleted)
-    }
-##############################################################
-@app.post("/load_model")
-def load_model(req: LoadModelRequest):
-    print(f"\n--- /load_model request received ---")
-    print(f"Model: {req.model}")
-    print(f"Sentence: {req.sentence}")
-    print(f"Task: {req.task}")
-    print(f"hypothesis: {req.hypothesis}")
-    if req.model in VISUALIZER_CACHE:
-        del VISUALIZER_CACHE[req.model]
-    torch.cuda.empty_cache()
-    vis_class = VISUALIZER_CLASSES.get(req.model)
-    if vis_class is None:
-        return {"error": f"Unknown model: {req.model}"}
-    print("instantiating visualizer")
-    try:
-        vis = vis_class(task=req.task.lower())
-        print(vis)
-        VISUALIZER_CACHE[req.model] = vis
-        print("Visualizer instantiated")
-    except Exception as e:
-        print("Visualizer init failed:", e)
-        return {"error": f"Instantiation failed: {str(e)}"}
-    print('tokenizing')
-    try:
-        if req.task.lower() == 'mnli':
-            token_output = vis.tokenize(req.sentence, hypothesis=req.hypothesis)
-        else:
-            token_output = vis.tokenize(req.sentence)
-        print("0 Tokenization successful:", token_output["tokens"])
-    except Exception as e:
-        print("Tokenization failed:", e)
-        return {"error": f"Tokenization failed: {str(e)}"}
-    print('response ready')
-    response = {
-        "model": req.model,
-        "tokens": token_output['tokens'],
-        "num_layers": vis.num_attention_layers,
-    }
-    print("load model successful")
-    print(response)
-    return response
-@app.post("/predict_model")
-def predict_model(req: PredModelRequest):
-    print(f"\n--- /predict_model request received ---")
-    print(f"predict: Model: {req.model}")
-    print(f"predict: Task: {req.task}")
-    print(f"predict: sentence: {req.sentence}")
-    print(f"predict: hypothesis: {req.hypothesis}")
-    print(f"predict: maskID: {req.maskID}")
-    print('predict: instantiating')
-    try:
-        vis_class = VISUALIZER_CLASSES.get(req.model)
-        if vis_class is None:
-            return {"error": f"Unknown model: {req.model}"}
-        #if any(p.device.type == 'meta' for p in vis.model.parameters()):
-        #    vis.model = torch.nn.Module.to_empty(vis.model, device=torch.device("cpu"))
-        vis = vis_class(task=req.task.lower())
-        VISUALIZER_CACHE[req.model] = vis
-        print("Model reloaded and cached.")
-    except Exception as e:
-        return {"error": f"Failed to reload model: {str(e)}"}
-    print('predict: meta stuff')
-    print('predict: Run prediction')
-    try:
-        if req.task.lower() == 'mnli':
-            decoded, top_probs = vis.predict(req.task.lower(), req.sentence, hypothesis=req.hypothesis)
-        elif req.task.lower() == 'mlm':
-            decoded, top_probs = vis.predict(req.task.lower(), req.sentence, maskID=req.maskID)
-        else:
-            decoded, top_probs = vis.predict(req.task.lower(), req.sentence)
-    except Exception as e:
-        decoded, top_probs = "error", e
-        print(e)
-    print('predict: response ready')
-    response = {
-        "decoded": decoded,
-        "top_probs": top_probs.tolist(),
-    }
-    print("predict: predict model successful")
-    if len(decoded) > 5:
-        print([(k,v[:5]) for k,v in response.items()])
-    else:
-        print(response)
-    return response
-@app.post("/get_grad_attn_matrix")
-def get_grad_attn_matrix(req: GradAttnModelRequest):
-    try:
-        print(f"\n--- /get_grad_matrix request received ---")
-        print(f"grad:Model: {req.model}")
-        print(f"grad:Task: {req.task}")
-        print(f"grad:sentence: {req.sentence}")
-        print(f"grad: hypothesis: {req.hypothesis}")
-        print(f"predict: maskID: {req.maskID}")
-        try:
-            vis_class = VISUALIZER_CLASSES.get(req.model)
-            if vis_class is None:
-                return {"error": f"Unknown model: {req.model}"}
-            #if any(p.device.type == 'meta' for p in vis.model.parameters()):
-            #    vis.model = torch.nn.Module.to_empty(vis.model, device=torch.device("cpu"))
-            vis = vis_class(task=req.task.lower())
-            VISUALIZER_CACHE[req.model] = vis
-            print("Model reloaded and cached.")
-        except Exception as e:
-            return {"error": f"Failed to reload model: {str(e)}"}
-        print("run function")
-        try:
-            if req.task.lower()=='mnli':
-                grad_matrix, attn_matrix = vis.get_all_grad_attn_matrix(req.task.lower(), req.sentence,hypothesis=req.hypothesis)
-            elif req.task.lower()=='mlm':
-                grad_matrix, attn_matrix = vis.get_all_grad_attn_matrix(req.task.lower(), req.sentence,maskID=req.maskID)
-            else:
-                grad_matrix, attn_matrix = vis.get_all_grad_attn_matrix(req.task.lower(), req.sentence)
-        except Exception as e:
-            print("Exception during grad/attn computation:", e)
-            grad_matrix, attn_matrix = e,e
-        response = {
-            "grad_matrix": grad_matrix,
-            "attn_matrix": attn_matrix,
-        }
-        print('grad attn successful')
-        return response
-    except Exception as e:
-        print("SERVER EXCEPTION:", e)
-        return {"error": str(e)}
-@app.post("/load_all_files")
-def load_all_files():
-    print('load BERTmlm ')
-    BERTVisualizer('mlm')
-    print('load BERTmnli ')
-    BERTVisualizer('mnli')
-    print('load BERTsst ')
-    BERTVisualizer('sst')
-    print('load roBERTmlm ')
-    RoBERTaVisualizer('mlm')
-    print('load roBERTmnli')
-    RoBERTaVisualizer('mnli')
-    print('load roBERTsst')
-    RoBERTaVisualizer('sst')
-    print('load distillBERTmlm ')
-    DistilBERTVisualizer('mlm')
-    print('load distillBERTmmli ')
-    DistilBERTVisualizer('mnli')
-    print('load distillBERTsst ')
-    DistilBERTVisualizer('sst')

+from fastapi import FastAPI, Request
+from pydantic import BaseModel
+from pathlib import Path
+import torch
+from fastapi import UploadFile, File
+import os
+from fastapi.middleware.cors import CORSMiddleware
+from ROBERTAmodel import *
+from BERTmodel import *
+from DISTILLBERTmodel import *
+import os
+import zipfile
+import shutil
+from fastapi import Form
+from fastapi import UploadFile, File, Form
+from pathlib import Path
+VISUALIZER_CLASSES = {
+    "BERT": BERTVisualizer,
+    "RoBERTa": RoBERTaVisualizer,
+    "DistilBERT": DistilBERTVisualizer,
+}
+VISUALIZER_CACHE = {}
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+MODEL_MAP = {
+    "BERT": "bert-base-uncased",
+    "RoBERTa": "roberta-base",
+    "DistilBERT": "distilbert-base-uncased",
+}
+class LoadModelRequest(BaseModel):
+    model: str
+    sentence: str
+    task:str
+    hypothesis:str
+class GradAttnModelRequest(BaseModel):
+    model: str
+    task: str
+    sentence: str
+    hypothesis:str
+    maskID: int | None = None
+class PredModelRequest(BaseModel):
+    model: str
+    sentence: str
+    task:str
+    hypothesis:str
+    maskID: int | None = None
+@app.post("/upload_model")
+async def upload_model(file: UploadFile = File(...)):
+    save_path = f"/data/models/{file.filename}"  # or wherever your disk is mounted
+    os.makedirs(os.path.dirname(save_path), exist_ok=True)
+    with open(save_path, "wb") as f:
+        f.write(await file.read())
+    return {"status": "uploaded", "path": save_path}
+@app.post("/load_model")
+def load_model(req: LoadModelRequest):
+    print(f"\n--- /load_model request received ---")
+    print(f"Model: {req.model}")
+    print(f"Sentence: {req.sentence}")
+    print(f"Task: {req.task}")
+    print(f"hypothesis: {req.hypothesis}")
+    if req.model in VISUALIZER_CACHE:
+        del VISUALIZER_CACHE[req.model]
+    torch.cuda.empty_cache()
+    vis_class = VISUALIZER_CLASSES.get(req.model)
+    if vis_class is None:
+        return {"error": f"Unknown model: {req.model}"}
+    print("instantiating visualizer")
+    try:
+        vis = vis_class(task=req.task.lower())
+        print(vis)
+        VISUALIZER_CACHE[req.model] = vis
+        print("Visualizer instantiated")
+    except Exception as e:
+        print("Visualizer init failed:", e)
+        return {"error": f"Instantiation failed: {str(e)}"}
+    print('tokenizing')
+    try:
+        if req.task.lower() == 'mnli':
+            token_output = vis.tokenize(req.sentence, hypothesis=req.hypothesis)
+        else:
+            token_output = vis.tokenize(req.sentence)
+        print("0 Tokenization successful:", token_output["tokens"])
+    except Exception as e:
+        print("Tokenization failed:", e)
+        return {"error": f"Tokenization failed: {str(e)}"}
+    print('response ready')
+    response = {
+        "model": req.model,
+        "tokens": token_output['tokens'],
+        "num_layers": vis.num_attention_layers,
+    }
+    print("load model successful")
+    print(response)
+    return response
+@app.post("/predict_model")
+def predict_model(req: PredModelRequest):
+    print(f"\n--- /predict_model request received ---")
+    print(f"predict: Model: {req.model}")
+    print(f"predict: Task: {req.task}")
+    print(f"predict: sentence: {req.sentence}")
+    print(f"predict: hypothesis: {req.hypothesis}")
+    print(f"predict: maskID: {req.maskID}")
+    print('predict: instantiating')
+    try:
+        vis_class = VISUALIZER_CLASSES.get(req.model)
+        if vis_class is None:
+            return {"error": f"Unknown model: {req.model}"}
+        #if any(p.device.type == 'meta' for p in vis.model.parameters()):
+        #    vis.model = torch.nn.Module.to_empty(vis.model, device=torch.device("cpu"))
+        vis = vis_class(task=req.task.lower())
+        VISUALIZER_CACHE[req.model] = vis
+        print("Model reloaded and cached.")
+    except Exception as e:
+        return {"error": f"Failed to reload model: {str(e)}"}
+    print('predict: meta stuff')
+    print('predict: Run prediction')
+    try:
+        if req.task.lower() == 'mnli':
+            decoded, top_probs = vis.predict(req.task.lower(), req.sentence, hypothesis=req.hypothesis)
+        elif req.task.lower() == 'mlm':
+            decoded, top_probs = vis.predict(req.task.lower(), req.sentence, maskID=req.maskID)
+        else:
+            decoded, top_probs = vis.predict(req.task.lower(), req.sentence)
+    except Exception as e:
+        decoded, top_probs = "error", e
+        print(e)
+    print('predict: response ready')
+    response = {
+        "decoded": decoded,
+        "top_probs": top_probs.tolist(),
+    }
+    print("predict: predict model successful")
+    if len(decoded) > 5:
+        print([(k,v[:5]) for k,v in response.items()])
+    else:
+        print(response)
+    return response
+@app.post("/get_grad_attn_matrix")
+def get_grad_attn_matrix(req: GradAttnModelRequest):
+    try:
+        print(f"\n--- /get_grad_matrix request received ---")
+        print(f"grad:Model: {req.model}")
+        print(f"grad:Task: {req.task}")
+        print(f"grad:sentence: {req.sentence}")
+        print(f"grad: hypothesis: {req.hypothesis}")
+        print(f"predict: maskID: {req.maskID}")
+        try:
+            vis_class = VISUALIZER_CLASSES.get(req.model)
+            if vis_class is None:
+                return {"error": f"Unknown model: {req.model}"}
+            #if any(p.device.type == 'meta' for p in vis.model.parameters()):
+            #    vis.model = torch.nn.Module.to_empty(vis.model, device=torch.device("cpu"))
+            vis = vis_class(task=req.task.lower())
+            VISUALIZER_CACHE[req.model] = vis
+            print("Model reloaded and cached.")
+        except Exception as e:
+            return {"error": f"Failed to reload model: {str(e)}"}
+        print("run function")
+        try:
+            if req.task.lower()=='mnli':
+                grad_matrix, attn_matrix = vis.get_all_grad_attn_matrix(req.task.lower(), req.sentence,hypothesis=req.hypothesis)
+            elif req.task.lower()=='mlm':
+                grad_matrix, attn_matrix = vis.get_all_grad_attn_matrix(req.task.lower(), req.sentence,maskID=req.maskID)
+            else:
+                grad_matrix, attn_matrix = vis.get_all_grad_attn_matrix(req.task.lower(), req.sentence)
+        except Exception as e:
+            print("Exception during grad/attn computation:", e)
+            grad_matrix, attn_matrix = e,e
+        response = {
+            "grad_matrix": grad_matrix,
+            "attn_matrix": attn_matrix,
+        }
+        print('grad attn successful')
+        return response
+    except Exception as e:
+        print("SERVER EXCEPTION:", e)
+        return {"error": str(e)}
+##################################################
+@app.get("/ping")
+def ping():
+    return {"message": "pong"}
+@app.post("/upload_to_path")
+async def upload_to_path(
+    file: UploadFile = File(...),
+    dest_path: str = Form(...)  # e.g., "models/model.pt"
+):
+    full_path = Path("/data") / dest_path
+    full_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(full_path, "wb") as f:
+        f.write(await file.read())
+    return {"status": "uploaded", "path": str(full_path)}
+@app.post("/make_dir")
+def make_directory(
+    dir_path: str = Form(...)  # e.g., "logs/test_run"
+):
+    full_dir = Path("/data") / dir_path
+    full_dir.mkdir(parents=True, exist_ok=True)
+    return {"status": "created", "directory": str(full_dir)}
+@app.get("/list_data")
+def list_data():
+    base_path = Path("/data")
+    all_items = []
+    for path in base_path.rglob("*"):  # recursive glob
+        all_items.append({
+            "path": str(path.relative_to(base_path)),
+            "type": "dir" if path.is_dir() else "file",
+            "size": path.stat().st_size if path.is_file() else None
+        })
+    return {"items": all_items}
+@app.post("/purge_data_123456789")
+def purge_data():
+    base_path = Path("/data")
+    if not base_path.exists():
+        return {"status": "error", "message": "/data does not exist"}
+    deleted = []
+    for child in base_path.iterdir():
+        try:
+            if child.is_file() or child.is_symlink():
+                child.unlink()
+            elif child.is_dir():
+                shutil.rmtree(child)
+            deleted.append(str(child.name))
+        except Exception as e:
+            deleted.append(f"FAILED: {child.name} ({e})")
+    return {
+        "status": "done",
+        "deleted": deleted,
+        "total": len(deleted)
+    }
+"""
+if __name__ == "__main__":
+    print('rim ')
+    BERTVisualizer('mlm')
+    BERTVisualizer('mnli')
+    BERTVisualizer('sst')
+    RoBERTaVisualizer('mlm')
+    RoBERTaVisualizer('mnli')
+    RoBERTaVisualizer('sst')
+    DistilBERTVisualizer('mlm')
+    DistilBERTVisualizer('mnli')
+    DistilBERTVisualizer('sst')
+"""