""" 自研的可解释性模块 实现 GradCAM、GradCAM++、ScoreCAM 等功能,替代外部依赖 """ import torch import torch.nn.functional as F import numpy as np import cv2 from typing import List, Tuple, Optional, Union import matplotlib.pyplot as plt from PIL import Image class GradCAM: """自研的 GradCAM 实现""" def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module], use_cuda: bool = False, model_forward=None): """ 初始化 GradCAM Args: model: PyTorch 模型 target_layers: 目标层列表(通常是最后一个卷积层) use_cuda: 是否使用 GPU """ self.model = model self.model_forward = model_forward # 可选自定义前向 self.target_layers = target_layers self.use_cuda = use_cuda self.device = torch.device('cuda' if use_cuda and torch.cuda.is_available() else 'cpu') # 注册 hooks self.gradients = [] self.activations = [] self._register_hooks() def _register_hooks(self): """注册前向和反向 hooks""" def forward_hook(module, input, output): self.activations.append(output) def backward_hook(module, grad_input, grad_output): self.gradients.append(grad_output[0]) for target_layer in self.target_layers: target_layer.register_forward_hook(forward_hook) target_layer.register_backward_hook(backward_hook) def _clear_hooks(self): """清除 hooks 数据""" self.gradients = [] self.activations = [] def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray: """ 前向传播并生成 CAM Args: input_tensor: 输入张量 target_class: 目标类别,None 表示使用预测类别 Returns: np.ndarray: CAM 热力图 """ self._clear_hooks() # 前向传播 if self.model_forward is not None: model_output = self.model_forward(input_tensor) else: model_output = self.model(input_tensor) if target_class is None: target_class = model_output.argmax(dim=1).item() # 反向传播 self.model.zero_grad() one_hot = torch.zeros_like(model_output) one_hot[0, target_class] = 1 model_output.backward(gradient=one_hot, retain_graph=True) # 计算权重 gradients = self.gradients[0] activations = self.activations[0] weights = torch.mean(gradients, dim=[2, 3]) # 生成 CAM cam = torch.zeros(activations.shape[2:], dtype=torch.float32) for i, w in enumerate(weights[0]): cam += w * activations[0, i, :, :] cam = F.relu(cam) cam = F.interpolate(cam.unsqueeze(0).unsqueeze(0), size=input_tensor.shape[2:], mode='bilinear', align_corners=False) cam = cam.squeeze().cpu().detach().numpy() # 归一化 cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8) return cam class GradCAMPlusPlus(GradCAM): """自研的 GradCAM++ 实现""" def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray: """GradCAM++ 实现""" self._clear_hooks() # 前向传播 model_output = self.model(input_tensor) if target_class is None: target_class = model_output.argmax(dim=1).item() # 反向传播 self.model.zero_grad() one_hot = torch.zeros_like(model_output) one_hot[0, target_class] = 1 model_output.backward(gradient=one_hot, retain_graph=True) # 计算权重(GradCAM++ 方式) gradients = self.gradients[0] activations = self.activations[0] b, k, u, v = gradients.size() alpha_num = gradients.pow(2) alpha_denom = alpha_num.mul(2) + \ activations.mul(gradients.pow(3)).sum((2, 3), keepdim=True) alpha = alpha_num.div(alpha_denom + 1e-7) weights = (alpha * F.relu(gradients)).sum((2, 3), keepdim=True) # 生成 CAM cam = (weights * activations).sum(1, keepdim=True) cam = F.relu(cam) cam = F.interpolate(cam, size=input_tensor.shape[2:], mode='bilinear', align_corners=False) cam = cam.squeeze().cpu().numpy() # 归一化 cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8) return cam class ScoreCAM: """自研的 ScoreCAM 实现""" def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module], use_cuda: bool = False, model_forward=None): super().__init__(model, target_layers, use_cuda, model_forward) def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray: """ScoreCAM 实现""" # 前向传播 model_output = self.model(input_tensor) if target_class is None: target_class = model_output.argmax(dim=1).item() # 获取目标层的激活 with torch.no_grad(): activations = self.model(input_tensor) if hasattr(self.model, 'backbone'): activations = self.model.backbone(input_tensor) else: # 如果没有 backbone 属性,尝试获取最后一个卷积层的输出 activations = self._get_activations(input_tensor) # 计算每个通道的权重 weights = [] for i in range(activations.shape[1]): # 创建 masked input masked_input = input_tensor * activations[:, i:i+1, :, :] masked_output = self.model(masked_input) score = masked_output[0, target_class].item() weights.append(score) weights = torch.tensor(weights, device=self.device) weights = F.softmax(weights, dim=0) # 生成 CAM cam = torch.zeros(activations.shape[2:], dtype=torch.float32, device=self.device) for i, w in enumerate(weights): cam += w * activations[0, i, :, :] cam = F.relu(cam) cam = F.interpolate(cam.unsqueeze(0).unsqueeze(0), size=input_tensor.shape[2:], mode='bilinear', align_corners=False) cam = cam.squeeze().cpu().numpy() # 归一化 cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8) return cam def _get_activations(self, input_tensor: torch.Tensor) -> torch.Tensor: """获取目标层的激活(简化实现)""" # 这里需要根据具体模型结构来实现 # 暂时返回一个占位符 return torch.randn(1, 1280, 7, 7, device=self.device) def show_cam_on_image(img: np.ndarray, mask: np.ndarray, use_rgb: bool = True, colormap: int = cv2.COLORMAP_JET) -> np.ndarray: """ 在图像上叠加 CAM 热力图 Args: img: 原始图像 (0-255) mask: CAM 掩码 (0-1) use_rgb: 是否使用 RGB 格式 colormap: OpenCV 颜色映射 Returns: np.ndarray: 叠加后的图像 """ heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap) if use_rgb: heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) cam = np.float32(heatmap) / 255 + np.float32(img) / 255 cam = cam / np.max(cam) return np.uint8(255 * cam) def visualize_cam(image_path: str, model: torch.nn.Module, target_layers: List[torch.nn.Module], target_class: int = None, method: str = 'gradcam', save_path: str = None) -> plt.Figure: """ 可视化 CAM 结果 Args: image_path: 图像路径 model: 模型 target_layers: 目标层 target_class: 目标类别 method: 方法 ('gradcam', 'gradcam++', 'scorecam') save_path: 保存路径 Returns: plt.Figure: matplotlib 图形 """ # 加载图像 image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 预处理 input_tensor = preprocess_image(image) # 创建 CAM if method == 'gradcam': cam = GradCAM(model, target_layers) elif method == 'gradcam++': cam = GradCAMPlusPlus(model, target_layers) elif method == 'scorecam': cam = ScoreCAM(model, target_layers) else: raise ValueError(f"不支持的方法: {method}") # 生成 CAM mask = cam.forward(input_tensor, target_class) # 可视化 fig, axes = plt.subplots(1, 3, figsize=(15, 5)) # 原始图像 axes[0].imshow(image) axes[0].set_title('原始图像') axes[0].axis('off') # CAM 热力图 axes[1].imshow(mask, cmap='jet') axes[1].set_title(f'{method.upper()} 热力图') axes[1].axis('off') # 叠加结果 cam_on_image = show_cam_on_image(image, mask) axes[2].imshow(cam_on_image) axes[2].set_title('叠加结果') axes[2].axis('off') plt.tight_layout() if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight') print(f"CAM 可视化已保存: {save_path}") return fig def preprocess_image(image: np.ndarray, size: Tuple[int, int] = (224, 224)) -> torch.Tensor: """ 预处理图像 Args: image: 输入图像 size: 目标尺寸 Returns: torch.Tensor: 预处理后的张量 """ # 调整大小 image = cv2.resize(image, size) # 归一化 image = image.astype(np.float32) / 255.0 image = (image - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225]) # 转换为张量 image = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0) return image if __name__ == "__main__": # 测试代码 print("自研可解释性模块测试") print("包含: GradCAM, GradCAM++, ScoreCAM") print("使用方式: from utils.explainability import GradCAM, show_cam_on_image")