aicomp_demo / utils /explainability.py
ceasonen
我的视网膜检测网站
04103fb
"""
自研的可解释性模块
实现 GradCAM、GradCAM++、ScoreCAM 等功能,替代外部依赖
"""
import torch
import torch.nn.functional as F
import numpy as np
import cv2
from typing import List, Tuple, Optional, Union
import matplotlib.pyplot as plt
from PIL import Image
class GradCAM:
"""自研的 GradCAM 实现"""
def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module],
use_cuda: bool = False, model_forward=None):
"""
初始化 GradCAM
Args:
model: PyTorch 模型
target_layers: 目标层列表(通常是最后一个卷积层)
use_cuda: 是否使用 GPU
"""
self.model = model
self.model_forward = model_forward # 可选自定义前向
self.target_layers = target_layers
self.use_cuda = use_cuda
self.device = torch.device('cuda' if use_cuda and torch.cuda.is_available() else 'cpu')
# 注册 hooks
self.gradients = []
self.activations = []
self._register_hooks()
def _register_hooks(self):
"""注册前向和反向 hooks"""
def forward_hook(module, input, output):
self.activations.append(output)
def backward_hook(module, grad_input, grad_output):
self.gradients.append(grad_output[0])
for target_layer in self.target_layers:
target_layer.register_forward_hook(forward_hook)
target_layer.register_backward_hook(backward_hook)
def _clear_hooks(self):
"""清除 hooks 数据"""
self.gradients = []
self.activations = []
def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray:
"""
前向传播并生成 CAM
Args:
input_tensor: 输入张量
target_class: 目标类别,None 表示使用预测类别
Returns:
np.ndarray: CAM 热力图
"""
self._clear_hooks()
# 前向传播
if self.model_forward is not None:
model_output = self.model_forward(input_tensor)
else:
model_output = self.model(input_tensor)
if target_class is None:
target_class = model_output.argmax(dim=1).item()
# 反向传播
self.model.zero_grad()
one_hot = torch.zeros_like(model_output)
one_hot[0, target_class] = 1
model_output.backward(gradient=one_hot, retain_graph=True)
# 计算权重
gradients = self.gradients[0]
activations = self.activations[0]
weights = torch.mean(gradients, dim=[2, 3])
# 生成 CAM
cam = torch.zeros(activations.shape[2:], dtype=torch.float32)
for i, w in enumerate(weights[0]):
cam += w * activations[0, i, :, :]
cam = F.relu(cam)
cam = F.interpolate(cam.unsqueeze(0).unsqueeze(0),
size=input_tensor.shape[2:],
mode='bilinear',
align_corners=False)
cam = cam.squeeze().cpu().detach().numpy()
# 归一化
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
return cam
class GradCAMPlusPlus(GradCAM):
"""自研的 GradCAM++ 实现"""
def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray:
"""GradCAM++ 实现"""
self._clear_hooks()
# 前向传播
model_output = self.model(input_tensor)
if target_class is None:
target_class = model_output.argmax(dim=1).item()
# 反向传播
self.model.zero_grad()
one_hot = torch.zeros_like(model_output)
one_hot[0, target_class] = 1
model_output.backward(gradient=one_hot, retain_graph=True)
# 计算权重(GradCAM++ 方式)
gradients = self.gradients[0]
activations = self.activations[0]
b, k, u, v = gradients.size()
alpha_num = gradients.pow(2)
alpha_denom = alpha_num.mul(2) + \
activations.mul(gradients.pow(3)).sum((2, 3), keepdim=True)
alpha = alpha_num.div(alpha_denom + 1e-7)
weights = (alpha * F.relu(gradients)).sum((2, 3), keepdim=True)
# 生成 CAM
cam = (weights * activations).sum(1, keepdim=True)
cam = F.relu(cam)
cam = F.interpolate(cam, size=input_tensor.shape[2:],
mode='bilinear', align_corners=False)
cam = cam.squeeze().cpu().numpy()
# 归一化
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
return cam
class ScoreCAM:
"""自研的 ScoreCAM 实现"""
def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module],
use_cuda: bool = False, model_forward=None):
super().__init__(model, target_layers, use_cuda, model_forward)
def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray:
"""ScoreCAM 实现"""
# 前向传播
model_output = self.model(input_tensor)
if target_class is None:
target_class = model_output.argmax(dim=1).item()
# 获取目标层的激活
with torch.no_grad():
activations = self.model(input_tensor)
if hasattr(self.model, 'backbone'):
activations = self.model.backbone(input_tensor)
else:
# 如果没有 backbone 属性,尝试获取最后一个卷积层的输出
activations = self._get_activations(input_tensor)
# 计算每个通道的权重
weights = []
for i in range(activations.shape[1]):
# 创建 masked input
masked_input = input_tensor * activations[:, i:i+1, :, :]
masked_output = self.model(masked_input)
score = masked_output[0, target_class].item()
weights.append(score)
weights = torch.tensor(weights, device=self.device)
weights = F.softmax(weights, dim=0)
# 生成 CAM
cam = torch.zeros(activations.shape[2:], dtype=torch.float32, device=self.device)
for i, w in enumerate(weights):
cam += w * activations[0, i, :, :]
cam = F.relu(cam)
cam = F.interpolate(cam.unsqueeze(0).unsqueeze(0),
size=input_tensor.shape[2:],
mode='bilinear',
align_corners=False)
cam = cam.squeeze().cpu().numpy()
# 归一化
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
return cam
def _get_activations(self, input_tensor: torch.Tensor) -> torch.Tensor:
"""获取目标层的激活(简化实现)"""
# 这里需要根据具体模型结构来实现
# 暂时返回一个占位符
return torch.randn(1, 1280, 7, 7, device=self.device)
def show_cam_on_image(img: np.ndarray, mask: np.ndarray,
use_rgb: bool = True, colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
"""
在图像上叠加 CAM 热力图
Args:
img: 原始图像 (0-255)
mask: CAM 掩码 (0-1)
use_rgb: 是否使用 RGB 格式
colormap: OpenCV 颜色映射
Returns:
np.ndarray: 叠加后的图像
"""
heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
if use_rgb:
heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
cam = np.float32(heatmap) / 255 + np.float32(img) / 255
cam = cam / np.max(cam)
return np.uint8(255 * cam)
def visualize_cam(image_path: str, model: torch.nn.Module,
target_layers: List[torch.nn.Module],
target_class: int = None,
method: str = 'gradcam',
save_path: str = None) -> plt.Figure:
"""
可视化 CAM 结果
Args:
image_path: 图像路径
model: 模型
target_layers: 目标层
target_class: 目标类别
method: 方法 ('gradcam', 'gradcam++', 'scorecam')
save_path: 保存路径
Returns:
plt.Figure: matplotlib 图形
"""
# 加载图像
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 预处理
input_tensor = preprocess_image(image)
# 创建 CAM
if method == 'gradcam':
cam = GradCAM(model, target_layers)
elif method == 'gradcam++':
cam = GradCAMPlusPlus(model, target_layers)
elif method == 'scorecam':
cam = ScoreCAM(model, target_layers)
else:
raise ValueError(f"不支持的方法: {method}")
# 生成 CAM
mask = cam.forward(input_tensor, target_class)
# 可视化
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# 原始图像
axes[0].imshow(image)
axes[0].set_title('原始图像')
axes[0].axis('off')
# CAM 热力图
axes[1].imshow(mask, cmap='jet')
axes[1].set_title(f'{method.upper()} 热力图')
axes[1].axis('off')
# 叠加结果
cam_on_image = show_cam_on_image(image, mask)
axes[2].imshow(cam_on_image)
axes[2].set_title('叠加结果')
axes[2].axis('off')
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"CAM 可视化已保存: {save_path}")
return fig
def preprocess_image(image: np.ndarray, size: Tuple[int, int] = (224, 224)) -> torch.Tensor:
"""
预处理图像
Args:
image: 输入图像
size: 目标尺寸
Returns:
torch.Tensor: 预处理后的张量
"""
# 调整大小
image = cv2.resize(image, size)
# 归一化
image = image.astype(np.float32) / 255.0
image = (image - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
# 转换为张量
image = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0)
return image
if __name__ == "__main__":
# 测试代码
print("自研可解释性模块测试")
print("包含: GradCAM, GradCAM++, ScoreCAM")
print("使用方式: from utils.explainability import GradCAM, show_cam_on_image")