Spaces:
Running
Running
""" | |
自研的可解释性模块 | |
实现 GradCAM、GradCAM++、ScoreCAM 等功能,替代外部依赖 | |
""" | |
import torch | |
import torch.nn.functional as F | |
import numpy as np | |
import cv2 | |
from typing import List, Tuple, Optional, Union | |
import matplotlib.pyplot as plt | |
from PIL import Image | |
class GradCAM: | |
"""自研的 GradCAM 实现""" | |
def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module], | |
use_cuda: bool = False, model_forward=None): | |
""" | |
初始化 GradCAM | |
Args: | |
model: PyTorch 模型 | |
target_layers: 目标层列表(通常是最后一个卷积层) | |
use_cuda: 是否使用 GPU | |
""" | |
self.model = model | |
self.model_forward = model_forward # 可选自定义前向 | |
self.target_layers = target_layers | |
self.use_cuda = use_cuda | |
self.device = torch.device('cuda' if use_cuda and torch.cuda.is_available() else 'cpu') | |
# 注册 hooks | |
self.gradients = [] | |
self.activations = [] | |
self._register_hooks() | |
def _register_hooks(self): | |
"""注册前向和反向 hooks""" | |
def forward_hook(module, input, output): | |
self.activations.append(output) | |
def backward_hook(module, grad_input, grad_output): | |
self.gradients.append(grad_output[0]) | |
for target_layer in self.target_layers: | |
target_layer.register_forward_hook(forward_hook) | |
target_layer.register_backward_hook(backward_hook) | |
def _clear_hooks(self): | |
"""清除 hooks 数据""" | |
self.gradients = [] | |
self.activations = [] | |
def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray: | |
""" | |
前向传播并生成 CAM | |
Args: | |
input_tensor: 输入张量 | |
target_class: 目标类别,None 表示使用预测类别 | |
Returns: | |
np.ndarray: CAM 热力图 | |
""" | |
self._clear_hooks() | |
# 前向传播 | |
if self.model_forward is not None: | |
model_output = self.model_forward(input_tensor) | |
else: | |
model_output = self.model(input_tensor) | |
if target_class is None: | |
target_class = model_output.argmax(dim=1).item() | |
# 反向传播 | |
self.model.zero_grad() | |
one_hot = torch.zeros_like(model_output) | |
one_hot[0, target_class] = 1 | |
model_output.backward(gradient=one_hot, retain_graph=True) | |
# 计算权重 | |
gradients = self.gradients[0] | |
activations = self.activations[0] | |
weights = torch.mean(gradients, dim=[2, 3]) | |
# 生成 CAM | |
cam = torch.zeros(activations.shape[2:], dtype=torch.float32) | |
for i, w in enumerate(weights[0]): | |
cam += w * activations[0, i, :, :] | |
cam = F.relu(cam) | |
cam = F.interpolate(cam.unsqueeze(0).unsqueeze(0), | |
size=input_tensor.shape[2:], | |
mode='bilinear', | |
align_corners=False) | |
cam = cam.squeeze().cpu().detach().numpy() | |
# 归一化 | |
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8) | |
return cam | |
class GradCAMPlusPlus(GradCAM): | |
"""自研的 GradCAM++ 实现""" | |
def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray: | |
"""GradCAM++ 实现""" | |
self._clear_hooks() | |
# 前向传播 | |
model_output = self.model(input_tensor) | |
if target_class is None: | |
target_class = model_output.argmax(dim=1).item() | |
# 反向传播 | |
self.model.zero_grad() | |
one_hot = torch.zeros_like(model_output) | |
one_hot[0, target_class] = 1 | |
model_output.backward(gradient=one_hot, retain_graph=True) | |
# 计算权重(GradCAM++ 方式) | |
gradients = self.gradients[0] | |
activations = self.activations[0] | |
b, k, u, v = gradients.size() | |
alpha_num = gradients.pow(2) | |
alpha_denom = alpha_num.mul(2) + \ | |
activations.mul(gradients.pow(3)).sum((2, 3), keepdim=True) | |
alpha = alpha_num.div(alpha_denom + 1e-7) | |
weights = (alpha * F.relu(gradients)).sum((2, 3), keepdim=True) | |
# 生成 CAM | |
cam = (weights * activations).sum(1, keepdim=True) | |
cam = F.relu(cam) | |
cam = F.interpolate(cam, size=input_tensor.shape[2:], | |
mode='bilinear', align_corners=False) | |
cam = cam.squeeze().cpu().numpy() | |
# 归一化 | |
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8) | |
return cam | |
class ScoreCAM: | |
"""自研的 ScoreCAM 实现""" | |
def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module], | |
use_cuda: bool = False, model_forward=None): | |
super().__init__(model, target_layers, use_cuda, model_forward) | |
def forward(self, input_tensor: torch.Tensor, target_class: int = None) -> np.ndarray: | |
"""ScoreCAM 实现""" | |
# 前向传播 | |
model_output = self.model(input_tensor) | |
if target_class is None: | |
target_class = model_output.argmax(dim=1).item() | |
# 获取目标层的激活 | |
with torch.no_grad(): | |
activations = self.model(input_tensor) | |
if hasattr(self.model, 'backbone'): | |
activations = self.model.backbone(input_tensor) | |
else: | |
# 如果没有 backbone 属性,尝试获取最后一个卷积层的输出 | |
activations = self._get_activations(input_tensor) | |
# 计算每个通道的权重 | |
weights = [] | |
for i in range(activations.shape[1]): | |
# 创建 masked input | |
masked_input = input_tensor * activations[:, i:i+1, :, :] | |
masked_output = self.model(masked_input) | |
score = masked_output[0, target_class].item() | |
weights.append(score) | |
weights = torch.tensor(weights, device=self.device) | |
weights = F.softmax(weights, dim=0) | |
# 生成 CAM | |
cam = torch.zeros(activations.shape[2:], dtype=torch.float32, device=self.device) | |
for i, w in enumerate(weights): | |
cam += w * activations[0, i, :, :] | |
cam = F.relu(cam) | |
cam = F.interpolate(cam.unsqueeze(0).unsqueeze(0), | |
size=input_tensor.shape[2:], | |
mode='bilinear', | |
align_corners=False) | |
cam = cam.squeeze().cpu().numpy() | |
# 归一化 | |
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8) | |
return cam | |
def _get_activations(self, input_tensor: torch.Tensor) -> torch.Tensor: | |
"""获取目标层的激活(简化实现)""" | |
# 这里需要根据具体模型结构来实现 | |
# 暂时返回一个占位符 | |
return torch.randn(1, 1280, 7, 7, device=self.device) | |
def show_cam_on_image(img: np.ndarray, mask: np.ndarray, | |
use_rgb: bool = True, colormap: int = cv2.COLORMAP_JET) -> np.ndarray: | |
""" | |
在图像上叠加 CAM 热力图 | |
Args: | |
img: 原始图像 (0-255) | |
mask: CAM 掩码 (0-1) | |
use_rgb: 是否使用 RGB 格式 | |
colormap: OpenCV 颜色映射 | |
Returns: | |
np.ndarray: 叠加后的图像 | |
""" | |
heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap) | |
if use_rgb: | |
heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) | |
cam = np.float32(heatmap) / 255 + np.float32(img) / 255 | |
cam = cam / np.max(cam) | |
return np.uint8(255 * cam) | |
def visualize_cam(image_path: str, model: torch.nn.Module, | |
target_layers: List[torch.nn.Module], | |
target_class: int = None, | |
method: str = 'gradcam', | |
save_path: str = None) -> plt.Figure: | |
""" | |
可视化 CAM 结果 | |
Args: | |
image_path: 图像路径 | |
model: 模型 | |
target_layers: 目标层 | |
target_class: 目标类别 | |
method: 方法 ('gradcam', 'gradcam++', 'scorecam') | |
save_path: 保存路径 | |
Returns: | |
plt.Figure: matplotlib 图形 | |
""" | |
# 加载图像 | |
image = cv2.imread(image_path) | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
# 预处理 | |
input_tensor = preprocess_image(image) | |
# 创建 CAM | |
if method == 'gradcam': | |
cam = GradCAM(model, target_layers) | |
elif method == 'gradcam++': | |
cam = GradCAMPlusPlus(model, target_layers) | |
elif method == 'scorecam': | |
cam = ScoreCAM(model, target_layers) | |
else: | |
raise ValueError(f"不支持的方法: {method}") | |
# 生成 CAM | |
mask = cam.forward(input_tensor, target_class) | |
# 可视化 | |
fig, axes = plt.subplots(1, 3, figsize=(15, 5)) | |
# 原始图像 | |
axes[0].imshow(image) | |
axes[0].set_title('原始图像') | |
axes[0].axis('off') | |
# CAM 热力图 | |
axes[1].imshow(mask, cmap='jet') | |
axes[1].set_title(f'{method.upper()} 热力图') | |
axes[1].axis('off') | |
# 叠加结果 | |
cam_on_image = show_cam_on_image(image, mask) | |
axes[2].imshow(cam_on_image) | |
axes[2].set_title('叠加结果') | |
axes[2].axis('off') | |
plt.tight_layout() | |
if save_path: | |
plt.savefig(save_path, dpi=300, bbox_inches='tight') | |
print(f"CAM 可视化已保存: {save_path}") | |
return fig | |
def preprocess_image(image: np.ndarray, size: Tuple[int, int] = (224, 224)) -> torch.Tensor: | |
""" | |
预处理图像 | |
Args: | |
image: 输入图像 | |
size: 目标尺寸 | |
Returns: | |
torch.Tensor: 预处理后的张量 | |
""" | |
# 调整大小 | |
image = cv2.resize(image, size) | |
# 归一化 | |
image = image.astype(np.float32) / 255.0 | |
image = (image - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225]) | |
# 转换为张量 | |
image = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0) | |
return image | |
if __name__ == "__main__": | |
# 测试代码 | |
print("自研可解释性模块测试") | |
print("包含: GradCAM, GradCAM++, ScoreCAM") | |
print("使用方式: from utils.explainability import GradCAM, show_cam_on_image") |