from typing import Dict import numpy as np import torch from PIL import Image from torchmetrics.multimodal import CLIPImageQualityAssessment class CLIPIQAMetric: def __init__(self): self.device = torch.device( "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" ) self.metric = CLIPImageQualityAssessment( model_name_or_path="clip_iqa", data_range=255.0, prompts=("quality",) ) self.metric.to(self.device) @property def name(self) -> str: return "clip_iqa" def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]: image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() image_tensor = image_tensor.unsqueeze(0) image_tensor = image_tensor.to(self.device) scores = self.metric(image_tensor) return {"clip_iqa": scores.item()}