Spaces:

X-iZhang
/

RadEval

Running

App Files Files Community

RadEval / factual /RadCliQv1 /semb_score.py

X-iZhang

initial

bad8293 verified 19 days ago

raw

history blame contribute delete

2.2 kB

	from typing import Sequence, Union

	import numpy as np
	import torch

	from factual.f1chexbert import F1CheXbert


	def semantic_embedding_scores(
	refs: Sequence[str],
	hyps: Sequence[str],
	*,
	device: Union[str, torch.device] = "cpu",
	) -> np.ndarray:
	"""Return per‑pair cosine similarities between `refs` and `hyps`.

	All heavy math is vectorised; no Python loops.

	Args:
	refs: Iterable of ground‑truth report strings.
	hyps: Iterable of predicted report strings (must match `refs` length).
	device: Computation device (e.g. "cpu", "cuda", "cuda:0").

	Returns
	-------
	np.ndarray
	Shape ``(N,)`` – cosine similarity for each pair, where
	``N == len(refs) == len(hyps)``.

	Raises
	------
	ValueError
	If `refs` and `hyps` are of different lengths.
	"""

	if len(refs) != len(hyps):
	raise ValueError(f"refs ({len(refs)}) and hyps ({len(hyps)}) differ in length")

	labeler = F1CheXbert(device=device)

	# Stack embeddings into (N, dim) matrices
	gt_embeds = np.vstack(labeler.get_embeddings(refs)) # (N, dim)
	pred_embeds = np.vstack(labeler.get_embeddings(hyps)) # (N, dim)

	# Cosine similarity – fully vectorised
	dot = np.einsum("nd,nd->n", gt_embeds, pred_embeds)
	norms = np.linalg.norm(gt_embeds, axis=1) * np.linalg.norm(pred_embeds, axis=1)
	with np.errstate(divide="ignore", invalid="ignore"):
	sims = np.where(norms > 0, dot / norms, 0.0)

	return sims


	def mean_semantic_score(scores: np.ndarray) -> float:
	"""Convenience helper: mean of an array of scores."""
	return float(scores.mean())


	if __name__ == "__main__":
	_refs = [
	"No evidence of pneumothorax following chest tube removal.",
	"There is a left pleural effusion.",
	"No evidence of pneumothorax following chest tube removal.",

	]
	_hyps = [
	"No pneumothorax detected.",
	"Left pleural effusion is present.",
	"Left pleural effusion is present.",
	]

	_scores = semantic_embedding_scores(_refs, _hyps, device="cpu")
	print("Per‑pair cosine:", _scores)
	print("Mean:", mean_semantic_score(_scores))