Spaces:

PrunaAI
/

InferBench

Running

App Files Files Community

nifleisch commited on Apr 25

Commit

2c50826

1 Parent(s): 61029d0

feat: add core logic for project

Browse files

Files changed (28) hide show

api/__init__.py +45 -0
api/baseline.py +53 -0
api/fal.py +48 -0
api/fireworks.py +53 -0
api/flux.py +35 -0
api/pruna.py +51 -0
api/replicate.py +48 -0
api/together.py +47 -0
benchmark/__init__.py +39 -0
benchmark/draw_bench.py +25 -0
benchmark/genai_bench.py +39 -0
benchmark/geneval.py +44 -0
benchmark/hps.py +49 -0
benchmark/metrics/__init__.py +42 -0
benchmark/metrics/arniqa.py +26 -0
benchmark/metrics/clip.py +23 -0
benchmark/metrics/clip_iqa.py +28 -0
benchmark/metrics/hps.py +77 -0
benchmark/metrics/image_reward.py +26 -0
benchmark/metrics/sharpness.py +24 -0
benchmark/metrics/vqa.py +25 -0
benchmark/parti.py +28 -0
evaluate.py +106 -0
evaluation_results/.gitkeep +0 -0
images/.gitkeep +0 -0
pyproject.toml +22 -0
sample.py +110 -0
uv.lock +0 -0

api/__init__.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from typing import Optional, Type
+from api.baseline import BaselineAPI
+from api.fireworks import FireworksAPI
+from api.flux import FluxAPI
+from api.pruna import PrunaAPI
+from api.replicate import ReplicateAPI
+from api.together import TogetherAPI
+from api.fal import FalAPI
+def create_api(api_type: str) -> FluxAPI:
+    """
+    Factory function to create API instances.
+    Args:
+        api_type (str): The type of API to create. Must be one of:
+            - "baseline"
+            - "fireworks"
+            - "pruna_speed_mode" (where speed_mode is the desired speed mode)
+            - "replicate"
+            - "together"
+            - "fal"
+    Returns:
+        FluxAPI: An instance of the requested API implementation
+    Raises:
+        ValueError: If an invalid API type is provided
+    """
+    if api_type.startswith("pruna_"):
+        speed_mode = api_type[6:]  # Remove "pruna_" prefix
+        return PrunaAPI(speed_mode)
+    api_map: dict[str, Type[FluxAPI]] = {
+        "baseline": BaselineAPI,
+        "fireworks": FireworksAPI,
+        "replicate": ReplicateAPI,
+        "together": TogetherAPI,
+        "fal": FalAPI,
+    }
+    if api_type not in api_map:
+        raise ValueError(f"Invalid API type: {api_type}. Must be one of {list(api_map.keys())} or start with 'pruna_'")
+    return api_map[api_type]()

api/baseline.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import time
+from pathlib import Path
+from typing import Any
+from dotenv import load_dotenv
+from api.flux import FluxAPI
+class BaselineAPI(FluxAPI):
+    """
+    As our baseline, we use the Replicate API with go_fast=False.
+    """
+    def __init__(self):
+        load_dotenv()
+        self._api_key = os.getenv("REPLICATE_API_TOKEN")
+        if not self._api_key:
+            raise ValueError("REPLICATE_API_TOKEN not found in environment variables")
+    @property
+    def name(self) -> str:
+        return "baseline"
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        import replicate
+        start_time = time.time()
+        result = replicate.run(
+            "black-forest-labs/flux-dev",
+            input={
+                "prompt": prompt,
+                "go_fast": False,
+                "guidance": 3.5,
+                "num_outputs": 1,
+                "aspect_ratio": "1:1",
+                "output_format": "png",
+                "num_inference_steps": 28,
+                "seed": 0,
+            },
+        )
+        end_time = time.time()
+        if result and len(result) > 0:
+            self._save_image_from_result(result[0], save_path)
+        else:
+            raise Exception("No result returned from Replicate API")
+        return end_time - start_time
+    def _save_image_from_result(self, result: Any, save_path: Path):
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(save_path, "wb") as f:
+            f.write(result.read())

api/fal.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import time
+from io import BytesIO
+from pathlib import Path
+from typing import Any
+import fal_client
+import requests
+from PIL import Image
+from flux import FluxAPI
+class FalAPI(FluxAPI):
+    @property
+    def name(self) -> str:
+        return "fal"
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        start_time = time.time()
+        result = fal_client.subscribe(
+            "fal-ai/flux/dev",
+            arguments={
+                "seed": 0,
+                "prompt": prompt,
+                "image_size": "square_hd",  # 1024x1024 image
+                "num_images": 1,
+                "guidance_scale": 3.5,
+                "num_inference_steps": 28,
+                "enable_safety_checker": True,
+            },
+        )
+        end_time = time.time()
+        url = result["images"][0]["url"]
+        self._save_image_from_url(url, save_path)
+        return end_time - start_time
+    def _save_image_from_url(self, url: str, save_path: Path):
+        response = requests.get(url)
+        image = Image.open(BytesIO(response.content))
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        image.save(save_path)
+    def _save_image_from_result(self, result: Any, save_path: Path):
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(save_path, "wb") as f:
+            f.write(result.content)

api/fireworks.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import time
+from pathlib import Path
+from typing import Any
+import requests
+from dotenv import load_dotenv
+from api.flux import FluxAPI
+class FireworksAPI(FluxAPI):
+    def __init__(self):
+        load_dotenv()
+        self._api_key = os.getenv("FIREWORKS_API_TOKEN")
+        if not self._api_key:
+            raise ValueError("FIREWORKS_API_TOKEN not found in environment variables")
+        self._url = "https://api.fireworks.ai/inference/v1/workflows/accounts/fireworks/models/flux-1-dev-fp8/text_to_image"
+    @property
+    def name(self) -> str:
+        return "fireworks_fp8"
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        start_time = time.time()
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "image/jpeg",
+            "Authorization": f"Bearer {self._api_key}",
+        }
+        data = {
+            "prompt": prompt,
+            "aspect_ratio": "1:1",
+            "guidance_scale": 3.5,
+            "num_inference_steps": 28,
+            "seed": 0,
+        }
+        result = requests.post(self._url, headers=headers, json=data)
+        end_time = time.time()
+        if result.status_code == 200:
+            self._save_image_from_result(result, save_path)
+        else:
+            raise Exception(f"Error: {result.status_code} {result.text}")
+        return end_time - start_time
+    def _save_image_from_result(self, result: Any, save_path: Path):
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(save_path, "wb") as f:
+            f.write(result.content)

api/flux.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from abc import ABC, abstractmethod
+from pathlib import Path
+class FluxAPI(ABC):
+    """
+    Abstract base class for Flux API implementations.
+    This class defines the common interface for all Flux API implementations.
+    """
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """
+        The name of the API implementation.
+        Returns:
+            str: The name of the specific API implementation
+        """
+        pass
+    @abstractmethod
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        """
+        Generate an image based on the prompt and save it to the specified path.
+        Args:
+            prompt (str): The text prompt to generate the image from
+            save_path (Path): The path where the generated image should be saved
+        Returns:
+            float: The time taken for the API call in seconds
+        """
+        pass

api/pruna.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+import time
+from pathlib import Path
+from typing import Any
+from dotenv import load_dotenv
+import replicate
+from api.flux import FluxAPI
+class PrunaAPI(FluxAPI):
+    def __init__(self, speed_mode: str):
+        self._speed_mode = speed_mode
+        self._speed_mode_name = speed_mode.split(" ")[0].strip().lower().replace(" ", "_")
+        load_dotenv()
+        self._api_key = os.getenv("REPLICATE_API_TOKEN")
+        if not self._api_key:
+            raise ValueError("REPLICATE_API_TOKEN not found in environment variables")
+    @property
+    def name(self) -> str:
+        return f"pruna_{self._speed_mode_name}"
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        start_time = time.time()
+        result = replicate.run(
+            "prunaai/flux.1-juiced:58977759ff2870cc010597ae75f4d87866d169b248e02b6e86c4e1bf8afe2410",
+            input={
+                "seed": 0,
+                "prompt": prompt,
+                "guidance": 3.5,
+                "num_outputs": 1,
+                "aspect_ratio": "1:1",
+                "output_format": "png",
+                "speed_mode": self._speed_mode,
+                "num_inference_steps": 28,
+            },
+        )
+        end_time = time.time()
+        if result:
+            self._save_image_from_result(result, save_path)
+        else:
+            raise Exception("No result returned from Replicate API")
+        return end_time - start_time
+    def _save_image_from_result(self, result: Any, save_path: Path):
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(save_path, "wb") as f:
+            f.write(result.read())

api/replicate.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import time
+from pathlib import Path
+from typing import Any
+from dotenv import load_dotenv
+import replicate
+from api.flux import FluxAPI
+class ReplicateAPI(FluxAPI):
+    def __init__(self):
+        load_dotenv()
+        self._api_key = os.getenv("REPLICATE_API_TOKEN")
+        if not self._api_key:
+            raise ValueError("REPLICATE_API_TOKEN not found in environment variables")
+    @property
+    def name(self) -> str:
+        return "replicate_go_fast"
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        start_time = time.time()
+        result = replicate.run(
+            "black-forest-labs/flux-dev",
+            input={
+                "seed": 0,
+                "prompt": prompt,
+                "go_fast": True,
+                "guidance": 3.5,
+                "num_outputs": 1,
+                "aspect_ratio": "1:1",
+                "output_format": "png",
+                "num_inference_steps": 28,
+            },
+        )
+        end_time = time.time()
+        if result and len(result) > 0:
+            self._save_image_from_result(result[0], save_path)
+        else:
+            raise Exception("No result returned from Replicate API")
+        return end_time - start_time
+    def _save_image_from_result(self, result: Any, save_path: Path):
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(save_path, "wb") as f:
+            f.write(result.read())

api/together.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import base64
+import io
+import time
+from pathlib import Path
+from typing import Any
+from dotenv import load_dotenv
+from PIL import Image
+from together import Together
+from api.flux import FluxAPI
+class TogetherAPI(FluxAPI):
+    def __init__(self):
+        load_dotenv()
+        self._client = Together()
+    @property
+    def name(self) -> str:
+        return "together"
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        start_time = time.time()
+        result = self._client.images.generate(
+            prompt=prompt,
+            model="black-forest-labs/FLUX.1-dev",
+            width=1024,
+            height=1024,
+            steps=28,
+            n=1,
+            seed=0,
+            response_format="b64_json",
+        )
+        end_time = time.time()
+        if result and hasattr(result, 'data') and len(result.data) > 0:
+            self._save_image_from_result(result, save_path)
+        else:
+            raise Exception("No result returned from Together API")
+        return end_time - start_time
+    def _save_image_from_result(self, result: Any, save_path: Path):
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        b64_str = result.data[0].b64_json
+        image_data = base64.b64decode(b64_str)
+        image = Image.open(io.BytesIO(image_data))
+        image.save(save_path)

benchmark/__init__.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from typing import Type
+from benchmark.draw_bench import DrawBenchPrompts
+from benchmark.genai_bench import GenAIBenchPrompts
+from benchmark.geneval import GenEvalPrompts
+from benchmark.hps import HPSPrompts
+from benchmark.parti import PartiPrompts
+def create_benchmark(benchmark_type: str) -> Type[DrawBenchPrompts | GenAIBenchPrompts | GenEvalPrompts | HPSPrompts | PartiPrompts]:
+    """
+    Factory function to create benchmark instances.
+    Args:
+        benchmark_type (str): The type of benchmark to create. Must be one of:
+            - "draw_bench"
+            - "genai_bench"
+            - "geneval"
+            - "hps"
+            - "parti"
+    Returns:
+        An instance of the requested benchmark implementation
+    Raises:
+        ValueError: If an invalid benchmark type is provided
+    """
+    benchmark_map = {
+        "draw_bench": DrawBenchPrompts,
+        "genai_bench": GenAIBenchPrompts,
+        "geneval": GenEvalPrompts,
+        "hps": HPSPrompts,
+        "parti": PartiPrompts,
+    }
+    if benchmark_type not in benchmark_map:
+        raise ValueError(f"Invalid benchmark type: {benchmark_type}. Must be one of {list(benchmark_map.keys())}")
+    return benchmark_map[benchmark_type]()

benchmark/draw_bench.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from pathlib import Path
+from typing import Iterator, List, Tuple
+from datasets import load_dataset
+class DrawBenchPrompts:
+    def __init__(self):
+        self.dataset = load_dataset("shunk031/DrawBench")["test"]
+    def __iter__(self) -> Iterator[Tuple[str, Path]]:
+        for i, row in enumerate(self.dataset):
+            yield row["prompts"], Path(f"{i}.png")
+    @property
+    def name(self) -> str:
+        return "draw_bench"
+    @property
+    def size(self) -> int:
+        return len(self.dataset)
+    @property
+    def metrics(self) -> List[str]:
+        return ["image_reward"]

benchmark/genai_bench.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from pathlib import Path
+from typing import Iterator, List, Tuple
+import requests
+class GenAIBenchPrompts:
+    def __init__(self):
+        super().__init__()
+        self._download_genai_bench_files()
+        prompts_path = Path('downloads/genai_bench/prompts.txt')
+        with open(prompts_path, 'r') as f:
+            self.prompts = [line.strip() for line in f if line.strip()]
+    def __iter__(self) -> Iterator[Tuple[str, Path]]:
+        for i, prompt in enumerate(self.prompts):
+            yield prompt, Path(f"{i}.png")
+    def _download_genai_bench_files(self) -> None:
+        folder_name = Path('downloads/genai_bench')
+        folder_name.mkdir(parents=True, exist_ok=True)
+        prompts_url = "https://huggingface.co/datasets/zhiqiulin/GenAI-Bench-527/raw/main/prompts.txt"
+        prompts_path = folder_name / "prompts.txt"
+        if not prompts_path.exists():
+            response = requests.get(prompts_url)
+            with open(prompts_path, 'w') as f:
+                f.write(response.text)
+    @property
+    def name(self) -> str:
+        return "genai_bench"
+    @property
+    def size(self) -> int:
+        return len(self.prompts)
+    @property
+    def metrics(self) -> List[str]:
+        return ["vqa"]

benchmark/geneval.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import json
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Tuple
+import requests
+class GenEvalPrompts:
+    def __init__(self):
+        super().__init__()
+        self._download_geneval_file()
+        metadata_path = Path('downloads/geneval/evaluation_metadata.jsonl')
+        self.entries: List[Dict[str, Any]] = []
+        with open(metadata_path, 'r') as f:
+            for line in f:
+                if line.strip():
+                    self.entries.append(json.loads(line))
+    def __iter__(self) -> Iterator[Tuple[Dict[str, Any], Path]]:
+        for i, entry in enumerate(self.entries):
+            folder_name = f"{i:05d}"
+            yield entry, folder_name
+    def _download_geneval_file(self) -> None:
+        folder_name = Path('downloads/geneval')
+        folder_name.mkdir(parents=True, exist_ok=True)
+        metadata_url = "https://raw.githubusercontent.com/djghosh13/geneval/main/prompts/evaluation_metadata.jsonl"
+        metadata_path = folder_name / "evaluation_metadata.jsonl"
+        if not metadata_path.exists():
+            response = requests.get(metadata_url)
+            with open(metadata_path, 'w') as f:
+                f.write(response.text)
+    @property
+    def name(self) -> str:
+        return "geneval"
+    @property
+    def size(self) -> int:
+        return len(self.entries)
+    @property
+    def metrics(self) -> List[str]:
+        raise NotImplementedError("GenEval requires custom evaluation, see README.md")

benchmark/hps.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import json
+import os
+from pathlib import Path
+from typing import Dict, Iterator, List, Tuple
+import huggingface_hub
+class HPSPrompts:
+    def __init__(self):
+        super().__init__()
+        self.hps_prompt_files = ['anime.json', 'concept-art.json', 'paintings.json', 'photo.json']
+        self._download_benchmark_prompts()
+        self.prompts: Dict[str, str] = {}
+        self._size = 0
+        for file in self.hps_prompt_files:
+            category = file.replace('.json', '')
+            with open(os.path.join('datasets/hps', file), 'r') as f:
+                prompts = json.load(f)
+                for i, prompt in enumerate(prompts):
+                    if i == 100:
+                        break
+                    filename = f"{category}_{i:03d}.png"
+                    self.prompts[filename] = prompt
+                    self._size += 1
+    def __iter__(self) -> Iterator[Tuple[str, Path]]:
+        for filename, prompt in self.prompts.items():
+            yield prompt, filename
+    @property
+    def name(self) -> str:
+        return "hps"
+    @property
+    def size(self) -> int:
+        return self._size
+    def _download_benchmark_prompts(self) -> None:
+        folder_name = Path('downloads/hps')
+        folder_name.mkdir(parents=True, exist_ok=True)
+        for file in self.hps_prompt_files:
+            file_name = huggingface_hub.hf_hub_download("zhwang/HPDv2", file, subfolder="benchmark", repo_type="dataset")
+            if not os.path.exists(os.path.join(folder_name, file)):
+                os.symlink(file_name, os.path.join(folder_name, file))
+    @property
+    def metrics(self) -> List[str]:
+        return ["hps"]

benchmark/metrics/__init__.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from typing import Type
+from benchmark.metrics.arniqa import ARNIQAMetric
+from benchmark.metrics.clip import CLIPMetric
+from benchmark.metrics.clip_iqa import CLIPIQAMetric
+from benchmark.metrics.image_reward import ImageRewardMetric
+from benchmark.metrics.sharpness import SharpnessMetric
+from benchmark.metrics.vqa import VQAMetric
+def create_metric(metric_type: str) -> Type[ARNIQAMetric | CLIPMetric | CLIPIQAMetric | ImageRewardMetric | SharpnessMetric | VQAMetric]:
+    """
+    Factory function to create metric instances.
+    Args:
+        metric_type (str): The type of metric to create. Must be one of:
+            - "arniqa"
+            - "clip"
+            - "clip_iqa"
+            - "image_reward"
+            - "sharpness"
+            - "vqa"
+    Returns:
+        An instance of the requested metric implementation
+    Raises:
+        ValueError: If an invalid metric type is provided
+    """
+    metric_map = {
+        "arniqa": ARNIQAMetric,
+        "clip": CLIPMetric,
+        "clip_iqa": CLIPIQAMetric,
+        "image_reward": ImageRewardMetric,
+        "sharpness": SharpnessMetric,
+        "vqa": VQAMetric,
+    }
+    if metric_type not in metric_map:
+        raise ValueError(f"Invalid metric type: {metric_type}. Must be one of {list(metric_map.keys())}")
+    return metric_map[metric_type]()

benchmark/metrics/arniqa.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from typing import Dict
+import numpy as np
+import torch
+from PIL import Image
+from torchmetrics.image.arniqa import ARNIQA
+class ARNIQAMetric:
+    def __init__(self):
+        self.metric = ARNIQA(
+            regressor_dataset="koniq10k",
+            reduction="mean",
+            normalize=True,
+            autocast=False
+        )
+    @property
+    def name(self) -> str:
+        return "arniqa"
+    def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
+        image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
+        image_tensor = image_tensor.unsqueeze(0)
+        score = self.metric(image_tensor)
+        return {"arniqa": score.item()}

benchmark/metrics/clip.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from typing import Dict
+import numpy as np
+import torch
+from PIL import Image
+from torchmetrics.multimodal.clip_score import CLIPScore
+class CLIPMetric:
+    def __init__(self, model_name_or_path: str = "openai/clip-vit-large-patch14"):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.metric = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14")
+        self.metric.to(self.device)
+    @property
+    def name(self) -> str:
+        return "clip"
+    def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
+        image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
+        image_tensor = image_tensor.to(self.device)
+        score = self.metric(image_tensor, prompt)
+        return {"clip": score.item()}

benchmark/metrics/clip_iqa.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Dict
+import numpy as np
+import torch
+from PIL import Image
+from torchmetrics.multimodal import CLIPImageQualityAssessment
+class CLIPIQAMetric:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.metric = CLIPImageQualityAssessment(
+            model_name_or_path="clip_iqa",
+            data_range=255.0,
+            prompts=["quality"]
+        )
+        self.metric.to(self.device)
+    @property
+    def name(self) -> str:
+        return "clip_iqa"
+    def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
+        image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
+        image_tensor = image_tensor.unsqueeze(0)
+        image_tensor = image_tensor.to(self.device)
+        scores = self.metric(image_tensor)
+        return {"clip_iqa": scores.item()}

benchmark/metrics/hps.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import os
+from typing import Dict
+import torch
+from PIL import Image
+from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
+import huggingface_hub
+from hpsv2.utils import root_path, hps_version_map
+class HPSMetric:
+    def __init__(self):
+        self.hps_version = "v2.1"
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.model_dict = {}
+        self._initialize_model()
+    def _initialize_model(self):
+        if not self.model_dict:
+            model, preprocess_train, preprocess_val = create_model_and_transforms(
+                'ViT-H-14',
+                'laion2B-s32B-b79K',
+                precision='amp',
+                device=self.device,
+                jit=False,
+                force_quick_gelu=False,
+                force_custom_text=False,
+                force_patch_dropout=False,
+                force_image_size=None,
+                pretrained_image=False,
+                image_mean=None,
+                image_std=None,
+                light_augmentation=True,
+                aug_cfg={},
+                output_dict=True,
+                with_score_predictor=False,
+                with_region_predictor=False
+            )
+            self.model_dict['model'] = model
+            self.model_dict['preprocess_val'] = preprocess_val
+            # Load checkpoint
+            if not os.path.exists(root_path):
+                os.makedirs(root_path)
+            cp = huggingface_hub.hf_hub_download("xswu/HPSv2", hps_version_map[self.hps_version])
+            checkpoint = torch.load(cp, map_location=self.device)
+            model.load_state_dict(checkpoint['state_dict'])
+            self.tokenizer = get_tokenizer('ViT-H-14')
+            model = model.to(self.device)
+            model.eval()
+    @property
+    def name(self) -> str:
+        return "hps"
+    def compute_score(
+        self,
+        image: Image.Image,
+        prompt: str,
+    ) -> Dict[str, float]:
+        model = self.model_dict['model']
+        preprocess_val = self.model_dict['preprocess_val']
+        with torch.no_grad():
+            # Process the image
+            image_tensor = preprocess_val(image).unsqueeze(0).to(device=self.device, non_blocking=True)
+            # Process the prompt
+            text = self.tokenizer([prompt]).to(device=self.device, non_blocking=True)
+            # Calculate the HPS
+            with torch.cuda.amp.autocast():
+                outputs = model(image_tensor, text)
+                image_features, text_features = outputs["image_features"], outputs["text_features"]
+                logits_per_image = image_features @ text_features.T
+                hps_score = torch.diagonal(logits_per_image).cpu().numpy()
+        return {"hps": float(hps_score[0])}

benchmark/metrics/image_reward.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+import tempfile
+from typing import Dict
+import ImageReward as RM
+from PIL import Image
+class ImageRewardMetric:
+    def __init__(self):
+        self.model = RM.load("ImageReward-v1.0")
+    @property
+    def name(self) -> str:
+        return "image_reward"
+    def compute_score(
+        self,
+        image: Image.Image,
+        prompt: str,
+    ) -> Dict[str, float]:
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+            image.save(tmp.name)
+            score = self.model.score(prompt, [tmp.name])
+            os.unlink(tmp.name)
+        return {"image_reward": score}

benchmark/metrics/sharpness.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import Dict
+import cv2
+import numpy as np
+from PIL import Image
+class SharpnessMetric:
+    def __init__(self):
+        self.kernel_size = 3
+    @property
+    def name(self) -> str:
+        return "sharpness"
+    def compute_score(
+        self,
+        image: Image.Image,
+        prompt: str,
+    ) -> Dict[str, float]:
+        img = np.array(image.convert('L'))
+        laplacian = cv2.Laplacian(img, cv2.CV_64F, ksize=self.kernel_size)
+        sharpness = laplacian.var()
+        return {"sharpness": float(sharpness)}

benchmark/metrics/vqa.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import tempfile
+from typing import Dict
+import t2v_metrics
+from PIL import Image
+class VQAMetric:
+    def __init__(self):
+        self.metric = t2v_metrics.VQAScore(model="clip-flant5-xxl")
+    @property
+    def name(self) -> str:
+        return "vqa_score"
+    def compute_score(
+        self,
+        image: Image.Image,
+        prompt: str,
+    ) -> Dict[str, float]:
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+            image.save(tmp.name)
+            score = self.metric(images=[tmp.name], texts=[prompt])
+            os.unlink(tmp.name)
+        return {"vqa_score": score[0][0].item()}

benchmark/parti.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from pathlib import Path
+from typing import Iterator, List, Tuple
+from datasets import load_dataset
+class PartiPrompts:
+    def __init__(self):
+        dataset = load_dataset("nateraw/parti-prompts")["train"]
+        shuffled_dataset = dataset.shuffle(seed=42)
+        selected_dataset = shuffled_dataset.select(range(800))
+        self.prompts = [row["Prompt"] for row in selected_dataset]
+    def __iter__(self) -> Iterator[Tuple[str, Path]]:
+        for i, prompt in enumerate(self.prompts):
+            yield prompt, Path(f"{i}.png")
+    @property
+    def name(self) -> str:
+        return "parti"
+    @property
+    def size(self) -> int:
+        return len(self.prompts)
+    @property
+    def metrics(self) -> List[str]:
+        return ["arniqa", "clip", "clip_iqa", "sharpness"]

evaluate.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import argparse
+import json
+from pathlib import Path
+from typing import Dict
+from benchmark import create_benchmark
+from benchmark.metrics import create_metric
+from PIL import Image
+def evaluate_benchmark(benchmark_type: str, api_type: str, images_dir: Path = Path("images")) -> Dict:
+    """
+    Evaluate a benchmark's images using its specific metrics.
+    Args:
+        benchmark_type (str): Type of benchmark to evaluate
+        api_type (str): Type of API used to generate images
+        images_dir (Path): Base directory containing generated images
+    Returns:
+        Dict containing evaluation results
+    """
+    benchmark = create_benchmark(benchmark_type)
+    benchmark_dir = images_dir / api_type / benchmark_type
+    metadata_file = benchmark_dir / "metadata.jsonl"
+    if not metadata_file.exists():
+        raise FileNotFoundError(f"No metadata file found for {api_type}/{benchmark_type}. Please run sample.py first.")
+    metadata = []
+    with open(metadata_file, "r") as f:
+        for line in f:
+            metadata.append(json.loads(line))
+    metrics = {metric_type: create_metric(metric_type) for metric_type in benchmark.metrics}
+    results = {
+        "api": api_type,
+        "benchmark": benchmark_type,
+        "metrics": {metric: 0.0 for metric in benchmark.metrics},
+        "avg_inference_time": 0.0,
+        "total_images": len(metadata)
+    }
+    for entry in metadata:
+        image_path = benchmark_dir / entry["filepath"]
+        if not image_path.exists():
+            continue
+        image = Image.open(image_path)
+        for metric_type, metric in metrics.items():
+            try:
+                score = metric.compute_score(image, entry["prompt"])
+                results["metrics"][metric_type] += score[metric_type]
+            except Exception as e:
+                print(f"Error computing {metric_type} for {image_path}: {str(e)}")
+        results["avg_inference_time"] += entry["inference_time"]
+    for metric in results["metrics"]:
+        results["metrics"][metric] /= len(metadata)
+    results["avg_inference_time"] /= len(metadata)
+    return results
+def main():
+    parser = argparse.ArgumentParser(description="Evaluate generated images using benchmark-specific metrics")
+    parser.add_argument("api_type", help="Type of API to evaluate")
+    parser.add_argument("benchmarks", nargs="+", help="List of benchmark types to evaluate")
+    args = parser.parse_args()
+    results_dir = Path("evaluation_results")
+    results_dir.mkdir(exist_ok=True)
+    results_file = results_dir / f"{args.api_type}.jsonl"
+    existing_results = set()
+    if results_file.exists():
+        with open(results_file, "r") as f:
+            for line in f:
+                result = json.loads(line)
+                existing_results.add(result["benchmark"])
+    for benchmark_type in args.benchmarks:
+        if benchmark_type in existing_results:
+            print(f"Skipping {args.api_type}/{benchmark_type} - already evaluated")
+            continue
+        try:
+            print(f"Evaluating {args.api_type}/{benchmark_type}")
+            results = evaluate_benchmark(benchmark_type, args.api_type)
+            # Append results to file
+            with open(results_file, "a") as f:
+                f.write(json.dumps(results) + "\n")
+        except Exception as e:
+            print(f"Error evaluating {args.api_type}/{benchmark_type}: {str(e)}")
+if __name__ == "__main__":
+    main()

evaluation_results/.gitkeep ADDED Viewed

File without changes

images/.gitkeep ADDED Viewed

File without changes

pyproject.toml ADDED Viewed

	@@ -0,0 +1,22 @@

+[project]
+name = "inferbench"
+version = "0.1.0"
+requires-python = ">=3.12"
+dependencies = [
+    "datasets>=3.5.0",
+    "fal-client>=0.5.9",
+    "hpsv2>=1.2.0",
+    "huggingface-hub>=0.30.2",
+    "image-reward>=1.5",
+    "numpy>=2.2.5",
+    "opencv-python>=4.11.0.86",
+    "pillow>=11.2.1",
+    "python-dotenv>=1.1.0",
+    "replicate>=1.0.4",
+    "requests>=2.32.3",
+    "t2v-metrics>=1.2",
+    "together>=1.5.5",
+    "torch>=2.7.0",
+    "torchmetrics>=1.7.1",
+    "tqdm>=4.67.1",
+]

sample.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import argparse
+import json
+from pathlib import Path
+from typing import List
+from tqdm import tqdm
+from api import create_api
+from benchmark import create_benchmark
+def generate_images(api_type: str, benchmarks: List[str]):
+    images_dir = Path("images")
+    api = create_api(api_type)
+    api_dir = images_dir / api_type
+    api_dir.mkdir(parents=True, exist_ok=True)
+    for benchmark_type in tqdm(benchmarks, desc="Processing benchmarks"):
+        print(f"\nProcessing benchmark: {benchmark_type}")
+        benchmark = create_benchmark(benchmark_type)
+        if benchmark_type == "geneval":
+            benchmark_dir = api_dir / benchmark_type
+            benchmark_dir.mkdir(parents=True, exist_ok=True)
+            metadata_file = benchmark_dir / "metadata.jsonl"
+            existing_metadata = {}
+            if metadata_file.exists():
+                with open(metadata_file, "r") as f:
+                    for line in f:
+                        entry = json.loads(line)
+                        existing_metadata[entry["filepath"]] = entry
+            for metadata, folder_name in tqdm(benchmark, desc=f"Generating images for {benchmark_type}", leave=False):
+                sample_path = benchmark_dir / folder_name
+                samples_path = sample_path / "samples"
+                samples_path.mkdir(parents=True, exist_ok=True)
+                image_path = samples_path / "0000.png"
+                if image_path.exists():
+                    continue
+                try:
+                    inference_time = api.generate_image(metadata["prompt"], image_path)
+                    metadata_entry = {
+                        "filepath": str(image_path),
+                        "prompt": metadata["prompt"],
+                        "inference_time": inference_time
+                    }
+                    existing_metadata[str(image_path)] = metadata_entry
+                except Exception as e:
+                    print(f"\nError generating image for prompt: {metadata['prompt']}")
+                    print(f"Error: {str(e)}")
+                    continue
+        else:
+            benchmark_dir = api_dir / benchmark_type
+            benchmark_dir.mkdir(parents=True, exist_ok=True)
+            metadata_file = benchmark_dir / "metadata.jsonl"
+            existing_metadata = {}
+            if metadata_file.exists():
+                with open(metadata_file, "r") as f:
+                    for line in f:
+                        entry = json.loads(line)
+                        existing_metadata[entry["filepath"]] = entry
+            for prompt, image_path in tqdm(benchmark, desc=f"Generating images for {benchmark_type}", leave=False):
+                full_image_path = benchmark_dir / image_path
+                if full_image_path.exists():
+                    continue
+                try:
+                    inference_time = api.generate_image(prompt, full_image_path)
+                    metadata_entry = {
+                        "filepath": str(image_path),
+                        "prompt": prompt,
+                        "inference_time": inference_time
+                    }
+                    existing_metadata[str(image_path)] = metadata_entry
+                except Exception as e:
+                    print(f"\nError generating image for prompt: {prompt}")
+                    print(f"Error: {str(e)}")
+                    continue
+            with open(metadata_file, "w") as f:
+                for entry in existing_metadata.values():
+                    f.write(json.dumps(entry) + "\n")
+def main():
+    parser = argparse.ArgumentParser(description="Generate images for specified benchmarks using a given API")
+    parser.add_argument("api_type", help="Type of API to use for image generation")
+    parser.add_argument("benchmarks", nargs="+", help="List of benchmark types to run")
+    args = parser.parse_args()
+    generate_images(args.api_type, args.benchmarks)
+if __name__ == "__main__":
+    main()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff