Spaces:

PrunaAI
/

InferBench

Running

App Files Files Community

davidberenstein1957 commited on 12 days ago

Commit

199a7d9

1 Parent(s): f5f9b38

chore: update .gitignore, environment.yml, and README; remove nils_installs.txt

Browse files

Files changed (12) hide show

.gitignore +2 -1
README.md +19 -1
benchmark/draw_bench.py +4 -4
benchmark/metrics/arniqa.py +13 -4
benchmark/metrics/clip.py +9 -3
benchmark/metrics/clip_iqa.py +9 -5
benchmark/metrics/hps.py +39 -24
benchmark/metrics/image_reward.py +12 -3
benchmark/metrics/vqa.py +14 -3
environment.yml +1 -1
nils_installs.txt +0 -177
sample.py +32 -26

.gitignore CHANGED Viewed

@@ -175,4 +175,5 @@ cython_debug/
 evaluation_results/
 images/
-hf_cache/

 evaluation_results/
 images/
+hf_cache/
+*.lock

README.md CHANGED Viewed

@@ -1,10 +1,28 @@
 # InferBench
 Evaluate the quality and efficiency of image gen api's.
-Install dependencies with conda like that:
 conda env create -f environment.yml
 Create .env file with all the credentials you will need.

 # InferBench
 Evaluate the quality and efficiency of image gen api's.
+## Installation
+### Install dependencies
+Install dependencies with conda like that:
+```
 conda env create -f environment.yml
+```
+### Install uv
+Install uv with pip like that:
+```
+uv venv --python 3.12
+```
+```
+uv sync --all-groups
+```
+## Usage
 Create .env file with all the credentials you will need.

benchmark/draw_bench.py CHANGED Viewed

@@ -6,16 +6,16 @@ from datasets import load_dataset
 class DrawBenchPrompts:
     def __init__(self):
-        self.dataset = load_dataset("shunk031/DrawBench")["test"]
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for i, row in enumerate(self.dataset):
             yield row["prompts"], Path(f"{i}.png")
     @property
     def name(self) -> str:
         return "draw_bench"
     @property
     def size(self) -> int:
         return len(self.dataset)

 class DrawBenchPrompts:
     def __init__(self):
+        self.dataset = load_dataset("shunk031/DrawBench", split="test[:5]")
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for i, row in enumerate(self.dataset):
             yield row["prompts"], Path(f"{i}.png")
     @property
     def name(self) -> str:
         return "draw_bench"
     @property
     def size(self) -> int:
         return len(self.dataset)

benchmark/metrics/arniqa.py CHANGED Viewed

@@ -8,20 +8,29 @@ from torchmetrics.image.arniqa import ARNIQA
 class ARNIQAMetric:
     def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.metric = ARNIQA(
             regressor_dataset="koniq10k",
             reduction="mean",
             normalize=True,
-            autocast=False
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "arniqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
-        image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
         image_tensor = image_tensor.unsqueeze(0).to(self.device)
         score = self.metric(image_tensor)
         return {"arniqa": score.item()}

 class ARNIQAMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.metric = ARNIQA(
             regressor_dataset="koniq10k",
             reduction="mean",
             normalize=True,
+            autocast=False,
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "arniqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
+        image_tensor = (
+            torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
+        )
         image_tensor = image_tensor.unsqueeze(0).to(self.device)
         score = self.metric(image_tensor)
         return {"arniqa": score.item()}

benchmark/metrics/clip.py CHANGED Viewed

@@ -8,14 +8,20 @@ from torchmetrics.multimodal.clip_score import CLIPScore
 class CLIPMetric:
     def __init__(self, model_name_or_path: str = "openai/clip-vit-large-patch14"):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.metric = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14")
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.to(self.device)

 class CLIPMetric:
     def __init__(self, model_name_or_path: str = "openai/clip-vit-large-patch14"):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.metric = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14")
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.to(self.device)

benchmark/metrics/clip_iqa.py CHANGED Viewed

@@ -8,18 +8,22 @@ from torchmetrics.multimodal import CLIPImageQualityAssessment
 class CLIPIQAMetric:
     def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.metric = CLIPImageQualityAssessment(
-            model_name_or_path="clip_iqa",
-            data_range=255.0,
-            prompts=("quality",)
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip_iqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.unsqueeze(0)

 class CLIPIQAMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.metric = CLIPImageQualityAssessment(
+            model_name_or_path="clip_iqa", data_range=255.0, prompts=("quality",)
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip_iqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.unsqueeze(0)

benchmark/metrics/hps.py CHANGED Viewed

@@ -1,26 +1,32 @@
 import os
 from typing import Dict
 import torch
-from PIL import Image
 from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
-import huggingface_hub
-from hpsv2.utils import root_path, hps_version_map
 class HPSMetric:
     def __init__(self):
         self.hps_version = "v2.1"
-        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         self.model_dict = {}
         self._initialize_model()
     def _initialize_model(self):
         if not self.model_dict:
             model, preprocess_train, preprocess_val = create_model_and_transforms(
-                'ViT-H-14',
-                'laion2B-s32B-b79K',
-                precision='amp',
                 device=self.device,
                 jit=False,
                 force_quick_gelu=False,
@@ -34,44 +40,53 @@ class HPSMetric:
                 aug_cfg={},
                 output_dict=True,
                 with_score_predictor=False,
-                with_region_predictor=False
             )
-            self.model_dict['model'] = model
-            self.model_dict['preprocess_val'] = preprocess_val
             # Load checkpoint
             if not os.path.exists(root_path):
                 os.makedirs(root_path)
-            cp = huggingface_hub.hf_hub_download("xswu/HPSv2", hps_version_map[self.hps_version])
             checkpoint = torch.load(cp, map_location=self.device)
-            model.load_state_dict(checkpoint['state_dict'])
-            self.tokenizer = get_tokenizer('ViT-H-14')
             model = model.to(self.device)
             model.eval()
     @property
     def name(self) -> str:
         return "hps"
     def compute_score(
         self,
         image: Image.Image,
         prompt: str,
     ) -> Dict[str, float]:
-        model = self.model_dict['model']
-        preprocess_val = self.model_dict['preprocess_val']
         with torch.no_grad():
             # Process the image
-            image_tensor = preprocess_val(image).unsqueeze(0).to(device=self.device, non_blocking=True)
             # Process the prompt
             text = self.tokenizer([prompt]).to(device=self.device, non_blocking=True)
             # Calculate the HPS
             with torch.cuda.amp.autocast():
                 outputs = model(image_tensor, text)
-                image_features, text_features = outputs["image_features"], outputs["text_features"]
                 logits_per_image = image_features @ text_features.T
                 hps_score = torch.diagonal(logits_per_image).cpu().numpy()
         return {"hps": float(hps_score[0])}

 import os
 from typing import Dict
+import huggingface_hub
 import torch
 from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
+from hpsv2.utils import hps_version_map, root_path
+from PIL import Image
 class HPSMetric:
     def __init__(self):
         self.hps_version = "v2.1"
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.model_dict = {}
         self._initialize_model()
     def _initialize_model(self):
         if not self.model_dict:
             model, preprocess_train, preprocess_val = create_model_and_transforms(
+                "ViT-H-14",
+                "laion2B-s32B-b79K",
+                precision="amp",
                 device=self.device,
                 jit=False,
                 force_quick_gelu=False,
                 aug_cfg={},
                 output_dict=True,
                 with_score_predictor=False,
+                with_region_predictor=False,
             )
+            self.model_dict["model"] = model
+            self.model_dict["preprocess_val"] = preprocess_val
             # Load checkpoint
             if not os.path.exists(root_path):
                 os.makedirs(root_path)
+            cp = huggingface_hub.hf_hub_download(
+                "xswu/HPSv2", hps_version_map[self.hps_version]
+            )
             checkpoint = torch.load(cp, map_location=self.device)
+            model.load_state_dict(checkpoint["state_dict"])
+            self.tokenizer = get_tokenizer("ViT-H-14")
             model = model.to(self.device)
             model.eval()
     @property
     def name(self) -> str:
         return "hps"
     def compute_score(
         self,
         image: Image.Image,
         prompt: str,
     ) -> Dict[str, float]:
+        model = self.model_dict["model"]
+        preprocess_val = self.model_dict["preprocess_val"]
         with torch.no_grad():
             # Process the image
+            image_tensor = (
+                preprocess_val(image)
+                .unsqueeze(0)
+                .to(device=self.device, non_blocking=True)
+            )
             # Process the prompt
             text = self.tokenizer([prompt]).to(device=self.device, non_blocking=True)
             # Calculate the HPS
             with torch.cuda.amp.autocast():
                 outputs = model(image_tensor, text)
+                image_features, text_features = (
+                    outputs["image_features"],
+                    outputs["text_features"],
+                )
                 logits_per_image = image_features @ text_features.T
                 hps_score = torch.diagonal(logits_per_image).cpu().numpy()
         return {"hps": float(hps_score[0])}

benchmark/metrics/image_reward.py CHANGED Viewed

@@ -3,17 +3,26 @@ import tempfile
 from typing import Dict
 import ImageReward as RM
 from PIL import Image
 class ImageRewardMetric:
     def __init__(self):
-        self.model = RM.load("ImageReward-v1.0")
     @property
     def name(self) -> str:
         return "image_reward"
     def compute_score(
         self,
         image: Image.Image,

 from typing import Dict
 import ImageReward as RM
+import torch
 from PIL import Image
 class ImageRewardMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
+        self.model = RM.load("ImageReward-v1.0", device=str(self.device))
     @property
     def name(self) -> str:
         return "image_reward"
     def compute_score(
         self,
         image: Image.Image,

benchmark/metrics/vqa.py CHANGED Viewed

@@ -2,15 +2,26 @@ from pathlib import Path
 from typing import Dict
 import t2v_metrics
 class VQAMetric:
     def __init__(self):
-        self.metric = t2v_metrics.VQAScore(model="clip-flant5-xxl")
     @property
     def name(self) -> str:
         return "vqa_score"
     def compute_score(
         self,
         image_path: Path,

 from typing import Dict
 import t2v_metrics
+import torch
 class VQAMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
+        self.metric = t2v_metrics.VQAScore(
+            model="clip-flant5-xxl", device=str(self.device)
+        )
     @property
     def name(self) -> str:
         return "vqa_score"
     def compute_score(
         self,
         image_path: Path,

environment.yml CHANGED Viewed

@@ -12,7 +12,7 @@ dependencies:
   - tqdm
   - pip
   - pip:
-      - datasets>=3.5.0
       - fal-client>=0.5.9
       - hpsv2>=1.2.0
       - huggingface-hub>=0.30.2

   - tqdm
   - pip
   - pip:
+      - datasets==3.6.0
       - fal-client>=0.5.9
       - hpsv2>=1.2.0
       - huggingface-hub>=0.30.2

nils_installs.txt DELETED Viewed

@@ -1,177 +0,0 @@
-accelerate==1.7.0
-aiohappyeyeballs==2.6.1
-aiohttp==3.12.12
-aiosignal==1.3.2
-annotated-types==0.7.0
-antlr4-python3-runtime==4.9.3
-anyio==4.9.0
-args==0.1.0
-asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1733250440834/work
-attrs==25.3.0
-beautifulsoup4==4.13.4
-boto3==1.38.33
-botocore==1.38.33
-braceexpand==0.1.7
-Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1749229842835/work
-certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1746569525376/work/certifi
-cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1725560558132/work
-charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1746214863626/work
-click==8.1.8
-clint==0.5.1
-clip==0.2.0
-colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1733218098505/work
-comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1733502965406/work
-contourpy==1.3.2
-cycler==0.12.1
-datasets==3.6.0
-debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1744321241074/work
-decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1740384970518/work
-diffusers==0.31.0
-dill==0.3.8
-distro==1.9.0
-einops==0.8.1
-eval_type_backport==0.2.2
-exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1746947292760/work
-executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1745502089858/work
-fairscale==0.4.13
-fal_client==0.7.0
-filelock==3.18.0
-fire==0.4.0
-fonttools==4.58.2
-frozenlist==1.7.0
-fsspec==2025.3.0
-ftfy==6.3.1
-gdown==5.2.0
-h11==0.16.0
-h2 @ file:///home/conda/feedstock_root/build_artifacts/h2_1738578511449/work
-hf-xet==1.1.3
-hpack @ file:///home/conda/feedstock_root/build_artifacts/hpack_1737618293087/work
-hpsv2==1.2.0
-httpcore==1.0.9
-httpx==0.28.1
-httpx-sse==0.4.0
-huggingface-hub==0.32.5
-hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1737618333194/work
-idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1733211830134/work
-image-reward==1.5
-importlib_metadata==8.7.0
-iniconfig==2.1.0
-iopath==0.1.10
-ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1719845459717/work
-ipython @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_ipython_1748713870/work
-ipython_pygments_lexers @ file:///home/conda/feedstock_root/build_artifacts/ipython_pygments_lexers_1737123620466/work
-jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1733300866624/work
-Jinja2==3.1.6
-jiter==0.10.0
-jmespath==1.0.1
-joblib==1.5.1
-jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1733440914442/work
-jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1748333051527/work
-kiwisolver==1.4.8
-lightning-utilities==0.14.3
-markdown-it-py==3.0.0
-MarkupSafe==3.0.2
-matplotlib==3.10.3
-matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1733416936468/work
-mdurl==0.1.2
-mpmath==1.3.0
-multidict==6.4.4
-multiprocess==0.70.16
-nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1733325553580/work
-networkx==3.5
-numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1749430504934/work/dist/numpy-2.3.0-cp312-cp312-linux_x86_64.whl#sha256=3c4437a0cbe50dbae872ad4cd8dc5316009165bce459c4ffe2c46cd30aba13d4
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-omegaconf==2.3.0
-open_clip_torch==2.32.0
-openai==1.85.0
-opencv-python==4.11.0
-opencv-python-headless==4.11.0
-packaging==25.0
-pandas==2.3.0
-parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1733271261340/work
-pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1733301927746/work
-pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1733327343728/work
-pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1746646208260/work
-piq==0.8.0
-platformdirs @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_platformdirs_1746710438/work
-pluggy==1.6.0
-portalocker==3.1.1
-prompt_toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1744724089886/work
-propcache==0.3.2
-protobuf==3.20.3
-psutil==7.0.0
-ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1733302279685/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=92c32ff62b5fd8cf325bec5ab90d7be3d2a8ca8c8a3813ff487a8d2002630d1f
-pure_eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1733569405015/work
-pyarrow==20.0.0
-pycocoevalcap==1.2
-pycocotools==2.0.10
-pycparser @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pycparser_1733195786/work
-pydantic==2.11.5
-pydantic_core==2.33.2
-Pygments==2.19.1
-pyparsing==3.2.3
-PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1733217236728/work
-pytest==7.2.0
-pytest-split==0.8.0
-python-dateutil==2.9.0.post0
-python-dotenv @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_python-dotenv_1742948348/work
-pytz==2025.2
-PyYAML==6.0.2
-pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1743831245863/work
-regex==2024.11.6
-replicate==1.0.7
-requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1749498106507/work
-rich==14.0.0
-s3transfer==0.13.0
-safetensors==0.5.3
-scikit-learn==1.7.0
-scipy==1.15.3
-sentencepiece==0.2.0
-setuptools==80.9.0
-shellingham==1.5.4
-six==1.17.0
-sniffio==1.3.1
-soupsieve==2.7
-stack_data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1733569443808/work
-sympy==1.14.0
-t2v_metrics==1.2
-tabulate==0.9.0
-termcolor==3.1.0
-threadpoolctl==3.6.0
-tiktoken==0.9.0
-timm==0.6.13
-together==1.5.11
-tokenizers==0.15.2
-torch==2.7.1
-torchmetrics==1.7.2
-torchvision==0.22.1
-tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1748003300911/work
-tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1735661334605/work
-traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1733367359838/work
-transformers==4.36.1
-triton==3.3.1
-typer==0.15.4
-typing-inspection==0.4.1
-typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_typing_extensions_1748959427/work
-tzdata==2025.2
-urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1744323578849/work
-wcwidth==0.2.13
-webdataset==0.2.111
-wheel==0.45.1
-xxhash==3.5.0
-yarl==1.20.1
-zipp==3.23.0
-zstandard==0.23.0

sample.py CHANGED Viewed

@@ -12,19 +12,19 @@ from benchmark import create_benchmark
 def generate_images(api_type: str, benchmarks: List[str]):
     images_dir = Path("images")
     api = create_api(api_type)
     api_dir = images_dir / api_type
     api_dir.mkdir(parents=True, exist_ok=True)
     for benchmark_type in tqdm(benchmarks, desc="Processing benchmarks"):
         print(f"\nProcessing benchmark: {benchmark_type}")
         benchmark = create_benchmark(benchmark_type)
         if benchmark_type == "geneval":
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
@@ -32,27 +32,29 @@ def generate_images(api_type: str, benchmarks: List[str]):
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
-            for metadata, folder_name in tqdm(benchmark, desc=f"Generating images for {benchmark_type}", leave=False):
                 sample_path = benchmark_dir / folder_name
                 samples_path = sample_path / "samples"
                 samples_path.mkdir(parents=True, exist_ok=True)
                 image_path = samples_path / "0000.png"
                 if image_path.exists():
                     continue
                 try:
                     inference_time = api.generate_image(metadata["prompt"], image_path)
                     metadata_entry = {
                         "filepath": str(image_path),
                         "prompt": metadata["prompt"],
-                        "inference_time": inference_time
                     }
                     existing_metadata[str(image_path)] = metadata_entry
                 except Exception as e:
                     print(f"\nError generating image for prompt: {metadata['prompt']}")
                     print(f"Error: {str(e)}")
@@ -60,7 +62,7 @@ def generate_images(api_type: str, benchmarks: List[str]):
         else:
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
@@ -68,41 +70,45 @@ def generate_images(api_type: str, benchmarks: List[str]):
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
-            for prompt, image_path in tqdm(benchmark, desc=f"Generating images for {benchmark_type}", leave=False):
                 full_image_path = benchmark_dir / image_path
                 if full_image_path.exists():
                     continue
                 try:
                     inference_time = api.generate_image(prompt, full_image_path)
                     metadata_entry = {
                         "filepath": str(image_path),
                         "prompt": prompt,
-                        "inference_time": inference_time
                     }
                     existing_metadata[str(image_path)] = metadata_entry
                 except Exception as e:
                     print(f"\nError generating image for prompt: {prompt}")
                     print(f"Error: {str(e)}")
                     continue
             with open(metadata_file, "w") as f:
                 for entry in existing_metadata.values():
                     f.write(json.dumps(entry) + "\n")
 def main():
-    parser = argparse.ArgumentParser(description="Generate images for specified benchmarks using a given API")
     parser.add_argument("api_type", help="Type of API to use for image generation")
     parser.add_argument("benchmarks", nargs="+", help="List of benchmark types to run")
     args = parser.parse_args()
     generate_images(args.api_type, args.benchmarks)

 def generate_images(api_type: str, benchmarks: List[str]):
     images_dir = Path("images")
     api = create_api(api_type)
     api_dir = images_dir / api_type
     api_dir.mkdir(parents=True, exist_ok=True)
     for benchmark_type in tqdm(benchmarks, desc="Processing benchmarks"):
         print(f"\nProcessing benchmark: {benchmark_type}")
         benchmark = create_benchmark(benchmark_type)
         if benchmark_type == "geneval":
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
+            for metadata, folder_name in tqdm(
+                benchmark, desc=f"Generating images for {benchmark_type}", leave=False
+            ):
                 sample_path = benchmark_dir / folder_name
                 samples_path = sample_path / "samples"
                 samples_path.mkdir(parents=True, exist_ok=True)
                 image_path = samples_path / "0000.png"
                 if image_path.exists():
                     continue
                 try:
                     inference_time = api.generate_image(metadata["prompt"], image_path)
                     metadata_entry = {
                         "filepath": str(image_path),
                         "prompt": metadata["prompt"],
+                        "inference_time": inference_time,
                     }
                     existing_metadata[str(image_path)] = metadata_entry
                 except Exception as e:
                     print(f"\nError generating image for prompt: {metadata['prompt']}")
                     print(f"Error: {str(e)}")
         else:
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
+            for prompt, image_path in tqdm(
+                benchmark, desc=f"Generating images for {benchmark_type}", leave=False
+            ):
                 full_image_path = benchmark_dir / image_path
                 if full_image_path.exists():
                     continue
                 try:
                     inference_time = api.generate_image(prompt, full_image_path)
                     metadata_entry = {
                         "filepath": str(image_path),
                         "prompt": prompt,
+                        "inference_time": inference_time,
                     }
                     existing_metadata[str(image_path)] = metadata_entry
                 except Exception as e:
                     print(f"\nError generating image for prompt: {prompt}")
                     print(f"Error: {str(e)}")
                     continue
             with open(metadata_file, "w") as f:
                 for entry in existing_metadata.values():
                     f.write(json.dumps(entry) + "\n")
 def main():
+    parser = argparse.ArgumentParser(
+        description="Generate images for specified benchmarks using a given API"
+    )
     parser.add_argument("api_type", help="Type of API to use for image generation")
     parser.add_argument("benchmarks", nargs="+", help="List of benchmark types to run")
     args = parser.parse_args()
     generate_images(args.api_type, args.benchmarks)