InferBench / benchmark /geneval.py
davidberenstein1957's picture
refactor: improve code formatting and organization across multiple API and benchmark files
34046e2
import json
from pathlib import Path
from typing import Any, Dict, Iterator, List, Tuple
import requests
class GenEvalPrompts:
def __init__(self):
super().__init__()
self._download_geneval_file()
metadata_path = Path("downloads/geneval/evaluation_metadata.jsonl")
self.entries: List[Dict[str, Any]] = []
with open(metadata_path, "r") as f:
for line in f:
if line.strip():
self.entries.append(json.loads(line))
def __iter__(self) -> Iterator[Tuple[Dict[str, Any], Path]]:
for i, entry in enumerate(self.entries):
folder_name = f"{i:05d}"
yield entry, folder_name
def _download_geneval_file(self) -> None:
folder_name = Path("downloads/geneval")
folder_name.mkdir(parents=True, exist_ok=True)
metadata_url = "https://raw.githubusercontent.com/djghosh13/geneval/main/prompts/evaluation_metadata.jsonl"
metadata_path = folder_name / "evaluation_metadata.jsonl"
if not metadata_path.exists():
response = requests.get(metadata_url)
with open(metadata_path, "w") as f:
f.write(response.text)
@property
def name(self) -> str:
return "geneval"
@property
def size(self) -> int:
return len(self.entries)
@property
def metrics(self) -> List[str]:
raise NotImplementedError("GenEval requires custom evaluation, see README.md")