TeddyYao's picture
Upload 38 files
8474f02 verified
from .base_benchmark import BaseBenchmark, BenchmarkResult
from .mmlu_benchmark import MMLUBenchmark
from .gsm8k_benchmark import GSM8KBenchmark
from .humaneval_benchmark import HumanEvalBenchmark
from .gpqa_benchmark import GPQABenchmark
from .math_benchmark import MATHBenchmark
BENCHMARK_REGISTRY = {
'mmlu': MMLUBenchmark,
'gsm8k': GSM8KBenchmark,
'humaneval': HumanEvalBenchmark,
'gpqa': GPQABenchmark,
'math': MATHBenchmark
}
def get_benchmark(name: str) -> BaseBenchmark:
"""Get benchmark instance by name"""
if name.lower() not in BENCHMARK_REGISTRY:
raise ValueError(f"Unknown benchmark: {name}. Available: {list(BENCHMARK_REGISTRY.keys())}")
return BENCHMARK_REGISTRY[name.lower()]()