Spaces:
Running
Running
from .base_benchmark import BaseBenchmark, BenchmarkResult | |
from .mmlu_benchmark import MMLUBenchmark | |
from .gsm8k_benchmark import GSM8KBenchmark | |
from .humaneval_benchmark import HumanEvalBenchmark | |
from .gpqa_benchmark import GPQABenchmark | |
from .math_benchmark import MATHBenchmark | |
BENCHMARK_REGISTRY = { | |
'mmlu': MMLUBenchmark, | |
'gsm8k': GSM8KBenchmark, | |
'humaneval': HumanEvalBenchmark, | |
'gpqa': GPQABenchmark, | |
'math': MATHBenchmark | |
} | |
def get_benchmark(name: str) -> BaseBenchmark: | |
"""Get benchmark instance by name""" | |
if name.lower() not in BENCHMARK_REGISTRY: | |
raise ValueError(f"Unknown benchmark: {name}. Available: {list(BENCHMARK_REGISTRY.keys())}") | |
return BENCHMARK_REGISTRY[name.lower()]() |