Spaces:

valory
/

olas-prediction-leaderboard

Runtime error

olas-prediction-leaderboard / tabs /run_benchmark.py

cyberosa

disabling temporarily the run_benchmark tab

a9bd212 over 1 year ago

1.67 kB

	import os

	# from benchmark.run_benchmark import run_benchmark


	def run_benchmark_main(
	tool_name,
	model_name,
	num_questions,
	openai_api_key,
	anthropic_api_key,
	openrouter_api_key,
	):
	"""Run the benchmark using the provided function and API key."""

	print("Running benchmark for the provided api keys")
	# Empyt the results directory
	os.system("rm -rf results/*")

	# Set the benchmark parameters
	kwargs = {}
	if not num_questions:
	num_questions = 10
	kwargs["num_questions"] = num_questions
	kwargs["tools"] = [tool_name]
	if model_name:
	kwargs["model"] = [model_name]
	kwargs["api_keys"] = {}
	if openai_api_key:
	kwargs["api_keys"]["openai"] = openai_api_key
	if anthropic_api_key:
	kwargs["api_keys"]["anthropic"] = anthropic_api_key
	if openrouter_api_key:
	kwargs["api_keys"]["openrouter"] = openrouter_api_key

	if "gpt" in model_name:
	kwargs["llm_provider"] = "openai"
	elif "claude" in model_name:
	kwargs["llm_provider"] = "anthropic"
	else:
	kwargs["llm_provider"] = "openrouter"

	if (
	tool_name == "prediction-request-reasoning"
	or tool_name == "prediction-request-rag"
	):
	if not openai_api_key:
	return f"Error: Tools that use RAG also require an OpenAI API Key"

	kwargs["num_urls"] = 3
	kwargs["num_words"] = 300
	kwargs["provide_source_links"] = True

	print(f"Running benchmark")

	# Run the benchmark
	try:
	# run_benchmark(kwargs=kwargs)
	return "completed"
	except Exception as e:
	return f"Error running benchmark: {e}"