Spaces:
Runtime error
Runtime error
| import os | |
| # from benchmark.run_benchmark import run_benchmark | |
| def run_benchmark_main( | |
| tool_name, | |
| model_name, | |
| num_questions, | |
| openai_api_key, | |
| anthropic_api_key, | |
| openrouter_api_key, | |
| ): | |
| """Run the benchmark using the provided function and API key.""" | |
| print("Running benchmark for the provided api keys") | |
| # Empyt the results directory | |
| os.system("rm -rf results/*") | |
| # Set the benchmark parameters | |
| kwargs = {} | |
| if not num_questions: | |
| num_questions = 10 | |
| kwargs["num_questions"] = num_questions | |
| kwargs["tools"] = [tool_name] | |
| if model_name: | |
| kwargs["model"] = [model_name] | |
| kwargs["api_keys"] = {} | |
| if openai_api_key: | |
| kwargs["api_keys"]["openai"] = openai_api_key | |
| if anthropic_api_key: | |
| kwargs["api_keys"]["anthropic"] = anthropic_api_key | |
| if openrouter_api_key: | |
| kwargs["api_keys"]["openrouter"] = openrouter_api_key | |
| if "gpt" in model_name: | |
| kwargs["llm_provider"] = "openai" | |
| elif "claude" in model_name: | |
| kwargs["llm_provider"] = "anthropic" | |
| else: | |
| kwargs["llm_provider"] = "openrouter" | |
| if ( | |
| tool_name == "prediction-request-reasoning" | |
| or tool_name == "prediction-request-rag" | |
| ): | |
| if not openai_api_key: | |
| return f"Error: Tools that use RAG also require an OpenAI API Key" | |
| kwargs["num_urls"] = 3 | |
| kwargs["num_words"] = 300 | |
| kwargs["provide_source_links"] = True | |
| print(f"Running benchmark") | |
| # Run the benchmark | |
| try: | |
| # run_benchmark(kwargs=kwargs) | |
| return "completed" | |
| except Exception as e: | |
| return f"Error running benchmark: {e}" | |