import os from benchmark.run_benchmark import run_benchmark def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key): """Run the benchmark using the provided function and API key.""" # Empyt the results directory os.system("rm -rf results/*") # Set the benchmark parameters kwargs = {} if not num_questions: num_questions = 10 kwargs["num_questions"] = num_questions kwargs["tools"] = [tool_name] if model_name: kwargs["model"] = [model_name] kwargs["api_keys"] = {} if openai_api_key: kwargs["api_keys"]["openai"] = openai_api_key if anthropic_api_key: kwargs["api_keys"]["anthropic"] = anthropic_api_key if openrouter_api_key: kwargs["api_keys"]["openrouter"] = openrouter_api_key if "gpt" in model_name: kwargs["llm_provider"] = "openai" elif "claude" in model_name: kwargs["llm_provider"] = "anthropic" else: kwargs["llm_provider"] = "openrouter" if tool_name == "prediction-request-reasoning" or tool_name == "prediction-request-rag": if not openai_api_key: return f"Error: Tools that use RAG also require an OpenAI API Key" kwargs["num_urls"] = 3 kwargs["num_words"] = 300 kwargs["provide_source_links"] = True print(f"Running benchmark") # Run the benchmark try: run_benchmark(kwargs=kwargs) return "completed" except Exception as e: return f"Error running benchmark: {e}"