Final_Assignment_GAIAAgent / src /gaia /utils /run_verification_tests.py
JoachimVC's picture
Upload GAIA agent implementation files for assessment
c922f8b
"""
GAIA Assessment Verification Test Runner
This script loads environment variables from the .env file and then runs
the verification tests to ensure all components work correctly.
Usage:
python run_verification_tests.py [--verbose] [--show-all] [--metrics]
"""
import os
import sys
import subprocess
import argparse
def load_dotenv():
"""Load environment variables from .env file"""
env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.env')
if not os.path.exists(env_path):
print(f"ERROR: .env file not found at {env_path}")
return False
print(f"Loading environment variables from {env_path}")
env_vars = {}
with open(env_path, 'r') as file:
for line in file:
line = line.strip()
if not line or line.startswith('#'):
continue
key, value = line.split('=', 1)
os.environ[key] = value
env_vars[key] = value
# Print the environment variables (redacted)
print("\nEnvironment variables loaded:")
for key in env_vars:
if key in ["OPENAI_API_KEY", "SUPABASE_KEY", "HF_TOKEN", "SERPER_API_KEY", "PERPLEXITY_API_KEY"]:
value = env_vars[key][:5] + "..." + env_vars[key][-4:] if len(env_vars[key]) > 9 else "***"
else:
value = env_vars[key]
print(f" {key}: {value}")
return True
def run_verification_tests(args):
"""Run the verification tests with the given arguments"""
cmd = ["python", "verify_gaia_assessment.py"]
# Add any command-line arguments
if args.verbose:
cmd.append("--verbose")
if args.show_all:
cmd.append("--show-all")
if args.metrics:
cmd.append("--metrics")
print(f"\nRunning verification tests: {' '.join(cmd)}")
# Run the verification tests and capture output
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# Stream output in real-time
while True:
output = process.stdout.readline()
if output == '' and process.poll() is not None:
break
if output:
print(output.strip())
# Get the final exit code
exit_code = process.poll()
# Get any error output
stderr = process.stderr.read()
if stderr:
print("\nERROR OUTPUT:")
print(stderr)
return exit_code
def test_single_integration():
"""Test OpenAI and Supabase integration individually"""
print("\n===== Testing OpenAI Integration =====")
openai_result = subprocess.run(
["python", "test_openai_minimal.py"],
capture_output=True,
text=True
)
print(f"OpenAI Test Result: {'SUCCESS' if openai_result.returncode == 0 else 'FAILED'}")
print(openai_result.stdout[:200] + "..." if len(openai_result.stdout) > 200 else openai_result.stdout)
print("\n===== Testing Supabase Integration =====")
supabase_result = subprocess.run(
["python", "test_supabase_minimal.py"],
capture_output=True,
text=True
)
print(f"Supabase Test Result: {'SUCCESS' if supabase_result.returncode == 0 else 'FAILED'}")
print(supabase_result.stdout[:200] + "..." if len(supabase_result.stdout) > 200 else supabase_result.stdout)
print("\n===== Testing Serper Integration =====")
serper_result = subprocess.run(
["python", "test_serper_minimal.py"],
capture_output=True,
text=True
)
print(f"Serper Test Result: {'SUCCESS' if serper_result.returncode == 0 else 'FAILED'}")
print(serper_result.stdout[:200] + "..." if len(serper_result.stdout) > 200 else serper_result.stdout)
return (
openai_result.returncode == 0,
supabase_result.returncode == 0,
serper_result.returncode == 0
)
def create_minimal_tests():
"""Create minimal test scripts for individual component testing"""
# Create OpenAI minimal test
with open("test_openai_minimal.py", "w") as f:
f.write("""
import os
import openai
from openai import OpenAI
def test_openai_connection():
print("Testing OpenAI connection...")
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
print("ERROR: OPENAI_API_KEY not set")
return False
try:
client = OpenAI(api_key=api_key)
models = client.models.list()
print(f"Successfully connected to OpenAI API. Available models: {len(models.data)}")
# Test a simple completion
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello, world!"}
]
)
print(f"Response: {response.choices[0].message.content[:50]}...")
return True
except Exception as e:
print(f"Error connecting to OpenAI: {str(e)}")
return False
if __name__ == "__main__":
if test_openai_connection():
print("OpenAI test passed!")
exit(0)
else:
print("OpenAI test failed!")
exit(1)
""")
# Create Supabase minimal test
with open("test_supabase_minimal.py", "w") as f:
f.write("""
import os
import sys
from supabase import create_client, Client
def test_supabase_connection():
print("Testing Supabase connection...")
supabase_url = os.getenv("SUPABASE_URL")
supabase_key = os.getenv("SUPABASE_KEY")
if not supabase_url or not supabase_key:
print("ERROR: SUPABASE_URL or SUPABASE_KEY not set")
return False
try:
supabase: Client = create_client(supabase_url, supabase_key)
# Try a simple query
version = supabase.table("postgres_version").select("*").execute()
print(f"Successfully connected to Supabase. Version: {version}")
# Check for the required tables
tables = ["gaia_memory", "gaia_memory_working", "gaia_memory_conversation"]
for table in tables:
try:
result = supabase.table(table).select("count(*)", count="exact").execute()
count = result.count
print(f"Table {table} exists with {count} rows")
except Exception as e:
print(f"Error accessing table {table}: {str(e)}")
return False
return True
except Exception as e:
print(f"Error connecting to Supabase: {str(e)}")
return False
if __name__ == "__main__":
if test_supabase_connection():
print("Supabase test passed!")
exit(0)
else:
print("Supabase test failed!")
exit(1)
""")
# Create Serper minimal test
with open("test_serper_minimal.py", "w") as f:
f.write("""
import os
import json
import requests
def test_serper_connection():
print("Testing Serper API connection...")
api_key = os.getenv("SERPER_API_KEY")
if not api_key:
print("ERROR: SERPER_API_KEY not set")
return False
try:
url = "https://google.serper.dev/search"
payload = json.dumps({
"q": "Who is the current president of the United States",
"gl": "us",
"hl": "en"
})
headers = {
'X-API-KEY': api_key,
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code == 200:
print(f"Successfully connected to Serper API. Response: {response.text[:100]}...")
return True
else:
print(f"Error from Serper API: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"Error connecting to Serper API: {str(e)}")
return False
if __name__ == "__main__":
if test_serper_connection():
print("Serper test passed!")
exit(0)
else:
print("Serper test failed!")
exit(1)
""")
def parse_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description="Run GAIA Assessment verification tests")
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
parser.add_argument("--show-all", action="store_true", help="Show all table entries")
parser.add_argument("--metrics", action="store_true", help="Show detailed performance metrics")
parser.add_argument("--components-only", action="store_true", help="Test individual components only")
return parser.parse_args()
def main():
"""Main function"""
args = parse_args()
# Load environment variables from .env file
if not load_dotenv():
print("Failed to load environment variables from .env file")
sys.exit(1)
# Create minimal test scripts
create_minimal_tests()
# If testing individual components only
if args.components_only:
openai_success, supabase_success, serper_success = test_single_integration()
# Print final summary
print("\n===== COMPONENT TEST RESULTS =====")
print(f"OpenAI Integration: {'✅ PASS' if openai_success else '❌ FAIL'}")
print(f"Supabase Integration: {'✅ PASS' if supabase_success else '❌ FAIL'}")
print(f"Serper Integration: {'✅ PASS' if serper_success else '❌ FAIL'}")
if openai_success and supabase_success and serper_success:
print("\nAll component tests passed successfully!")
return 0
else:
print("\nSome component tests failed. Please check the output for details.")
return 1
# Run verification tests
return run_verification_tests(args)
if __name__ == "__main__":
sys.exit(main())