Final_Assignment_GAIAAgent / src /gaia /utils /remove_duplicate_tests.py
JoachimVC's picture
Upload GAIA agent implementation files for assessment
c922f8b
#!/usr/bin/env python
"""
Script to remove duplicate test files.
This script identifies and removes duplicate test files based on predefined rules.
"""
import os
import logging
import shutil
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("remove_duplicates")
# Define duplicate test files to remove
DUPLICATES_TO_REMOVE = [
# Keep test_supabase_tables_comprehensive.py, remove others
"tests/test_supabase_tables_final.py",
"tests/test_supabase_tables_fix.py",
"tests/test_supabase_tables_production.py",
# Duplicate test files now consolidated
"tests/test_gaia_agent.py", # Consolidated into test_gaia_agent_consolidated.py
"tests/test_fixed_agent.py", # Consolidated into test_gaia_agent_consolidated.py
"tests/test_duckduckgo_search.py", # Consolidated into test_duckduckgo_consolidated.py
"tests/test_duckduckgo_integration.py", # Consolidated into test_duckduckgo_consolidated.py
"tests/test_api_search.py", # Consolidated into test_api_search_consolidated.py
"tests/test_api_search_simple.py", # Consolidated into test_api_search_consolidated.py
"tests/test_api_search_tool.py", # Consolidated into test_api_search_consolidated.py
"tests/test_api_search_tool_comprehensive.py", # Consolidated into test_api_search_consolidated.py
"tests/test_api_search_standalone.py", # Consolidated into test_api_search_consolidated.py
# Other potential duplicates
"tests/minimal_supabase_test.py", # Likely a duplicate or test version
]
def backup_file(file_path):
"""Create a backup of a file before removing it."""
backup_dir = "backup_tests"
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
# Get just the filename without the path
filename = os.path.basename(file_path)
backup_path = os.path.join(backup_dir, filename)
# Copy the file to the backup directory
shutil.copy2(file_path, backup_path)
logger.info(f"Created backup of {file_path} at {backup_path}")
def remove_duplicate_files(file_paths, dry_run=False):
"""
Remove duplicate test files.
Args:
file_paths: List of file paths to remove
dry_run: If True, don't actually remove files
"""
removed_count = 0
for file_path in file_paths:
if os.path.exists(file_path):
logger.info(f"Removing duplicate test file: {file_path}")
if not dry_run:
# Create a backup before removing
backup_file(file_path)
# Remove the file
os.remove(file_path)
removed_count += 1
else:
logger.warning(f"File not found: {file_path}")
return removed_count
def main():
"""Main function."""
import argparse
parser = argparse.ArgumentParser(description="Remove duplicate test files")
parser.add_argument("--dry-run", action="store_true", help="Don't actually remove files")
args = parser.parse_args()
if args.dry_run:
logger.info("Running in dry-run mode. No files will be removed.")
# Remove duplicate files
removed_count = remove_duplicate_files(DUPLICATES_TO_REMOVE, args.dry_run)
# Print summary
logger.info("\nCleanup Summary:")
logger.info(f"- Duplicate test files removed: {removed_count}")
if args.dry_run:
logger.info("\nThis was a dry run. Run without --dry-run to apply changes.")
if __name__ == "__main__":
# Force output to console even in dry-run mode
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
# Add direct print statements for debugging
print("Starting duplicate test removal script")
print(f"Files to remove: {DUPLICATES_TO_REMOVE}")
# Check if files exist
for file_path in DUPLICATES_TO_REMOVE:
if os.path.exists(file_path):
print(f"File exists: {file_path}")
else:
print(f"File does not exist: {file_path}")
main()
print("Script completed")