Final_Assignment_GAIAAgent / src /gaia /utils /remove_debug_statements.py
JoachimVC's picture
Upload GAIA agent implementation files for assessment
c922f8b
"""
Script to remove debug statements and unnecessary commented code from Python files.
This script focuses on removing:
1. Debug logging statements (logger.debug)
2. Print statements used for debugging
3. Commented code that's not necessary for production
"""
import os
import re
import logging
from typing import List, Dict, Tuple
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("remove_debug")
CRITICAL_FILES = [
"agent/agent.py",
"agent/config.py",
"agent/graph.py",
"agent/tool_registry.py",
"memory/supabase_memory.py",
"tools/web_tools.py",
"tools/reasoning_tools.py"
]
DEBUG_PATTERNS = [
r'^\s*logger\.debug\s*\(.*\)',
r'^\s*print\s*\(.*\)',
r'^\s*console\.log\s*\(.*\)',
]
KEEP_COMMENT_PATTERNS = [
r'^\s*# -+$', # Separator lines
r'^\s*# [A-Z][a-z]+ [A-Z][a-z]+', # Section headers like "# Import Configuration"
r'^\s*# Copyright', # Copyright notices
r'^\s*# License', # License information
r'^\s*# Author', # Author information
]
def should_keep_comment(line: str) -> bool:
"""Determine if a comment line should be kept."""
if line.strip() == "#":
return True
for pattern in KEEP_COMMENT_PATTERNS:
if re.match(pattern, line):
return True
if '"""' in line or "'''" in line:
return True
return False
def clean_file(file_path: str, dry_run: bool = False) -> Tuple[int, int]:
"""
Clean debug statements and unnecessary comments from a file.
Args:
file_path: Path to the file to clean
dry_run: If True, don't actually modify the file
Returns:
Tuple of (debug_statements_removed, comment_lines_removed)
"""
logger.info(f"Cleaning file: {file_path}")
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
debug_statements_removed = 0
comment_lines_removed = 0
cleaned_lines = []
in_docstring = False
for line in lines:
if '"""' in line or "'''" in line:
triple_quotes_count = line.count('"""') + line.count("'''")
if triple_quotes_count % 2 == 1:
in_docstring = not in_docstring
is_debug_statement = False
for pattern in DEBUG_PATTERNS:
if re.match(pattern, line):
debug_statements_removed += 1
is_debug_statement = True
break
if is_debug_statement:
continue
if not in_docstring and line.strip().startswith('#'):
if should_keep_comment(line):
cleaned_lines.append(line)
else:
comment_lines_removed += 1
continue
else:
cleaned_lines.append(line)
if not dry_run:
with open(file_path, 'w', encoding='utf-8') as f:
f.writelines(cleaned_lines)
logger.info(f"Removed {debug_statements_removed} debug statements and {comment_lines_removed} comment lines")
return debug_statements_removed, comment_lines_removed
except Exception as e:
logger.error(f"Error cleaning file {file_path}: {str(e)}")
return 0, 0
def clean_files(file_paths: List[str], dry_run: bool = False) -> Dict[str, Tuple[int, int]]:
"""
Clean multiple files.
Args:
file_paths: List of file paths to clean
dry_run: If True, don't actually modify the files
Returns:
Dictionary mapping file paths to (debug_statements_removed, comment_lines_removed)
"""
results = {}
for file_path in file_paths:
if os.path.exists(file_path):
results[file_path] = clean_file(file_path, dry_run)
else:
logger.warning(f"File not found: {file_path}")
return results
def main():
"""Main function."""
import argparse
parser = argparse.ArgumentParser(description="Remove debug statements and unnecessary comments")
parser.add_argument("--dry-run", action="store_true", help="Don't actually modify files")
parser.add_argument("--all", action="store_true", help="Clean all Python files, not just critical ones")
args = parser.parse_args()
if args.dry_run:
logger.info("Running in dry-run mode. No files will be modified.")
files_to_clean = CRITICAL_FILES.copy()
if args.all:
for root, _, files in os.walk('.'):
for file in files:
if file.endswith('.py'):
file_path = os.path.join(root, file)
file_path = file_path.replace('\\', '/')
if file_path not in files_to_clean:
files_to_clean.append(file_path)
for file_path in files_to_clean:
if os.path.exists(file_path):
debug_removed, comments_removed = clean_file(file_path, args.dry_run)
else:
results = clean_files(files_to_clean, args.dry_run)
total_debug_removed = sum(debug for debug, _ in results.values())
total_comments_removed = sum(comments for _, comments in results.values())
logger.info("\nCleanup Summary:")
logger.info(f"- Files processed: {len(results)}")
logger.info(f"- Debug statements removed: {total_debug_removed}")
logger.info(f"- Comment lines removed: {total_comments_removed}")
if args.dry_run:
logger.info("\nThis was a dry run. Run without --dry-run to apply changes.")
if __name__ == "__main__":
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
for file_path in CRITICAL_FILES:
if os.path.exists(file_path):
else:
main()