""" Image Metadata Remover Module This module provides a comprehensive class-based metadata remover that strips ALL metadata from images while preserving original quality, format, and aspect ratio. Handles EXIF, IPTC, XMP, ICC profiles, and all other embedded metadata for security/privacy. """ from .image_base import ImageBase import os from custom_logger import logger_config from PIL import Image from PIL.ExifTags import TAGS, GPSTAGS from typing import Dict import base64 class RemoveMetadata(ImageBase): """ Comprehensive image metadata remover for security and privacy. Removes ALL metadata types including EXIF, IPTC, XMP, ICC profiles, thumbnails, GPS data, camera settings, and any other embedded information. """ def __init__(self): super().__init__("remove_metadata") def _extract_all_metadata(self, image: Image.Image): """ Extract ALL available metadata from the image for security analysis. Args: image: PIL Image object Returns: Dictionary containing comprehensive metadata extraction """ all_metadata = {} # 1. Extract EXIF data (most common and detailed) try: if hasattr(image, '_getexif') and image._getexif(): exif_dict = image._getexif() all_metadata['exif_raw'] = exif_dict # Decode EXIF tags to human-readable format exif_decoded = {} for tag_id, value in exif_dict.items(): tag = TAGS.get(tag_id, tag_id) # Special handling for GPS data if tag == 'GPSInfo': gps_data = {} for gps_tag_id, gps_value in value.items(): gps_tag = GPSTAGS.get(gps_tag_id, gps_tag_id) gps_data[gps_tag] = gps_value exif_decoded[tag] = gps_data else: exif_decoded[tag] = value all_metadata['exif_decoded'] = exif_decoded except Exception as e: all_metadata['exif_error'] = str(e) # 2. Extract all image.info metadata (includes IPTC, XMP, ICC, etc.) if hasattr(image, 'info') and image.info: info_data = {} for key, value in image.info.items(): # Handle binary data by encoding it if isinstance(value, bytes): try: # Try to decode as text first info_data[key] = value.decode('utf-8', errors='ignore') except: # If that fails, encode as base64 for viewing info_data[key] = f"{base64.b64encode(value[:100]).decode()}" else: info_data[key] = value all_metadata['info'] = info_data # 3. Extract ICC Profile (color management) try: if hasattr(image, 'info') and 'icc_profile' in image.info: icc_profile = image.info['icc_profile'] all_metadata['icc_profile_size'] = len(icc_profile) if icc_profile else 0 all_metadata['icc_profile_present'] = bool(icc_profile) except Exception as e: all_metadata['icc_error'] = str(e) # 4. Extract XMP metadata try: if hasattr(image, 'info') and 'xmp' in image.info: xmp_data = image.info['xmp'] if isinstance(xmp_data, bytes): all_metadata['xmp'] = xmp_data.decode('utf-8', errors='ignore') else: all_metadata['xmp'] = xmp_data except Exception as e: all_metadata['xmp_error'] = str(e) # 5. Extract IPTC metadata try: if hasattr(image, 'info') and any(key.startswith('iptc') for key in image.info.keys()): iptc_data = {k: v for k, v in image.info.items() if k.startswith('iptc')} all_metadata['iptc'] = iptc_data except Exception as e: all_metadata['iptc_error'] = str(e) # 6. Extract PIL tag and tag_v2 (alternative EXIF access) try: if hasattr(image, 'tag') and image.tag: all_metadata['pil_tag'] = dict(image.tag) except Exception as e: all_metadata['pil_tag_error'] = str(e) try: if hasattr(image, 'tag_v2') and image.tag_v2: all_metadata['pil_tag_v2'] = dict(image.tag_v2) except Exception as e: all_metadata['pil_tag_v2_error'] = str(e) # 7. Extract quantization tables (JPEG compression info) try: if hasattr(image, 'quantization') and image.quantization: all_metadata['quantization_tables'] = len(image.quantization) except Exception as e: all_metadata['quantization_error'] = str(e) # 8. Extract embedded thumbnails try: if hasattr(image, 'info') and 'thumbnail' in image.info: all_metadata['thumbnail_present'] = True except: pass # 9. Extract text chunks (PNG specific) try: if image.format == 'PNG' and hasattr(image, 'text'): all_metadata['png_text_chunks'] = dict(image.text) except Exception as e: all_metadata['png_text_error'] = str(e) # 10. Extract basic image properties that might contain metadata basic_props = { 'format': getattr(image, 'format', None), 'mode': getattr(image, 'mode', None), 'size': getattr(image, 'size', None), 'filename': getattr(image, 'filename', None), 'format_description': getattr(image, 'format_description', None) } all_metadata['basic_properties'] = basic_props return all_metadata def _get_quality_settings_for_format(self, format_name: str, original_mode: str) -> Dict: """ Get optimal save settings for maximum quality preservation per format. ALL metadata will be stripped regardless of format. Args: format_name: PIL format name original_mode: Original image color mode Returns: Dictionary of save parameters """ settings = {} if format_name == 'JPEG': settings = { 'quality': 100, # Maximum quality 'optimize': False, # Don't optimize to preserve quality 'progressive': False, # Standard baseline JPEG 'subsampling': 0, # No chroma subsampling 'exif': b'', # Explicitly remove EXIF 'icc_profile': None # Remove ICC profile } elif format_name == 'PNG': settings = { 'optimize': False, # Don't optimize to preserve quality 'compress_level': 1, # Minimal compression 'icc_profile': None, # Remove ICC profile 'pnginfo': None # Remove PNG info chunks } elif format_name == 'WEBP': settings = { 'lossless': True, # Lossless compression 'quality': 100, # Maximum quality 'method': 6, # Best compression method 'icc_profile': None, # Remove ICC profile 'exif': b'' # Remove EXIF } elif format_name == 'TIFF': settings = { 'compression': None, # No compression 'icc_profile': None # Remove ICC profile } elif format_name in ['BMP', 'GIF']: settings = {} # These formats have limited metadata anyway return settings def _clean_png_with_transparency(self, image: Image.Image, output_path: str) -> bool: """ Clean PNG image while preserving transparency and quality, removing ALL metadata. Args: image: PIL Image object output_path: Path for output file Returns: True if successful """ try: original_mode = image.mode # Create completely new clean image preserving only visual data if original_mode == 'P': # Palette mode - preserve only palette and pixel data clean_img = Image.new(original_mode, image.size) if image.getpalette(): clean_img.putpalette(image.getpalette()) clean_img.paste(image, (0, 0)) else: # RGBA or LA mode - preserve only pixel data clean_img = Image.new(original_mode, image.size, (0, 0, 0, 0)) clean_img.paste(image, (0, 0)) # Ensure no metadata is carried over clean_img.info = {} # Save with quality preservation and NO metadata save_settings = self._get_quality_settings_for_format('PNG', original_mode) clean_img.save(output_path, format='PNG', **save_settings) return True except Exception as e: raise Exception(f"PNG transparency cleaning failed: {e}") def _clean_jpeg_image(self, image: Image.Image, output_path: str) -> bool: """ Clean JPEG image while preserving maximum quality, removing ALL metadata. Args: image: PIL Image object output_path: Path for output file Returns: True if successful """ try: # Convert to RGB if necessary and create clean copy if image.mode != 'RGB': clean_img = image.convert('RGB') else: # Create new image to ensure no metadata transfer clean_img = Image.new('RGB', image.size) clean_img.paste(image, (0, 0)) # Ensure no metadata is carried over clean_img.info = {} # Save with maximum quality and absolutely NO metadata save_settings = self._get_quality_settings_for_format('JPEG', image.mode) clean_img.save(output_path, format='JPEG', **save_settings) return True except Exception as e: raise Exception(f"JPEG cleaning failed: {e}") def _clean_other_format(self, image: Image.Image, output_path: str, original_format: str) -> bool: """ Clean other image formats while preserving quality, removing ALL metadata. Args: image: PIL Image object output_path: Path for output file original_format: Original image format Returns: True if successful """ try: # Create completely new image without any metadata transfer clean_img = Image.new(image.mode, image.size) # Handle palette images (preserve only palette, not metadata) if image.mode == 'P' and image.getpalette(): clean_img.putpalette(image.getpalette()) # Paste only pixel data clean_img.paste(image, (0, 0)) # Ensure absolutely no metadata is carried over clean_img.info = {} # Save with format-specific quality settings and no metadata save_settings = self._get_quality_settings_for_format(original_format, image.mode) clean_img.save(output_path, format=original_format, **save_settings) return True except Exception as e: raise Exception(f"{original_format} cleaning failed: {e}") def _verify_metadata_removal(self, output_path) -> bool: """ Comprehensive verification that ALL metadata has been removed. Returns: True if ALL metadata was successfully removed """ try: with Image.open(output_path) as img: remaining_metadata = self._extract_all_metadata(img) # Check for any remaining metadata beyond basic properties sensitive_keys = ['exif_raw', 'exif_decoded', 'info', 'icc_profile_present', 'xmp', 'iptc', 'pil_tag', 'pil_tag_v2', 'quantization_tables', 'thumbnail_present', 'png_text_chunks'] remaining_sensitive = {k: v for k, v in remaining_metadata.items() if k in sensitive_keys and v} if remaining_sensitive: logger_config.warning(f"WARNING: Sensitive metadata still present: {list(remaining_sensitive.keys())}") logger_config.debug(f"Remaining metadata: {remaining_sensitive}") return False else: logger_config.success("ALL metadata successfully removed - image is clean") return True except Exception as e: raise Exception(f"Verification failed: {e}") def _verify_image_quality(self, original_path: str, cleaned_path: str) -> bool: """ Verify that image quality and dimensions are preserved. Args: original_path: Path to original image cleaned_path: Path to cleaned image Returns: True if quality is preserved """ try: with Image.open(original_path) as original, Image.open(cleaned_path) as cleaned: # Check dimensions if original.size != cleaned.size: raise Exception(f"Dimension mismatch! Original: {original.size}, Cleaned: {cleaned.size}") # Check if file exists and has reasonable size if not os.path.exists(cleaned_path): return False cleaned_size = os.path.getsize(cleaned_path) if cleaned_size == 0: raise Exception("Cleaned file is empty") logger_config.success(f"Quality preserved - Size: {cleaned.size}, File size: {cleaned_size:,} bytes") return True except Exception as e: raise Exception(f"Quality verification failed: {e}") def process(self, input_file_name: str): """ Remove ALL metadata from an image for complete security and privacy protection. This method removes: - EXIF data (camera settings, GPS coordinates, timestamps) - IPTC data (keywords, captions, copyright) - XMP data (Adobe metadata) - ICC color profiles - Embedded thumbnails - Quantization tables - PNG text chunks - Any other embedded metadata Args: input_file_name: Input image name Returns: Tuple containing (output_path, extracted_metadata) Raises: FileNotFoundError: If input file doesn't exist ValueError: If file format is not supported Exception: If metadata removal fails """ try: self.input_file_name = input_file_name self.input_file_path = f'{self.input_dir}/{self.input_file_name}' # Validate input self._validate_input_file() # Generate output path output_path = self._generate_output_path() logger_config.info(f"Processing: {self.input_file_path}") # Open and analyze image with Image.open(self.input_file_path) as image: original_format = image.format original_mode = image.mode # Extract ALL metadata for security analysis extracted_metadata = self._extract_all_metadata(image) # Print comprehensive metadata analysis logger_config.info("=== COMPREHENSIVE METADATA ANALYSIS ===") if extracted_metadata: for key, value in extracted_metadata.items(): if value: # Only show non-empty metadata logger_config.info(f"{key}: {value}") else: logger_config.info("No metadata found in original image") logger_config.info("=== END METADATA ANALYSIS ===") # Choose appropriate cleaning method based on format success = False if original_format == 'PNG' and original_mode in ('RGBA', 'LA', 'P'): success = self._clean_png_with_transparency(image, output_path) elif original_format == 'JPEG' or output_path.lower().endswith(('.jpg', '.jpeg')): success = self._clean_jpeg_image(image, output_path) else: success = self._clean_other_format(image, output_path, original_format) if not success: raise Exception("Metadata cleaning failed") # Verify results self._verify_image_quality(self.input_file_path, output_path) # Comprehensive metadata removal verification self._verify_metadata_removal(output_path) logger_config.success(f"Cleaned image saved: {output_path}") logger_config.success("ALL METADATA REMOVED - Image is secure for sharing") return output_path, extracted_metadata except Exception as e: logger_config.error(f"Failed to clean image: {str(e)}") return None, {} # Example usage if __name__ == "__main__": cleaner = RemoveMetadata() cleaned_file, metadata = cleaner.process("image/input/test.png") print(f"Output: {cleaned_file}") print(f"Extracted metadata: {metadata}") print("Image is now secure for sharing - all metadata removed!")