|
""" |
|
Image Metadata Remover Module |
|
|
|
This module provides a comprehensive class-based metadata remover that strips ALL metadata |
|
from images while preserving original quality, format, and aspect ratio. |
|
Handles EXIF, IPTC, XMP, ICC profiles, and all other embedded metadata for security/privacy. |
|
""" |
|
|
|
from .image_base import ImageBase |
|
import os |
|
from custom_logger import logger_config |
|
from PIL import Image |
|
from PIL.ExifTags import TAGS, GPSTAGS |
|
from typing import Dict |
|
import base64 |
|
|
|
class RemoveMetadata(ImageBase): |
|
""" |
|
Comprehensive image metadata remover for security and privacy. |
|
|
|
Removes ALL metadata types including EXIF, IPTC, XMP, ICC profiles, thumbnails, |
|
GPS data, camera settings, and any other embedded information. |
|
""" |
|
def __init__(self): |
|
super().__init__("remove_metadata") |
|
|
|
def _extract_all_metadata(self, image: Image.Image): |
|
""" |
|
Extract ALL available metadata from the image for security analysis. |
|
|
|
Args: |
|
image: PIL Image object |
|
|
|
Returns: |
|
Dictionary containing comprehensive metadata extraction |
|
""" |
|
all_metadata = {} |
|
|
|
|
|
try: |
|
if hasattr(image, '_getexif') and image._getexif(): |
|
exif_dict = image._getexif() |
|
all_metadata['exif_raw'] = exif_dict |
|
|
|
|
|
exif_decoded = {} |
|
for tag_id, value in exif_dict.items(): |
|
tag = TAGS.get(tag_id, tag_id) |
|
|
|
|
|
if tag == 'GPSInfo': |
|
gps_data = {} |
|
for gps_tag_id, gps_value in value.items(): |
|
gps_tag = GPSTAGS.get(gps_tag_id, gps_tag_id) |
|
gps_data[gps_tag] = gps_value |
|
exif_decoded[tag] = gps_data |
|
else: |
|
exif_decoded[tag] = value |
|
|
|
all_metadata['exif_decoded'] = exif_decoded |
|
except Exception as e: |
|
all_metadata['exif_error'] = str(e) |
|
|
|
|
|
if hasattr(image, 'info') and image.info: |
|
info_data = {} |
|
for key, value in image.info.items(): |
|
|
|
if isinstance(value, bytes): |
|
try: |
|
|
|
info_data[key] = value.decode('utf-8', errors='ignore') |
|
except: |
|
|
|
info_data[key] = f"<binary_data_base64>{base64.b64encode(value[:100]).decode()}</binary_data_base64>" |
|
else: |
|
info_data[key] = value |
|
all_metadata['info'] = info_data |
|
|
|
|
|
try: |
|
if hasattr(image, 'info') and 'icc_profile' in image.info: |
|
icc_profile = image.info['icc_profile'] |
|
all_metadata['icc_profile_size'] = len(icc_profile) if icc_profile else 0 |
|
all_metadata['icc_profile_present'] = bool(icc_profile) |
|
except Exception as e: |
|
all_metadata['icc_error'] = str(e) |
|
|
|
|
|
try: |
|
if hasattr(image, 'info') and 'xmp' in image.info: |
|
xmp_data = image.info['xmp'] |
|
if isinstance(xmp_data, bytes): |
|
all_metadata['xmp'] = xmp_data.decode('utf-8', errors='ignore') |
|
else: |
|
all_metadata['xmp'] = xmp_data |
|
except Exception as e: |
|
all_metadata['xmp_error'] = str(e) |
|
|
|
|
|
try: |
|
if hasattr(image, 'info') and any(key.startswith('iptc') for key in image.info.keys()): |
|
iptc_data = {k: v for k, v in image.info.items() if k.startswith('iptc')} |
|
all_metadata['iptc'] = iptc_data |
|
except Exception as e: |
|
all_metadata['iptc_error'] = str(e) |
|
|
|
|
|
try: |
|
if hasattr(image, 'tag') and image.tag: |
|
all_metadata['pil_tag'] = dict(image.tag) |
|
except Exception as e: |
|
all_metadata['pil_tag_error'] = str(e) |
|
|
|
try: |
|
if hasattr(image, 'tag_v2') and image.tag_v2: |
|
all_metadata['pil_tag_v2'] = dict(image.tag_v2) |
|
except Exception as e: |
|
all_metadata['pil_tag_v2_error'] = str(e) |
|
|
|
|
|
try: |
|
if hasattr(image, 'quantization') and image.quantization: |
|
all_metadata['quantization_tables'] = len(image.quantization) |
|
except Exception as e: |
|
all_metadata['quantization_error'] = str(e) |
|
|
|
|
|
try: |
|
if hasattr(image, 'info') and 'thumbnail' in image.info: |
|
all_metadata['thumbnail_present'] = True |
|
except: |
|
pass |
|
|
|
|
|
try: |
|
if image.format == 'PNG' and hasattr(image, 'text'): |
|
all_metadata['png_text_chunks'] = dict(image.text) |
|
except Exception as e: |
|
all_metadata['png_text_error'] = str(e) |
|
|
|
|
|
basic_props = { |
|
'format': getattr(image, 'format', None), |
|
'mode': getattr(image, 'mode', None), |
|
'size': getattr(image, 'size', None), |
|
'filename': getattr(image, 'filename', None), |
|
'format_description': getattr(image, 'format_description', None) |
|
} |
|
all_metadata['basic_properties'] = basic_props |
|
|
|
return all_metadata |
|
|
|
def _get_quality_settings_for_format(self, format_name: str, original_mode: str) -> Dict: |
|
""" |
|
Get optimal save settings for maximum quality preservation per format. |
|
ALL metadata will be stripped regardless of format. |
|
|
|
Args: |
|
format_name: PIL format name |
|
original_mode: Original image color mode |
|
|
|
Returns: |
|
Dictionary of save parameters |
|
""" |
|
settings = {} |
|
|
|
if format_name == 'JPEG': |
|
settings = { |
|
'quality': 100, |
|
'optimize': False, |
|
'progressive': False, |
|
'subsampling': 0, |
|
'exif': b'', |
|
'icc_profile': None |
|
} |
|
|
|
elif format_name == 'PNG': |
|
settings = { |
|
'optimize': False, |
|
'compress_level': 1, |
|
'icc_profile': None, |
|
'pnginfo': None |
|
} |
|
|
|
elif format_name == 'WEBP': |
|
settings = { |
|
'lossless': True, |
|
'quality': 100, |
|
'method': 6, |
|
'icc_profile': None, |
|
'exif': b'' |
|
} |
|
|
|
elif format_name == 'TIFF': |
|
settings = { |
|
'compression': None, |
|
'icc_profile': None |
|
} |
|
|
|
elif format_name in ['BMP', 'GIF']: |
|
settings = {} |
|
|
|
return settings |
|
|
|
def _clean_png_with_transparency(self, image: Image.Image, output_path: str) -> bool: |
|
""" |
|
Clean PNG image while preserving transparency and quality, removing ALL metadata. |
|
|
|
Args: |
|
image: PIL Image object |
|
output_path: Path for output file |
|
|
|
Returns: |
|
True if successful |
|
""" |
|
try: |
|
original_mode = image.mode |
|
|
|
|
|
if original_mode == 'P': |
|
|
|
clean_img = Image.new(original_mode, image.size) |
|
if image.getpalette(): |
|
clean_img.putpalette(image.getpalette()) |
|
clean_img.paste(image, (0, 0)) |
|
else: |
|
|
|
clean_img = Image.new(original_mode, image.size, (0, 0, 0, 0)) |
|
clean_img.paste(image, (0, 0)) |
|
|
|
|
|
clean_img.info = {} |
|
|
|
|
|
save_settings = self._get_quality_settings_for_format('PNG', original_mode) |
|
clean_img.save(output_path, format='PNG', **save_settings) |
|
|
|
return True |
|
|
|
except Exception as e: |
|
raise Exception(f"PNG transparency cleaning failed: {e}") |
|
|
|
def _clean_jpeg_image(self, image: Image.Image, output_path: str) -> bool: |
|
""" |
|
Clean JPEG image while preserving maximum quality, removing ALL metadata. |
|
|
|
Args: |
|
image: PIL Image object |
|
output_path: Path for output file |
|
|
|
Returns: |
|
True if successful |
|
""" |
|
try: |
|
|
|
if image.mode != 'RGB': |
|
clean_img = image.convert('RGB') |
|
else: |
|
|
|
clean_img = Image.new('RGB', image.size) |
|
clean_img.paste(image, (0, 0)) |
|
|
|
|
|
clean_img.info = {} |
|
|
|
|
|
save_settings = self._get_quality_settings_for_format('JPEG', image.mode) |
|
clean_img.save(output_path, format='JPEG', **save_settings) |
|
|
|
return True |
|
|
|
except Exception as e: |
|
raise Exception(f"JPEG cleaning failed: {e}") |
|
|
|
def _clean_other_format(self, image: Image.Image, output_path: str, original_format: str) -> bool: |
|
""" |
|
Clean other image formats while preserving quality, removing ALL metadata. |
|
|
|
Args: |
|
image: PIL Image object |
|
output_path: Path for output file |
|
original_format: Original image format |
|
|
|
Returns: |
|
True if successful |
|
""" |
|
try: |
|
|
|
clean_img = Image.new(image.mode, image.size) |
|
|
|
|
|
if image.mode == 'P' and image.getpalette(): |
|
clean_img.putpalette(image.getpalette()) |
|
|
|
|
|
clean_img.paste(image, (0, 0)) |
|
|
|
|
|
clean_img.info = {} |
|
|
|
|
|
save_settings = self._get_quality_settings_for_format(original_format, image.mode) |
|
clean_img.save(output_path, format=original_format, **save_settings) |
|
|
|
return True |
|
|
|
except Exception as e: |
|
raise Exception(f"{original_format} cleaning failed: {e}") |
|
|
|
def _verify_metadata_removal(self, output_path) -> bool: |
|
""" |
|
Comprehensive verification that ALL metadata has been removed. |
|
Returns: |
|
True if ALL metadata was successfully removed |
|
""" |
|
try: |
|
with Image.open(output_path) as img: |
|
remaining_metadata = self._extract_all_metadata(img) |
|
|
|
|
|
sensitive_keys = ['exif_raw', 'exif_decoded', 'info', 'icc_profile_present', |
|
'xmp', 'iptc', 'pil_tag', 'pil_tag_v2', 'quantization_tables', |
|
'thumbnail_present', 'png_text_chunks'] |
|
|
|
remaining_sensitive = {k: v for k, v in remaining_metadata.items() |
|
if k in sensitive_keys and v} |
|
|
|
if remaining_sensitive: |
|
logger_config.warning(f"WARNING: Sensitive metadata still present: {list(remaining_sensitive.keys())}") |
|
logger_config.debug(f"Remaining metadata: {remaining_sensitive}") |
|
return False |
|
else: |
|
logger_config.success("ALL metadata successfully removed - image is clean") |
|
return True |
|
|
|
except Exception as e: |
|
raise Exception(f"Verification failed: {e}") |
|
|
|
def _verify_image_quality(self, original_path: str, cleaned_path: str) -> bool: |
|
""" |
|
Verify that image quality and dimensions are preserved. |
|
|
|
Args: |
|
original_path: Path to original image |
|
cleaned_path: Path to cleaned image |
|
|
|
Returns: |
|
True if quality is preserved |
|
""" |
|
try: |
|
with Image.open(original_path) as original, Image.open(cleaned_path) as cleaned: |
|
|
|
if original.size != cleaned.size: |
|
raise Exception(f"Dimension mismatch! Original: {original.size}, Cleaned: {cleaned.size}") |
|
|
|
|
|
if not os.path.exists(cleaned_path): |
|
return False |
|
|
|
cleaned_size = os.path.getsize(cleaned_path) |
|
if cleaned_size == 0: |
|
raise Exception("Cleaned file is empty") |
|
|
|
logger_config.success(f"Quality preserved - Size: {cleaned.size}, File size: {cleaned_size:,} bytes") |
|
return True |
|
|
|
except Exception as e: |
|
raise Exception(f"Quality verification failed: {e}") |
|
|
|
def process(self, input_file_name: str): |
|
""" |
|
Remove ALL metadata from an image for complete security and privacy protection. |
|
|
|
This method removes: |
|
- EXIF data (camera settings, GPS coordinates, timestamps) |
|
- IPTC data (keywords, captions, copyright) |
|
- XMP data (Adobe metadata) |
|
- ICC color profiles |
|
- Embedded thumbnails |
|
- Quantization tables |
|
- PNG text chunks |
|
- Any other embedded metadata |
|
|
|
Args: |
|
input_file_name: Input image name |
|
|
|
Returns: |
|
Tuple containing (output_path, extracted_metadata) |
|
|
|
Raises: |
|
FileNotFoundError: If input file doesn't exist |
|
ValueError: If file format is not supported |
|
Exception: If metadata removal fails |
|
""" |
|
try: |
|
self.input_file_name = input_file_name |
|
self.input_file_path = f'{self.input_dir}/{self.input_file_name}' |
|
|
|
self._validate_input_file() |
|
|
|
|
|
output_path = self._generate_output_path() |
|
|
|
logger_config.info(f"Processing: {self.input_file_path}") |
|
|
|
|
|
with Image.open(self.input_file_path) as image: |
|
original_format = image.format |
|
original_mode = image.mode |
|
|
|
|
|
extracted_metadata = self._extract_all_metadata(image) |
|
|
|
|
|
logger_config.info("=== COMPREHENSIVE METADATA ANALYSIS ===") |
|
if extracted_metadata: |
|
for key, value in extracted_metadata.items(): |
|
if value: |
|
logger_config.info(f"{key}: {value}") |
|
else: |
|
logger_config.info("No metadata found in original image") |
|
logger_config.info("=== END METADATA ANALYSIS ===") |
|
|
|
|
|
success = False |
|
|
|
if original_format == 'PNG' and original_mode in ('RGBA', 'LA', 'P'): |
|
success = self._clean_png_with_transparency(image, output_path) |
|
|
|
elif original_format == 'JPEG' or output_path.lower().endswith(('.jpg', '.jpeg')): |
|
success = self._clean_jpeg_image(image, output_path) |
|
|
|
else: |
|
success = self._clean_other_format(image, output_path, original_format) |
|
|
|
if not success: |
|
raise Exception("Metadata cleaning failed") |
|
|
|
|
|
self._verify_image_quality(self.input_file_path, output_path) |
|
|
|
|
|
self._verify_metadata_removal(output_path) |
|
|
|
logger_config.success(f"Cleaned image saved: {output_path}") |
|
logger_config.success("ALL METADATA REMOVED - Image is secure for sharing") |
|
return output_path, extracted_metadata |
|
|
|
except Exception as e: |
|
logger_config.error(f"Failed to clean image: {str(e)}") |
|
return None, {} |
|
|
|
|
|
if __name__ == "__main__": |
|
cleaner = RemoveMetadata() |
|
cleaned_file, metadata = cleaner.process("image/input/test.png") |
|
print(f"Output: {cleaned_file}") |
|
print(f"Extracted metadata: {metadata}") |
|
print("Image is now secure for sharing - all metadata removed!") |