phase4-quantum-compression / tests /test_saved_models.py

Add test scripts, requirements, and setup guide for users

7c58f51 verified 21 days ago

1.7 kB

	#!/usr/bin/env python3
	"""Test the saved compressed models"""
	import torch
	import torch.nn as nn
	import os

	print("="*70)
	print(" "*10 + "TESTING SAVED COMPRESSED MODELS")
	print("="*70)

	# Test MLP model
	print("\n1. Testing MLP models:")
	print("-"*40)

	# Load original
	original_mlp = torch.load("compressed_models/mlp_original_fp32.pth")
	print(f"✅ Loaded original MLP: {os.path.getsize('compressed_models/mlp_original_fp32.pth')/1024:.1f} KB")

	# Load compressed
	compressed_mlp = torch.load("compressed_models/mlp_compressed_int8.pth")
	print(f"✅ Loaded compressed MLP: {os.path.getsize('compressed_models/mlp_compressed_int8.pth')/1024:.1f} KB")

	# Recreate model and test
	model = nn.Sequential(
	nn.Linear(784, 256),
	nn.ReLU(),
	nn.Linear(256, 128),
	nn.ReLU(),
	nn.Linear(128, 10)
	)
	model.load_state_dict(original_mlp['model_state_dict'])

	# Test inference
	test_input = torch.randn(1, 784)
	with torch.no_grad():
	output = model(test_input)
	print(f" Original output shape: {output.shape}")
	print(f" Prediction: {torch.argmax(output).item()}")

	# For quantized model, we need to recreate and quantize
	model_quant = nn.Sequential(
	nn.Linear(784, 256),
	nn.ReLU(),
	nn.Linear(256, 128),
	nn.ReLU(),
	nn.Linear(128, 10)
	)
	model_quant.eval()
	model_quant = torch.quantization.quantize_dynamic(model_quant, {nn.Linear}, dtype=torch.qint8)
	model_quant.load_state_dict(compressed_mlp['model_state_dict'])

	with torch.no_grad():
	output_quant = model_quant(test_input)
	print(f" Compressed output shape: {output_quant.shape}")
	print(f" Prediction: {torch.argmax(output_quant).item()}")

	print("\n✅ Both models work and produce valid outputs!")