phase4-quantum-compression / src /utils /validate_acceptance_criteria.py
jmurray10's picture
Add test scripts, requirements, and setup guide for users
7c58f51 verified
#!/usr/bin/env python3
# validate_acceptance_criteria.py
"""
Script to validate that experimental results meet the acceptance criteria
specified in make_it_real.md
"""
import json
import csv
import argparse
from pathlib import Path
def validate_quantum_criteria(csv_file):
"""
Validate quantum acceptance criteria:
- Quantum (hardware): n=5, m=1 → p_success ≥ 0.55 at k=k* with ≥2000 shots
- Simulator: clear peak near k* with p_success ≥ 0.90
"""
results = {"passed": False, "details": {}}
try:
with open(csv_file, 'r') as f:
reader = csv.DictReader(f)
rows = list(reader)
# Find optimal k and max p_success
k_star = int(rows[0]['k_opt']) if rows else None
max_p = max(float(row['p_success']) for row in rows)
optimal_row = max(rows, key=lambda r: float(r['p_success']))
backend = rows[0]['backend'] if rows else None
shots = int(rows[0]['shots']) if rows else 0
results["details"] = {
"backend": backend,
"k_star": k_star,
"max_p_success": max_p,
"optimal_k": int(optimal_row['k']),
"shots": shots
}
if backend == "aer":
# Simulator criteria: p_success ≥ 0.90
results["passed"] = max_p >= 0.90
results["criteria"] = "Simulator: p_success ≥ 0.90"
else:
# Hardware criteria: p_success ≥ 0.55 with ≥2000 shots
results["passed"] = max_p >= 0.55 and shots >= 2000
results["criteria"] = "Hardware: p_success ≥ 0.55 with ≥2000 shots"
except Exception as e:
results["error"] = str(e)
return results
def validate_energy_criteria(baseline_file, quantized_file):
"""
Validate energy/compression criteria:
- ≥ 40% reduction in J per 1M tokens
- ≤ 3% quality drift (PPL/accuracy)
- P95 latency ≥ 20% better
- ≥ 4× storage reduction
"""
results = {"passed": False, "details": {}}
try:
with open(baseline_file, 'r') as f:
baseline = json.load(f)
with open(quantized_file, 'r') as f:
quantized = json.load(f)
# Calculate reductions
energy_reduction = (baseline["J_per_1M_tokens"] - quantized["J_per_1M_tokens"]) / baseline["J_per_1M_tokens"]
latency_improvement = (baseline["latency_ms_p95"] - quantized["latency_ms_p95"]) / baseline["latency_ms_p95"]
size_reduction = baseline["size_bytes"] / quantized["size_bytes"]
results["details"] = {
"energy_reduction_pct": energy_reduction * 100,
"latency_improvement_pct": latency_improvement * 100,
"size_reduction_factor": size_reduction,
"baseline_J_per_1M": baseline["J_per_1M_tokens"],
"quantized_J_per_1M": quantized["J_per_1M_tokens"],
"baseline_latency_p95": baseline["latency_ms_p95"],
"quantized_latency_p95": quantized["latency_ms_p95"]
}
# Check all criteria
energy_ok = energy_reduction >= 0.40 # ≥ 40% reduction
latency_ok = latency_improvement >= 0.20 # ≥ 20% improvement
size_ok = size_reduction >= 4.0 # ≥ 4× reduction
results["passed"] = energy_ok and latency_ok and size_ok
results["criteria_met"] = {
"energy_reduction_40pct": energy_ok,
"latency_improvement_20pct": latency_ok,
"size_reduction_4x": size_ok
}
except Exception as e:
results["error"] = str(e)
return results
def validate_training_criteria(sgd_evo_file):
"""
Validate training cost criteria:
- Publish cost-to-quality curves (kJ & time) for SGD vs Evolution
"""
results = {"passed": False, "details": {}}
try:
with open(sgd_evo_file, 'r') as f:
data = json.load(f)
sgd = data["sgd"]
evo = data["evo"]
# Check that both methods achieved similar accuracy
acc_diff = abs(sgd["acc"] - evo["acc"])
results["details"] = {
"sgd_accuracy": sgd["acc"],
"evo_accuracy": evo["acc"],
"accuracy_difference": acc_diff,
"sgd_energy_kJ": sgd.get("energy_J", 0) / 1000 if sgd.get("energy_J") else None,
"evo_energy_kJ": evo.get("energy_J", 0) / 1000 if evo.get("energy_J") else None,
"sgd_time_s": sgd["wall_s"],
"evo_time_s": evo["wall_s"]
}
# Pass if both methods have valid results
results["passed"] = sgd["acc"] > 0 and evo["acc"] > 0 and acc_diff < 0.1
except Exception as e:
results["error"] = str(e)
return results
def main():
parser = argparse.ArgumentParser(description='Validate Phase 4 acceptance criteria')
parser.add_argument('--quantum_csv', help='Path to quantum results CSV')
parser.add_argument('--baseline_json', help='Path to baseline energy JSON')
parser.add_argument('--quantized_json', help='Path to quantized energy JSON')
parser.add_argument('--sgd_evo_json', help='Path to SGD vs Evolution JSON')
parser.add_argument('--all', action='store_true', help='Test all criteria with default paths')
args = parser.parse_args()
results = {}
if args.all or args.quantum_csv:
csv_path = args.quantum_csv or "quantum/qiskit/results/sample_grover_qiskit_results.csv"
print(f"\n=== QUANTUM CRITERIA ===")
print(f"Testing: {csv_path}")
quantum_results = validate_quantum_criteria(csv_path)
results["quantum"] = quantum_results
print(f"PASSED: {quantum_results['passed']}")
print(f"Details: {json.dumps(quantum_results['details'], indent=2)}")
if args.all or (args.baseline_json and args.quantized_json):
baseline_path = args.baseline_json or "phase4_outputs/llm_eval_baseline.json"
quantized_path = args.quantized_json or "phase4_outputs/llm_eval_post_quant.json"
print(f"\n=== ENERGY/COMPRESSION CRITERIA ===")
print(f"Testing: {baseline_path} vs {quantized_path}")
energy_results = validate_energy_criteria(baseline_path, quantized_path)
results["energy"] = energy_results
print(f"PASSED: {energy_results['passed']}")
print(f"Details: {json.dumps(energy_results['details'], indent=2)}")
if 'criteria_met' in energy_results:
print(f"Criteria met: {json.dumps(energy_results['criteria_met'], indent=2)}")
if args.all or args.sgd_evo_json:
sgd_evo_path = args.sgd_evo_json or "phase4_outputs/sgd_vs_evo.json"
print(f"\n=== TRAINING COST CRITERIA ===")
print(f"Testing: {sgd_evo_path}")
training_results = validate_training_criteria(sgd_evo_path)
results["training"] = training_results
print(f"PASSED: {training_results['passed']}")
print(f"Details: {json.dumps(training_results['details'], indent=2)}")
# Overall summary
print(f"\n=== OVERALL SUMMARY ===")
passed_count = sum(1 for r in results.values() if r['passed'])
total_count = len(results)
print(f"Passed: {passed_count}/{total_count} criteria")
all_passed = all(r['passed'] for r in results.values())
print(f"ALL CRITERIA MET: {all_passed}")
return 0 if all_passed else 1
if __name__ == '__main__':
exit(main())