#!/usr/bin/env python3 # validate_acceptance_criteria.py """ Script to validate that experimental results meet the acceptance criteria specified in make_it_real.md """ import json import csv import argparse from pathlib import Path def validate_quantum_criteria(csv_file): """ Validate quantum acceptance criteria: - Quantum (hardware): n=5, m=1 → p_success ≥ 0.55 at k=k* with ≥2000 shots - Simulator: clear peak near k* with p_success ≥ 0.90 """ results = {"passed": False, "details": {}} try: with open(csv_file, 'r') as f: reader = csv.DictReader(f) rows = list(reader) # Find optimal k and max p_success k_star = int(rows[0]['k_opt']) if rows else None max_p = max(float(row['p_success']) for row in rows) optimal_row = max(rows, key=lambda r: float(r['p_success'])) backend = rows[0]['backend'] if rows else None shots = int(rows[0]['shots']) if rows else 0 results["details"] = { "backend": backend, "k_star": k_star, "max_p_success": max_p, "optimal_k": int(optimal_row['k']), "shots": shots } if backend == "aer": # Simulator criteria: p_success ≥ 0.90 results["passed"] = max_p >= 0.90 results["criteria"] = "Simulator: p_success ≥ 0.90" else: # Hardware criteria: p_success ≥ 0.55 with ≥2000 shots results["passed"] = max_p >= 0.55 and shots >= 2000 results["criteria"] = "Hardware: p_success ≥ 0.55 with ≥2000 shots" except Exception as e: results["error"] = str(e) return results def validate_energy_criteria(baseline_file, quantized_file): """ Validate energy/compression criteria: - ≥ 40% reduction in J per 1M tokens - ≤ 3% quality drift (PPL/accuracy) - P95 latency ≥ 20% better - ≥ 4× storage reduction """ results = {"passed": False, "details": {}} try: with open(baseline_file, 'r') as f: baseline = json.load(f) with open(quantized_file, 'r') as f: quantized = json.load(f) # Calculate reductions energy_reduction = (baseline["J_per_1M_tokens"] - quantized["J_per_1M_tokens"]) / baseline["J_per_1M_tokens"] latency_improvement = (baseline["latency_ms_p95"] - quantized["latency_ms_p95"]) / baseline["latency_ms_p95"] size_reduction = baseline["size_bytes"] / quantized["size_bytes"] results["details"] = { "energy_reduction_pct": energy_reduction * 100, "latency_improvement_pct": latency_improvement * 100, "size_reduction_factor": size_reduction, "baseline_J_per_1M": baseline["J_per_1M_tokens"], "quantized_J_per_1M": quantized["J_per_1M_tokens"], "baseline_latency_p95": baseline["latency_ms_p95"], "quantized_latency_p95": quantized["latency_ms_p95"] } # Check all criteria energy_ok = energy_reduction >= 0.40 # ≥ 40% reduction latency_ok = latency_improvement >= 0.20 # ≥ 20% improvement size_ok = size_reduction >= 4.0 # ≥ 4× reduction results["passed"] = energy_ok and latency_ok and size_ok results["criteria_met"] = { "energy_reduction_40pct": energy_ok, "latency_improvement_20pct": latency_ok, "size_reduction_4x": size_ok } except Exception as e: results["error"] = str(e) return results def validate_training_criteria(sgd_evo_file): """ Validate training cost criteria: - Publish cost-to-quality curves (kJ & time) for SGD vs Evolution """ results = {"passed": False, "details": {}} try: with open(sgd_evo_file, 'r') as f: data = json.load(f) sgd = data["sgd"] evo = data["evo"] # Check that both methods achieved similar accuracy acc_diff = abs(sgd["acc"] - evo["acc"]) results["details"] = { "sgd_accuracy": sgd["acc"], "evo_accuracy": evo["acc"], "accuracy_difference": acc_diff, "sgd_energy_kJ": sgd.get("energy_J", 0) / 1000 if sgd.get("energy_J") else None, "evo_energy_kJ": evo.get("energy_J", 0) / 1000 if evo.get("energy_J") else None, "sgd_time_s": sgd["wall_s"], "evo_time_s": evo["wall_s"] } # Pass if both methods have valid results results["passed"] = sgd["acc"] > 0 and evo["acc"] > 0 and acc_diff < 0.1 except Exception as e: results["error"] = str(e) return results def main(): parser = argparse.ArgumentParser(description='Validate Phase 4 acceptance criteria') parser.add_argument('--quantum_csv', help='Path to quantum results CSV') parser.add_argument('--baseline_json', help='Path to baseline energy JSON') parser.add_argument('--quantized_json', help='Path to quantized energy JSON') parser.add_argument('--sgd_evo_json', help='Path to SGD vs Evolution JSON') parser.add_argument('--all', action='store_true', help='Test all criteria with default paths') args = parser.parse_args() results = {} if args.all or args.quantum_csv: csv_path = args.quantum_csv or "quantum/qiskit/results/sample_grover_qiskit_results.csv" print(f"\n=== QUANTUM CRITERIA ===") print(f"Testing: {csv_path}") quantum_results = validate_quantum_criteria(csv_path) results["quantum"] = quantum_results print(f"PASSED: {quantum_results['passed']}") print(f"Details: {json.dumps(quantum_results['details'], indent=2)}") if args.all or (args.baseline_json and args.quantized_json): baseline_path = args.baseline_json or "phase4_outputs/llm_eval_baseline.json" quantized_path = args.quantized_json or "phase4_outputs/llm_eval_post_quant.json" print(f"\n=== ENERGY/COMPRESSION CRITERIA ===") print(f"Testing: {baseline_path} vs {quantized_path}") energy_results = validate_energy_criteria(baseline_path, quantized_path) results["energy"] = energy_results print(f"PASSED: {energy_results['passed']}") print(f"Details: {json.dumps(energy_results['details'], indent=2)}") if 'criteria_met' in energy_results: print(f"Criteria met: {json.dumps(energy_results['criteria_met'], indent=2)}") if args.all or args.sgd_evo_json: sgd_evo_path = args.sgd_evo_json or "phase4_outputs/sgd_vs_evo.json" print(f"\n=== TRAINING COST CRITERIA ===") print(f"Testing: {sgd_evo_path}") training_results = validate_training_criteria(sgd_evo_path) results["training"] = training_results print(f"PASSED: {training_results['passed']}") print(f"Details: {json.dumps(training_results['details'], indent=2)}") # Overall summary print(f"\n=== OVERALL SUMMARY ===") passed_count = sum(1 for r in results.values() if r['passed']) total_count = len(results) print(f"Passed: {passed_count}/{total_count} criteria") all_passed = all(r['passed'] for r in results.values()) print(f"ALL CRITERIA MET: {all_passed}") return 0 if all_passed else 1 if __name__ == '__main__': exit(main())