File size: 7,509 Bytes
7c58f51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
#!/usr/bin/env python3
# validate_acceptance_criteria.py
"""
Script to validate that experimental results meet the acceptance criteria
specified in make_it_real.md
"""
import json
import csv
import argparse
from pathlib import Path
def validate_quantum_criteria(csv_file):
"""
Validate quantum acceptance criteria:
- Quantum (hardware): n=5, m=1 → p_success ≥ 0.55 at k=k* with ≥2000 shots
- Simulator: clear peak near k* with p_success ≥ 0.90
"""
results = {"passed": False, "details": {}}
try:
with open(csv_file, 'r') as f:
reader = csv.DictReader(f)
rows = list(reader)
# Find optimal k and max p_success
k_star = int(rows[0]['k_opt']) if rows else None
max_p = max(float(row['p_success']) for row in rows)
optimal_row = max(rows, key=lambda r: float(r['p_success']))
backend = rows[0]['backend'] if rows else None
shots = int(rows[0]['shots']) if rows else 0
results["details"] = {
"backend": backend,
"k_star": k_star,
"max_p_success": max_p,
"optimal_k": int(optimal_row['k']),
"shots": shots
}
if backend == "aer":
# Simulator criteria: p_success ≥ 0.90
results["passed"] = max_p >= 0.90
results["criteria"] = "Simulator: p_success ≥ 0.90"
else:
# Hardware criteria: p_success ≥ 0.55 with ≥2000 shots
results["passed"] = max_p >= 0.55 and shots >= 2000
results["criteria"] = "Hardware: p_success ≥ 0.55 with ≥2000 shots"
except Exception as e:
results["error"] = str(e)
return results
def validate_energy_criteria(baseline_file, quantized_file):
"""
Validate energy/compression criteria:
- ≥ 40% reduction in J per 1M tokens
- ≤ 3% quality drift (PPL/accuracy)
- P95 latency ≥ 20% better
- ≥ 4× storage reduction
"""
results = {"passed": False, "details": {}}
try:
with open(baseline_file, 'r') as f:
baseline = json.load(f)
with open(quantized_file, 'r') as f:
quantized = json.load(f)
# Calculate reductions
energy_reduction = (baseline["J_per_1M_tokens"] - quantized["J_per_1M_tokens"]) / baseline["J_per_1M_tokens"]
latency_improvement = (baseline["latency_ms_p95"] - quantized["latency_ms_p95"]) / baseline["latency_ms_p95"]
size_reduction = baseline["size_bytes"] / quantized["size_bytes"]
results["details"] = {
"energy_reduction_pct": energy_reduction * 100,
"latency_improvement_pct": latency_improvement * 100,
"size_reduction_factor": size_reduction,
"baseline_J_per_1M": baseline["J_per_1M_tokens"],
"quantized_J_per_1M": quantized["J_per_1M_tokens"],
"baseline_latency_p95": baseline["latency_ms_p95"],
"quantized_latency_p95": quantized["latency_ms_p95"]
}
# Check all criteria
energy_ok = energy_reduction >= 0.40 # ≥ 40% reduction
latency_ok = latency_improvement >= 0.20 # ≥ 20% improvement
size_ok = size_reduction >= 4.0 # ≥ 4× reduction
results["passed"] = energy_ok and latency_ok and size_ok
results["criteria_met"] = {
"energy_reduction_40pct": energy_ok,
"latency_improvement_20pct": latency_ok,
"size_reduction_4x": size_ok
}
except Exception as e:
results["error"] = str(e)
return results
def validate_training_criteria(sgd_evo_file):
"""
Validate training cost criteria:
- Publish cost-to-quality curves (kJ & time) for SGD vs Evolution
"""
results = {"passed": False, "details": {}}
try:
with open(sgd_evo_file, 'r') as f:
data = json.load(f)
sgd = data["sgd"]
evo = data["evo"]
# Check that both methods achieved similar accuracy
acc_diff = abs(sgd["acc"] - evo["acc"])
results["details"] = {
"sgd_accuracy": sgd["acc"],
"evo_accuracy": evo["acc"],
"accuracy_difference": acc_diff,
"sgd_energy_kJ": sgd.get("energy_J", 0) / 1000 if sgd.get("energy_J") else None,
"evo_energy_kJ": evo.get("energy_J", 0) / 1000 if evo.get("energy_J") else None,
"sgd_time_s": sgd["wall_s"],
"evo_time_s": evo["wall_s"]
}
# Pass if both methods have valid results
results["passed"] = sgd["acc"] > 0 and evo["acc"] > 0 and acc_diff < 0.1
except Exception as e:
results["error"] = str(e)
return results
def main():
parser = argparse.ArgumentParser(description='Validate Phase 4 acceptance criteria')
parser.add_argument('--quantum_csv', help='Path to quantum results CSV')
parser.add_argument('--baseline_json', help='Path to baseline energy JSON')
parser.add_argument('--quantized_json', help='Path to quantized energy JSON')
parser.add_argument('--sgd_evo_json', help='Path to SGD vs Evolution JSON')
parser.add_argument('--all', action='store_true', help='Test all criteria with default paths')
args = parser.parse_args()
results = {}
if args.all or args.quantum_csv:
csv_path = args.quantum_csv or "quantum/qiskit/results/sample_grover_qiskit_results.csv"
print(f"\n=== QUANTUM CRITERIA ===")
print(f"Testing: {csv_path}")
quantum_results = validate_quantum_criteria(csv_path)
results["quantum"] = quantum_results
print(f"PASSED: {quantum_results['passed']}")
print(f"Details: {json.dumps(quantum_results['details'], indent=2)}")
if args.all or (args.baseline_json and args.quantized_json):
baseline_path = args.baseline_json or "phase4_outputs/llm_eval_baseline.json"
quantized_path = args.quantized_json or "phase4_outputs/llm_eval_post_quant.json"
print(f"\n=== ENERGY/COMPRESSION CRITERIA ===")
print(f"Testing: {baseline_path} vs {quantized_path}")
energy_results = validate_energy_criteria(baseline_path, quantized_path)
results["energy"] = energy_results
print(f"PASSED: {energy_results['passed']}")
print(f"Details: {json.dumps(energy_results['details'], indent=2)}")
if 'criteria_met' in energy_results:
print(f"Criteria met: {json.dumps(energy_results['criteria_met'], indent=2)}")
if args.all or args.sgd_evo_json:
sgd_evo_path = args.sgd_evo_json or "phase4_outputs/sgd_vs_evo.json"
print(f"\n=== TRAINING COST CRITERIA ===")
print(f"Testing: {sgd_evo_path}")
training_results = validate_training_criteria(sgd_evo_path)
results["training"] = training_results
print(f"PASSED: {training_results['passed']}")
print(f"Details: {json.dumps(training_results['details'], indent=2)}")
# Overall summary
print(f"\n=== OVERALL SUMMARY ===")
passed_count = sum(1 for r in results.values() if r['passed'])
total_count = len(results)
print(f"Passed: {passed_count}/{total_count} criteria")
all_passed = all(r['passed'] for r in results.values())
print(f"ALL CRITERIA MET: {all_passed}")
return 0 if all_passed else 1
if __name__ == '__main__':
exit(main()) |