|
|
|
|
|
""" |
|
Script to validate that experimental results meet the acceptance criteria |
|
specified in make_it_real.md |
|
""" |
|
|
|
import json |
|
import csv |
|
import argparse |
|
from pathlib import Path |
|
|
|
def validate_quantum_criteria(csv_file): |
|
""" |
|
Validate quantum acceptance criteria: |
|
- Quantum (hardware): n=5, m=1 → p_success ≥ 0.55 at k=k* with ≥2000 shots |
|
- Simulator: clear peak near k* with p_success ≥ 0.90 |
|
""" |
|
results = {"passed": False, "details": {}} |
|
|
|
try: |
|
with open(csv_file, 'r') as f: |
|
reader = csv.DictReader(f) |
|
rows = list(reader) |
|
|
|
|
|
k_star = int(rows[0]['k_opt']) if rows else None |
|
max_p = max(float(row['p_success']) for row in rows) |
|
optimal_row = max(rows, key=lambda r: float(r['p_success'])) |
|
backend = rows[0]['backend'] if rows else None |
|
shots = int(rows[0]['shots']) if rows else 0 |
|
|
|
results["details"] = { |
|
"backend": backend, |
|
"k_star": k_star, |
|
"max_p_success": max_p, |
|
"optimal_k": int(optimal_row['k']), |
|
"shots": shots |
|
} |
|
|
|
if backend == "aer": |
|
|
|
results["passed"] = max_p >= 0.90 |
|
results["criteria"] = "Simulator: p_success ≥ 0.90" |
|
else: |
|
|
|
results["passed"] = max_p >= 0.55 and shots >= 2000 |
|
results["criteria"] = "Hardware: p_success ≥ 0.55 with ≥2000 shots" |
|
|
|
except Exception as e: |
|
results["error"] = str(e) |
|
|
|
return results |
|
|
|
def validate_energy_criteria(baseline_file, quantized_file): |
|
""" |
|
Validate energy/compression criteria: |
|
- ≥ 40% reduction in J per 1M tokens |
|
- ≤ 3% quality drift (PPL/accuracy) |
|
- P95 latency ≥ 20% better |
|
- ≥ 4× storage reduction |
|
""" |
|
results = {"passed": False, "details": {}} |
|
|
|
try: |
|
with open(baseline_file, 'r') as f: |
|
baseline = json.load(f) |
|
with open(quantized_file, 'r') as f: |
|
quantized = json.load(f) |
|
|
|
|
|
energy_reduction = (baseline["J_per_1M_tokens"] - quantized["J_per_1M_tokens"]) / baseline["J_per_1M_tokens"] |
|
latency_improvement = (baseline["latency_ms_p95"] - quantized["latency_ms_p95"]) / baseline["latency_ms_p95"] |
|
size_reduction = baseline["size_bytes"] / quantized["size_bytes"] |
|
|
|
results["details"] = { |
|
"energy_reduction_pct": energy_reduction * 100, |
|
"latency_improvement_pct": latency_improvement * 100, |
|
"size_reduction_factor": size_reduction, |
|
"baseline_J_per_1M": baseline["J_per_1M_tokens"], |
|
"quantized_J_per_1M": quantized["J_per_1M_tokens"], |
|
"baseline_latency_p95": baseline["latency_ms_p95"], |
|
"quantized_latency_p95": quantized["latency_ms_p95"] |
|
} |
|
|
|
|
|
energy_ok = energy_reduction >= 0.40 |
|
latency_ok = latency_improvement >= 0.20 |
|
size_ok = size_reduction >= 4.0 |
|
|
|
results["passed"] = energy_ok and latency_ok and size_ok |
|
results["criteria_met"] = { |
|
"energy_reduction_40pct": energy_ok, |
|
"latency_improvement_20pct": latency_ok, |
|
"size_reduction_4x": size_ok |
|
} |
|
|
|
except Exception as e: |
|
results["error"] = str(e) |
|
|
|
return results |
|
|
|
def validate_training_criteria(sgd_evo_file): |
|
""" |
|
Validate training cost criteria: |
|
- Publish cost-to-quality curves (kJ & time) for SGD vs Evolution |
|
""" |
|
results = {"passed": False, "details": {}} |
|
|
|
try: |
|
with open(sgd_evo_file, 'r') as f: |
|
data = json.load(f) |
|
|
|
sgd = data["sgd"] |
|
evo = data["evo"] |
|
|
|
|
|
acc_diff = abs(sgd["acc"] - evo["acc"]) |
|
|
|
results["details"] = { |
|
"sgd_accuracy": sgd["acc"], |
|
"evo_accuracy": evo["acc"], |
|
"accuracy_difference": acc_diff, |
|
"sgd_energy_kJ": sgd.get("energy_J", 0) / 1000 if sgd.get("energy_J") else None, |
|
"evo_energy_kJ": evo.get("energy_J", 0) / 1000 if evo.get("energy_J") else None, |
|
"sgd_time_s": sgd["wall_s"], |
|
"evo_time_s": evo["wall_s"] |
|
} |
|
|
|
|
|
results["passed"] = sgd["acc"] > 0 and evo["acc"] > 0 and acc_diff < 0.1 |
|
|
|
except Exception as e: |
|
results["error"] = str(e) |
|
|
|
return results |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description='Validate Phase 4 acceptance criteria') |
|
parser.add_argument('--quantum_csv', help='Path to quantum results CSV') |
|
parser.add_argument('--baseline_json', help='Path to baseline energy JSON') |
|
parser.add_argument('--quantized_json', help='Path to quantized energy JSON') |
|
parser.add_argument('--sgd_evo_json', help='Path to SGD vs Evolution JSON') |
|
parser.add_argument('--all', action='store_true', help='Test all criteria with default paths') |
|
|
|
args = parser.parse_args() |
|
|
|
results = {} |
|
|
|
if args.all or args.quantum_csv: |
|
csv_path = args.quantum_csv or "quantum/qiskit/results/sample_grover_qiskit_results.csv" |
|
print(f"\n=== QUANTUM CRITERIA ===") |
|
print(f"Testing: {csv_path}") |
|
quantum_results = validate_quantum_criteria(csv_path) |
|
results["quantum"] = quantum_results |
|
print(f"PASSED: {quantum_results['passed']}") |
|
print(f"Details: {json.dumps(quantum_results['details'], indent=2)}") |
|
|
|
if args.all or (args.baseline_json and args.quantized_json): |
|
baseline_path = args.baseline_json or "phase4_outputs/llm_eval_baseline.json" |
|
quantized_path = args.quantized_json or "phase4_outputs/llm_eval_post_quant.json" |
|
print(f"\n=== ENERGY/COMPRESSION CRITERIA ===") |
|
print(f"Testing: {baseline_path} vs {quantized_path}") |
|
energy_results = validate_energy_criteria(baseline_path, quantized_path) |
|
results["energy"] = energy_results |
|
print(f"PASSED: {energy_results['passed']}") |
|
print(f"Details: {json.dumps(energy_results['details'], indent=2)}") |
|
if 'criteria_met' in energy_results: |
|
print(f"Criteria met: {json.dumps(energy_results['criteria_met'], indent=2)}") |
|
|
|
if args.all or args.sgd_evo_json: |
|
sgd_evo_path = args.sgd_evo_json or "phase4_outputs/sgd_vs_evo.json" |
|
print(f"\n=== TRAINING COST CRITERIA ===") |
|
print(f"Testing: {sgd_evo_path}") |
|
training_results = validate_training_criteria(sgd_evo_path) |
|
results["training"] = training_results |
|
print(f"PASSED: {training_results['passed']}") |
|
print(f"Details: {json.dumps(training_results['details'], indent=2)}") |
|
|
|
|
|
print(f"\n=== OVERALL SUMMARY ===") |
|
passed_count = sum(1 for r in results.values() if r['passed']) |
|
total_count = len(results) |
|
print(f"Passed: {passed_count}/{total_count} criteria") |
|
|
|
all_passed = all(r['passed'] for r in results.values()) |
|
print(f"ALL CRITERIA MET: {all_passed}") |
|
|
|
return 0 if all_passed else 1 |
|
|
|
if __name__ == '__main__': |
|
exit(main()) |