File size: 7,509 Bytes
7c58f51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/usr/bin/env python3
# validate_acceptance_criteria.py
"""
Script to validate that experimental results meet the acceptance criteria 
specified in make_it_real.md
"""

import json
import csv
import argparse
from pathlib import Path

def validate_quantum_criteria(csv_file):
    """
    Validate quantum acceptance criteria:
    - Quantum (hardware): n=5, m=1 → p_success ≥ 0.55 at k=k* with ≥2000 shots
    - Simulator: clear peak near k* with p_success ≥ 0.90
    """
    results = {"passed": False, "details": {}}
    
    try:
        with open(csv_file, 'r') as f:
            reader = csv.DictReader(f)
            rows = list(reader)
        
        # Find optimal k and max p_success
        k_star = int(rows[0]['k_opt']) if rows else None
        max_p = max(float(row['p_success']) for row in rows)
        optimal_row = max(rows, key=lambda r: float(r['p_success']))
        backend = rows[0]['backend'] if rows else None
        shots = int(rows[0]['shots']) if rows else 0
        
        results["details"] = {
            "backend": backend,
            "k_star": k_star,
            "max_p_success": max_p,
            "optimal_k": int(optimal_row['k']),
            "shots": shots
        }
        
        if backend == "aer":
            # Simulator criteria: p_success ≥ 0.90
            results["passed"] = max_p >= 0.90
            results["criteria"] = "Simulator: p_success ≥ 0.90"
        else:
            # Hardware criteria: p_success ≥ 0.55 with ≥2000 shots
            results["passed"] = max_p >= 0.55 and shots >= 2000
            results["criteria"] = "Hardware: p_success ≥ 0.55 with ≥2000 shots"
            
    except Exception as e:
        results["error"] = str(e)
    
    return results

def validate_energy_criteria(baseline_file, quantized_file):
    """
    Validate energy/compression criteria:
    - ≥ 40% reduction in J per 1M tokens
    - ≤ 3% quality drift (PPL/accuracy)
    - P95 latency ≥ 20% better
    - ≥ 4× storage reduction
    """
    results = {"passed": False, "details": {}}
    
    try:
        with open(baseline_file, 'r') as f:
            baseline = json.load(f)
        with open(quantized_file, 'r') as f:
            quantized = json.load(f)
        
        # Calculate reductions
        energy_reduction = (baseline["J_per_1M_tokens"] - quantized["J_per_1M_tokens"]) / baseline["J_per_1M_tokens"]
        latency_improvement = (baseline["latency_ms_p95"] - quantized["latency_ms_p95"]) / baseline["latency_ms_p95"]
        size_reduction = baseline["size_bytes"] / quantized["size_bytes"]
        
        results["details"] = {
            "energy_reduction_pct": energy_reduction * 100,
            "latency_improvement_pct": latency_improvement * 100,
            "size_reduction_factor": size_reduction,
            "baseline_J_per_1M": baseline["J_per_1M_tokens"],
            "quantized_J_per_1M": quantized["J_per_1M_tokens"],
            "baseline_latency_p95": baseline["latency_ms_p95"],
            "quantized_latency_p95": quantized["latency_ms_p95"]
        }
        
        # Check all criteria
        energy_ok = energy_reduction >= 0.40  # ≥ 40% reduction
        latency_ok = latency_improvement >= 0.20  # ≥ 20% improvement
        size_ok = size_reduction >= 4.0  # ≥ 4× reduction
        
        results["passed"] = energy_ok and latency_ok and size_ok
        results["criteria_met"] = {
            "energy_reduction_40pct": energy_ok,
            "latency_improvement_20pct": latency_ok,
            "size_reduction_4x": size_ok
        }
        
    except Exception as e:
        results["error"] = str(e)
    
    return results

def validate_training_criteria(sgd_evo_file):
    """
    Validate training cost criteria:
    - Publish cost-to-quality curves (kJ & time) for SGD vs Evolution
    """
    results = {"passed": False, "details": {}}
    
    try:
        with open(sgd_evo_file, 'r') as f:
            data = json.load(f)
        
        sgd = data["sgd"]
        evo = data["evo"]
        
        # Check that both methods achieved similar accuracy
        acc_diff = abs(sgd["acc"] - evo["acc"])
        
        results["details"] = {
            "sgd_accuracy": sgd["acc"],
            "evo_accuracy": evo["acc"],
            "accuracy_difference": acc_diff,
            "sgd_energy_kJ": sgd.get("energy_J", 0) / 1000 if sgd.get("energy_J") else None,
            "evo_energy_kJ": evo.get("energy_J", 0) / 1000 if evo.get("energy_J") else None,
            "sgd_time_s": sgd["wall_s"],
            "evo_time_s": evo["wall_s"]
        }
        
        # Pass if both methods have valid results
        results["passed"] = sgd["acc"] > 0 and evo["acc"] > 0 and acc_diff < 0.1
        
    except Exception as e:
        results["error"] = str(e)
    
    return results

def main():
    parser = argparse.ArgumentParser(description='Validate Phase 4 acceptance criteria')
    parser.add_argument('--quantum_csv', help='Path to quantum results CSV')
    parser.add_argument('--baseline_json', help='Path to baseline energy JSON')
    parser.add_argument('--quantized_json', help='Path to quantized energy JSON')
    parser.add_argument('--sgd_evo_json', help='Path to SGD vs Evolution JSON')
    parser.add_argument('--all', action='store_true', help='Test all criteria with default paths')
    
    args = parser.parse_args()
    
    results = {}
    
    if args.all or args.quantum_csv:
        csv_path = args.quantum_csv or "quantum/qiskit/results/sample_grover_qiskit_results.csv"
        print(f"\n=== QUANTUM CRITERIA ===")
        print(f"Testing: {csv_path}")
        quantum_results = validate_quantum_criteria(csv_path)
        results["quantum"] = quantum_results
        print(f"PASSED: {quantum_results['passed']}")
        print(f"Details: {json.dumps(quantum_results['details'], indent=2)}")
    
    if args.all or (args.baseline_json and args.quantized_json):
        baseline_path = args.baseline_json or "phase4_outputs/llm_eval_baseline.json"
        quantized_path = args.quantized_json or "phase4_outputs/llm_eval_post_quant.json"
        print(f"\n=== ENERGY/COMPRESSION CRITERIA ===")
        print(f"Testing: {baseline_path} vs {quantized_path}")
        energy_results = validate_energy_criteria(baseline_path, quantized_path)
        results["energy"] = energy_results
        print(f"PASSED: {energy_results['passed']}")
        print(f"Details: {json.dumps(energy_results['details'], indent=2)}")
        if 'criteria_met' in energy_results:
            print(f"Criteria met: {json.dumps(energy_results['criteria_met'], indent=2)}")
    
    if args.all or args.sgd_evo_json:
        sgd_evo_path = args.sgd_evo_json or "phase4_outputs/sgd_vs_evo.json"
        print(f"\n=== TRAINING COST CRITERIA ===")
        print(f"Testing: {sgd_evo_path}")
        training_results = validate_training_criteria(sgd_evo_path)
        results["training"] = training_results
        print(f"PASSED: {training_results['passed']}")
        print(f"Details: {json.dumps(training_results['details'], indent=2)}")
    
    # Overall summary
    print(f"\n=== OVERALL SUMMARY ===")
    passed_count = sum(1 for r in results.values() if r['passed'])
    total_count = len(results)
    print(f"Passed: {passed_count}/{total_count} criteria")
    
    all_passed = all(r['passed'] for r in results.values())
    print(f"ALL CRITERIA MET: {all_passed}")
    
    return 0 if all_passed else 1

if __name__ == '__main__':
    exit(main())