File size: 44,950 Bytes
2e237ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
#!/usr/bin/env python3
# update_docx_from_json.py
import sys, json, re
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from docx import Document
from docx.shared import RGBColor, Pt  # add Pt
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
from copy import deepcopy
from docx.oxml.ns import qn
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P

BLACK = RGBColor(0, 0, 0)
RED = RGBColor(0xFF, 0x00, 0x00)

# ----------------------------- text helpers -----------------------------
def _find_table_with_headers(doc: Document, must_have: list[str]) -> Optional[Table]:
    for t in doc.tables:
        if not t.rows: 
            continue
        head = canon(" ".join(cell_text(c) for c in t.rows[0].cells))
        if all(canon_label(x) in head for x in must_have):
            return t
    return None

def ensure_auditor_decl_headers(doc: Document) -> bool:
    """
    Second-last page table under 'NHVAS APPROVED AUDITOR DECLARATION'.
    Force the HEADER row to read exactly:
      [ Print Name | NHVR or Exemplar Global Auditor Registration Number ]
    Never touch the bottom (values) row.
    """
    changed = False
    expected_left  = "Print Name"
    expected_right = "NHVR or Exemplar Global Auditor Registration Number"

    for t in doc.tables:
        if not t.rows or not t.rows[0].cells:
            continue
        # must look like the auditor table: header left says "Print Name", 2+ cols, 2+ rows
        head_left = canon_label(cell_text(t.rows[0].cells[0]))
        if head_left == "print name" and len(t.rows[0].cells) >= 2 and len(t.rows) >= 2:
            # fix left header if needed
            if canon_label(cell_text(t.rows[0].cells[0])) != canon_label(expected_left) or \
               any(is_red_run(r) for p in t.rows[0].cells[0].paragraphs for r in p.runs):
                _set_cell_text_black(t.rows[0].cells[0], expected_left)
                changed = True
            # unconditionally set the RIGHT header text (this is where "Peter Sheppard" was sitting)
            if canon_label(cell_text(t.rows[0].cells[1])) != canon_label(expected_right) or \
               any(is_red_run(r) for p in t.rows[0].cells[1].paragraphs for r in p.runs):
                _set_cell_text_black(t.rows[0].cells[1], expected_right)
                changed = True
            # found and fixed the table; no need to continue
            break

    return changed


def fill_operator_declaration(doc: Document, print_name: str, position_title: str) -> bool:
    """Last page table: write values ONLY into the bottom row (red placeholders)."""
    t = _find_table_with_headers(doc, ["Print Name", "Position Title"])
    if not t or len(t.rows) < 2 or len(t.rows[0].cells) < 2:
        return False
    bot_left  = t.rows[1].cells[0]
    bot_right = t.rows[1].cells[1]

    # only replace if that cell has a red placeholder
    if any(is_red_run(r) for p in bot_left.paragraphs for r in p.runs):
        _set_cell_text_black(bot_left, print_name)
    if any(is_red_run(r) for p in bot_right.paragraphs for r in p.runs):
        _set_cell_text_black(bot_right, position_title)
    return True

def find_heading_index_from_end(doc: Document, heading: str) -> Optional[int]:
    key = canon(heading)
    allp = iter_paragraphs(doc)
    for i in range(len(allp) - 1, -1, -1):
        if key in canon(para_text(allp[i])):
            return i
    return None

def set_date_by_heading_from_end(doc: Document, heading: str, date_text: str, max_scan: int = 60) -> bool:
    """Find the LAST occurrence of `heading`, then replace the FIRST red run in the next paragraphs."""
    if not date_text:
        return False
    allp = iter_paragraphs(doc)
    idx = find_heading_index_from_end(doc, heading)
    if idx is None:
        return False
    for p in allp[idx + 1 : min(idx + 1 + max_scan, len(allp))]:
        if replace_red_in_paragraph(p, date_text):  # writes in black
            return True
    return False

def set_date_by_paragraph_from_end(doc: Document, paragraph_text: str, date_text: str, max_scan: int = 60) -> bool:
    """Find the LAST paragraph matching `paragraph_text`, then set the FIRST red run after it."""
    if not date_text:
        return False
    key = canon(paragraph_text)
    allp = iter_paragraphs(doc)
    hit = None
    for i in range(len(allp) - 1, -1, -1):
        if key in canon(para_text(allp[i])):
            hit = i
            break
    if hit is None:
        return False
    # date placeholder is on the LAST page, right after this long paragraph
    for p in allp[hit + 1 : min(hit + 1 + max_scan, len(allp))]:
        if replace_red_in_paragraph(p, date_text):  # writes in black
            return True
    return False

def set_layer3_name_after_management_heading(doc: Document, mid_heading: str, allowed_prev_titles: List[str], name: str) -> bool:
    if not name:
        return False

    allp = iter_paragraphs(doc)
    wrote = False
    mid = canon(mid_heading)
    allowed_prev = {canon(t) for t in allowed_prev_titles}

    for i, p in enumerate(allp):
        if canon(para_text(p)) != mid:
            continue

        # previous non-empty must be one of the allowed titles
        j = i - 1
        while j >= 0 and not nz(para_text(allp[j])):
            j -= 1
        if j < 0 or canon(para_text(allp[j])) not in allowed_prev:
            continue

        # next non-empty is the 3rd line we overwrite
        k = i + 1
        while k < len(allp) and not nz(para_text(allp[k])):
            k += 1
        if k >= len(allp):
            continue

        # compute target size from the middle heading; fall back to a sensible bump
        target_size = _para_effective_font_size(allp[i]) or Pt(16)

        _clear_para_and_write_black(allp[k], name)

        # apply size to all runs explicitly (overrides style)
        for r in allp[k].runs:
            r.font.size = target_size

        wrote = True

    return wrote

def _para_effective_font_size(p: Paragraph):
    # try explicit run sizes first
    for r in p.runs:
        if r.font.size:
            return r.font.size
    # then the paragraph style
    if p.style and p.style.font and p.style.font.size:
        return p.style.font.size
    return None

# --- helpers for summary tables ---
# --- helpers for summary overwrite ---
def _std_key(s: str) -> str:
    """
    Normalize a label to match a 'Std N' key.
    e.g. 'Std 7. Internal Review' -> 'std 7'
    """
    t = canon_label(s)
    m = re.match(r"(std\s+\d+)", t)
    return m.group(1) if m else t

def _looks_like_summary_table(table: Table) -> Optional[Tuple[int, int]]:
    """
    Return (label_col_idx, details_col_idx) if this is a Summary table
    with a DETAILS column; otherwise None.
    """
    if not table.rows:
        return None
    first = table.rows[0]
    cols = len(first.cells)
    if cols < 2:
        return None

    # header texts for first row
    head = [canon(cell_text(c)) for c in first.cells]

    # find DETAILS column
    details_col = None
    for j, t in enumerate(head):
        if "detail" in t:
            details_col = j
            break
    if details_col is None:
        return None

    # find the label column (left-hand standards column)
    label_col = None
    for j, t in enumerate(head):
        if any(k in t for k in ["maintenance management", "mass management", "fatigue management"]):
            label_col = j
            break
    if label_col is None:
        # fallback: assume the first non-DETAILS column is the label column
        label_col = 0 if details_col != 0 else 1

    return (label_col, details_col)
def count_header_rows(table: Table, scan_up_to: int = 6) -> int:
    """Heuristically count header rows (stop when first data row like '1.' appears)."""
    for i, row in enumerate(table.rows[:scan_up_to]):
        first = cell_text(row.cells[0]).strip()
        if re.match(r"^\d+\.?$", first):
            return i
    return 1
def _header_col_texts(table: Table, scan_rows: int = 5) -> List[str]:
    scan_rows = min(scan_rows, len(table.rows))
    if scan_rows == 0:
        return []
    # pick the row with the most cells as base
    base_row = max(range(scan_rows), key=lambda i: len(table.rows[i].cells))
    base_cols = len(table.rows[base_row].cells)
    cols = []
    for j in range(base_cols):
        parts = []
        for i in range(scan_rows):
            row = table.rows[i]
            if j < len(row.cells):
                parts.append(cell_text(row.cells[j]))
        cols.append(canon(" ".join(parts)))
    return cols

def count_header_rows(table: Table, scan_up_to: int = 6) -> int:
    """Header ends right before the first row whose 1st cell looks like '1.'"""
    limit = min(scan_up_to, len(table.rows))
    for i in range(limit):
        first = cell_text(table.rows[i].cells[0]).strip()
        if re.match(r"^\d+\.?$", first):
            return i
    # fallback to 1 header row
    return 1

def map_cols_mass_strict(table: Table) -> Dict[str, int]:
    cols = _header_col_texts(table, 5)
    def first_col(*needles):
        for j, t in enumerate(cols):
            if all(n in t for n in needles):
                return j
        return None
    idx = {
        "no":   first_col("no"),
        "reg":  first_col("registration", "number") or first_col("registration"),
        "wv":   first_col("weight", "verification"),
        "rfs":  first_col("rfs", "cert") or first_col("rfs", "certification"),
        "susp": first_col("suspension", "maintenance"),
        "trip": first_col("trip", "record"),
        "frs":  first_col("fault", "suspension") or first_col("fault", "reporting", "suspension"),
    }
    return {k: v for k, v in idx.items() if v is not None}

def find_mass_vehicle_numbers_table(doc: Document) -> Optional[Table]:
    """Pick the Mass vehicle-number table by matching its column set (not the Summary table)."""
    best = None
    best_score = -1
    for t in iter_tables(doc):
        cols = _header_col_texts(t, 5)
        allhdr = " ".join(cols)
        # must look like the vehicle numbers table
        hits = 0
        hits += int(any("registration" in c and "number" in c for c in cols))
        hits += int(any("weight" in c and "verification" in c for c in cols))
        hits += int(any("rfs" in c and ("cert" in c or "certification" in c) for c in cols))
        hits += int(any("suspension" in c and "maintenance" in c for c in cols))
        hits += int(any("trip" in c and "record" in c for c in cols))
        hits += int(any("fault" in c and "suspension" in c for c in cols))
        # reject obvious Summary tables
        if "details" in allhdr:
            continue
        # prefer tables with numbering column and many rows
        score = hits + (0.5 if any("no" == c or c.startswith("no ") for c in cols) else 0) + (len(t.rows) / 100.0)
        if hits >= 4 and score > best_score:
            best, best_score = t, score
    return best

def update_operator_declaration(doc: Document, print_name: str, position_title: str) -> bool:
    """
    First try strict table label mapping for 'Print Name' and 'Position Title'.
    If not found, fallback to the first two red placeholders under the 'Operator Declaration' heading.
    """
    changed = False
    # 1) Table label approach
    for lbl, val in (("Print Name", print_name), ("Position Title", position_title)):
        if not val:
            continue
        loc = find_label_cell(doc, lbl)
        if not loc:
            # tolerate odd spacing/colon/camelcase
            for alt in ("PrintName", "Print  Name", "Print Name:", "PositionTitle", "Position  Title", "Position Title:"):
                loc = find_label_cell(doc, alt)
                if loc:
                    break
        if loc:
            t, r, c = loc
            cell = get_adjacent_value_cell(t, r, c)
            if not replace_red_in_cell(cell, val):
                _set_cell_text_black(cell, val)
            changed = True

    if changed:
        return True

    # 2) Fallback: heading-scoped red placeholders
    head = "OPERATOR DECLARATION"
    p = find_heading_paragraph(doc, head) or find_heading_paragraph(doc, head.title())
    if not p:
        return False
    allp = iter_paragraphs(doc)
    try:
        i = allp.index(p)
    except ValueError:
        i = 0
    red_targets = []
    for q in allp[i+1:i+1+20]:
        reds = [r for r in q.runs if is_red_run(r)]
        if reds:
            red_targets.extend(reds)
        if len(red_targets) >= 2:
            break
    wrote = False
    if print_name and red_targets:
        _set_text_and_black(red_targets[0], print_name); wrote = True
    if position_title and len(red_targets) >= 2:
        _set_text_and_black(red_targets[1], position_title); wrote = True
    return wrote


def fill_mass_vehicle_table_preserve_headers(table: Table, arrays: Dict[str, List[str]]):
    colmap = map_cols_mass_strict(table)
    if "reg" not in colmap:
        return
    hdr_rows = count_header_rows(table, 6)
    regs = arrays.get("Registration Number", [])
    n = len(regs)

    # clear data rows only
    while len(table.rows) > hdr_rows:
        table._tbl.remove(table.rows[-1]._tr)
    # ensure enough rows
    while len(table.rows) < hdr_rows + n:
        table.add_row()

    def put(row, key, arr_key, i):
        if key in colmap:
            vals = arrays.get(arr_key, [])
            val = nz(vals[i]) if i < len(vals) else ""
            replace_red_in_cell(row.cells[colmap[key]], val)

    for i in range(n):
        row = table.rows[hdr_rows + i]
        replace_red_in_cell(row.cells[colmap["reg"]], nz(regs[i]))
        put(row, "wv",   "Weight Verification Records", i)
        put(row, "rfs",  "RFS Suspension Certification #", i)
        put(row, "susp", "Suspension System Maintenance", i)
        put(row, "trip", "Trip Records", i)
        put(row, "frs",  "Fault Recording/ Reporting on Suspension System", i)

def overwrite_summary_details_cells(doc: Document, section_name: str, section_dict: Dict[str, List[str]]) -> int:
    """For a Summary table (Maintenance/Mass/Fatigue), replace the entire DETAILS cell
    for each Std N row with the JSON text (written in black)."""
    # build desired texts
    desired: Dict[str, str] = { _std_key(k): join_value(v) for k, v in section_dict.items() }

    # pick which tables belong to this section by header sniff
    wanted_prefix = canon_label(section_name.split()[0])  # "maintenance" | "mass" | "fatigue"

    updated = 0
    for t in doc.tables:
        cols = _looks_like_summary_table(t)
        if not cols:
            continue
        label_col, details_col = cols

        head_txt = table_header_text(t, up_to_rows=2)
        if wanted_prefix not in head_txt:   # keep to the correct section
            continue

        # walk body rows
        for i in range(1, len(t.rows)):
            row = t.rows[i]
            key = _std_key(cell_text(row.cells[label_col]))

            # exact match or "std N" prefix match
            cand = desired.get(key)
            if not cand:
                m = re.match(r"(std\s+\d+)", key)
                if m:
                    for k2, v2 in desired.items():
                        if k2.startswith(m.group(1)):
                            cand = v2
                            break
            if not cand:
                continue

            _set_cell_text_black(row.cells[details_col], cand)  # full overwrite, black
            updated += 1
    return updated

SPLIT_SENT_PAT = re.compile(r"(?<=\.|\?|!)\s+")
ORDINAL_DATE_PAT = re.compile(r"\b(\d{1,2}(?:st|nd|rd|th)\s+[A-Za-z]+\s+\d{4})\b", re.I)

def split_sentences_keep(text: str) -> List[str]:
    s = " ".join(str(text or "").split())
    if not s:
        return []
    out = []
    start = 0
    for m in SPLIT_SENT_PAT.finditer(s):
        out.append(s[start:m.start()].strip())
        start = m.end()
    last = s[start:].strip()
    if last:
        out.append(last)
    return out

_sent_split = re.compile(r'(?<=[.!?])\s+|\n+')
_date_pat   = re.compile(r'\b(?:\d{1,2}(?:st|nd|rd|th)\s+[A-Za-z]+\s+\d{4}|\d{1,2}/\d{1,2}/\d{2,4}|[A-Za-z]+\s+\d{1,2},\s*\d{4})\b')

def extract_summary_snippets(desired_text: str):
    sents = _sentences(desired_text)
    dates = [m.group(0) for m in _date_pat.finditer(desired_text)]
    pick  = lambda rx: next((s for s in sents if re.search(rx, s, re.I)), None)
    return {
        "sheet_sent": pick(r'\b(daily\s+check|sheet)\b'),
        "sheet_phrase": _extract_sheet_phrase_from_desired(desired_text),
        "review":  pick(r'\binternal\s+review\b'),
        "qcs":     pick(r'\bquarterly\b.*\bcompliance\b') or pick(r'\bquarterly\b'),
        "dates":   dates,
        "sents":   sents,
    }

def fill_management_summary_tables(doc: Document, section_key: str, section_data: Dict[str, List[str]]):
    """
    Fill ALL summary tables for the given section_key ('maintenance'|'mass'|'fatigue')
    by matching each row label (left column) against keys in section_data and
    patching only the red text inside the DETAILS cell.
    """
    targets = [x for x in find_all_summary_tables(doc) if x[0] == section_key]
    if not targets:
        return

    # build list of (normalized label, original label, desired_text)
    desired = []
    for label, vals in section_data.items():
        want = canon_label(label)
        if not want:
            continue
        desired.append((want, label, join_value(vals)))

    for _, table, lcol, dcol in targets:
        # iterate data rows (skip header)
        for i in range(1, len(table.rows)):
            left_txt_norm = canon_label(cell_text(table.rows[i].cells[lcol]))
            if not left_txt_norm:
                continue
            for want_norm, _orig_lbl, value in desired:
                # loose contains match handles minor punctuation differences
                if want_norm and want_norm in left_txt_norm:
                    patch_details_cell_from_json(table.rows[i].cells[dcol], value)

def _set_text_and_black(run, new_text: str):
    """Replace a run's text and force color to black (clears theme color too)."""
    if new_text is None:
        new_text = ""
    run.text = str(new_text)
    run.font.color.rgb = BLACK
    try:
        # clear any theme color so rgb sticks
        run.font.color.theme_color = None
    except Exception:
        pass

def update_business_summary_once(doc: Document, value) -> bool:
    """Replace only the red summary paragraph; keep 'Accreditation Number' and 'Expiry Date' lines."""
    loc = (find_label_cell(doc, "Nature of the Operators Business (Summary)")
           or find_label_cell(doc, "Nature of the Operators Business (Summary):"))
    if not loc:
        return False

    t, r, c = loc
    cell = get_adjacent_value_cell(t, r, c)
    if not cell.paragraphs:
        cell.add_paragraph("")

    txt = join_value(value)

    # find paragraphs with any red runs (the placeholders for the summary)
    red_paras = [p for p in cell.paragraphs if any(is_red_run(run) for run in p.runs)]

    if red_paras:
        # write the summary into the first red paragraph (in black)
        _clear_para_and_write_black(red_paras[0], txt)
        # clear any extra red placeholders
        for p in red_paras[1:]:
            _clear_para_and_write_black(p, "")
    else:
        # no red placeholder found: just put the summary into the first paragraph, leave others
        _clear_para_and_write_black(cell.paragraphs[0], txt)

    return True


def _nuke_cell_paragraphs(cell: _Cell):
    """Remove ALL paragraphs from a cell (true delete, not just emptying runs)."""
    for p in list(cell.paragraphs):
        p._element.getparent().remove(p._element)

def _clear_para_and_write_black(paragraph, text: str):
    """Clear a whole paragraph and write fresh black text."""
    # wipe existing runs
    for r in list(paragraph.runs):
        r.text = ""
    r = paragraph.add_run(str(text or ""))
    r.font.color.rgb = BLACK
    try:
        r.font.color.theme_color = None
    except Exception:
        pass

def _set_cell_text_black(cell, text: str):
    """Clear a table cell and insert black text."""
    # remove text from all runs in all paragraphs
    for p in cell.paragraphs:
        for r in p.runs:
            r.text = ""
    p = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
    r = p.add_run(str(text or ""))
    r.font.color.rgb = BLACK
    try:
        r.font.color.theme_color = None
    except Exception:
        pass

def nz(x: Optional[str]) -> str:
    return (x or "").strip()

def canon(s: str) -> str:
    s = re.sub(r"\s+", " ", str(s)).strip().lower()
    s = s.replace("–", "-").replace("—", "-")
    return re.sub(r"[^a-z0-9/#()+,.\- ]+", "", s)

def canon_label(s: str) -> str:
    # labels often vary by punctuation/casing; keep digits/letters
    s = re.sub(r"\s+", " ", str(s)).strip().lower()
    s = s.replace("–", "-").replace("—", "-")
    s = re.sub(r"[^a-z0-9 ]+", " ", s)
    return re.sub(r"\s+", " ", s).strip()

def join_value(value) -> str:
    if isinstance(value, list):
        # Keep multi-line when list provided
        return "\n".join([str(v) for v in value if nz(v)])
    return str(value)

def split_digits(s: str) -> List[str]:
    return re.findall(r"\d", s)

def para_text(p: Paragraph) -> str:
    return "".join(run.text for run in p.runs)

def cell_text(c: _Cell) -> str:
    return "\n".join(para_text(p) for p in c.paragraphs)

def is_red_run(run) -> bool:
    col = run.font.color
    if not col:
        return False
    if col.rgb is not None:
        return col.rgb == RED
    # Some templates use theme colors; treat explicit red text snippets only
    return False

def replace_red_in_paragraph(p: Paragraph, new_text: str) -> bool:
    replaced = False
    red_runs = [r for r in p.runs if is_red_run(r)]
    if not red_runs:
        return False
    # collapse all red runs into one and write value (in black)
    first = red_runs[0]
    _set_text_and_black(first, new_text)
    for r in red_runs[1:]:
        r.text = ""
    replaced = True
    return replaced

def replace_red_in_cell(cell: _Cell, new_text: str) -> bool:
    # replace only red runs; if none, replace whole cell with a single run (fallback)
    any_red = False
    for p in cell.paragraphs:
        if replace_red_in_paragraph(p, new_text):
            any_red = True
    if any_red:
        return True
    # fallback: clear cell, set single paragraph text in black
    _set_cell_text_black(cell, new_text)
    return True

def parse_attendance_lines(value) -> List[str]:
    """
    Parse strings like:
      "Peter Sheppard - Compliance Greg Dyer - Auditor"
    into:
      ["Peter Sheppard - Compliance", "Greg Dyer - Auditor"]
    Handles lists, newlines, semicolons, and pipes too.
    """
    if isinstance(value, list):
        s = " ".join(str(v) for v in value if v)
    else:
        s = str(value or "")
    s = re.sub(r"\s+", " ", s).strip()
    if not s:
        return []

    # First split on explicit separators; then within each chunk, extract Name - Title pairs.
    chunks = re.split(r"\s*[\n;|]\s*", s)
    items: List[str] = []

    pair_pat = re.compile(
        r"([A-Z][A-Za-z.'-]+(?:\s+[A-Z][A-Za-z.'-]+){0,3})\s*-\s*"
        r"([^-\n]+?)(?=\s+[A-Z][A-Za-z.'-]+(?:\s+[A-Z][A-Za-z.'-]+){0,3}\s*-\s*|$)"
    )

    for chunk in chunks:
        chunk = chunk.strip()
        if not chunk:
            continue
        found = False
        for m in pair_pat.finditer(chunk):
            name = m.group(1).strip()
            title = m.group(2).strip()
            items.append(f"{name} - {title}")
            found = True
        if not found:
            # Fallback: single "Name - Title"
            if " - " in chunk:
                a, b = chunk.split(" - ", 1)
                items.append(f"{a.strip()} - {b.strip()}")
            elif chunk:
                items.append(chunk)

    return items

def fill_attendance_block(doc: Document, value) -> bool:
    items = parse_attendance_lines(value)
    if not items:
        return False

    loc = find_label_cell(doc, "Attendance List (Names and Position Titles)")
    if not loc:
        return False

    t, r, c = loc
    # value cell: usually directly under the heading cell
    target = (
        t.rows[r + 1].cells[c]
        if r + 1 < len(t.rows) and c < len(t.rows[r + 1].cells)
        else get_adjacent_value_cell(t, r, c)
    )

    # ---- read ONLY the target cell (don’t touch the row)
    def is_red_para(p): return any(is_red_run(run) for run in p.runs)
    def looks_like_pair(s: str) -> bool:
        if " - " not in s: return False
        a, b = s.split(" - ", 1)
        return bool(a.strip()) and bool(b.strip())

    paras = list(target.paragraphs)
    red_count = sum(1 for p in paras if is_red_para(p))
    existing_black = [para_text(p).strip() for p in paras
                      if (not is_red_para(p)) and looks_like_pair(para_text(p))]

    # compose final lines
    out_lines: List[str] = []
    out_lines.extend(items[:red_count])          # replace red placeholders
    out_lines.extend(existing_black)             # keep black lines
    norm = lambda s: re.sub(r"\s+", " ", s.strip().lower())
    seen = {norm(x) for x in out_lines}
    for extra in items[red_count:]:
        k = norm(extra)
        if k not in seen:
            out_lines.append(extra); seen.add(k)

    # ---- hard clear target cell and write fresh (all black)
    _nuke_cell_paragraphs(target)
    # first line
    p = target.add_paragraph()
    _clear_para_and_write_black(p, out_lines[0] if out_lines else "")
    # remaining lines
    for line in out_lines[1:]:
        p = target.add_paragraph()
        _clear_para_and_write_black(p, line)

    return True

# ----------------------------- document search -----------------------------
def iter_tables(doc: Document) -> List[Table]:
    return list(doc.tables)

def iter_paragraphs(doc: Document) -> List[Paragraph]:
    # paragraphs at doc level + inside tables
    out = list(doc.paragraphs)
    for t in doc.tables:
        for row in t.rows:
            for cell in row.cells:
                out.extend(cell.paragraphs)
    return out

def find_heading_paragraph(doc: Document, heading_text: str, window: int = 60) -> Optional[Paragraph]:
    key = canon(heading_text)
    for p in iter_paragraphs(doc):
        if canon(para_text(p)).startswith(key):
            return p
    # fuzzy contains
    for p in iter_paragraphs(doc):
        if key in canon(para_text(p)):
            return p
    return None

def find_label_cell_in_table(table: Table, label: str) -> Optional[Tuple[int, int]]:
    target = canon_label(label)
    for r_i, row in enumerate(table.rows):
        for c_i, cell in enumerate(row.cells):
            if canon_label(cell_text(cell)) == target:
                return (r_i, c_i)
    # allow contains (safe-ish)
    for r_i, row in enumerate(table.rows):
        for c_i, cell in enumerate(row.cells):
            if target and target in canon_label(cell_text(cell)):
                return (r_i, c_i)
    return None

def find_label_cell(doc: Document, label: str) -> Optional[Tuple[Table, int, int]]:
    for t in iter_tables(doc):
        pos = find_label_cell_in_table(t, label)
        if pos:
            return (t, pos[0], pos[1])
    return None

def get_adjacent_value_cell(table: Table, r: int, c: int) -> _Cell:
    # Prefer right cell, otherwise next row same col, otherwise this cell
    cols = len(table.rows[0].cells)
    if c + 1 < cols:
        return table.rows[r].cells[c+1]
    if r + 1 < len(table.rows):
        return table.rows[r+1].cells[c]
    return table.rows[r].cells[c]

# ----------------------------- label/value updates -----------------------------
def update_label_value_in_tables(doc: Document, label: str, value) -> bool:
    tup = find_label_cell(doc, label)
    val = join_value(value)
    if not tup:
        return False
    t, r, c = tup
    target_cell = get_adjacent_value_cell(t, r, c)
    return replace_red_in_cell(target_cell, val)

def update_heading_followed_red(doc: Document, heading: str, value, max_scan: int = 12) -> bool:
    """Find heading paragraph, then replace the first red run found within next N paragraphs (including inside tables)"""
    start = find_heading_paragraph(doc, heading)
    if not start:
        return False
    # Build a linear list of paragraphs across whole doc to get an index
    allp = iter_paragraphs(doc)
    try:
        idx = allp.index(start)
    except ValueError:
        idx = 0
    new_text = join_value(value)
    # Scan forward
    for p in allp[idx+1: idx+1+max_scan]:
        if replace_red_in_paragraph(p, new_text):
            return True
        # Also check any red in table cells inside this paragraph's parent (already covered via iter_paragraphs)
    return False

# ----------------------------- ACN per-digit fill -----------------------------
def fill_acn_digits(doc: Document, acn_value: str) -> bool:
    digits = split_digits(acn_value)
    if not digits:
        return False
    loc = find_label_cell(doc, "Australian Company Number")
    if not loc:
        return False

    t, r, c = loc

    # Collect cells to the RIGHT in the same row first
    targets: List[_Cell] = [t.rows[r].cells[j] for j in range(c + 1, len(t.rows[r].cells))]

    # If not enough, continue row-by-row below (left→right)
    rr = r + 1
    while len(targets) < len(digits) and rr < len(t.rows):
        targets.extend(list(t.rows[rr].cells))
        rr += 1

    targets = targets[:len(digits)]
    if not targets:
        return False

    # Clear each target cell and write ONE digit in black
    for d, cell in zip(digits, targets):
        _set_cell_text_black(cell, d)

    return True


# ----------------------------- vehicle tables -----------------------------
def table_header_text(table: Table, up_to_rows: int = 3) -> str:
    heads = []
    for i, row in enumerate(table.rows[:up_to_rows]):
        for cell in row.cells:
            heads.append(cell_text(cell))
    return canon(" ".join(heads))

def find_vehicle_table(doc: Document, want: str) -> Optional[Table]:
    """
    want = "maintenance" or "mass"
    """
    MAINT_KEYS = ["registration number", "maintenance records", "daily checks", "fault recording", "fault repair"]
    MASS_KEYS  = ["registration number", "weight verification", "rfs suspension", "suspension system maintenance", "trip records", "reporting on suspension"]
    candidates = []
    for t in iter_tables(doc):
        htxt = table_header_text(t)
        if want == "maintenance":
            if all(k in htxt for k in ["registration", "maintenance", "fault"]) and "suspension" not in htxt:
                candidates.append(t)
        elif want == "mass":
            if "suspension" in htxt and "weight" in htxt:
                candidates.append(t)
    # Prefer the one with most rows
    if not candidates:
        return None
    return max(candidates, key=lambda tb: len(tb.rows))

def map_cols(table: Table, want: str) -> Dict[str, int]:
    # map header columns by keywords from the first 2 rows that contain headers
    header_rows = table.rows[:2]
    col_texts = []
    cols = len(table.rows[0].cells)
    for j in range(cols):
        txt = " ".join(cell_text(r.cells[j]) for r in header_rows if j < len(r.cells))
        col_texts.append(canon(txt))
    idx = {}
    def first_col(*needles) -> Optional[int]:
        for j, t in enumerate(col_texts):
            if all(n in t for n in needles):
                return j
        return None
    if want == "maintenance":
        idx["reg"]   = first_col("registration")
        idx["rw"]    = first_col("roadworthiness")
        idx["mr"]    = first_col("maintenance", "records")
        idx["daily"] = first_col("daily", "check")
        idx["fr"]    = first_col("fault", "recording")
        idx["rep"]   = first_col("fault", "repair")
    else:
        idx["reg"]   = first_col("registration")
        idx["wv"]    = first_col("weight", "verification")
        idx["rfs"]   = first_col("rfs", "cert")
        idx["susp"]  = first_col("suspension", "maintenance")
        idx["trip"]  = first_col("trip", "record")
        idx["frs"]   = first_col("fault", "suspension")
    return {k:v for k,v in idx.items() if v is not None}

def clear_data_rows_keep_headers(table: Table, header_rows: int = 1):
    # Keep first header_rows, drop everything else
    while len(table.rows) > header_rows:
        table._tbl.remove(table.rows[-1]._tr)

def ensure_rows(table: Table, need_rows: int):
    # assumes 1 header row; add rows to reach need_rows + 1 total
    while len(table.rows) < need_rows + 1:
        table.add_row()

def fill_vehicle_table(table: Table, want: str, arrays: Dict[str, List[str]]):
    colmap = map_cols(table, want)
    if "reg" not in colmap:
        return
    if want == "maintenance":
        regs = arrays.get("Registration Number", [])
        rw   = arrays.get("Roadworthiness Certificates", [])
        mr   = arrays.get("Maintenance Records", [])
        daily= arrays.get("Daily Checks", [])
        fr   = arrays.get("Fault Recording/ Reporting", [])
        rep  = arrays.get("Fault Repair", [])
        n = len(regs)
        # keep header row(s), then fill N rows
        clear_data_rows_keep_headers(table, header_rows=1)
        ensure_rows(table, n)
        for i in range(n):
            row = table.rows[i+1]
            def put(col_key, vals):
                if col_key not in colmap or i >= len(vals): return
                c = row.cells[colmap[col_key]]
                replace_red_in_cell(c, nz(vals[i]))
            # write each col
            c_reg = row.cells[colmap["reg"]]; replace_red_in_cell(c_reg, nz(regs[i]))
            put("rw",   rw)
            put("mr",   mr)
            put("daily",daily)
            put("fr",   fr)
            put("rep",  rep)
    else:
        regs = arrays.get("Registration Number", [])
        wv   = arrays.get("Weight Verification Records", [])
        rfs  = arrays.get("RFS Suspension Certification #", [])
        susp = arrays.get("Suspension System Maintenance", [])
        trip = arrays.get("Trip Records", [])
        frs  = arrays.get("Fault Recording/ Reporting on Suspension System", [])
        n = len(regs)
        clear_data_rows_keep_headers(table, header_rows=1)
        ensure_rows(table, n)
        for i in range(n):
            row = table.rows[i+1]
            def put(col_key, vals):
                if col_key not in colmap or i >= len(vals): return
                c = row.cells[colmap[col_key]]
                replace_red_in_cell(c, nz(vals[i]))
            c_reg = row.cells[colmap["reg"]]; replace_red_in_cell(c_reg, nz(regs[i]))
            put("wv",   wv)
            put("rfs",  rfs)
            put("susp", susp)
            put("trip", trip)
            put("frs",  frs)

# ----------------------------- driver table -----------------------------
def find_driver_table(doc: Document) -> Optional[Table]:
    for t in iter_tables(doc):
        h = table_header_text(t)
        if "driver / scheduler" in h and ("fit for duty" in h or "work diary" in h):
            return t
    return None

def map_driver_cols(table: Table) -> Dict[str,int]:
    header_rows = table.rows[:2]
    cols = len(table.rows[0].cells)
    col_texts = []
    for j in range(cols):
        txt = " ".join(cell_text(r.cells[j]) for r in header_rows if j < len(r.cells))
        col_texts.append(canon(txt))
    idx = {}
    def first_col(*needles):
        for j, t in enumerate(col_texts):
            if all(n in t for n in needles):
                return j
        return None
    idx["name"]  = first_col("driver", "name")
    idx["roster"]= first_col("roster", "safe")
    idx["fit"]   = first_col("fit for duty")
    # Work diary might be split across two headers; match "work diary" OR "electronic work diary"
    wd = first_col("work diary") or first_col("electronic work diary")
    if wd is not None: idx["wd"] = wd
    return {k:v for k,v in idx.items() if v is not None}

def fill_driver_table(table: Table, arrays: Dict[str, List[str]]):
    colmap = map_driver_cols(table)
    if not colmap:
        return

    names   = arrays.get("Driver / Scheduler Name", [])
    rosters = arrays.get("Roster / Schedule / Safe Driving Plan (Date Range)", [])
    fit     = arrays.get("Fit for Duty Statement Completed (Yes/No)", [])
    wd      = arrays.get("Work Diary Pages (Page Numbers) Electronic Work Diary Records (Date Range)", [])

    n = max(len(rosters), len(fit), len(wd), len(names))
    clear_data_rows_keep_headers(table, header_rows=1)
    ensure_rows(table, n)

    has_any_name = any(str(x).strip() for x in names)

    for i in range(n):
        row = table.rows[i+1]
        if "name" in colmap and has_any_name:
            replace_red_in_cell(row.cells[colmap["name"]], names[i] if i < len(names) else "")
        if "roster" in colmap:
            replace_red_in_cell(row.cells[colmap["roster"]], rosters[i] if i < len(rosters) else "")
        if "fit" in colmap:
            replace_red_in_cell(row.cells[colmap["fit"]],     fit[i] if i < len(fit) else "")
        if "wd" in colmap:
            replace_red_in_cell(row.cells[colmap["wd"]],      wd[i]  if i < len(wd)  else "")



# ----------------------------- main mapping -----------------------------
def flatten_simple_sections(data: Dict) -> Dict[str, str]:
    """Collect simple label->single value mappings from top-level sections other than tables."""
    out = {}
    skip_sections = {
        "Vehicle Registration Numbers Maintenance",
        "Vehicle Registration Numbers Mass",
        "Driver / Scheduler Records Examined",
        "paragraphs",
        "Attendance List (Names and Position Titles)",
        "Nature of the Operators Business (Summary)",
        "Maintenance Management Summary",
        "Mass Management Summary",
        "Fatigue Management Summary",
    }
    for sec, kv in data.items():
        if sec in skip_sections: continue
        if not isinstance(kv, dict): continue
        for label, val in kv.items():
            out[f"{sec}::{label}"] = join_value(val)
    return out

def run(input_json: Path, template_docx: Path, output_docx: Path):
    with open(input_json, "r", encoding="utf-8") as f:
        data = json.load(f)

    doc = Document(str(template_docx))

    # 1) simple label/value tables
    simple = flatten_simple_sections(data)

    # Map by (section::label). We try: (a) find exact label cell somewhere and write in the adjacent cell;
    # (b) if not found, search by heading then the next red run below the heading.
    for k, v in simple.items():
        # use the part after '::' as the label
        label = k.split("::", 1)[1] if "::" in k else k

        # SPECIAL: skip ACN here; we'll fill per-digit later
        if canon_label(label) == "australian company number":
            continue

        ok = update_label_value_in_tables(doc, label, v)
        if not ok:
            sec = k.split("::", 1)[0] if "::" in k else k
            update_heading_followed_red(doc, sec, v)


            # 2) paragraphs block
        paras = data.get("paragraphs", {})

        # 2a) generic headings → replace next red (skip the 3 management headings here)
        # third-line headings above the three tables
        for head in ("MAINTENANCE MANAGEMENT", "MASS MANAGEMENT", "FATIGUE MANAGEMENT"):
            name_val = join_value(paras.get(head, ""))
            if name_val:
                update_heading_followed_red(doc, head, name_val, max_scan=6)

        # 2b) the 3-layer headings → overwrite the 3rd line only
        # second-last page: date under page heading
        aud_head = "NHVAS APPROVED AUDITOR DECLARATION"
        aud_date = join_value(paras.get(aud_head, ""))
        if aud_date:
            set_date_by_heading_from_end(doc, aud_head, aud_date, max_scan=40)

        # last page: date under the long acknowledgement paragraph
        ack_head = ("I hereby acknowledge and agree with the findings detailed in this NHVAS Audit Summary Report. "
                    "I have read and understand the conditions applicable to the Scheme, including the NHVAS Business Rules and Standards.")
        ack_date = join_value(paras.get(ack_head, ""))
        if ack_date:
            set_date_by_paragraph_from_end(doc, ack_head, ack_date, max_scan=40)

        maint_name = join_value(paras.get("MAINTENANCE MANAGEMENT", ""))
        if maint_name:
            set_layer3_name_after_management_heading(
                doc,
                "MAINTENANCE MANAGEMENT",
                ["Vehicle Registration Numbers of Records Examined"],
                maint_name,
            )

        mass_name = join_value(paras.get("MASS MANAGEMENT", ""))
        if mass_name:
            set_layer3_name_after_management_heading(
                doc,
                "MASS MANAGEMENT",
                ["Vehicle Registration Numbers of Records Examined"],
                mass_name,
            )

        fat_name = join_value(paras.get("FATIGUE MANAGEMENT", ""))
        if fat_name:
            set_layer3_name_after_management_heading(
                doc,
                "FATIGUE MANAGEMENT",
                ["Driver / Scheduler Records Examined"],
                fat_name,
            )


    # 3) ACN digits
    op_info = data.get("Operator Information", {})
    acn_val = join_value(op_info.get("Australian Company Number", ""))
    if acn_val:
        fill_acn_digits(doc, acn_val)

    # 4) Vehicle tables
    maint = data.get("Vehicle Registration Numbers Maintenance", {})
    mass  = data.get("Vehicle Registration Numbers Mass", {})
    t_m = find_vehicle_table(doc, "maintenance")
    if t_m and maint:
        fill_vehicle_table(t_m, "maintenance", maint)
    t_ms = find_mass_vehicle_numbers_table(doc)
    if t_ms and mass:
        fill_mass_vehicle_table_preserve_headers(t_ms, mass)

    # 5) Driver table
    drivers = data.get("Driver / Scheduler Records Examined", {})
    t_d = find_driver_table(doc)
    if t_d and drivers:
        fill_driver_table(t_d, drivers)

    # 6) Special: Audit Declaration dates via heading
    decl = data.get("Audit Declaration dates", {})
    if decl.get("Audit was conducted on"):
        update_heading_followed_red(doc, "Audit was conducted on", decl["Audit was conducted on"])

    # 7) Operator Declaration (last page, bottom row only), and fix Auditor table header
    op_decl = data.get("Operator Declaration", {})
    if op_decl:
        fill_operator_declaration(
            doc,
            join_value(op_decl.get("Print Name", "")),
            join_value(op_decl.get("Position Title", "")),
        )

    # make sure the second-last page “NHVAS APPROVED AUDITOR DECLARATION” header row is labels
    ensure_auditor_decl_headers(doc)


    # 8) Attendance List
    # Attendance: replace red lines only
    atts = data.get("Attendance List (Names and Position Titles)", {})
    att_val = atts.get("Attendance List (Names and Position Titles)")
    if att_val:
        fill_attendance_block(doc, att_val)

    # 9) Nature of the Operators Business (Summary): write once (no duplicates)
    biz = data.get("Nature of the Operators Business (Summary)", {})
    if biz:
        val = biz.get("Nature of the Operators Business (Summary):") or next(iter(biz.values()), "")
        if val:
            update_business_summary_once(doc, val)

    # 10) Summary tables: FULL OVERWRITE of DETAILS from JSON
    mm_sum = data.get("Maintenance Management Summary", {})
    if mm_sum:
        overwrite_summary_details_cells(doc, "Maintenance Management Summary", mm_sum)

    mass_sum = data.get("Mass Management Summary", {})
    if mass_sum:
        overwrite_summary_details_cells(doc, "Mass Management Summary", mass_sum)

    fat_sum = data.get("Fatigue Management Summary", {})
    if fat_sum:
        overwrite_summary_details_cells(doc, "Fatigue Management Summary", fat_sum)


    doc.save(str(output_docx))

# ----------------------------- CLI -----------------------------
if __name__ == "__main__":
    import sys
    from pathlib import Path

    if len(sys.argv) != 4:
        print("Usage: python updated_word.py <json> <template.docx> <output.docx>")
        sys.exit(1)

    a, b, c = map(Path, sys.argv[1:4])
    files = [a, b, c]

    json_path = next((p for p in files if p.suffix.lower() == ".json"), None)
    docx_paths = [p for p in files if p.suffix.lower() == ".docx"]

    if not json_path or len(docx_paths) < 2:
        print("Error: provide one .json and two .docx (template + output).")
        sys.exit(1)

    # Template = the .docx that already exists; Output = the other .docx
    template_docx = next((p for p in docx_paths if p.exists()), docx_paths[0])
    output_docx = docx_paths[1] if docx_paths[0] == template_docx else docx_paths[0]

    run(json_path, template_docx, output_docx)