import xml.etree.ElementTree as ET import json import sys import os def main(xml_path): if not os.path.isfile(xml_path): print(f"ERROR: cannot find tabular XML at '{xml_path}'") sys.exit(1) tree = ET.parse(xml_path) root = tree.getroot() icd_to_description = {} # Iterate over every in the entire file, recursively. # Each has: # • (the ICD-10 code) # • (the human-readable description) # • zero or more nested children (sub-codes). for diag in root.iter("diag"): name_elem = diag.find("name") desc_elem = diag.find("desc") if name_elem is None or desc_elem is None: continue # Some nodes might have or with no text; skip those. if name_elem.text is None or desc_elem.text is None: continue code = name_elem.text.strip() description = desc_elem.text.strip() # Only store non-empty strings: if code and description: icd_to_description[code] = description # Write out a flat JSON mapping code → description out_path = "icd_to_description.json" with open(out_path, "w", encoding="utf-8") as fp: json.dump(icd_to_description, fp, indent=2, ensure_ascii=False) print(f"Wrote {len(icd_to_description)} code entries to {out_path}") if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python parse_tabular.py ") sys.exit(1) main(sys.argv[1])