Spaces:
Running
Running
import xml.etree.ElementTree as ET | |
import json | |
import sys | |
import os | |
def main(xml_path): | |
if not os.path.isfile(xml_path): | |
print(f"ERROR: cannot find tabular XML at '{xml_path}'") | |
sys.exit(1) | |
tree = ET.parse(xml_path) | |
root = tree.getroot() | |
icd_to_description = {} | |
# Iterate over every <diag> in the entire file, recursively. | |
# Each <diag> has: | |
# • <name> (the ICD-10 code) | |
# • <desc> (the human-readable description) | |
# • zero or more nested <diag> children (sub-codes). | |
for diag in root.iter("diag"): | |
name_elem = diag.find("name") | |
desc_elem = diag.find("desc") | |
if name_elem is None or desc_elem is None: | |
continue | |
# Some <diag> nodes might have <name/> or <desc/> with no text; skip those. | |
if name_elem.text is None or desc_elem.text is None: | |
continue | |
code = name_elem.text.strip() | |
description = desc_elem.text.strip() | |
# Only store non-empty strings: | |
if code and description: | |
icd_to_description[code] = description | |
# Write out a flat JSON mapping code → description | |
out_path = "icd_to_description.json" | |
with open(out_path, "w", encoding="utf-8") as fp: | |
json.dump(icd_to_description, fp, indent=2, ensure_ascii=False) | |
print(f"Wrote {len(icd_to_description)} code entries to {out_path}") | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Usage: python parse_tabular.py <path/to/icd10cm_tabular_2025.xml>") | |
sys.exit(1) | |
main(sys.argv[1]) | |