successfully scrapped

Browse files

Files changed (10) hide show

app.py +35 -9
hf-mcp-hackathon/app.py +0 -37
hf-mcp-hackathon/idea.md +1 -1
icd10cm_tabular_2025/icd10cm_index_2025.xml +0 -0
icd10cm_tabular_2025/icd10cm_tabular_2025.xml +0 -0
icd_to_description.json +0 -0
knowledge_base.json +0 -0
merge_kb.py +15 -0
parse_tabular.py +48 -0
symptom_to_icd.json +0 -0

app.py CHANGED Viewed

@@ -1,14 +1,40 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!"
-iface = gr.Interface(
-    fn=greet,
-    inputs=gr.Textbox(label="Input"),
-    outputs=gr.Textbox(label="Output"),
-    mcp_server=True
-)
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

+# app.py
+import json
 import gradio as gr
+# Load the merged knowledge base
+with open("knowledge_base.json", encoding="utf-8") as f:
+    kb = json.load(f)
+symptom_to_icd    = kb["symptom_to_icd"]
+icd_to_description = kb["icd_to_description"]
+def map_symptoms(raw_input):
+    terms = [t.strip().lower() for t in raw_input.split(",") if t.strip()]
+    icd_counts = {}
+    for term in terms:
+        for code in symptom_to_icd.get(term, []):
+            icd_counts[code] = icd_counts.get(code, 0) + 1
+    if not icd_counts:
+        return {"diagnoses": [], "confidences": []}
+    total = sum(icd_counts.values())
+    # sort codes by frequency descending
+    sorted_items = sorted(icd_counts.items(), key=lambda x: x[1], reverse=True)
+    diagnoses = []
+    confidences = []
+    for code, count in sorted_items:
+        desc = icd_to_description.get(code, "Unknown")
+        diagnoses.append(f"{code}: {desc}")
+        confidences.append(round(count / total, 2))
+    return {"diagnoses": diagnoses, "confidences": confidences}
+# Use Blocks so that mcp_server=True is accepted
+with gr.Blocks(mcp_server=True) as demo:
+    gr.Markdown("## Symptom to ICD‐10 Code Lookup")
+    inp = gr.Textbox(label="Enter symptoms (comma‐separated)")
+    out = gr.JSON(label="Result")
+    # Wire the submit event
+    inp.submit(fn=map_symptoms, inputs=inp, outputs=out)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

hf-mcp-hackathon/app.py DELETED Viewed

@@ -1,37 +0,0 @@
-import gradio as gr
-def analyze_text(text: str) -> dict:
-    """Analyze text and return various statistics.
-    Args:
-        text: The text to analyze
-    Returns:
-        A dictionary containing text statistics like word count, character count, etc.
-    """
-    stats = {
-        "word_count": len(text.split()),
-        "character_count": len(text),
-        "line_count": len(text.splitlines()) or 1,
-        "uppercase_count": sum(1 for c in text if c.isupper()),
-        "lowercase_count": sum(1 for c in text if c.islower()),
-        "digit_count": sum(1 for c in text if c.isdigit())
-    }
-    return stats
-# Create the Gradio interface
-demo = gr.Interface(
-    fn=analyze_text,
-    inputs=gr.Textbox(label="Input Text", placeholder="Enter some text to analyze..."),
-    outputs=gr.JSON(label="Text Statistics"),
-    title="Text Analysis Tool",
-    description="A simple tool that provides various statistics about input text.",
-    examples=[
-        ["Hello World! This is a test message 123."],
-        ["The quick brown fox jumps over the lazy dog."]
-    ]
-)
-if __name__ == "__main__":
-    # Launch with MCP server enabled
-    demo.launch(mcp_server=True)

hf-mcp-hackathon/idea.md CHANGED Viewed

	@@ -1 +1 @@
1	- Develop a medical-symptom MCP server that takes patient-entered symptoms, maps them to ICD-10 codes via a local knowledge base, and returns a JSON of probable diagnoses with confidence scores.~~knowledge_base.json~~


1	+ Develop a medical-symptom MCP server that takes patient-entered symptoms, maps them to ICD-10 codes via a local knowledge base, and returns a JSON of probable diagnoses with confidence scores.

icd10cm_tabular_2025/icd10cm_index_2025.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

icd10cm_tabular_2025/icd10cm_tabular_2025.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

icd_to_description.json ADDED Viewed

The diff for this file is too large to render. See raw diff

knowledge_base.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

merge_kb.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# merge_kb.py
+import json
+with open("symptom_to_icd.json") as f:
+    symptom_to_icd = json.load(f)
+with open("icd_to_description.json") as f:
+    icd_to_description = json.load(f)
+kb = {
+    "symptom_to_icd": symptom_to_icd,
+    "icd_to_description": icd_to_description
+}
+with open("knowledge_base.json", "w", encoding="utf-8") as f:
+    json.dump(kb, f, indent=2, ensure_ascii=False)

parse_tabular.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import xml.etree.ElementTree as ET
+import json
+import sys
+import os
+def main(xml_path):
+    if not os.path.isfile(xml_path):
+        print(f"ERROR: cannot find tabular XML at '{xml_path}'")
+        sys.exit(1)
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+    icd_to_description = {}
+    # Iterate over every <diag> in the entire file, recursively.
+    # Each <diag> has:
+    #   • <name>  (the ICD-10 code)
+    #   • <desc>  (the human-readable description)
+    #   • zero or more nested <diag> children (sub-codes).
+    for diag in root.iter("diag"):
+        name_elem = diag.find("name")
+        desc_elem = diag.find("desc")
+        if name_elem is None or desc_elem is None:
+            continue
+        # Some <diag> nodes might have <name/> or <desc/> with no text; skip those.
+        if name_elem.text is None or desc_elem.text is None:
+            continue
+        code = name_elem.text.strip()
+        description = desc_elem.text.strip()
+        # Only store non-empty strings:
+        if code and description:
+            icd_to_description[code] = description
+    # Write out a flat JSON mapping code → description
+    out_path = "icd_to_description.json"
+    with open(out_path, "w", encoding="utf-8") as fp:
+        json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
+    print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python parse_tabular.py <path/to/icd10cm_tabular_2025.xml>")
+        sys.exit(1)
+    main(sys.argv[1])

symptom_to_icd.json ADDED Viewed

The diff for this file is too large to render. See raw diff