Spaces:
Running
Running
successfully scrapped
Browse files- app.py +35 -9
- hf-mcp-hackathon/app.py +0 -37
- hf-mcp-hackathon/idea.md +1 -1
- icd10cm_tabular_2025/icd10cm_index_2025.xml +0 -0
- icd10cm_tabular_2025/icd10cm_tabular_2025.xml +0 -0
- icd_to_description.json +0 -0
- knowledge_base.json +0 -0
- merge_kb.py +15 -0
- parse_tabular.py +48 -0
- symptom_to_icd.json +0 -0
app.py
CHANGED
@@ -1,14 +1,40 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
|
4 |
-
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
if __name__ == "__main__":
|
14 |
-
|
|
|
1 |
+
# app.py
|
2 |
+
import json
|
3 |
import gradio as gr
|
4 |
|
5 |
+
# Load the merged knowledge base
|
6 |
+
with open("knowledge_base.json", encoding="utf-8") as f:
|
7 |
+
kb = json.load(f)
|
8 |
|
9 |
+
symptom_to_icd = kb["symptom_to_icd"]
|
10 |
+
icd_to_description = kb["icd_to_description"]
|
11 |
+
|
12 |
+
def map_symptoms(raw_input):
|
13 |
+
terms = [t.strip().lower() for t in raw_input.split(",") if t.strip()]
|
14 |
+
icd_counts = {}
|
15 |
+
for term in terms:
|
16 |
+
for code in symptom_to_icd.get(term, []):
|
17 |
+
icd_counts[code] = icd_counts.get(code, 0) + 1
|
18 |
+
if not icd_counts:
|
19 |
+
return {"diagnoses": [], "confidences": []}
|
20 |
+
total = sum(icd_counts.values())
|
21 |
+
# sort codes by frequency descending
|
22 |
+
sorted_items = sorted(icd_counts.items(), key=lambda x: x[1], reverse=True)
|
23 |
+
diagnoses = []
|
24 |
+
confidences = []
|
25 |
+
for code, count in sorted_items:
|
26 |
+
desc = icd_to_description.get(code, "Unknown")
|
27 |
+
diagnoses.append(f"{code}: {desc}")
|
28 |
+
confidences.append(round(count / total, 2))
|
29 |
+
return {"diagnoses": diagnoses, "confidences": confidences}
|
30 |
+
|
31 |
+
# Use Blocks so that mcp_server=True is accepted
|
32 |
+
with gr.Blocks(mcp_server=True) as demo:
|
33 |
+
gr.Markdown("## Symptom to ICD‐10 Code Lookup")
|
34 |
+
inp = gr.Textbox(label="Enter symptoms (comma‐separated)")
|
35 |
+
out = gr.JSON(label="Result")
|
36 |
+
# Wire the submit event
|
37 |
+
inp.submit(fn=map_symptoms, inputs=inp, outputs=out)
|
38 |
|
39 |
if __name__ == "__main__":
|
40 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
hf-mcp-hackathon/app.py
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
|
3 |
-
def analyze_text(text: str) -> dict:
|
4 |
-
"""Analyze text and return various statistics.
|
5 |
-
|
6 |
-
Args:
|
7 |
-
text: The text to analyze
|
8 |
-
|
9 |
-
Returns:
|
10 |
-
A dictionary containing text statistics like word count, character count, etc.
|
11 |
-
"""
|
12 |
-
stats = {
|
13 |
-
"word_count": len(text.split()),
|
14 |
-
"character_count": len(text),
|
15 |
-
"line_count": len(text.splitlines()) or 1,
|
16 |
-
"uppercase_count": sum(1 for c in text if c.isupper()),
|
17 |
-
"lowercase_count": sum(1 for c in text if c.islower()),
|
18 |
-
"digit_count": sum(1 for c in text if c.isdigit())
|
19 |
-
}
|
20 |
-
return stats
|
21 |
-
|
22 |
-
# Create the Gradio interface
|
23 |
-
demo = gr.Interface(
|
24 |
-
fn=analyze_text,
|
25 |
-
inputs=gr.Textbox(label="Input Text", placeholder="Enter some text to analyze..."),
|
26 |
-
outputs=gr.JSON(label="Text Statistics"),
|
27 |
-
title="Text Analysis Tool",
|
28 |
-
description="A simple tool that provides various statistics about input text.",
|
29 |
-
examples=[
|
30 |
-
["Hello World! This is a test message 123."],
|
31 |
-
["The quick brown fox jumps over the lazy dog."]
|
32 |
-
]
|
33 |
-
)
|
34 |
-
|
35 |
-
if __name__ == "__main__":
|
36 |
-
# Launch with MCP server enabled
|
37 |
-
demo.launch(mcp_server=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf-mcp-hackathon/idea.md
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Develop a medical-symptom MCP server that takes patient-entered symptoms, maps them to ICD-10 codes via a local knowledge base, and returns a JSON of probable diagnoses with confidence scores.
|
|
|
1 |
+
Develop a medical-symptom MCP server that takes patient-entered symptoms, maps them to ICD-10 codes via a local knowledge base, and returns a JSON of probable diagnoses with confidence scores.
|
icd10cm_tabular_2025/icd10cm_index_2025.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
icd10cm_tabular_2025/icd10cm_tabular_2025.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
icd_to_description.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
knowledge_base.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
merge_kb.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# merge_kb.py
|
2 |
+
import json
|
3 |
+
|
4 |
+
with open("symptom_to_icd.json") as f:
|
5 |
+
symptom_to_icd = json.load(f)
|
6 |
+
with open("icd_to_description.json") as f:
|
7 |
+
icd_to_description = json.load(f)
|
8 |
+
|
9 |
+
kb = {
|
10 |
+
"symptom_to_icd": symptom_to_icd,
|
11 |
+
"icd_to_description": icd_to_description
|
12 |
+
}
|
13 |
+
|
14 |
+
with open("knowledge_base.json", "w", encoding="utf-8") as f:
|
15 |
+
json.dump(kb, f, indent=2, ensure_ascii=False)
|
parse_tabular.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xml.etree.ElementTree as ET
|
2 |
+
import json
|
3 |
+
import sys
|
4 |
+
import os
|
5 |
+
|
6 |
+
def main(xml_path):
|
7 |
+
if not os.path.isfile(xml_path):
|
8 |
+
print(f"ERROR: cannot find tabular XML at '{xml_path}'")
|
9 |
+
sys.exit(1)
|
10 |
+
|
11 |
+
tree = ET.parse(xml_path)
|
12 |
+
root = tree.getroot()
|
13 |
+
|
14 |
+
icd_to_description = {}
|
15 |
+
|
16 |
+
# Iterate over every <diag> in the entire file, recursively.
|
17 |
+
# Each <diag> has:
|
18 |
+
# • <name> (the ICD-10 code)
|
19 |
+
# • <desc> (the human-readable description)
|
20 |
+
# • zero or more nested <diag> children (sub-codes).
|
21 |
+
for diag in root.iter("diag"):
|
22 |
+
name_elem = diag.find("name")
|
23 |
+
desc_elem = diag.find("desc")
|
24 |
+
if name_elem is None or desc_elem is None:
|
25 |
+
continue
|
26 |
+
# Some <diag> nodes might have <name/> or <desc/> with no text; skip those.
|
27 |
+
if name_elem.text is None or desc_elem.text is None:
|
28 |
+
continue
|
29 |
+
|
30 |
+
code = name_elem.text.strip()
|
31 |
+
description = desc_elem.text.strip()
|
32 |
+
# Only store non-empty strings:
|
33 |
+
if code and description:
|
34 |
+
icd_to_description[code] = description
|
35 |
+
|
36 |
+
# Write out a flat JSON mapping code → description
|
37 |
+
out_path = "icd_to_description.json"
|
38 |
+
with open(out_path, "w", encoding="utf-8") as fp:
|
39 |
+
json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
|
40 |
+
|
41 |
+
print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
if len(sys.argv) != 2:
|
46 |
+
print("Usage: python parse_tabular.py <path/to/icd10cm_tabular_2025.xml>")
|
47 |
+
sys.exit(1)
|
48 |
+
main(sys.argv[1])
|
symptom_to_icd.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|