gpaasch commited on
Commit
0d38280
·
1 Parent(s): 23867a8

successfully scrapped

Browse files
app.py CHANGED
@@ -1,14 +1,40 @@
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!"
 
5
 
6
- iface = gr.Interface(
7
- fn=greet,
8
- inputs=gr.Textbox(label="Input"),
9
- outputs=gr.Textbox(label="Output"),
10
- mcp_server=True
11
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  if __name__ == "__main__":
14
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ # app.py
2
+ import json
3
  import gradio as gr
4
 
5
+ # Load the merged knowledge base
6
+ with open("knowledge_base.json", encoding="utf-8") as f:
7
+ kb = json.load(f)
8
 
9
+ symptom_to_icd = kb["symptom_to_icd"]
10
+ icd_to_description = kb["icd_to_description"]
11
+
12
+ def map_symptoms(raw_input):
13
+ terms = [t.strip().lower() for t in raw_input.split(",") if t.strip()]
14
+ icd_counts = {}
15
+ for term in terms:
16
+ for code in symptom_to_icd.get(term, []):
17
+ icd_counts[code] = icd_counts.get(code, 0) + 1
18
+ if not icd_counts:
19
+ return {"diagnoses": [], "confidences": []}
20
+ total = sum(icd_counts.values())
21
+ # sort codes by frequency descending
22
+ sorted_items = sorted(icd_counts.items(), key=lambda x: x[1], reverse=True)
23
+ diagnoses = []
24
+ confidences = []
25
+ for code, count in sorted_items:
26
+ desc = icd_to_description.get(code, "Unknown")
27
+ diagnoses.append(f"{code}: {desc}")
28
+ confidences.append(round(count / total, 2))
29
+ return {"diagnoses": diagnoses, "confidences": confidences}
30
+
31
+ # Use Blocks so that mcp_server=True is accepted
32
+ with gr.Blocks(mcp_server=True) as demo:
33
+ gr.Markdown("## Symptom to ICD‐10 Code Lookup")
34
+ inp = gr.Textbox(label="Enter symptoms (comma‐separated)")
35
+ out = gr.JSON(label="Result")
36
+ # Wire the submit event
37
+ inp.submit(fn=map_symptoms, inputs=inp, outputs=out)
38
 
39
  if __name__ == "__main__":
40
+ demo.launch(server_name="0.0.0.0", server_port=7860)
hf-mcp-hackathon/app.py DELETED
@@ -1,37 +0,0 @@
1
- import gradio as gr
2
-
3
- def analyze_text(text: str) -> dict:
4
- """Analyze text and return various statistics.
5
-
6
- Args:
7
- text: The text to analyze
8
-
9
- Returns:
10
- A dictionary containing text statistics like word count, character count, etc.
11
- """
12
- stats = {
13
- "word_count": len(text.split()),
14
- "character_count": len(text),
15
- "line_count": len(text.splitlines()) or 1,
16
- "uppercase_count": sum(1 for c in text if c.isupper()),
17
- "lowercase_count": sum(1 for c in text if c.islower()),
18
- "digit_count": sum(1 for c in text if c.isdigit())
19
- }
20
- return stats
21
-
22
- # Create the Gradio interface
23
- demo = gr.Interface(
24
- fn=analyze_text,
25
- inputs=gr.Textbox(label="Input Text", placeholder="Enter some text to analyze..."),
26
- outputs=gr.JSON(label="Text Statistics"),
27
- title="Text Analysis Tool",
28
- description="A simple tool that provides various statistics about input text.",
29
- examples=[
30
- ["Hello World! This is a test message 123."],
31
- ["The quick brown fox jumps over the lazy dog."]
32
- ]
33
- )
34
-
35
- if __name__ == "__main__":
36
- # Launch with MCP server enabled
37
- demo.launch(mcp_server=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf-mcp-hackathon/idea.md CHANGED
@@ -1 +1 @@
1
- Develop a medical-symptom MCP server that takes patient-entered symptoms, maps them to ICD-10 codes via a local knowledge base, and returns a JSON of probable diagnoses with confidence scores.knowledge_base.json
 
1
+ Develop a medical-symptom MCP server that takes patient-entered symptoms, maps them to ICD-10 codes via a local knowledge base, and returns a JSON of probable diagnoses with confidence scores.
icd10cm_tabular_2025/icd10cm_index_2025.xml ADDED
The diff for this file is too large to render. See raw diff
 
icd10cm_tabular_2025/icd10cm_tabular_2025.xml ADDED
The diff for this file is too large to render. See raw diff
 
icd_to_description.json ADDED
The diff for this file is too large to render. See raw diff
 
knowledge_base.json CHANGED
The diff for this file is too large to render. See raw diff
 
merge_kb.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # merge_kb.py
2
+ import json
3
+
4
+ with open("symptom_to_icd.json") as f:
5
+ symptom_to_icd = json.load(f)
6
+ with open("icd_to_description.json") as f:
7
+ icd_to_description = json.load(f)
8
+
9
+ kb = {
10
+ "symptom_to_icd": symptom_to_icd,
11
+ "icd_to_description": icd_to_description
12
+ }
13
+
14
+ with open("knowledge_base.json", "w", encoding="utf-8") as f:
15
+ json.dump(kb, f, indent=2, ensure_ascii=False)
parse_tabular.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xml.etree.ElementTree as ET
2
+ import json
3
+ import sys
4
+ import os
5
+
6
+ def main(xml_path):
7
+ if not os.path.isfile(xml_path):
8
+ print(f"ERROR: cannot find tabular XML at '{xml_path}'")
9
+ sys.exit(1)
10
+
11
+ tree = ET.parse(xml_path)
12
+ root = tree.getroot()
13
+
14
+ icd_to_description = {}
15
+
16
+ # Iterate over every <diag> in the entire file, recursively.
17
+ # Each <diag> has:
18
+ # • <name> (the ICD-10 code)
19
+ # • <desc> (the human-readable description)
20
+ # • zero or more nested <diag> children (sub-codes).
21
+ for diag in root.iter("diag"):
22
+ name_elem = diag.find("name")
23
+ desc_elem = diag.find("desc")
24
+ if name_elem is None or desc_elem is None:
25
+ continue
26
+ # Some <diag> nodes might have <name/> or <desc/> with no text; skip those.
27
+ if name_elem.text is None or desc_elem.text is None:
28
+ continue
29
+
30
+ code = name_elem.text.strip()
31
+ description = desc_elem.text.strip()
32
+ # Only store non-empty strings:
33
+ if code and description:
34
+ icd_to_description[code] = description
35
+
36
+ # Write out a flat JSON mapping code → description
37
+ out_path = "icd_to_description.json"
38
+ with open(out_path, "w", encoding="utf-8") as fp:
39
+ json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
40
+
41
+ print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
42
+
43
+
44
+ if __name__ == "__main__":
45
+ if len(sys.argv) != 2:
46
+ print("Usage: python parse_tabular.py <path/to/icd10cm_tabular_2025.xml>")
47
+ sys.exit(1)
48
+ main(sys.argv[1])
symptom_to_icd.json ADDED
The diff for this file is too large to render. See raw diff