gpaasch commited on
Commit
9d2bec8
·
1 Parent(s): 3b5fe24

improved seperation of concerns best practice in the code, added print statements for better understanding of what code is doing

Browse files
ai-plugin.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "schema_version": "v1",
3
- "name_for_human": "MedCodeMCP",
4
- "name_for_model": "MedCodeMCP",
5
- "description_for_human": "Map natural-language symptom descriptions to ICD-10 codes with confidence scores and follow-up questions.",
6
- "description_for_model": "Use MedCodeMCP to analyze patient symptom descriptions and return probable ICD-10 codes, confidence scores, and follow-up diagnostic questions if needed.",
7
- "auth": {
8
- "type": "none"
9
- },
10
- "api": {
11
- "type": "gradio",
12
- "url": "https://huggingface.co/spaces/Agents-MCP-Hackathon/MedCodeMCP"
13
- },
14
- "contact_email": "grahampaasch@gmail.com",
15
- "legal_info_url": "https://huggingface.co/spaces/agents-mcp-hackathon/medcode-mcp"
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,47 +1,32 @@
1
- from huggingface_hub import hf_hub_download
2
  import gradio as gr
3
- from llama_index.core import Settings
4
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
- from llama_index.llms.llama_cpp import LlamaCPP
6
- from src.parse_tabular import create_symptom_index
7
- from utils import model_configuration_utils as mc
8
- from utils import voice_input_utils as viu
9
- import json
10
  import torch
11
  import torchaudio.transforms as T
 
12
 
13
- # Set up model paths
14
- MODEL_NAME, REPO_ID = mc.select_best_model()
15
-
16
- # Ensure model is downloaded
17
- model_path = mc.ensure_model()
18
-
19
- # Configure local LLM with LlamaCPP
20
- print("\nInitializing LLM...")
21
- llm = LlamaCPP(
22
- model_path=model_path,
23
- temperature=0.7,
24
- max_new_tokens=256,
25
- context_window=2048,
26
- verbose=False # Reduce logging
27
- # n_batch and n_threads are not valid parameters for LlamaCPP and should not be used.
28
- # If you encounter segmentation faults, try reducing context_window or check your system resources.
29
- )
30
- print("LLM initialized successfully")
31
 
32
- # Configure global settings
33
- print("\nConfiguring settings...")
34
- Settings.llm = llm
35
- Settings.embed_model = HuggingFaceEmbedding(
36
- model_name="sentence-transformers/all-MiniLM-L6-v2"
37
- )
38
- print("Settings configured")
39
 
40
- # Create the index at startup
41
- print("\nCreating symptom index...")
42
- symptom_index = create_symptom_index()
43
- print("Index created successfully")
44
- print("Loaded symptom_index:", type(symptom_index))
45
 
46
  # --- System prompt ---
47
  SYSTEM_PROMPT = """
@@ -177,7 +162,7 @@ with gr.Blocks(theme="default") as demo:
177
  clear_btn.click(lambda: None, None, chatbot, queue=False)
178
 
179
  microphone.stream(
180
- fn=viu.enhanced_process_speech,
181
  inputs=[microphone, chatbot, api_key, model_selector, temperature],
182
  outputs=chatbot,
183
  show_progress="hidden",
@@ -217,7 +202,7 @@ with gr.Blocks(theme="default") as demo:
217
  sample_rate, audio_array = audio
218
  features = process_audio(audio_array, sample_rate)
219
 
220
- asr = viu.get_asr_pipeline()
221
  result = asr(features)
222
 
223
  return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
@@ -278,7 +263,7 @@ with gr.Blocks(theme="default") as demo:
278
 
279
  new_history = history + [
280
  {"role": "user", "content": text},
281
- {"role": "assistant", "content": viu.format_response_for_user(result)}
282
  ]
283
  return new_history, "" # Return empty string to clear input
284
 
 
 
1
  import gradio as gr
2
+ from utils.model_configuration_utils import select_best_model, ensure_model
3
+ from services.llm import build_llm
4
+ from services.embeddings import configure_embeddings
5
+ from services.indexing import build_symptom_index
6
+ from utils.voice_input_utils import enhanced_process_speech, format_response_for_user, get_asr_pipeline
 
 
7
  import torch
8
  import torchaudio.transforms as T
9
+ import json
10
 
11
+ # 1) Model selection & download
12
+ MODEL_NAME, REPO_ID = select_best_model()
13
+ model_path = ensure_model()
14
+ print(f"Using model: {MODEL_NAME} from {REPO_ID}")
15
+ print(f"Model path: {model_path}")
16
+ print(f"Model size: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
17
+ print(f"Model requirements: {MODEL_NAME} requires at least 4GB VRAM and 8GB RAM.")
18
+ print(f"Model type: {'GPU' if torch.cuda.is_available() else 'CPU'}")
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # 2) LLM and embeddings config
21
+ llm = build_llm(model_path)
22
+ configure_embeddings()
23
+ print(f"LLM configured with model: {model_path}")
24
+ print("Embeddings configured successfully.")
 
 
25
 
26
+ # 3) Index setup
27
+ symptom_index = build_symptom_index()
28
+ print("Symptom index built successfully.")
29
+ print("Ready for queries.")
 
30
 
31
  # --- System prompt ---
32
  SYSTEM_PROMPT = """
 
162
  clear_btn.click(lambda: None, None, chatbot, queue=False)
163
 
164
  microphone.stream(
165
+ fn=enhanced_process_speech,
166
  inputs=[microphone, chatbot, api_key, model_selector, temperature],
167
  outputs=chatbot,
168
  show_progress="hidden",
 
202
  sample_rate, audio_array = audio
203
  features = process_audio(audio_array, sample_rate)
204
 
205
+ asr = get_asr_pipeline()
206
  result = asr(features)
207
 
208
  return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
 
263
 
264
  new_history = history + [
265
  {"role": "user", "content": text},
266
+ {"role": "assistant", "content": format_response_for_user(result)}
267
  ]
268
  return new_history, "" # Return empty string to clear input
269
 
services/embeddings.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from llama_index.core import Settings
2
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
3
+
4
+
5
+ def configure_embeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"):
6
+ Settings.embed_model = HuggingFaceEmbedding(model_name=model_name)
services/indexing.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from src.parse_tabular import create_symptom_index
2
+
3
+ def build_symptom_index():
4
+ return create_symptom_index()
services/llm.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core import Settings
2
+ from llama_index.llms.llama_cpp import LlamaCPP
3
+
4
+ def build_llm(model_path, temperature=0.7, max_tokens=256, context_window=2048):
5
+ llm = LlamaCPP(
6
+ model_path=model_path,
7
+ temperature=temperature,
8
+ max_new_tokens=max_tokens,
9
+ context_window=context_window,
10
+ verbose=False
11
+ )
12
+ Settings.llm = llm
13
+ return llm
src/merge_kb.py DELETED
@@ -1,15 +0,0 @@
1
- # merge_kb.py
2
- import json
3
-
4
- with open("symptom_to_icd.json") as f:
5
- symptom_to_icd = json.load(f)
6
- with open("icd_to_description.json") as f:
7
- icd_to_description = json.load(f)
8
-
9
- kb = {
10
- "symptom_to_icd": symptom_to_icd,
11
- "icd_to_description": icd_to_description
12
- }
13
-
14
- with open("knowledge_base.json", "w", encoding="utf-8") as f:
15
- json.dump(kb, f, indent=2, ensure_ascii=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/llama_index_utils.py DELETED
@@ -1,49 +0,0 @@
1
- import os
2
- import json
3
- from transformers import pipeline
4
- from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex, LLMPredictor, OpenAI
5
-
6
- _index = None
7
-
8
- def query_symptoms_tool(prompt_json: str):
9
- # parse “prompt_json” into Python dict and call your existing query_symptoms()
10
- data = json.loads(prompt_json)
11
- return query_symptoms(data["raw_input"])
12
-
13
- def get_llm_predictor():
14
- """
15
- Return an LLMPredictor configured for local GPU (transformers) if USE_LOCAL_GPU=1,
16
- otherwise uses OpenAI.
17
- """
18
- if os.getenv("USE_LOCAL_GPU") == "1":
19
- # Local GPU inference using GPT-2 as an example
20
- local_pipe = pipeline("text-generation", model="gpt2", device=0)
21
- return LLMPredictor(llm=local_pipe)
22
- # Default to OpenAI provider
23
- return LLMPredictor(llm=OpenAI(temperature=0))
24
-
25
-
26
- def build_index(data_path="data/icd10cm_tabular_2025"): # noqa: C901
27
- """
28
- Build (or retrieve cached) GPTVectorStoreIndex from ICD documents.
29
- """
30
- global _index
31
- if _index is None:
32
- # Load documents from the ICD data directory
33
- docs = SimpleDirectoryReader(data_path).load_data()
34
- # Initialize the index with chosen LLM predictor
35
- predictor = get_llm_predictor()
36
- _index = GPTVectorStoreIndex.from_documents(docs, llm_predictor=predictor)
37
- return _index
38
-
39
-
40
- def query_symptoms(prompt: str, top_k: int = 5):
41
- """
42
- Query the index for the given symptom prompt and return the result.
43
- """
44
- idx = build_index()
45
- # Create a query engine with the same predictor
46
- predictor = get_llm_predictor()
47
- query_engine = idx.as_query_engine(similarity_top_k=top_k, llm_predictor=predictor)
48
- return query_engine.query(prompt)
49
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/model_configuration_utils.py CHANGED
@@ -13,6 +13,7 @@ import torch
13
  import torchaudio.transforms as T
14
  from huggingface_hub import hf_hub_download
15
  from typing import Optional
 
16
 
17
  # Model options mapped to their requirements
18
  MODEL_OPTIONS = {
 
13
  import torchaudio.transforms as T
14
  from huggingface_hub import hf_hub_download
15
  from typing import Optional
16
+ from llama_index.llms.llama_cpp import LlamaCPP
17
 
18
  # Model options mapped to their requirements
19
  MODEL_OPTIONS = {