Spaces:

Agents-MCP-Hackathon
/

MedCodeMCP

Running

App Files Files Community

gpaasch commited on Jun 9

Commit

9d2bec8

1 Parent(s): 3b5fe24

improved seperation of concerns best practice in the code, added print statements for better understanding of what code is doing

Browse files

Files changed (8) hide show

ai-plugin.json +0 -16
app.py +26 -41
services/embeddings.py +6 -0
services/indexing.py +4 -0
services/llm.py +13 -0
src/merge_kb.py +0 -15
utils/llama_index_utils.py +0 -49
utils/model_configuration_utils.py +1 -0

ai-plugin.json DELETED Viewed

@@ -1,16 +0,0 @@
-{
-  "schema_version": "v1",
-  "name_for_human": "MedCodeMCP",
-  "name_for_model": "MedCodeMCP",
-  "description_for_human": "Map natural-language symptom descriptions to ICD-10 codes with confidence scores and follow-up questions.",
-  "description_for_model": "Use MedCodeMCP to analyze patient symptom descriptions and return probable ICD-10 codes, confidence scores, and follow-up diagnostic questions if needed.",
-  "auth": {
-    "type": "none"
-  },
-  "api": {
-    "type": "gradio",
-    "url": "https://huggingface.co/spaces/Agents-MCP-Hackathon/MedCodeMCP"
-  },
-  "contact_email": "grahampaasch@gmail.com",
-  "legal_info_url": "https://huggingface.co/spaces/agents-mcp-hackathon/medcode-mcp"
-}

app.py CHANGED Viewed

@@ -1,47 +1,32 @@
-from huggingface_hub import hf_hub_download
 import gradio as gr
-from llama_index.core import Settings
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.llms.llama_cpp import LlamaCPP
-from src.parse_tabular import create_symptom_index
-from utils import model_configuration_utils as mc
-from utils import voice_input_utils as viu
-import json
 import torch
 import torchaudio.transforms as T
-# Set up model paths
-MODEL_NAME, REPO_ID = mc.select_best_model()
-# Ensure model is downloaded
-model_path = mc.ensure_model()
-# Configure local LLM with LlamaCPP
-print("\nInitializing LLM...")
-llm = LlamaCPP(
-    model_path=model_path,
-    temperature=0.7,
-    max_new_tokens=256,
-    context_window=2048,
-    verbose=False    # Reduce logging
-    # n_batch and n_threads are not valid parameters for LlamaCPP and should not be used.
-    # If you encounter segmentation faults, try reducing context_window or check your system resources.
-)
-print("LLM initialized successfully")
-# Configure global settings
-print("\nConfiguring settings...")
-Settings.llm = llm
-Settings.embed_model = HuggingFaceEmbedding(
-    model_name="sentence-transformers/all-MiniLM-L6-v2"
-)
-print("Settings configured")
-# Create the index at startup
-print("\nCreating symptom index...")
-symptom_index = create_symptom_index()
-print("Index created successfully")
-print("Loaded symptom_index:", type(symptom_index))
 # --- System prompt ---
 SYSTEM_PROMPT = """
@@ -177,7 +162,7 @@ with gr.Blocks(theme="default") as demo:
     clear_btn.click(lambda: None, None, chatbot, queue=False)
     microphone.stream(
-        fn=viu.enhanced_process_speech,
         inputs=[microphone, chatbot, api_key, model_selector, temperature],
         outputs=chatbot,
         show_progress="hidden",
@@ -217,7 +202,7 @@ with gr.Blocks(theme="default") as demo:
             sample_rate, audio_array = audio
             features = process_audio(audio_array, sample_rate)
-            asr = viu.get_asr_pipeline()
             result = asr(features)
             return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
@@ -278,7 +263,7 @@ with gr.Blocks(theme="default") as demo:
         new_history = history + [
             {"role": "user", "content": text},
-            {"role": "assistant", "content": viu.format_response_for_user(result)}
         ]
         return new_history, ""  # Return empty string to clear input

 import gradio as gr
+from utils.model_configuration_utils import select_best_model, ensure_model
+from services.llm import build_llm
+from services.embeddings import configure_embeddings
+from services.indexing import build_symptom_index
+from utils.voice_input_utils import enhanced_process_speech, format_response_for_user, get_asr_pipeline
 import torch
 import torchaudio.transforms as T
+import json
+# 1) Model selection & download
+MODEL_NAME, REPO_ID = select_best_model()
+model_path = ensure_model()
+print(f"Using model: {MODEL_NAME} from {REPO_ID}")
+print(f"Model path: {model_path}")
+print(f"Model size: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
+print(f"Model requirements: {MODEL_NAME} requires at least 4GB VRAM and 8GB RAM.")
+print(f"Model type: {'GPU' if torch.cuda.is_available() else 'CPU'}")
+# 2) LLM and embeddings config
+llm = build_llm(model_path)
+configure_embeddings()
+print(f"LLM configured with model: {model_path}")
+print("Embeddings configured successfully.")
+# 3) Index setup
+symptom_index = build_symptom_index()
+print("Symptom index built successfully.")
+print("Ready for queries.")
 # --- System prompt ---
 SYSTEM_PROMPT = """
     clear_btn.click(lambda: None, None, chatbot, queue=False)
     microphone.stream(
+        fn=enhanced_process_speech,
         inputs=[microphone, chatbot, api_key, model_selector, temperature],
         outputs=chatbot,
         show_progress="hidden",
             sample_rate, audio_array = audio
             features = process_audio(audio_array, sample_rate)
+            asr = get_asr_pipeline()
             result = asr(features)
             return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
         new_history = history + [
             {"role": "user", "content": text},
+            {"role": "assistant", "content": format_response_for_user(result)}
         ]
         return new_history, ""  # Return empty string to clear input

services/embeddings.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from llama_index.core import Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+def configure_embeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"):
+    Settings.embed_model = HuggingFaceEmbedding(model_name=model_name)

services/indexing.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from src.parse_tabular import create_symptom_index
+def build_symptom_index():
+    return create_symptom_index()

services/llm.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from llama_index.core import Settings
+from llama_index.llms.llama_cpp import LlamaCPP
+def build_llm(model_path, temperature=0.7, max_tokens=256, context_window=2048):
+    llm = LlamaCPP(
+        model_path=model_path,
+        temperature=temperature,
+        max_new_tokens=max_tokens,
+        context_window=context_window,
+        verbose=False
+    )
+    Settings.llm = llm
+    return llm

src/merge_kb.py DELETED Viewed

@@ -1,15 +0,0 @@
-# merge_kb.py
-import json
-with open("symptom_to_icd.json") as f:
-    symptom_to_icd = json.load(f)
-with open("icd_to_description.json") as f:
-    icd_to_description = json.load(f)
-kb = {
-    "symptom_to_icd": symptom_to_icd,
-    "icd_to_description": icd_to_description
-}
-with open("knowledge_base.json", "w", encoding="utf-8") as f:
-    json.dump(kb, f, indent=2, ensure_ascii=False)

utils/llama_index_utils.py DELETED Viewed

@@ -1,49 +0,0 @@
-import os
-import json
-from transformers import pipeline
-from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex, LLMPredictor, OpenAI
-_index = None
-def query_symptoms_tool(prompt_json: str):
-    # parse “prompt_json” into Python dict and call your existing query_symptoms()
-    data = json.loads(prompt_json)
-    return query_symptoms(data["raw_input"])
-def get_llm_predictor():
-    """
-    Return an LLMPredictor configured for local GPU (transformers) if USE_LOCAL_GPU=1,
-    otherwise uses OpenAI.
-    """
-    if os.getenv("USE_LOCAL_GPU") == "1":
-        # Local GPU inference using GPT-2 as an example
-        local_pipe = pipeline("text-generation", model="gpt2", device=0)
-        return LLMPredictor(llm=local_pipe)
-    # Default to OpenAI provider
-    return LLMPredictor(llm=OpenAI(temperature=0))
-def build_index(data_path="data/icd10cm_tabular_2025"):  # noqa: C901
-    """
-    Build (or retrieve cached) GPTVectorStoreIndex from ICD documents.
-    """
-    global _index
-    if _index is None:
-        # Load documents from the ICD data directory
-        docs = SimpleDirectoryReader(data_path).load_data()
-        # Initialize the index with chosen LLM predictor
-        predictor = get_llm_predictor()
-        _index = GPTVectorStoreIndex.from_documents(docs, llm_predictor=predictor)
-    return _index
-def query_symptoms(prompt: str, top_k: int = 5):
-    """
-    Query the index for the given symptom prompt and return the result.
-    """
-    idx = build_index()
-    # Create a query engine with the same predictor
-    predictor = get_llm_predictor()
-    query_engine = idx.as_query_engine(similarity_top_k=top_k, llm_predictor=predictor)
-    return query_engine.query(prompt)

utils/model_configuration_utils.py CHANGED Viewed

@@ -13,6 +13,7 @@ import torch
 import torchaudio.transforms as T
 from huggingface_hub import hf_hub_download
 from typing import Optional
 # Model options mapped to their requirements
 MODEL_OPTIONS = {

 import torchaudio.transforms as T
 from huggingface_hub import hf_hub_download
 from typing import Optional
+from llama_index.llms.llama_cpp import LlamaCPP
 # Model options mapped to their requirements
 MODEL_OPTIONS = {