Spaces:

Agents-MCP-Hackathon
/

MedCodeMCP

Running

gpaasch commited on Jun 7

Commit

c024962

1 Parent(s): 932f0ad

feat: Add local LLM support with ctransformers

Files changed (2) hide show

requirements.txt CHANGED Viewed

@@ -3,20 +3,21 @@ gradio[mcp]
 gradio
 # core Llama-Index + HF model support
-openai
 torch
 transformers[torch]
 accelerate
-llama-index>=0.9.0  # Specify minimum version
 llama-index-embeddings-huggingface
 llama-index-llms-huggingface
 # optional extras
 langchain
 langchain-community
-sentence-transformers>=2.2.0
-# system requirement for audio I/O (ffmpeg must be installed):
-#   • Debian/Ubuntu: sudo apt install ffmpeg
-#   • macOS (homebrew): brew install ffmpeg
-#   • Windows: download from https://ffmpeg.org/download.html

 gradio
 # core Llama-Index + HF model support
 torch
 transformers[torch]
 accelerate
+llama-index>=0.9.0
 llama-index-embeddings-huggingface
 llama-index-llms-huggingface
+# Language models and embeddings
+sentence-transformers>=2.2.0
+ctransformers[cuda]>=0.2.24  # For local LLM support with CUDA
+huggingface-hub  # For model downloading
 # optional extras
 langchain
 langchain-community
+# system requirement for audio I/O
+ffmpeg-python

src/app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import os
 import gradio as gr
-from llama_index.core import Settings
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from parse_tabular import create_symptom_index  # Change this import
 import json
 # Configure embeddings globally
@@ -10,8 +12,29 @@ Settings.embed_model = HuggingFaceEmbedding(
     model_name="sentence-transformers/all-MiniLM-L6-v2"
 )
-# Create the index at startup
-symptom_index = create_symptom_index()
 # --- System prompt ---
 SYSTEM_PROMPT = """

 import os
 import gradio as gr
+from llama_index.core import Settings, ServiceContext
+from llama_index_embeddings_huggingface import HuggingFaceEmbedding
+from llama_index.llms import HuggingFaceLLM
+from ctransformers import AutoModelForCausalLM
+from parse_tabular import create_symptom_index
 import json
 # Configure embeddings globally
     model_name="sentence-transformers/all-MiniLM-L6-v2"
 )
+# Configure local LLM with ctransformers
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+    model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+    model_type="mistral",
+    gpu_layers=0  # Set > 0 if you have GPU support
+)
+llm = HuggingFaceLLM(
+    model=model,
+    context_window=2048,
+    max_new_tokens=256,
+    temperature=0.7
+)
+# Create service context with local LLM
+service_context = ServiceContext.from_defaults(
+    llm=llm,
+    embed_model=Settings.embed_model
+)
+# Create the index at startup with local service context
+symptom_index = create_symptom_index(service_context=service_context)
 # --- System prompt ---
 SYSTEM_PROMPT = """