gpaasch commited on
Commit
c024962
·
1 Parent(s): 932f0ad

feat: Add local LLM support with ctransformers

Browse files
Files changed (2) hide show
  1. requirements.txt +8 -7
  2. src/app.py +28 -5
requirements.txt CHANGED
@@ -3,20 +3,21 @@ gradio[mcp]
3
  gradio
4
 
5
  # core Llama-Index + HF model support
6
- openai
7
  torch
8
  transformers[torch]
9
  accelerate
10
- llama-index>=0.9.0 # Specify minimum version
11
  llama-index-embeddings-huggingface
12
  llama-index-llms-huggingface
13
 
 
 
 
 
 
14
  # optional extras
15
  langchain
16
  langchain-community
17
- sentence-transformers>=2.2.0
18
 
19
- # system requirement for audio I/O (ffmpeg must be installed):
20
- # • Debian/Ubuntu: sudo apt install ffmpeg
21
- # • macOS (homebrew): brew install ffmpeg
22
- # • Windows: download from https://ffmpeg.org/download.html
 
3
  gradio
4
 
5
  # core Llama-Index + HF model support
 
6
  torch
7
  transformers[torch]
8
  accelerate
9
+ llama-index>=0.9.0
10
  llama-index-embeddings-huggingface
11
  llama-index-llms-huggingface
12
 
13
+ # Language models and embeddings
14
+ sentence-transformers>=2.2.0
15
+ ctransformers[cuda]>=0.2.24 # For local LLM support with CUDA
16
+ huggingface-hub # For model downloading
17
+
18
  # optional extras
19
  langchain
20
  langchain-community
 
21
 
22
+ # system requirement for audio I/O
23
+ ffmpeg-python
 
 
src/app.py CHANGED
@@ -1,8 +1,10 @@
1
  import os
2
  import gradio as gr
3
- from llama_index.core import Settings
4
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
- from parse_tabular import create_symptom_index # Change this import
 
 
6
  import json
7
 
8
  # Configure embeddings globally
@@ -10,8 +12,29 @@ Settings.embed_model = HuggingFaceEmbedding(
10
  model_name="sentence-transformers/all-MiniLM-L6-v2"
11
  )
12
 
13
- # Create the index at startup
14
- symptom_index = create_symptom_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # --- System prompt ---
17
  SYSTEM_PROMPT = """
 
1
  import os
2
  import gradio as gr
3
+ from llama_index.core import Settings, ServiceContext
4
+ from llama_index_embeddings_huggingface import HuggingFaceEmbedding
5
+ from llama_index.llms import HuggingFaceLLM
6
+ from ctransformers import AutoModelForCausalLM
7
+ from parse_tabular import create_symptom_index
8
  import json
9
 
10
  # Configure embeddings globally
 
12
  model_name="sentence-transformers/all-MiniLM-L6-v2"
13
  )
14
 
15
+ # Configure local LLM with ctransformers
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
18
+ model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
19
+ model_type="mistral",
20
+ gpu_layers=0 # Set > 0 if you have GPU support
21
+ )
22
+
23
+ llm = HuggingFaceLLM(
24
+ model=model,
25
+ context_window=2048,
26
+ max_new_tokens=256,
27
+ temperature=0.7
28
+ )
29
+
30
+ # Create service context with local LLM
31
+ service_context = ServiceContext.from_defaults(
32
+ llm=llm,
33
+ embed_model=Settings.embed_model
34
+ )
35
+
36
+ # Create the index at startup with local service context
37
+ symptom_index = create_symptom_index(service_context=service_context)
38
 
39
  # --- System prompt ---
40
  SYSTEM_PROMPT = """