Spaces:
Running
Running
setup for local llm
Browse files- src/app.py +2 -5
src/app.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import gradio as gr
|
3 |
from llama_index.core import Settings, ServiceContext
|
4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
5 |
-
from
|
6 |
from parse_tabular import create_symptom_index
|
7 |
import json
|
8 |
|
@@ -17,10 +17,7 @@ llm = LlamaCPP(
|
|
17 |
model_path="models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
|
18 |
temperature=0.7,
|
19 |
max_new_tokens=256,
|
20 |
-
context_window=2048
|
21 |
-
# GPU configuration
|
22 |
-
n_gpu_layers=0, # Increase for GPU support
|
23 |
-
n_threads=8 # Adjust based on your CPU
|
24 |
)
|
25 |
|
26 |
# Create service context with local LLM
|
|
|
2 |
import gradio as gr
|
3 |
from llama_index.core import Settings, ServiceContext
|
4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
5 |
+
from llama_index.llms.llama_cpp import LlamaCPP
|
6 |
from parse_tabular import create_symptom_index
|
7 |
import json
|
8 |
|
|
|
17 |
model_path="models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
|
18 |
temperature=0.7,
|
19 |
max_new_tokens=256,
|
20 |
+
context_window=2048
|
|
|
|
|
|
|
21 |
)
|
22 |
|
23 |
# Create service context with local LLM
|