Spaces:
Running
Running
preventing segmentation fault
Browse files- src/app.py +10 -5
src/app.py
CHANGED
@@ -207,11 +207,9 @@ llm = LlamaCPP(
|
|
207 |
temperature=0.7,
|
208 |
max_new_tokens=256,
|
209 |
context_window=2048,
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
n_threads=4, # Limit threads
|
214 |
-
last_n_tokens_size=256 # Limit token history
|
215 |
)
|
216 |
print("LLM initialized successfully")
|
217 |
|
@@ -560,6 +558,13 @@ with gr.Blocks(
|
|
560 |
queue=False
|
561 |
)
|
562 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
563 |
def process_text_input(text, history):
|
564 |
"""Process text input with memory management."""
|
565 |
if not text:
|
|
|
207 |
temperature=0.7,
|
208 |
max_new_tokens=256,
|
209 |
context_window=2048,
|
210 |
+
verbose=False # Reduce logging
|
211 |
+
# n_batch and n_threads are not valid parameters for LlamaCPP and should not be used.
|
212 |
+
# If you encounter segmentation faults, try reducing context_window or check your system resources.
|
|
|
|
|
213 |
)
|
214 |
print("LLM initialized successfully")
|
215 |
|
|
|
558 |
queue=False
|
559 |
)
|
560 |
|
561 |
+
def cleanup_memory():
|
562 |
+
"""Release unused memory (placeholder for future memory management)."""
|
563 |
+
import gc
|
564 |
+
gc.collect()
|
565 |
+
if torch.cuda.is_available():
|
566 |
+
torch.cuda.empty_cache()
|
567 |
+
|
568 |
def process_text_input(text, history):
|
569 |
"""Process text input with memory management."""
|
570 |
if not text:
|