gpaasch commited on
Commit
7254b3e
·
1 Parent(s): 75020f1

preventing segmentation fault

Browse files
Files changed (1) hide show
  1. src/app.py +10 -5
src/app.py CHANGED
@@ -207,11 +207,9 @@ llm = LlamaCPP(
207
  temperature=0.7,
208
  max_new_tokens=256,
209
  context_window=2048,
210
- n_batch=512, # Added batch size limit
211
- n_ctx=2048, # Explicit context window
212
- verbose=False, # Reduce logging
213
- n_threads=4, # Limit threads
214
- last_n_tokens_size=256 # Limit token history
215
  )
216
  print("LLM initialized successfully")
217
 
@@ -560,6 +558,13 @@ with gr.Blocks(
560
  queue=False
561
  )
562
 
 
 
 
 
 
 
 
563
  def process_text_input(text, history):
564
  """Process text input with memory management."""
565
  if not text:
 
207
  temperature=0.7,
208
  max_new_tokens=256,
209
  context_window=2048,
210
+ verbose=False # Reduce logging
211
+ # n_batch and n_threads are not valid parameters for LlamaCPP and should not be used.
212
+ # If you encounter segmentation faults, try reducing context_window or check your system resources.
 
 
213
  )
214
  print("LLM initialized successfully")
215
 
 
558
  queue=False
559
  )
560
 
561
+ def cleanup_memory():
562
+ """Release unused memory (placeholder for future memory management)."""
563
+ import gc
564
+ gc.collect()
565
+ if torch.cuda.is_available():
566
+ torch.cuda.empty_cache()
567
+
568
  def process_text_input(text, history):
569
  """Process text input with memory management."""
570
  if not text: