gpaasch commited on
Commit
75020f1
·
1 Parent(s): 21d0c24

1. Added memory management

Browse files

2. Limited input text length
3. Limited response length
4. Added batch size control
5. Added thread limiting

Files changed (1) hide show
  1. src/app.py +17 -4
src/app.py CHANGED
@@ -206,7 +206,12 @@ llm = LlamaCPP(
206
  model_path=model_path,
207
  temperature=0.7,
208
  max_new_tokens=256,
209
- context_window=2048
 
 
 
 
 
210
  )
211
  print("LLM initialized successfully")
212
 
@@ -556,31 +561,39 @@ with gr.Blocks(
556
  )
557
 
558
  def process_text_input(text, history):
559
- """Process text input and generate response."""
560
  if not text:
561
  return history
562
 
563
  try:
 
 
 
 
564
  # Process the symptoms
565
  diagnosis_query = f"""
566
  Based on these symptoms: '{text}'
567
  Provide relevant ICD-10 codes and diagnostic questions.
568
  Focus on clinical implications.
 
569
  """
570
  response = symptom_index.as_query_engine().query(diagnosis_query)
 
 
 
571
 
572
- # Format and return chat messages
573
  return history + [
574
  {"role": "user", "content": text},
575
  {"role": "assistant", "content": format_response_for_user({
576
  "diagnoses": [],
577
  "confidences": [],
578
- "follow_up": str(response)
579
  })}
580
  ]
581
 
582
  except Exception as e:
583
  print(f"Text processing error: {str(e)}")
 
584
  return history
585
 
586
  submit_btn.click(
 
206
  model_path=model_path,
207
  temperature=0.7,
208
  max_new_tokens=256,
209
+ context_window=2048,
210
+ n_batch=512, # Added batch size limit
211
+ n_ctx=2048, # Explicit context window
212
+ verbose=False, # Reduce logging
213
+ n_threads=4, # Limit threads
214
+ last_n_tokens_size=256 # Limit token history
215
  )
216
  print("LLM initialized successfully")
217
 
 
561
  )
562
 
563
  def process_text_input(text, history):
564
+ """Process text input with memory management."""
565
  if not text:
566
  return history
567
 
568
  try:
569
+ # Limit input length
570
+ if len(text) > 500:
571
+ text = text[:500] + "..."
572
+
573
  # Process the symptoms
574
  diagnosis_query = f"""
575
  Based on these symptoms: '{text}'
576
  Provide relevant ICD-10 codes and diagnostic questions.
577
  Focus on clinical implications.
578
+ Limit response to 1000 characters.
579
  """
580
  response = symptom_index.as_query_engine().query(diagnosis_query)
581
+
582
+ # Clean up memory
583
+ cleanup_memory()
584
 
 
585
  return history + [
586
  {"role": "user", "content": text},
587
  {"role": "assistant", "content": format_response_for_user({
588
  "diagnoses": [],
589
  "confidences": [],
590
+ "follow_up": str(response)[:1000] # Limit response length
591
  })}
592
  ]
593
 
594
  except Exception as e:
595
  print(f"Text processing error: {str(e)}")
596
+ cleanup_memory()
597
  return history
598
 
599
  submit_btn.click(