Spaces:
Running
Running
1. Added memory management
Browse files2. Limited input text length
3. Limited response length
4. Added batch size control
5. Added thread limiting
- src/app.py +17 -4
src/app.py
CHANGED
@@ -206,7 +206,12 @@ llm = LlamaCPP(
|
|
206 |
model_path=model_path,
|
207 |
temperature=0.7,
|
208 |
max_new_tokens=256,
|
209 |
-
context_window=2048
|
|
|
|
|
|
|
|
|
|
|
210 |
)
|
211 |
print("LLM initialized successfully")
|
212 |
|
@@ -556,31 +561,39 @@ with gr.Blocks(
|
|
556 |
)
|
557 |
|
558 |
def process_text_input(text, history):
|
559 |
-
"""Process text input
|
560 |
if not text:
|
561 |
return history
|
562 |
|
563 |
try:
|
|
|
|
|
|
|
|
|
564 |
# Process the symptoms
|
565 |
diagnosis_query = f"""
|
566 |
Based on these symptoms: '{text}'
|
567 |
Provide relevant ICD-10 codes and diagnostic questions.
|
568 |
Focus on clinical implications.
|
|
|
569 |
"""
|
570 |
response = symptom_index.as_query_engine().query(diagnosis_query)
|
|
|
|
|
|
|
571 |
|
572 |
-
# Format and return chat messages
|
573 |
return history + [
|
574 |
{"role": "user", "content": text},
|
575 |
{"role": "assistant", "content": format_response_for_user({
|
576 |
"diagnoses": [],
|
577 |
"confidences": [],
|
578 |
-
"follow_up": str(response)
|
579 |
})}
|
580 |
]
|
581 |
|
582 |
except Exception as e:
|
583 |
print(f"Text processing error: {str(e)}")
|
|
|
584 |
return history
|
585 |
|
586 |
submit_btn.click(
|
|
|
206 |
model_path=model_path,
|
207 |
temperature=0.7,
|
208 |
max_new_tokens=256,
|
209 |
+
context_window=2048,
|
210 |
+
n_batch=512, # Added batch size limit
|
211 |
+
n_ctx=2048, # Explicit context window
|
212 |
+
verbose=False, # Reduce logging
|
213 |
+
n_threads=4, # Limit threads
|
214 |
+
last_n_tokens_size=256 # Limit token history
|
215 |
)
|
216 |
print("LLM initialized successfully")
|
217 |
|
|
|
561 |
)
|
562 |
|
563 |
def process_text_input(text, history):
|
564 |
+
"""Process text input with memory management."""
|
565 |
if not text:
|
566 |
return history
|
567 |
|
568 |
try:
|
569 |
+
# Limit input length
|
570 |
+
if len(text) > 500:
|
571 |
+
text = text[:500] + "..."
|
572 |
+
|
573 |
# Process the symptoms
|
574 |
diagnosis_query = f"""
|
575 |
Based on these symptoms: '{text}'
|
576 |
Provide relevant ICD-10 codes and diagnostic questions.
|
577 |
Focus on clinical implications.
|
578 |
+
Limit response to 1000 characters.
|
579 |
"""
|
580 |
response = symptom_index.as_query_engine().query(diagnosis_query)
|
581 |
+
|
582 |
+
# Clean up memory
|
583 |
+
cleanup_memory()
|
584 |
|
|
|
585 |
return history + [
|
586 |
{"role": "user", "content": text},
|
587 |
{"role": "assistant", "content": format_response_for_user({
|
588 |
"diagnoses": [],
|
589 |
"confidences": [],
|
590 |
+
"follow_up": str(response)[:1000] # Limit response length
|
591 |
})}
|
592 |
]
|
593 |
|
594 |
except Exception as e:
|
595 |
print(f"Text processing error: {str(e)}")
|
596 |
+
cleanup_memory()
|
597 |
return history
|
598 |
|
599 |
submit_btn.click(
|