Spaces:
Running
Running
1. Structure responses with both diagnoses and follow-up questions
Browse files2. Maintain context between interactions
3. Use Mistral model (either local or API) when available
4. Format responses in a more interactive way
- src/app.py +64 -55
src/app.py
CHANGED
@@ -102,15 +102,17 @@ def get_system_specs() -> Dict[str, float]:
|
|
102 |
"gpu_vram_gb": gpu_vram_gb
|
103 |
}
|
104 |
|
105 |
-
def select_best_model()
|
106 |
"""Select the best model based on system specifications."""
|
107 |
specs = get_system_specs()
|
108 |
-
print(f"\nSystem specifications:")
|
109 |
-
print(f"RAM: {specs['ram_gb']:.1f} GB")
|
110 |
-
print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB")
|
111 |
|
|
|
|
|
|
|
|
|
|
|
112 |
# Prioritize GPU if available
|
113 |
-
|
114 |
model_tier = "small" # phi-2 should work well on RTX 2060
|
115 |
elif specs['ram_gb'] >= 8:
|
116 |
model_tier = "small"
|
@@ -212,12 +214,15 @@ symptom_index = create_symptom_index()
|
|
212 |
print("Index created successfully")
|
213 |
|
214 |
# --- System prompt ---
|
215 |
-
SYSTEM_PROMPT = """
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
"""
|
|
|
|
|
|
|
221 |
|
222 |
def process_speech(audio_data, history):
|
223 |
"""Process speech input and convert to text."""
|
@@ -521,80 +526,84 @@ with gr.Blocks(theme="default") as demo:
|
|
521 |
features = process_audio(audio_array, sample_rate)
|
522 |
|
523 |
asr = get_asr_pipeline()
|
524 |
-
result = asr(features)
|
525 |
-
|
526 |
return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
|
527 |
except Exception as e:
|
|
|
|
|
528 |
print(f"Transcription error: {str(e)}")
|
529 |
-
return ""
|
530 |
-
|
531 |
microphone.stream(
|
532 |
fn=update_live_transcription,
|
533 |
inputs=[microphone],
|
534 |
-
outputs=transcript_box,
|
535 |
show_progress="hidden",
|
|
|
|
|
536 |
queue=True
|
537 |
-
)
|
538 |
-
|
539 |
clear_btn.click(
|
540 |
fn=lambda: (None, "", ""),
|
541 |
outputs=[chatbot, transcript_box, text_input],
|
|
|
|
|
542 |
queue=False
|
543 |
-
)
|
544 |
-
|
545 |
def cleanup_memory():
|
546 |
"""Release unused memory (placeholder for future memory management)."""
|
547 |
-
import
|
548 |
-
gc.collect()
|
549 |
if torch.cuda.is_available():
|
550 |
torch.cuda.empty_cache()
|
551 |
-
|
552 |
def process_text_input(text, history):
|
553 |
-
"""Process text input with
|
|
|
|
|
554 |
if not text:
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
#
|
559 |
-
prompt = f"""
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
Format as JSON with diagnoses, confidences, and follow_up fields."""
|
566 |
|
567 |
-
|
568 |
-
|
569 |
-
try:
|
570 |
-
# Try to parse as JSON first
|
571 |
result = json.loads(response.text)
|
572 |
except json.JSONDecodeError:
|
573 |
-
|
574 |
-
|
575 |
-
"
|
576 |
-
"
|
577 |
-
"
|
578 |
-
}
|
579 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
new_history = history + [
|
581 |
{"role": "user", "content": text},
|
582 |
-
{"role": "assistant", "content":
|
583 |
-
]
|
584 |
-
return new_history, ""
|
585 |
except Exception as e:
|
586 |
print(f"Error processing text: {str(e)}")
|
587 |
-
return history, text
|
588 |
-
|
589 |
# Update the submit button handler
|
590 |
submit_btn.click(
|
591 |
-
fn=process_text_input,
|
592 |
inputs=[text_input, chatbot],
|
593 |
outputs=[chatbot, text_input],
|
594 |
-
queue=
|
595 |
).success( # Changed from .then to .success for better error handling
|
596 |
fn=cleanup_memory,
|
597 |
-
inputs=None,
|
598 |
-
outputs=None,
|
|
|
|
|
599 |
queue=False
|
600 |
)
|
|
|
102 |
"gpu_vram_gb": gpu_vram_gb
|
103 |
}
|
104 |
|
105 |
+
def select_best_model():
|
106 |
"""Select the best model based on system specifications."""
|
107 |
specs = get_system_specs()
|
|
|
|
|
|
|
108 |
|
109 |
+
# Prioritize Mistral if we have API key or sufficient resources
|
110 |
+
if any(k.startswith("mk-") for k in [api_key.value]): # Check for Mistral API key
|
111 |
+
return "mistral-7b-instruct-v0.1.Q4_K_M.gguf", "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
|
112 |
+
elif specs['gpu_vram_gb'] >= 6 or specs['ram_gb'] >= 16:
|
113 |
+
return MODEL_OPTIONS["medium"]["name"], MODEL_OPTIONS["medium"]["repo"]
|
114 |
# Prioritize GPU if available
|
115 |
+
elif specs['gpu_vram_gb'] >= 4: # You have 6GB, so this should work
|
116 |
model_tier = "small" # phi-2 should work well on RTX 2060
|
117 |
elif specs['ram_gb'] >= 8:
|
118 |
model_tier = "small"
|
|
|
214 |
print("Index created successfully")
|
215 |
|
216 |
# --- System prompt ---
|
217 |
+
SYSTEM_PROMPT = """You are a medical assistant using the Mistral model to analyze symptoms and determine ICD-10 codes.
|
218 |
+
Your responses should ALWAYS be in this format:
|
219 |
+
{
|
220 |
+
"diagnoses": ["ICD10 code - description"],
|
221 |
+
"confidences": [confidence score between 0-1],
|
222 |
+
"follow_up": "ONE specific follow-up question to refine the diagnosis",
|
223 |
+
"explanation": "Brief explanation of why you're asking this question"
|
224 |
+
}
|
225 |
+
Keep responses focused and clinical."""
|
226 |
|
227 |
def process_speech(audio_data, history):
|
228 |
"""Process speech input and convert to text."""
|
|
|
526 |
features = process_audio(audio_array, sample_rate)
|
527 |
|
528 |
asr = get_asr_pipeline()
|
|
|
|
|
529 |
return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
|
530 |
except Exception as e:
|
531 |
+
print(f"Transcription error: {str(e)}")f isinstance(result, dict) else str(result).strip()
|
532 |
+
return ""ion as e:
|
533 |
print(f"Transcription error: {str(e)}")
|
|
|
|
|
534 |
microphone.stream(
|
535 |
fn=update_live_transcription,
|
536 |
inputs=[microphone],
|
537 |
+
outputs=transcript_box,ption,
|
538 |
show_progress="hidden",
|
539 |
+
queue=Trueanscript_box,
|
540 |
+
) show_progress="hidden",
|
541 |
queue=True
|
|
|
|
|
542 |
clear_btn.click(
|
543 |
fn=lambda: (None, "", ""),
|
544 |
outputs=[chatbot, transcript_box, text_input],
|
545 |
+
queue=False(None, "", ""),
|
546 |
+
) outputs=[chatbot, transcript_box, text_input],
|
547 |
queue=False
|
|
|
|
|
548 |
def cleanup_memory():
|
549 |
"""Release unused memory (placeholder for future memory management)."""
|
550 |
+
import gcemory():
|
551 |
+
gc.collect()nused memory (placeholder for future memory management)."""
|
552 |
if torch.cuda.is_available():
|
553 |
torch.cuda.empty_cache()
|
554 |
+
if torch.cuda.is_available():
|
555 |
def process_text_input(text, history):
|
556 |
+
"""Process text input with interactive follow-up."""
|
557 |
+
if not text:_input(text, history):
|
558 |
+
return history, ""with interactive follow-up."""
|
559 |
if not text:
|
560 |
+
try:return history, ""
|
561 |
+
# Add context from history
|
562 |
+
context = "\n".join([m["content"] for m in history if m["role"] == "user"]) if history else ""
|
563 |
+
# Add context from history
|
564 |
+
prompt = f"""{SYSTEM_PROMPT}ent"] for m in history if m["role"] == "user"]) if history else ""
|
565 |
+
Previous context: {context}
|
566 |
+
Current symptoms: {text}"{SYSTEM_PROMPT}
|
567 |
+
Analyze and respond with likely diagnoses and ONE key follow-up question."""
|
568 |
+
toms: {text}
|
569 |
+
response = llm.complete(prompt)nd ONE key follow-up question."""
|
|
|
570 |
|
571 |
+
try:onse = llm.complete(prompt)
|
|
|
|
|
|
|
572 |
result = json.loads(response.text)
|
573 |
except json.JSONDecodeError:
|
574 |
+
result = {son.loads(response.text)
|
575 |
+
"diagnoses": ["R69 - Illness, unspecified"],
|
576 |
+
"confidences": [0.5],
|
577 |
+
"follow_up": str(response.text)[:200],ied"],
|
578 |
+
"explanation": "Could not parse response"
|
579 |
+
} "follow_up": str(response.text)[:200],
|
580 |
+
"explanation": "Could not parse response"
|
581 |
+
formatted_response = f"""Possible Diagnoses:
|
582 |
+
{''.join(f'- {d} ({c*100:.0f}%)\n' for d, c in zip(result['diagnoses'], result['confidences']))}
|
583 |
+
formatted_response = f"""Possible Diagnoses:
|
584 |
+
Follow-up Question: {result['follow_up']} c in zip(result['diagnoses'], result['confidences']))}
|
585 |
+
({result['explanation']})"""
|
586 |
+
Follow-up Question: {result['follow_up']}
|
587 |
new_history = history + [
|
588 |
{"role": "user", "content": text},
|
589 |
+
{"role": "assistant", "content": formatted_response}
|
590 |
+
] {"role": "user", "content": text},
|
591 |
+
return new_history, ""t", "content": formatted_response}
|
592 |
except Exception as e:
|
593 |
print(f"Error processing text: {str(e)}")
|
594 |
+
return history, text
|
595 |
+
print(f"Error processing text: {str(e)}")
|
596 |
# Update the submit button handler
|
597 |
submit_btn.click(
|
598 |
+
fn=process_text_input, handler
|
599 |
inputs=[text_input, chatbot],
|
600 |
outputs=[chatbot, text_input],
|
601 |
+
queue=Truext_input, chatbot],
|
602 |
).success( # Changed from .then to .success for better error handling
|
603 |
fn=cleanup_memory,
|
604 |
+
inputs=None,anged from .then to .success for better error handling
|
605 |
+
outputs=None,mory,
|
606 |
+
queue=False,
|
607 |
+
) outputs=None,
|
608 |
queue=False
|
609 |
)
|