health-simplify-tool / medical_simplifier.py
faisalshah012003's picture
Update medical_simplifier.py
a13509e verified
import spacy
import re
import sys
from transformers import pipeline
class MedicalTextSimplifier:
def __init__(self):
print("Loading models...")
try:
# Load SciSpaCy model
self.nlp = spacy.load("en_core_sci_sm")
# Load BioMedLM via Hugging Face pipeline
self.pipe = pipeline("text-generation", model="stanford-crfm/BioMedLM", device=-1)
print("Models loaded successfully!")
except Exception as e:
print(f"Error loading models: {e}")
sys.exit(1)
def identify_medical_terms(self, text):
"""Identify biomedical terms using SciSpaCy"""
doc = self.nlp(text)
terms = []
for ent in doc.ents:
terms.append({
'term': ent.text,
'start': ent.start_char,
'end': ent.end_char
})
return terms
def generate_simplified_explanation(self, term, context):
"""Generate plain-language explanation using BioMedLM"""
try:
prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
result = self.pipe(
prompt,
max_new_tokens=50, # Use max_new_tokens instead of max_length
do_sample=True,
temperature=0.7,
top_p=0.9,
truncation=True, # Explicitly enable truncation
pad_token_id=self.pipe.tokenizer.eos_token_id # Set pad token
)
explanation = result[0]['generated_text'].split("Explanation:")[-1].strip()
return explanation
except Exception as e:
print(f"Error generating explanation for '{term}': {e}")
return f"a medical term related to {term}"
def get_formatted_output(self, text):
"""Get formatted output with original text and unique medical terms explained"""
medical_terms = self.identify_medical_terms(text)
if not medical_terms:
return {
"original_text": text,
"medical_terms_explained": {}
}
# Create dictionary to store unique terms and their explanations
unique_terms = {}
for item in medical_terms:
term = item['term'].lower() # Convert to lowercase for comparison
if term not in unique_terms:
explanation = self.generate_simplified_explanation(item['term'], text)
unique_terms[term] = explanation
return {
"original_text": text,
"medical_terms_explained": unique_terms
}
def simplify_text(self, text):
# Keep original method for backward compatibility
print("\nOriginal text:")
print(text)
print("\nIdentifying medical terms using SciSpaCy...")
medical_terms = self.identify_medical_terms(text)
if not medical_terms:
print("No medical terms found.")
return text
simplified_text = text
offset = 0
print("\nMedical terms and simplified explanations:")
for item in medical_terms:
term = item['term']
start = item['start'] + offset
end = item['end'] + offset
explanation = self.generate_simplified_explanation(term, text)
annotated = f"{term} ({explanation})"
simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
offset += len(annotated) - len(term)
print("\nSimplified text:")
print(simplified_text)
return simplified_text