File size: 3,738 Bytes
418a820 10dfafd 418a820 10dfafd 418a820 10dfafd 418a820 ce98406 418a820 10dfafd 418a820 10dfafd 418a820 ce98406 418a820 10dfafd 418a820 a13509e 10dfafd 418a820 10dfafd 418a820 ce98406 418a820 ce98406 418a820 ce98406 418a820 ce98406 418a820 ce98406 10dfafd ce98406 418a820 ce98406 a13509e 22029cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import spacy
import re
import sys
from transformers import pipeline
class MedicalTextSimplifier:
def __init__(self):
print("Loading models...")
try:
# Load SciSpaCy model
self.nlp = spacy.load("en_core_sci_sm")
# Load BioMedLM via Hugging Face pipeline
self.pipe = pipeline("text-generation", model="stanford-crfm/BioMedLM", device=-1)
print("Models loaded successfully!")
except Exception as e:
print(f"Error loading models: {e}")
sys.exit(1)
def identify_medical_terms(self, text):
"""Identify biomedical terms using SciSpaCy"""
doc = self.nlp(text)
terms = []
for ent in doc.ents:
terms.append({
'term': ent.text,
'start': ent.start_char,
'end': ent.end_char
})
return terms
def generate_simplified_explanation(self, term, context):
"""Generate plain-language explanation using BioMedLM"""
try:
prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
result = self.pipe(
prompt,
max_new_tokens=50, # Use max_new_tokens instead of max_length
do_sample=True,
temperature=0.7,
top_p=0.9,
truncation=True, # Explicitly enable truncation
pad_token_id=self.pipe.tokenizer.eos_token_id # Set pad token
)
explanation = result[0]['generated_text'].split("Explanation:")[-1].strip()
return explanation
except Exception as e:
print(f"Error generating explanation for '{term}': {e}")
return f"a medical term related to {term}"
def get_formatted_output(self, text):
"""Get formatted output with original text and unique medical terms explained"""
medical_terms = self.identify_medical_terms(text)
if not medical_terms:
return {
"original_text": text,
"medical_terms_explained": {}
}
# Create dictionary to store unique terms and their explanations
unique_terms = {}
for item in medical_terms:
term = item['term'].lower() # Convert to lowercase for comparison
if term not in unique_terms:
explanation = self.generate_simplified_explanation(item['term'], text)
unique_terms[term] = explanation
return {
"original_text": text,
"medical_terms_explained": unique_terms
}
def simplify_text(self, text):
# Keep original method for backward compatibility
print("\nOriginal text:")
print(text)
print("\nIdentifying medical terms using SciSpaCy...")
medical_terms = self.identify_medical_terms(text)
if not medical_terms:
print("No medical terms found.")
return text
simplified_text = text
offset = 0
print("\nMedical terms and simplified explanations:")
for item in medical_terms:
term = item['term']
start = item['start'] + offset
end = item['end'] + offset
explanation = self.generate_simplified_explanation(term, text)
annotated = f"{term} ({explanation})"
simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
offset += len(annotated) - len(term)
print("\nSimplified text:")
print(simplified_text)
return simplified_text |