|
import spacy |
|
import re |
|
import sys |
|
from transformers import pipeline |
|
|
|
class MedicalTextSimplifier: |
|
def __init__(self): |
|
print("Loading models...") |
|
try: |
|
|
|
self.nlp = spacy.load("en_core_sci_sm") |
|
|
|
self.pipe = pipeline("text-generation", model="stanford-crfm/BioMedLM", device=-1) |
|
print("Models loaded successfully!") |
|
except Exception as e: |
|
print(f"Error loading models: {e}") |
|
sys.exit(1) |
|
|
|
def identify_medical_terms(self, text): |
|
"""Identify biomedical terms using SciSpaCy""" |
|
doc = self.nlp(text) |
|
terms = [] |
|
for ent in doc.ents: |
|
terms.append({ |
|
'term': ent.text, |
|
'start': ent.start_char, |
|
'end': ent.end_char |
|
}) |
|
return terms |
|
|
|
def generate_simplified_explanation(self, term, context): |
|
"""Generate plain-language explanation using BioMedLM""" |
|
try: |
|
prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:" |
|
result = self.pipe( |
|
prompt, |
|
max_new_tokens=50, |
|
do_sample=True, |
|
temperature=0.7, |
|
top_p=0.9, |
|
truncation=True, |
|
pad_token_id=self.pipe.tokenizer.eos_token_id |
|
) |
|
explanation = result[0]['generated_text'].split("Explanation:")[-1].strip() |
|
return explanation |
|
except Exception as e: |
|
print(f"Error generating explanation for '{term}': {e}") |
|
return f"a medical term related to {term}" |
|
|
|
def get_formatted_output(self, text): |
|
"""Get formatted output with original text and unique medical terms explained""" |
|
medical_terms = self.identify_medical_terms(text) |
|
|
|
if not medical_terms: |
|
return { |
|
"original_text": text, |
|
"medical_terms_explained": {} |
|
} |
|
|
|
|
|
unique_terms = {} |
|
|
|
for item in medical_terms: |
|
term = item['term'].lower() |
|
if term not in unique_terms: |
|
explanation = self.generate_simplified_explanation(item['term'], text) |
|
unique_terms[term] = explanation |
|
|
|
return { |
|
"original_text": text, |
|
"medical_terms_explained": unique_terms |
|
} |
|
|
|
def simplify_text(self, text): |
|
|
|
print("\nOriginal text:") |
|
print(text) |
|
print("\nIdentifying medical terms using SciSpaCy...") |
|
medical_terms = self.identify_medical_terms(text) |
|
|
|
if not medical_terms: |
|
print("No medical terms found.") |
|
return text |
|
|
|
simplified_text = text |
|
offset = 0 |
|
print("\nMedical terms and simplified explanations:") |
|
|
|
for item in medical_terms: |
|
term = item['term'] |
|
start = item['start'] + offset |
|
end = item['end'] + offset |
|
explanation = self.generate_simplified_explanation(term, text) |
|
annotated = f"{term} ({explanation})" |
|
simplified_text = simplified_text[:start] + annotated + simplified_text[end:] |
|
offset += len(annotated) - len(term) |
|
|
|
print("\nSimplified text:") |
|
print(simplified_text) |
|
return simplified_text |