File size: 3,738 Bytes
418a820
 
 
10dfafd
418a820
 
 
 
 
10dfafd
418a820
10dfafd
 
418a820
 
 
 
ce98406
418a820
10dfafd
418a820
 
 
10dfafd
 
 
 
 
418a820
ce98406
418a820
10dfafd
418a820
 
a13509e
 
 
 
 
 
 
 
 
10dfafd
418a820
 
10dfafd
418a820
ce98406
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418a820
ce98406
418a820
 
 
 
ce98406
418a820
 
 
ce98406
418a820
ce98406
10dfafd
ce98406
418a820
 
 
 
 
 
 
 
ce98406
a13509e
 
22029cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import spacy
import re
import sys
from transformers import pipeline

class MedicalTextSimplifier:
    def __init__(self):
        print("Loading models...")
        try:
            # Load SciSpaCy model
            self.nlp = spacy.load("en_core_sci_sm")
            # Load BioMedLM via Hugging Face pipeline
            self.pipe = pipeline("text-generation", model="stanford-crfm/BioMedLM", device=-1)
            print("Models loaded successfully!")
        except Exception as e:
            print(f"Error loading models: {e}")
            sys.exit(1)
    
    def identify_medical_terms(self, text):
        """Identify biomedical terms using SciSpaCy"""
        doc = self.nlp(text)
        terms = []
        for ent in doc.ents:
            terms.append({
                'term': ent.text,
                'start': ent.start_char,
                'end': ent.end_char
            })
        return terms
    
    def generate_simplified_explanation(self, term, context):
        """Generate plain-language explanation using BioMedLM"""
        try:
            prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
            result = self.pipe(
            prompt, 
            max_new_tokens=50,  # Use max_new_tokens instead of max_length
            do_sample=True, 
            temperature=0.7, 
            top_p=0.9,
            truncation=True,  # Explicitly enable truncation
            pad_token_id=self.pipe.tokenizer.eos_token_id  # Set pad token
            )
            explanation = result[0]['generated_text'].split("Explanation:")[-1].strip()
            return explanation
        except Exception as e:
            print(f"Error generating explanation for '{term}': {e}")
            return f"a medical term related to {term}"
    
    def get_formatted_output(self, text):
        """Get formatted output with original text and unique medical terms explained"""
        medical_terms = self.identify_medical_terms(text)
        
        if not medical_terms:
            return {
                "original_text": text,
                "medical_terms_explained": {}
            }
        
        # Create dictionary to store unique terms and their explanations
        unique_terms = {}
        
        for item in medical_terms:
            term = item['term'].lower()  # Convert to lowercase for comparison
            if term not in unique_terms:
                explanation = self.generate_simplified_explanation(item['term'], text)
                unique_terms[term] = explanation
        
        return {
            "original_text": text,
            "medical_terms_explained": unique_terms
        }
    
    def simplify_text(self, text):
        # Keep original method for backward compatibility
        print("\nOriginal text:")
        print(text)
        print("\nIdentifying medical terms using SciSpaCy...")
        medical_terms = self.identify_medical_terms(text)
        
        if not medical_terms:
            print("No medical terms found.")
            return text
        
        simplified_text = text
        offset = 0
        print("\nMedical terms and simplified explanations:")
        
        for item in medical_terms:
            term = item['term']
            start = item['start'] + offset
            end = item['end'] + offset
            explanation = self.generate_simplified_explanation(term, text)
            annotated = f"{term} ({explanation})"
            simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
            offset += len(annotated) - len(term)
            
        print("\nSimplified text:")
        print(simplified_text)    
        return simplified_text