Spaces:

faisalshah012003
/

health-simplify-tool

Running

App Files Files Community

health-simplify-tool / medical_simplifier.py

faisalshah012003

Update medical_simplifier.py

a13509e verified 3 days ago

raw

history blame contribute delete

3.74 kB

	import spacy
	import re
	import sys
	from transformers import pipeline

	class MedicalTextSimplifier:
	def __init__(self):
	print("Loading models...")
	try:
	# Load SciSpaCy model
	self.nlp = spacy.load("en_core_sci_sm")
	# Load BioMedLM via Hugging Face pipeline
	self.pipe = pipeline("text-generation", model="stanford-crfm/BioMedLM", device=-1)
	print("Models loaded successfully!")
	except Exception as e:
	print(f"Error loading models: {e}")
	sys.exit(1)

	def identify_medical_terms(self, text):
	"""Identify biomedical terms using SciSpaCy"""
	doc = self.nlp(text)
	terms = []
	for ent in doc.ents:
	terms.append({
	'term': ent.text,
	'start': ent.start_char,
	'end': ent.end_char
	})
	return terms

	def generate_simplified_explanation(self, term, context):
	"""Generate plain-language explanation using BioMedLM"""
	try:
	prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
	result = self.pipe(
	prompt,
	max_new_tokens=50, # Use max_new_tokens instead of max_length
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	truncation=True, # Explicitly enable truncation
	pad_token_id=self.pipe.tokenizer.eos_token_id # Set pad token
	)
	explanation = result[0]['generated_text'].split("Explanation:")[-1].strip()
	return explanation
	except Exception as e:
	print(f"Error generating explanation for '{term}': {e}")
	return f"a medical term related to {term}"

	def get_formatted_output(self, text):
	"""Get formatted output with original text and unique medical terms explained"""
	medical_terms = self.identify_medical_terms(text)

	if not medical_terms:
	return {
	"original_text": text,
	"medical_terms_explained": {}
	}

	# Create dictionary to store unique terms and their explanations
	unique_terms = {}

	for item in medical_terms:
	term = item['term'].lower() # Convert to lowercase for comparison
	if term not in unique_terms:
	explanation = self.generate_simplified_explanation(item['term'], text)
	unique_terms[term] = explanation

	return {
	"original_text": text,
	"medical_terms_explained": unique_terms
	}

	def simplify_text(self, text):
	# Keep original method for backward compatibility
	print("\nOriginal text:")
	print(text)
	print("\nIdentifying medical terms using SciSpaCy...")
	medical_terms = self.identify_medical_terms(text)

	if not medical_terms:
	print("No medical terms found.")
	return text

	simplified_text = text
	offset = 0
	print("\nMedical terms and simplified explanations:")

	for item in medical_terms:
	term = item['term']
	start = item['start'] + offset
	end = item['end'] + offset
	explanation = self.generate_simplified_explanation(term, text)
	annotated = f"{term} ({explanation})"
	simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
	offset += len(annotated) - len(term)

	print("\nSimplified text:")
	print(simplified_text)
	return simplified_text