# Import Libraries
import os
import gradio as gr
import google.generativeai as genai
import fitz # PyMuPDF
from PIL import Image, ImageEnhance
import io
import re
import json
import numpy as np
import pandas as pd
from datetime import datetime
import base64
# Blood Report Analyzer Implementation
# Configure Google Gemini API
def configure_genai(api_key):
genai.configure(api_key=api_key)
# Use Gemini Pro Vision for image analysis
vision_model = genai.GenerativeModel('gemini-pro-vision')
# Use Gemini Pro for text analysis (better for structured text)
text_model = genai.GenerativeModel('gemini-pro')
return vision_model, text_model
# Image preprocessing to improve OCR
def preprocess_image(image):
# Convert to grayscale
img_gray = image.convert('L')
# Enhance contrast
enhancer = ImageEnhance.Contrast(img_gray)
img_enhanced = enhancer.enhance(2.0)
# Increase sharpness
sharpness = ImageEnhance.Sharpness(img_enhanced)
img_sharp = sharpness.enhance(2.0)
return img_sharp
# Extract text from PDF with advanced techniques
def extract_text_from_pdf(pdf_file):
doc = fitz.open(stream=pdf_file, filetype="pdf")
complete_text = ""
images = []
tables = []
for page_num in range(len(doc)):
page = doc.load_page(page_num)
# Get text with improved layout preservation
text = page.get_text("dict")
blocks = text.get("blocks", [])
# Process text blocks to preserve table-like structures
page_text = ""
for block in blocks:
if block.get("type") == 0: # Text block
for line in block.get("lines", []):
line_text = " ".join([span.get("text", "") for span in line.get("spans", [])])
page_text += line_text + "\n"
complete_text += page_text + "\n\n"
# Extract tables using heuristics
# Look for grid-like structures in the text
table_candidates = re.findall(r'(?:\w+[\t ]+){2,}(?:\d+\.?\d*[\t ]+){2,}', page_text)
if table_candidates:
tables.extend(table_candidates)
# Extract images for visual analysis
image_list = page.get_images(full=True)
for img_index, img in enumerate(image_list):
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
image = Image.open(io.BytesIO(image_bytes))
# Only keep images that might be charts or reports
# (filter out logos and decorative elements)
if image.width > 200 and image.height > 200:
# Preprocess image to improve readability
processed_image = preprocess_image(image)
images.append(processed_image)
return complete_text, images, tables
# Blood markers dictionary for reference
BLOOD_MARKERS = {
"Vitamin D": ["25-OH Vitamin D", "Vitamin D, 25-Hydroxy", "25(OH)D", "Calcidiol"],
"Vitamin B12": ["Cobalamin", "Cyanocobalamin", "Methylcobalamin", "B-12"],
"Folate": ["Vitamin B9", "Folic Acid"],
"Vitamin A": ["Retinol", "Beta-carotene"],
"Vitamin E": ["Tocopherol", "Alpha-tocopherol"],
"Vitamin K": ["Phylloquinone", "Menaquinone"],
"Vitamin C": ["Ascorbic Acid", "L-ascorbic acid"],
"Vitamin B1": ["Thiamine", "Thiamin"],
"Vitamin B2": ["Riboflavin"],
"Vitamin B3": ["Niacin", "Nicotinic acid"],
"Vitamin B5": ["Pantothenic acid"],
"Vitamin B6": ["Pyridoxine", "Pyridoxal", "Pyridoxamine"],
"Vitamin B7": ["Biotin"],
"Iron": ["Ferritin", "Transferrin", "TIBC", "UIBC", "Serum Iron"],
"Calcium": ["Ca", "Serum Calcium", "Ionized Calcium"],
"Magnesium": ["Mg", "Serum Magnesium"],
"Zinc": ["Zn", "Serum Zinc"],
"Selenium": ["Se", "Serum Selenium"],
"Iodine": ["I", "Urinary Iodine"]
}
# Normal ranges reference (based on Indian standards)
REFERENCE_RANGES = {
"Vitamin D": {"unit": "ng/mL", "min": 30, "max": 100,
"deficiency": "<20", "insufficiency": "20-29"},
"Vitamin B12": {"unit": "pg/mL", "min": 211, "max": 911,
"deficiency": "<200", "insufficiency": "200-300"},
"Folate": {"unit": "ng/mL", "min": 5.9, "max": 24.8,
"deficiency": "<5.9"},
"Ferritin": {"unit": "ng/mL", "min_male": 30, "max_male": 400,
"min_female": 13, "max_female": 150,
"deficiency_male": "<30", "deficiency_female": "<13"},
"Hemoglobin": {"unit": "g/dL",
"min_male": 13.5, "max_male": 17.5,
"min_female": 12.0, "max_female": 15.5,
"deficiency_male": "<13.5", "deficiency_female": "<12.0"},
"Calcium": {"unit": "mg/dL", "min": 8.6, "max": 10.3,
"deficiency": "<8.6"},
"Magnesium": {"unit": "mg/dL", "min": 1.7, "max": 2.2,
"deficiency": "<1.7"},
"Zinc": {"unit": "μg/dL", "min": 70, "max": 120,
"deficiency": "<70"}
}
# Extract blood markers and values from text
def extract_blood_markers(text):
extracted_markers = {}
# Iterate through all known markers and their aliases
for vitamin, aliases in BLOOD_MARKERS.items():
all_terms = aliases + [vitamin]
for term in all_terms:
# Look for the marker and its value
# Pattern matches: Marker name: value unit
# Or: Marker name value unit
pattern = r'(?i)(%s)\s*[:=-]?\s*(\d+\.?\d*)' % re.escape(term)
matches = re.findall(pattern, text)
if matches:
for match in matches:
marker, value = match
# Convert to float if possible
try:
value = float(value)
extracted_markers[vitamin] = value
break # Found a value for this vitamin, move to next
except ValueError:
continue
return extracted_markers
# Analyze report with Gemini using structured approach
def analyze_report(vision_model, text_model, content, extracted_markers, is_text=False):
# Create structured input for better analysis
analysis_prompt = f"""
I need a detailed analysis of this blood test report. Focus specifically on vitamin, mineral and nutritional deficiencies.
The report is from India, so provide recommendations relevant to Indian context, diet, and healthcare practices.
For each identified deficiency:
1. Specify the exact deficiency (vitamin/mineral name)
2. Current level from report and normal reference range
3. Severity (mild/moderate/severe)
4. Recommended daily dosage in appropriate units (mg, mcg, IU) for supplementation
5. Duration of recommended supplementation
6. Specific health impacts this deficiency is causing or may cause
7. Recommended foods available in India that address this deficiency (include both vegetarian and non-vegetarian options)
8. Any additional blood tests that should be considered for confirmation
Also provide:
- A comprehensive summary of all nutritional findings
- Lifestyle modifications specific to Indian context
- Any concerning values that require immediate medical attention
- Follow-up testing recommendations with timeline
If you cannot confidently determine specific deficiencies, explain why and suggest further tests.
The extracted markers I've identified include: {json.dumps(extracted_markers)}
Format your response as structured JSON with the following schema:
{{
"deficiencies": [
{{
"nutrient": "string",
"current_level": "string",
"reference_range": "string",
"severity": "string",
"recommended_dosage": "string",
"supplementation_duration": "string",
"health_impacts": ["string"],
"recommended_foods": {{
"vegetarian": ["string"],
"non_vegetarian": ["string"]
}},
"confirmation_tests": ["string"]
}}
],
"summary": "string",
"lifestyle_modifications": ["string"],
"urgent_concerns": ["string"] or null,
"followup_recommendations": {{
"tests": ["string"],
"timeline": "string"
}}
}}
"""
try:
if is_text:
full_content = content + "\n\nExtracted markers: " + json.dumps(extracted_markers)
response = text_model.generate_content([analysis_prompt, full_content])
else:
# For image, combine extracted markers with the image
response = vision_model.generate_content([analysis_prompt, content])
# Extract JSON from response
response_text = response.text
# Find JSON object in the response
json_match = re.search(r'```json\s*([\s\S]*?)\s*```', response_text)
if json_match:
json_str = json_match.group(1)
else:
# Try to find JSON without code blocks
json_match = re.search(r'({[\s\S]*})', response_text)
if json_match:
json_str = json_match.group(1)
else:
return {"error": "Failed to parse JSON response", "raw_response": response_text}
# Parse JSON
try:
result = json.loads(json_str)
return result
except json.JSONDecodeError:
return {"error": "Invalid JSON response", "raw_response": response_text}
except Exception as e:
return {"error": f"Analysis failed: {str(e)}"}
# Generate personalized recommendation report
def generate_recommendation_html(analysis_result, patient_info=None):
if "error" in analysis_result:
return f"
Error in analysis: {analysis_result['error']}
"
# Current date for the report
current_date = datetime.now().strftime("%d %B, %Y")
# Start building HTML
html = f"""
Nutritional Analysis Report
Generated on: {current_date}
{f"
Patient: {patient_info['name']} | Age: {patient_info['age']} | Gender: {patient_info['gender']}
" if patient_info else ""}
Summary
{analysis_result.get('summary', 'No summary available')}
"""
# Add deficiencies section
deficiencies = analysis_result.get('deficiencies', [])
if deficiencies:
html += '
Detected Deficiencies
'
for deficiency in deficiencies:
severity_color = {
"mild": "#f39c12",
"moderate": "#e67e22",
"severe": "#c0392b"
}.get(deficiency.get('severity', '').lower(), "#7f8c8d")
html += f"""
{deficiency.get('nutrient', 'Unknown')}
{deficiency.get('severity', 'Unknown')} deficiency
Current Level: {deficiency.get('current_level', 'N/A')}
Reference Range: {deficiency.get('reference_range', 'N/A')}
Recommended Dosage: {deficiency.get('recommended_dosage', 'N/A')}
Duration: {deficiency.get('supplementation_duration', 'N/A')}
Health Impacts:
"""
# Add health impacts
for impact in deficiency.get('health_impacts', ['N/A']):
html += f"- {impact}
"
html += """
Recommended Foods
Vegetarian Options
"""
# Add vegetarian foods
veg_foods = deficiency.get('recommended_foods', {}).get('vegetarian', ['N/A'])
for food in veg_foods:
html += f"- {food}
"
html += """
Non-Vegetarian Options
"""
# Add non-vegetarian foods
non_veg_foods = deficiency.get('recommended_foods', {}).get('non_vegetarian', ['N/A'])
for food in non_veg_foods:
html += f"- {food}
"
html += """
Additional Tests
"""
# Add confirmation tests
tests = deficiency.get('confirmation_tests', ['None recommended'])
for test in tests:
html += f"- {test}
"
html += """
"""
else:
html += '
No specific deficiencies detected.
'
# Add lifestyle modifications
lifestyle = analysis_result.get('lifestyle_modifications', [])
if lifestyle:
html += """
Lifestyle Recommendations
"""
for item in lifestyle:
html += f"- {item}
"
html += """
"""
# Add urgent concerns
urgent = analysis_result.get('urgent_concerns', [])
if urgent and urgent != [None]:
html += """
⚠️ Urgent Considerations
"""
for item in urgent:
html += f"- {item}
"
html += """
Please consult with a healthcare provider promptly regarding these concerns.
"""
# Add follow-up recommendations
followup = analysis_result.get('followup_recommendations', {})
if followup and followup.get('tests'):
html += f"""
Follow-up Recommendations
Timeline: {followup.get('timeline', 'As advised by your healthcare provider')}
Recommended Tests:
"""
for test in followup.get('tests', []):
html += f"- {test}
"
html += """
"""
# Disclaimer
html += """
Disclaimer: This analysis is generated by an AI system and should not replace professional medical advice.
Always consult with a healthcare provider before making any changes to your diet, lifestyle, or supplementation regimen.
"""
return html
# Calculate nutritional recommendations based on deficiencies
def calculate_recommendations(analysis_result, weight_kg=70, height_cm=165, activity_level="moderate"):
if not analysis_result or "deficiencies" not in analysis_result:
return None
# Basic calculations
bmi = weight_kg / ((height_cm/100) ** 2)
# Activity level multipliers
activity_multipliers = {
"sedentary": 1.2,
"light": 1.375,
"moderate": 1.55,
"active": 1.725,
"very active": 1.9
}
# Calculate basal metabolic rate (BMR) using Mifflin-St Jeor equation
bmr = 10 * weight_kg + 6.25 * height_cm - 5 * 30 + 5 # Assuming age 30 for example
# Calculate total daily energy expenditure
tdee = bmr * activity_multipliers.get(activity_level.lower(), 1.55)
# Create recommendation dictionary
recommendations = {
"anthropometrics": {
"bmi": round(bmi, 1),
"bmi_category": get_bmi_category(bmi),
"estimated_energy_needs": round(tdee)
},
"supplements": []
}
# Process each deficiency
for deficiency in analysis_result["deficiencies"]:
nutrient = deficiency["nutrient"]
severity = deficiency["severity"].lower()
# Extract dosage value and unit
dosage_match = re.search(r'(\d+[\.\d]*)\s*([a-zA-Z]+)', deficiency["recommended_dosage"])
if dosage_match:
amount = float(dosage_match.group(1))
unit = dosage_match.group(2)
# Adjust based on severity
if severity == "severe":
adjusted_amount = amount * 1.2 # 20% higher for severe
elif severity == "mild":
adjusted_amount = amount * 0.9 # 10% lower for mild
else:
adjusted_amount = amount
recommendations["supplements"].append({
"nutrient": nutrient,
"dosage": f"{round(adjusted_amount, 2)} {unit}",
"original_dosage": f"{amount} {unit}",
"severity": severity,
"duration": deficiency["supplementation_duration"],
"frequency": "Daily",
"best_time": get_best_time_for_supplement(nutrient),
"interactions": get_supplement_interactions(nutrient)
})
return recommendations
# Helper functions for recommendations
def get_bmi_category(bmi):
if bmi < 18.5:
return "Underweight"
elif bmi < 25:
return "Normal weight"
elif bmi < 30:
return "Overweight"
else:
return "Obese"
def get_best_time_for_supplement(nutrient):
# Time recommendations based on Indian context
nutrient_lower = nutrient.lower()
if any(term in nutrient_lower for term in ["d", "a", "e", "k"]):
return "With meals containing some fat (lunch or dinner)"
elif "b12" in nutrient_lower:
return "Morning, with breakfast"
elif "iron" in nutrient_lower:
return "On empty stomach, 1 hour before meals with Vitamin C"
elif "calcium" in nutrient_lower:
return "Between meals, avoid taking with iron supplements"
elif "zinc" in nutrient_lower:
return "1-2 hours after meals, not with calcium supplements"
else:
return "As directed by healthcare provider"
def get_supplement_interactions(nutrient):
# Common interactions for Indian medications and supplements
nutrient_lower = nutrient.lower()
if "iron" in nutrient_lower:
return ["Calcium supplements", "Tea/coffee", "Antacids", "Certain antibiotics"]
elif "calcium" in nutrient_lower:
return ["Iron supplements", "Certain antibiotics", "Thyroid medications"]
elif "b12" in nutrient_lower:
return ["Metformin", "Acid-reducing medications", "Colchicine"]
elif "d" in nutrient_lower:
return ["Steroids", "Weight loss medications", "Certain cholesterol medications"]
else:
return []
# File upload handler for Hugging Face
def upload_and_process_file(file, api_key, name, age, gender):
if not api_key:
return "Please enter a valid Google API key", None
try:
if file is None:
return "No file was uploaded", None
# Get file extension
file_extension = file.name.split('.')[-1].lower()
file_content = file.read()
# Process based on file type
if file_extension == 'pdf':
report_text, extracted_images, tables = extract_text_from_pdf(file_content)
extracted_markers = extract_blood_markers(report_text)
vision_model, text_model = configure_genai(api_key)
# If text extraction worked well and we found markers
if len(extracted_markers) > 0:
analysis_result = analyze_report(vision_model, text_model, report_text, extracted_markers, is_text=True)
# If text extraction didn't yield much, use the images
elif extracted_images:
# Use the first image as primary, but include text context
analysis_result = analyze_report(vision_model, text_model,
[report_text, extracted_images[0]],
extracted_markers)
else:
return "Could not extract sufficient data from the PDF. Please try uploading a clearer document.", None
elif file_extension in ['jpg', 'jpeg', 'png']:
img = Image.open(io.BytesIO(file_content))
processed_img = preprocess_image(img)
vision_model, text_model = configure_genai(api_key)
analysis_result = analyze_report(vision_model, text_model, processed_img, {})
else:
return f"Unsupported file format: {file_extension}. Please upload a PDF or image (JPG, PNG).", None
# Create patient info dictionary if provided
patient_info = None
if name or age or gender:
patient_info = {
"name": name,
"age": age,
"gender": gender
}
# Generate HTML report
html_report = generate_recommendation_html(analysis_result, patient_info)
return html_report, analysis_result
except Exception as e:
return f"An error occurred: {str(e)}", None
# Create the Gradio Interface for Hugging Face
def create_interface():
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as app:
gr.Markdown(
"""
# 🩸 Blood Report Analyzer
## Analyze blood test reports for vitamin deficiencies and get personalized recommendations
This application uses Gemini AI to analyze your blood test results and provide detailed insights
on nutritional deficiencies with recommendations tailored to Indian health needs.
"""
)
with gr.Tab("📊 Report Analysis"):
with gr.Row():
with gr.Column(scale=1):
api_key = gr.Textbox(
label="Google Gemini API Key",
placeholder="Enter your Gemini API key",
type="password"
)
with gr.Accordion("Instructions for Using This Tool", open=False):
gr.Markdown(
"""
## How to Use This Tool
### 1. Prepare Your Report
- Ensure your blood report is clear and readable
- PDF format is preferred
- If using images, ensure good lighting and focus
### 2. Get a Gemini API Key
- Visit [Google AI Studio](https://ai.google.dev/)
- Create an account or sign in
- Navigate to API keys and create a new key
### 3. Upload and Analyze
- Enter your API key in the designated field
- (Optional) Enter patient information for personalized results
- Upload your blood report file
- Click "Analyze Report"
### 4. Review Results
- The analysis will display deficiencies found, their severity, and recommendations
- For personalized supplementation, enter weight, height, and activity level
- Click "Generate Supplement Plan" for customized dosage recommendations
### 5. Share Results
- You can save the HTML report by right-clicking and selecting "Save as"
- Share the results with your healthcare provider
### Important Notes
- This tool is for informational purposes only
- Always consult with healthcare professionals before making health decisions
- Your data is not stored and is only used for analysis
"""
)
with gr.Row():
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### Patient Information (Optional)")
name = gr.Textbox(label="Name", placeholder="Enter patient name")
with gr.Row():
age = gr.Textbox(label="Age", placeholder="e.g., 35")
gender = gr.Dropdown(label="Gender", choices=["Male", "Female", "Other"], value="Male")
upload_file = gr.File(label="Upload Blood Report")
analyze_button = gr.Button("📊 Analyze Report", variant="primary")
with gr.Column(scale=2):
output = gr.HTML(label="Analysis Results")
raw_output = gr.JSON(label="Raw Analysis Data", visible=False)
with gr.Row():
with gr.Column():
with gr.Group():
gr.Markdown("### Supplement Recommendations")
with gr.Row():
weight = gr.Number(label="Weight (kg)", value=70)
height = gr.Number(label="Height (cm)", value=165)
activity = gr.Dropdown(
label="Activity Level",
choices=["Sedentary", "Light", "Moderate", "Active", "Very Active"],
value="Moderate"
)
supplement_button = gr.Button("💊 Generate Supplement Plan")
supplement_output = gr.JSON(label="Personalized Supplement Plan")
# Connect the buttons to functions
analyze_button.click(
fn=upload_and_process_file,
inputs=[upload_file, api_key, name, age, gender],
outputs=[output, raw_output]
)
supplement_button.click(
fn=calculate_recommendations,
inputs=[raw_output, weight, height, activity],
outputs=[supplement_output]
)
with gr.Tab("📋 About"):
gr.Markdown(
"""
## About Blood Report Analyzer
This tool was developed to help people in India better understand their blood test results,
with a focus on identifying nutritional deficiencies that are common in the Indian population.
### How it Works
1. The tool uses advanced OCR and AI to extract relevant information from your blood report
2. Google's Gemini AI models analyze the data to identify deficiencies
3. Recommendations are tailored to the Indian context, including:
- Locally available foods
- Cultural dietary considerations
- Regional supplementation guidelines
### Privacy & Security
- Your data remains private and is not stored
- Analysis happens in real-time
- API keys are only used for processing and are not saved
### Limitations
- This tool is for informational purposes only
- It does not replace medical advice from healthcare professionals
- Accuracy depends on the quality of the uploaded report
- Some rare deficiencies may not be detected
### Acknowledgements
This application uses Google's Gemini AI models and is built with Gradio for Hugging Face Spaces.
"""
)
return app
# Export the interface
app = create_interface()
# Launch the app
if __name__ == "__main__":
app.launch()
app.launch(share=True)