# Import Libraries import os import gradio as gr import google.generativeai as genai import fitz # PyMuPDF from PIL import Image, ImageEnhance import io import re import json import numpy as np import pandas as pd from datetime import datetime import base64 # Blood Report Analyzer Implementation # Configure Google Gemini API def configure_genai(api_key): genai.configure(api_key=api_key) # Use Gemini Pro Vision for image analysis vision_model = genai.GenerativeModel('gemini-pro-vision') # Use Gemini Pro for text analysis (better for structured text) text_model = genai.GenerativeModel('gemini-pro') return vision_model, text_model # Image preprocessing to improve OCR def preprocess_image(image): # Convert to grayscale img_gray = image.convert('L') # Enhance contrast enhancer = ImageEnhance.Contrast(img_gray) img_enhanced = enhancer.enhance(2.0) # Increase sharpness sharpness = ImageEnhance.Sharpness(img_enhanced) img_sharp = sharpness.enhance(2.0) return img_sharp # Extract text from PDF with advanced techniques def extract_text_from_pdf(pdf_file): doc = fitz.open(stream=pdf_file, filetype="pdf") complete_text = "" images = [] tables = [] for page_num in range(len(doc)): page = doc.load_page(page_num) # Get text with improved layout preservation text = page.get_text("dict") blocks = text.get("blocks", []) # Process text blocks to preserve table-like structures page_text = "" for block in blocks: if block.get("type") == 0: # Text block for line in block.get("lines", []): line_text = " ".join([span.get("text", "") for span in line.get("spans", [])]) page_text += line_text + "\n" complete_text += page_text + "\n\n" # Extract tables using heuristics # Look for grid-like structures in the text table_candidates = re.findall(r'(?:\w+[\t ]+){2,}(?:\d+\.?\d*[\t ]+){2,}', page_text) if table_candidates: tables.extend(table_candidates) # Extract images for visual analysis image_list = page.get_images(full=True) for img_index, img in enumerate(image_list): xref = img[0] base_image = doc.extract_image(xref) image_bytes = base_image["image"] image = Image.open(io.BytesIO(image_bytes)) # Only keep images that might be charts or reports # (filter out logos and decorative elements) if image.width > 200 and image.height > 200: # Preprocess image to improve readability processed_image = preprocess_image(image) images.append(processed_image) return complete_text, images, tables # Blood markers dictionary for reference BLOOD_MARKERS = { "Vitamin D": ["25-OH Vitamin D", "Vitamin D, 25-Hydroxy", "25(OH)D", "Calcidiol"], "Vitamin B12": ["Cobalamin", "Cyanocobalamin", "Methylcobalamin", "B-12"], "Folate": ["Vitamin B9", "Folic Acid"], "Vitamin A": ["Retinol", "Beta-carotene"], "Vitamin E": ["Tocopherol", "Alpha-tocopherol"], "Vitamin K": ["Phylloquinone", "Menaquinone"], "Vitamin C": ["Ascorbic Acid", "L-ascorbic acid"], "Vitamin B1": ["Thiamine", "Thiamin"], "Vitamin B2": ["Riboflavin"], "Vitamin B3": ["Niacin", "Nicotinic acid"], "Vitamin B5": ["Pantothenic acid"], "Vitamin B6": ["Pyridoxine", "Pyridoxal", "Pyridoxamine"], "Vitamin B7": ["Biotin"], "Iron": ["Ferritin", "Transferrin", "TIBC", "UIBC", "Serum Iron"], "Calcium": ["Ca", "Serum Calcium", "Ionized Calcium"], "Magnesium": ["Mg", "Serum Magnesium"], "Zinc": ["Zn", "Serum Zinc"], "Selenium": ["Se", "Serum Selenium"], "Iodine": ["I", "Urinary Iodine"] } # Normal ranges reference (based on Indian standards) REFERENCE_RANGES = { "Vitamin D": {"unit": "ng/mL", "min": 30, "max": 100, "deficiency": "<20", "insufficiency": "20-29"}, "Vitamin B12": {"unit": "pg/mL", "min": 211, "max": 911, "deficiency": "<200", "insufficiency": "200-300"}, "Folate": {"unit": "ng/mL", "min": 5.9, "max": 24.8, "deficiency": "<5.9"}, "Ferritin": {"unit": "ng/mL", "min_male": 30, "max_male": 400, "min_female": 13, "max_female": 150, "deficiency_male": "<30", "deficiency_female": "<13"}, "Hemoglobin": {"unit": "g/dL", "min_male": 13.5, "max_male": 17.5, "min_female": 12.0, "max_female": 15.5, "deficiency_male": "<13.5", "deficiency_female": "<12.0"}, "Calcium": {"unit": "mg/dL", "min": 8.6, "max": 10.3, "deficiency": "<8.6"}, "Magnesium": {"unit": "mg/dL", "min": 1.7, "max": 2.2, "deficiency": "<1.7"}, "Zinc": {"unit": "μg/dL", "min": 70, "max": 120, "deficiency": "<70"} } # Extract blood markers and values from text def extract_blood_markers(text): extracted_markers = {} # Iterate through all known markers and their aliases for vitamin, aliases in BLOOD_MARKERS.items(): all_terms = aliases + [vitamin] for term in all_terms: # Look for the marker and its value # Pattern matches: Marker name: value unit # Or: Marker name value unit pattern = r'(?i)(%s)\s*[:=-]?\s*(\d+\.?\d*)' % re.escape(term) matches = re.findall(pattern, text) if matches: for match in matches: marker, value = match # Convert to float if possible try: value = float(value) extracted_markers[vitamin] = value break # Found a value for this vitamin, move to next except ValueError: continue return extracted_markers # Analyze report with Gemini using structured approach def analyze_report(vision_model, text_model, content, extracted_markers, is_text=False): # Create structured input for better analysis analysis_prompt = f""" I need a detailed analysis of this blood test report. Focus specifically on vitamin, mineral and nutritional deficiencies. The report is from India, so provide recommendations relevant to Indian context, diet, and healthcare practices. For each identified deficiency: 1. Specify the exact deficiency (vitamin/mineral name) 2. Current level from report and normal reference range 3. Severity (mild/moderate/severe) 4. Recommended daily dosage in appropriate units (mg, mcg, IU) for supplementation 5. Duration of recommended supplementation 6. Specific health impacts this deficiency is causing or may cause 7. Recommended foods available in India that address this deficiency (include both vegetarian and non-vegetarian options) 8. Any additional blood tests that should be considered for confirmation Also provide: - A comprehensive summary of all nutritional findings - Lifestyle modifications specific to Indian context - Any concerning values that require immediate medical attention - Follow-up testing recommendations with timeline If you cannot confidently determine specific deficiencies, explain why and suggest further tests. The extracted markers I've identified include: {json.dumps(extracted_markers)} Format your response as structured JSON with the following schema: {{ "deficiencies": [ {{ "nutrient": "string", "current_level": "string", "reference_range": "string", "severity": "string", "recommended_dosage": "string", "supplementation_duration": "string", "health_impacts": ["string"], "recommended_foods": {{ "vegetarian": ["string"], "non_vegetarian": ["string"] }}, "confirmation_tests": ["string"] }} ], "summary": "string", "lifestyle_modifications": ["string"], "urgent_concerns": ["string"] or null, "followup_recommendations": {{ "tests": ["string"], "timeline": "string" }} }} """ try: if is_text: full_content = content + "\n\nExtracted markers: " + json.dumps(extracted_markers) response = text_model.generate_content([analysis_prompt, full_content]) else: # For image, combine extracted markers with the image response = vision_model.generate_content([analysis_prompt, content]) # Extract JSON from response response_text = response.text # Find JSON object in the response json_match = re.search(r'```json\s*([\s\S]*?)\s*```', response_text) if json_match: json_str = json_match.group(1) else: # Try to find JSON without code blocks json_match = re.search(r'({[\s\S]*})', response_text) if json_match: json_str = json_match.group(1) else: return {"error": "Failed to parse JSON response", "raw_response": response_text} # Parse JSON try: result = json.loads(json_str) return result except json.JSONDecodeError: return {"error": "Invalid JSON response", "raw_response": response_text} except Exception as e: return {"error": f"Analysis failed: {str(e)}"} # Generate personalized recommendation report def generate_recommendation_html(analysis_result, patient_info=None): if "error" in analysis_result: return f"
Error in analysis: {analysis_result['error']}
" # Current date for the report current_date = datetime.now().strftime("%d %B, %Y") # Start building HTML html = f"""

Nutritional Analysis Report

Generated on: {current_date}

{f"

Patient: {patient_info['name']} | Age: {patient_info['age']} | Gender: {patient_info['gender']}

" if patient_info else ""}

Summary

{analysis_result.get('summary', 'No summary available')}

""" # Add deficiencies section deficiencies = analysis_result.get('deficiencies', []) if deficiencies: html += '

Detected Deficiencies

' for deficiency in deficiencies: severity_color = { "mild": "#f39c12", "moderate": "#e67e22", "severe": "#c0392b" }.get(deficiency.get('severity', '').lower(), "#7f8c8d") html += f"""

{deficiency.get('nutrient', 'Unknown')} {deficiency.get('severity', 'Unknown')} deficiency

Current Level: {deficiency.get('current_level', 'N/A')}

Reference Range: {deficiency.get('reference_range', 'N/A')}

Recommended Dosage: {deficiency.get('recommended_dosage', 'N/A')}

Duration: {deficiency.get('supplementation_duration', 'N/A')}

Health Impacts:

    """ # Add health impacts for impact in deficiency.get('health_impacts', ['N/A']): html += f"
  • {impact}
  • " html += """

Recommended Foods

Vegetarian Options
    """ # Add vegetarian foods veg_foods = deficiency.get('recommended_foods', {}).get('vegetarian', ['N/A']) for food in veg_foods: html += f"
  • {food}
  • " html += """
Non-Vegetarian Options
    """ # Add non-vegetarian foods non_veg_foods = deficiency.get('recommended_foods', {}).get('non_vegetarian', ['N/A']) for food in non_veg_foods: html += f"
  • {food}
  • " html += """

Additional Tests

    """ # Add confirmation tests tests = deficiency.get('confirmation_tests', ['None recommended']) for test in tests: html += f"
  • {test}
  • " html += """
""" else: html += '

No specific deficiencies detected.

' # Add lifestyle modifications lifestyle = analysis_result.get('lifestyle_modifications', []) if lifestyle: html += """

Lifestyle Recommendations

""" # Add urgent concerns urgent = analysis_result.get('urgent_concerns', []) if urgent and urgent != [None]: html += """

⚠️ Urgent Considerations

Please consult with a healthcare provider promptly regarding these concerns.

""" # Add follow-up recommendations followup = analysis_result.get('followup_recommendations', {}) if followup and followup.get('tests'): html += f"""

Follow-up Recommendations

Timeline: {followup.get('timeline', 'As advised by your healthcare provider')}

Recommended Tests:

""" # Disclaimer html += """

Disclaimer: This analysis is generated by an AI system and should not replace professional medical advice. Always consult with a healthcare provider before making any changes to your diet, lifestyle, or supplementation regimen.

""" return html # Calculate nutritional recommendations based on deficiencies def calculate_recommendations(analysis_result, weight_kg=70, height_cm=165, activity_level="moderate"): if not analysis_result or "deficiencies" not in analysis_result: return None # Basic calculations bmi = weight_kg / ((height_cm/100) ** 2) # Activity level multipliers activity_multipliers = { "sedentary": 1.2, "light": 1.375, "moderate": 1.55, "active": 1.725, "very active": 1.9 } # Calculate basal metabolic rate (BMR) using Mifflin-St Jeor equation bmr = 10 * weight_kg + 6.25 * height_cm - 5 * 30 + 5 # Assuming age 30 for example # Calculate total daily energy expenditure tdee = bmr * activity_multipliers.get(activity_level.lower(), 1.55) # Create recommendation dictionary recommendations = { "anthropometrics": { "bmi": round(bmi, 1), "bmi_category": get_bmi_category(bmi), "estimated_energy_needs": round(tdee) }, "supplements": [] } # Process each deficiency for deficiency in analysis_result["deficiencies"]: nutrient = deficiency["nutrient"] severity = deficiency["severity"].lower() # Extract dosage value and unit dosage_match = re.search(r'(\d+[\.\d]*)\s*([a-zA-Z]+)', deficiency["recommended_dosage"]) if dosage_match: amount = float(dosage_match.group(1)) unit = dosage_match.group(2) # Adjust based on severity if severity == "severe": adjusted_amount = amount * 1.2 # 20% higher for severe elif severity == "mild": adjusted_amount = amount * 0.9 # 10% lower for mild else: adjusted_amount = amount recommendations["supplements"].append({ "nutrient": nutrient, "dosage": f"{round(adjusted_amount, 2)} {unit}", "original_dosage": f"{amount} {unit}", "severity": severity, "duration": deficiency["supplementation_duration"], "frequency": "Daily", "best_time": get_best_time_for_supplement(nutrient), "interactions": get_supplement_interactions(nutrient) }) return recommendations # Helper functions for recommendations def get_bmi_category(bmi): if bmi < 18.5: return "Underweight" elif bmi < 25: return "Normal weight" elif bmi < 30: return "Overweight" else: return "Obese" def get_best_time_for_supplement(nutrient): # Time recommendations based on Indian context nutrient_lower = nutrient.lower() if any(term in nutrient_lower for term in ["d", "a", "e", "k"]): return "With meals containing some fat (lunch or dinner)" elif "b12" in nutrient_lower: return "Morning, with breakfast" elif "iron" in nutrient_lower: return "On empty stomach, 1 hour before meals with Vitamin C" elif "calcium" in nutrient_lower: return "Between meals, avoid taking with iron supplements" elif "zinc" in nutrient_lower: return "1-2 hours after meals, not with calcium supplements" else: return "As directed by healthcare provider" def get_supplement_interactions(nutrient): # Common interactions for Indian medications and supplements nutrient_lower = nutrient.lower() if "iron" in nutrient_lower: return ["Calcium supplements", "Tea/coffee", "Antacids", "Certain antibiotics"] elif "calcium" in nutrient_lower: return ["Iron supplements", "Certain antibiotics", "Thyroid medications"] elif "b12" in nutrient_lower: return ["Metformin", "Acid-reducing medications", "Colchicine"] elif "d" in nutrient_lower: return ["Steroids", "Weight loss medications", "Certain cholesterol medications"] else: return [] # File upload handler for Hugging Face def upload_and_process_file(file, api_key, name, age, gender): if not api_key: return "Please enter a valid Google API key", None try: if file is None: return "No file was uploaded", None # Get file extension file_extension = file.name.split('.')[-1].lower() file_content = file.read() # Process based on file type if file_extension == 'pdf': report_text, extracted_images, tables = extract_text_from_pdf(file_content) extracted_markers = extract_blood_markers(report_text) vision_model, text_model = configure_genai(api_key) # If text extraction worked well and we found markers if len(extracted_markers) > 0: analysis_result = analyze_report(vision_model, text_model, report_text, extracted_markers, is_text=True) # If text extraction didn't yield much, use the images elif extracted_images: # Use the first image as primary, but include text context analysis_result = analyze_report(vision_model, text_model, [report_text, extracted_images[0]], extracted_markers) else: return "Could not extract sufficient data from the PDF. Please try uploading a clearer document.", None elif file_extension in ['jpg', 'jpeg', 'png']: img = Image.open(io.BytesIO(file_content)) processed_img = preprocess_image(img) vision_model, text_model = configure_genai(api_key) analysis_result = analyze_report(vision_model, text_model, processed_img, {}) else: return f"Unsupported file format: {file_extension}. Please upload a PDF or image (JPG, PNG).", None # Create patient info dictionary if provided patient_info = None if name or age or gender: patient_info = { "name": name, "age": age, "gender": gender } # Generate HTML report html_report = generate_recommendation_html(analysis_result, patient_info) return html_report, analysis_result except Exception as e: return f"An error occurred: {str(e)}", None # Create the Gradio Interface for Hugging Face def create_interface(): with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as app: gr.Markdown( """ # 🩸 Blood Report Analyzer ## Analyze blood test reports for vitamin deficiencies and get personalized recommendations This application uses Gemini AI to analyze your blood test results and provide detailed insights on nutritional deficiencies with recommendations tailored to Indian health needs. """ ) with gr.Tab("📊 Report Analysis"): with gr.Row(): with gr.Column(scale=1): api_key = gr.Textbox( label="Google Gemini API Key", placeholder="Enter your Gemini API key", type="password" ) with gr.Accordion("Instructions for Using This Tool", open=False): gr.Markdown( """ ## How to Use This Tool ### 1. Prepare Your Report - Ensure your blood report is clear and readable - PDF format is preferred - If using images, ensure good lighting and focus ### 2. Get a Gemini API Key - Visit [Google AI Studio](https://ai.google.dev/) - Create an account or sign in - Navigate to API keys and create a new key ### 3. Upload and Analyze - Enter your API key in the designated field - (Optional) Enter patient information for personalized results - Upload your blood report file - Click "Analyze Report" ### 4. Review Results - The analysis will display deficiencies found, their severity, and recommendations - For personalized supplementation, enter weight, height, and activity level - Click "Generate Supplement Plan" for customized dosage recommendations ### 5. Share Results - You can save the HTML report by right-clicking and selecting "Save as" - Share the results with your healthcare provider ### Important Notes - This tool is for informational purposes only - Always consult with healthcare professionals before making health decisions - Your data is not stored and is only used for analysis """ ) with gr.Row(): with gr.Column(scale=1): with gr.Group(): gr.Markdown("### Patient Information (Optional)") name = gr.Textbox(label="Name", placeholder="Enter patient name") with gr.Row(): age = gr.Textbox(label="Age", placeholder="e.g., 35") gender = gr.Dropdown(label="Gender", choices=["Male", "Female", "Other"], value="Male") upload_file = gr.File(label="Upload Blood Report") analyze_button = gr.Button("📊 Analyze Report", variant="primary") with gr.Column(scale=2): output = gr.HTML(label="Analysis Results") raw_output = gr.JSON(label="Raw Analysis Data", visible=False) with gr.Row(): with gr.Column(): with gr.Group(): gr.Markdown("### Supplement Recommendations") with gr.Row(): weight = gr.Number(label="Weight (kg)", value=70) height = gr.Number(label="Height (cm)", value=165) activity = gr.Dropdown( label="Activity Level", choices=["Sedentary", "Light", "Moderate", "Active", "Very Active"], value="Moderate" ) supplement_button = gr.Button("💊 Generate Supplement Plan") supplement_output = gr.JSON(label="Personalized Supplement Plan") # Connect the buttons to functions analyze_button.click( fn=upload_and_process_file, inputs=[upload_file, api_key, name, age, gender], outputs=[output, raw_output] ) supplement_button.click( fn=calculate_recommendations, inputs=[raw_output, weight, height, activity], outputs=[supplement_output] ) with gr.Tab("📋 About"): gr.Markdown( """ ## About Blood Report Analyzer This tool was developed to help people in India better understand their blood test results, with a focus on identifying nutritional deficiencies that are common in the Indian population. ### How it Works 1. The tool uses advanced OCR and AI to extract relevant information from your blood report 2. Google's Gemini AI models analyze the data to identify deficiencies 3. Recommendations are tailored to the Indian context, including: - Locally available foods - Cultural dietary considerations - Regional supplementation guidelines ### Privacy & Security - Your data remains private and is not stored - Analysis happens in real-time - API keys are only used for processing and are not saved ### Limitations - This tool is for informational purposes only - It does not replace medical advice from healthcare professionals - Accuracy depends on the quality of the uploaded report - Some rare deficiencies may not be detected ### Acknowledgements This application uses Google's Gemini AI models and is built with Gradio for Hugging Face Spaces. """ ) return app # Export the interface app = create_interface() # Launch the app if __name__ == "__main__": app.launch() app.launch(share=True)