# app.py
from flask import Flask, render_template, request, jsonify, Response
import requests
import re
import time
import os
from bs4 import BeautifulSoup
from huggingface_hub import InferenceClient
from typing import Optional, Tuple

app = Flask(__name__)

# Debug versions
print("Hugging Face Hub version:", os.environ.get("HUGGINGFACE_HUB_VERSION", "Not set"))

# Set Hugging Face cache directory explicitly (optional for InferenceClient, but useful for consistency)
os.environ["HF_HOME"] = "/home/user/.cache/huggingface"
print(f"Hugging Face cache directory: {os.environ['HF_HOME']}")

# Use Hugging Face InferenceClient for Qwen/Qwen2-1.5B-Instruct (public model, no token needed)
# You can switch to other public models by changing the model name
model_name = "Qwen/Qwen2-1.5B-Instruct"
client = InferenceClient(model=model_name)

# System prompt for better medical responses
SYSTEM_PROMPT = """
You are MedChat, an educational medical assistant designed to provide detailed, accurate information about medications, side effects, drug interactions, alternative treatments (including natural options), and recent medical studies for any condition, including diabetes, heart disease, and more. Your responses are for educational purposes only, not medical advice—always include the disclaimer: "This is an educational tool, not medical advice. Consult a healthcare professional." For any query, identify the medical condition or drug mentioned, and provide relevant medications, side effects, alternatives, or studies. If no specific drug or condition is mentioned, suggest common medications or treatments for the implied condition (e.g., for diabetes, suggest metformin, insulin). Use simple, clear language, and cite sources like DailyMed, PubMed, or reliable websites (e.g., WebMD, Mayo Clinic) for all information. Respond quickly and concisely, streaming the output character by character.
"""

# DailyMed and PubMed API base URLs with timeout
DAILYMED_BASE_URL = "https://dailymed.nlm.nih.gov/dailymed/services/v2/drugnames"
PUBMED_BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
PUBMED_SUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"

# Web scraping sources for additional data
WEBMD_URL = "https://www.webmd.com/search/search_results/default.aspx?query={query}"
MAYO_CLINIC_URL = "https://www.mayoclinic.org/search/search-results?q={query}"

def process_query(query: str) -> Tuple[Optional[str], Optional[str], str, str]:
    """Process user query with InferenceClient and return response, with error handling."""
    try:
        # Format prompt with system instruction and user query
        prompt = f"{SYSTEM_PROMPT}\n\nUser: {query}\nMedChat:"
        print(f"Processing query: {query}")  # Debug query

        # Use InferenceClient to generate response (streamed via API)
        response = client.text_generation(
            prompt,
            max_new_tokens=300,  # Increased for detailed responses
            temperature=0.7,
            top_p=0.9,
            return_full_text=False,  # Only return generated text after prompt
            stream=True  # Enable streaming
        )

        # Collect streamed response
        full_response = ""
        for chunk in response:
            full_response += chunk

        response_text = full_response.strip()
        
        # Parse response for condition, drug, intent (expanded for flexibility)
        drug = None
        condition = None
        intent = "general"
        if any(d in response_text.lower() for d in ["ibuprofen", "aspirin", "metformin", "tylenol", "metoprolol", "lisinopril", "insulin"]):
            drug = re.search(r"Drug:\s*([^\n,]+)", response_text)
            drug = drug.group(1).strip() if drug else None
        if any(c in query.lower() or c in response_text.lower() for c in ["diabetes", "heart", "headache", "pain"]):
            condition = re.search(r"condition:\s*([^\n,]+)", response_text) or re.search(r"for\s+([^\n,]+)", query.lower())
            condition = condition.group(1).strip() if condition else None
        if "side effects" in response_text.lower():
            intent = "side effects"
        elif "interaction" in response_text.lower():
            intent = "interaction"
        elif "alternatives" in response_text.lower() or "natural" in query.lower():
            intent = "alternatives"
        elif "studies" in response_text.lower() or "research" in query.lower():
            intent = "studies"
        
        return drug, condition, intent, response_text
    except Exception as e:
        print(f"Error processing query: {e}")
        return None, None, "error", f"Error processing your request: {str(e)}"

def get_drug_info(drug_name: str) -> str:
    """Fetch drug info from DailyMed and web scrape for additional data."""
    try:
        # DailyMed API
        daily_response = requests.get(f"{DAILYMED_BASE_URL}?drug_name={drug_name}", timeout=5)
        daily_data = ""
        if daily_response.status_code == 200:
            data = daily_response.json()
            if data.get("data"):
                daily_data = f"Drug: {drug_name.capitalize()} - Found in DailyMed. Details: https://dailymed.nlm.nih.gov/dailymed/"

        # Web scraping for additional info (WebMD)
        webmd_query = f"{drug_name}+medication"
        webmd_response = requests.get(WEBMD_URL.format(query=webmd_query), timeout=5)
        if webmd_response.status_code == 200:
            soup = BeautifulSoup(webmd_response.text, 'html.parser')
            content = soup.find('div', class_='search-results-content')
            if content:
                webmd_info = content.get_text(strip=True)[:200] + "..."  # Limit length
                daily_data += f"\nAdditional info from WebMD: {webmd_info} (Source: https://www.webmd.com)"
        
        return daily_data if daily_data else "No drug info found on DailyMed or WebMD."
    except Exception as e:
        print(f"Error fetching drug info: {e}")
        return f"Error fetching drug info: {str(e)}"

def get_condition_info(condition: str) -> str:
    """Provide common medications or treatments for a condition, with web scraping."""
    try:
        medications = ""
        if "diabetes" in condition.lower():
            medications = "Common medications for diabetes include metformin, insulin (e.g., insulin glargine), and sulfonylureas (e.g., glipizide). Consult a healthcare professional."
        elif "heart" in condition.lower():
            medications = "Common heart medications include beta-blockers like metoprolol, ACE inhibitors like lisinopril, and statins like atorvastatin. Consult a healthcare professional."
        elif "headache" in condition.lower() or "pain" in condition.lower():
            medications = "Common medications for headaches or pain include ibuprofen, acetaminophen (Tylenol), or aspirin. Natural alternatives might include turmeric or ginger. Consult a healthcare professional."
        
        # Web scraping for additional info (Mayo Clinic)
        mayo_query = f"{condition}+medications"
        mayo_response = requests.get(MAYO_CLINIC_URL.format(query=mayo_query), timeout=5)
        if mayo_response.status_code == 200:
            soup = BeautifulSoup(mayo_response.text, 'html.parser')
            content = soup.find('div', class_='content')
            if content:
                mayo_info = content.get_text(strip=True)[:200] + "..."  # Limit length
                medications += f"\nAdditional info from Mayo Clinic: {mayo_info} (Source: https://www.mayoclinic.org)"
        
        return medications if medications else "No condition-specific information found."
    except Exception as e:
        print(f"Error fetching condition info: {e}")
        return f"Error fetching condition info: {str(e)}"

def get_recent_studies(query: str) -> str:
    try:
        params = {
            "db": "pubmed",
            "term": query,
            "retmax": 3,
            "datetype": "pdat",
            "mindate": "2020",
            "maxdate": "2025",
            "retmode": "json"
        }
        response = requests.get(PUBMED_BASE_URL, params=params, timeout=5)
        if response.status_code == 200:
            data = response.json()
            id_list = data["esearchresult"]["idlist"]
            if not id_list:
                return "No recent studies found on PubMed."
            summary_params = {
                "db": "pubmed",
                "id": ",".join(id_list),
                "retmode": "json"
            }
            summary_response = requests.get(PUBMED_SUMMARY_URL, params=summary_params, timeout=5)
            if summary_response.status_code == 200:
                summaries = summary_response.json()["result"]
                result = "Recent studies from PubMed:\n"
                for uid in id_list:
                    study = summaries[uid]
                    title = study.get("title", "No title")
                    result += f"- {title} (PMID: {uid}, https://pubmed.ncbi.nlm.nih.gov/{uid}/)\n"
                return result
            return "Found studies, but couldn’t retrieve details."
        return "No recent studies found."
    except Exception as e:
        print(f"Error fetching studies: {e}")
        return f"Error fetching studies: {str(e)}"

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/chat", methods=["POST"])
def chat():
    print(f"Received request method: {request.method}")  # Debug request method
    user_message = request.json.get("message", "").lower()
    if not user_message:
        return jsonify({"response": "Please enter a message."})

    drug, condition, intent, full_response = process_query(user_message)

    def event_stream():
        try:
            if intent == "error":
                response = full_response  # Return the error message
            else:
                response = "This is an educational tool, not medical advice. Consult a healthcare professional.\n\n"

                if drug:
                    response += get_drug_info(drug)
                    if "alternatives" in intent or "natural" in user_message:
                        response += f"\nNatural alternatives might include dietary changes or herbal options (consult a professional)."
                elif condition:
                    response += get_condition_info(condition)
                    if "alternatives" in intent or "natural" in user_message:
                        response += f"\nNatural alternatives might include dietary changes or herbal options (consult a professional)."
                else:
                    response += "Please specify a drug, condition, or question for more details (e.g., 'medications for diabetes' or 'side effects of metformin')."

                if (drug or condition) and "studies" not in intent and ("studies" in user_message or "research" in user_message):
                    response += "\n\n" + get_recent_studies(drug or condition)

                # Web scrape for additional context if no specific data found
                if not (drug or condition) and "medications" in user_message:
                    web_response = get_condition_info(user_message.replace("medications", "").strip() or "general health")
                    if web_response and "Error" not in web_response:
                        response += f"\n\nAdditional info: {web_response}"

            # Stream the response character by character
            for char in response:
                yield f"data: {char}\n\n"
                time.sleep(0.005)  # Very fast streaming for better performance
            yield "data: [END]\n\n"
        except Exception as e:
            print(f"Error in event stream: {e}")
            response = f"Error: Cannot connect to server. Details: {str(e)}"
            for char in response:
                yield f"data: {char}\n\n"
            yield "data: [END]\n\n"

    return Response(event_stream(), mimetype="text/event-stream")

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=True)