Spaces:
Sleeping
Sleeping
# app.py | |
from flask import Flask, render_template, request, jsonify, Response | |
import requests | |
import re | |
import time | |
import os | |
from bs4 import BeautifulSoup | |
from huggingface_hub import InferenceClient | |
from typing import Optional, Tuple | |
app = Flask(__name__) | |
# Debug versions | |
print("Hugging Face Hub version:", os.environ.get("HUGGINGFACE_HUB_VERSION", "Not set")) | |
# Set Hugging Face cache directory explicitly (optional for InferenceClient, but useful for consistency) | |
os.environ["HF_HOME"] = "/home/user/.cache/huggingface" | |
print(f"Hugging Face cache directory: {os.environ['HF_HOME']}") | |
# Use Hugging Face InferenceClient for Qwen/Qwen2-1.5B-Instruct (public model, no token needed) | |
# You can switch to other public models by changing the model name | |
model_name = "Qwen/Qwen2-1.5B-Instruct" | |
client = InferenceClient(model=model_name) | |
# System prompt for better medical responses | |
SYSTEM_PROMPT = """ | |
You are MedChat, an educational medical assistant designed to provide detailed, accurate information about medications, side effects, drug interactions, alternative treatments (including natural options), and recent medical studies for any condition, including diabetes, heart disease, and more. Your responses are for educational purposes only, not medical advice—always include the disclaimer: "This is an educational tool, not medical advice. Consult a healthcare professional." For any query, identify the medical condition or drug mentioned, and provide relevant medications, side effects, alternatives, or studies. If no specific drug or condition is mentioned, suggest common medications or treatments for the implied condition (e.g., for diabetes, suggest metformin, insulin). Use simple, clear language, and cite sources like DailyMed, PubMed, or reliable websites (e.g., WebMD, Mayo Clinic) for all information. Respond quickly and concisely, streaming the output character by character. | |
""" | |
# DailyMed and PubMed API base URLs with timeout | |
DAILYMED_BASE_URL = "https://dailymed.nlm.nih.gov/dailymed/services/v2/drugnames" | |
PUBMED_BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" | |
PUBMED_SUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" | |
# Web scraping sources for additional data | |
WEBMD_URL = "https://www.webmd.com/search/search_results/default.aspx?query={query}" | |
MAYO_CLINIC_URL = "https://www.mayoclinic.org/search/search-results?q={query}" | |
def process_query(query: str) -> Tuple[Optional[str], Optional[str], str, str]: | |
"""Process user query with InferenceClient and return response, with error handling.""" | |
try: | |
# Format prompt with system instruction and user query | |
prompt = f"{SYSTEM_PROMPT}\n\nUser: {query}\nMedChat:" | |
print(f"Processing query: {query}") # Debug query | |
# Use InferenceClient to generate response (streamed via API) | |
response = client.text_generation( | |
prompt, | |
max_new_tokens=300, # Increased for detailed responses | |
temperature=0.7, | |
top_p=0.9, | |
return_full_text=False, # Only return generated text after prompt | |
stream=True # Enable streaming | |
) | |
# Collect streamed response | |
full_response = "" | |
for chunk in response: | |
full_response += chunk | |
response_text = full_response.strip() | |
# Parse response for condition, drug, intent (expanded for flexibility) | |
drug = None | |
condition = None | |
intent = "general" | |
if any(d in response_text.lower() for d in ["ibuprofen", "aspirin", "metformin", "tylenol", "metoprolol", "lisinopril", "insulin"]): | |
drug = re.search(r"Drug:\s*([^\n,]+)", response_text) | |
drug = drug.group(1).strip() if drug else None | |
if any(c in query.lower() or c in response_text.lower() for c in ["diabetes", "heart", "headache", "pain"]): | |
condition = re.search(r"condition:\s*([^\n,]+)", response_text) or re.search(r"for\s+([^\n,]+)", query.lower()) | |
condition = condition.group(1).strip() if condition else None | |
if "side effects" in response_text.lower(): | |
intent = "side effects" | |
elif "interaction" in response_text.lower(): | |
intent = "interaction" | |
elif "alternatives" in response_text.lower() or "natural" in query.lower(): | |
intent = "alternatives" | |
elif "studies" in response_text.lower() or "research" in query.lower(): | |
intent = "studies" | |
return drug, condition, intent, response_text | |
except Exception as e: | |
print(f"Error processing query: {e}") | |
return None, None, "error", f"Error processing your request: {str(e)}" | |
def get_drug_info(drug_name: str) -> str: | |
"""Fetch drug info from DailyMed and web scrape for additional data.""" | |
try: | |
# DailyMed API | |
daily_response = requests.get(f"{DAILYMED_BASE_URL}?drug_name={drug_name}", timeout=5) | |
daily_data = "" | |
if daily_response.status_code == 200: | |
data = daily_response.json() | |
if data.get("data"): | |
daily_data = f"Drug: {drug_name.capitalize()} - Found in DailyMed. Details: https://dailymed.nlm.nih.gov/dailymed/" | |
# Web scraping for additional info (WebMD) | |
webmd_query = f"{drug_name}+medication" | |
webmd_response = requests.get(WEBMD_URL.format(query=webmd_query), timeout=5) | |
if webmd_response.status_code == 200: | |
soup = BeautifulSoup(webmd_response.text, 'html.parser') | |
content = soup.find('div', class_='search-results-content') | |
if content: | |
webmd_info = content.get_text(strip=True)[:200] + "..." # Limit length | |
daily_data += f"\nAdditional info from WebMD: {webmd_info} (Source: https://www.webmd.com)" | |
return daily_data if daily_data else "No drug info found on DailyMed or WebMD." | |
except Exception as e: | |
print(f"Error fetching drug info: {e}") | |
return f"Error fetching drug info: {str(e)}" | |
def get_condition_info(condition: str) -> str: | |
"""Provide common medications or treatments for a condition, with web scraping.""" | |
try: | |
medications = "" | |
if "diabetes" in condition.lower(): | |
medications = "Common medications for diabetes include metformin, insulin (e.g., insulin glargine), and sulfonylureas (e.g., glipizide). Consult a healthcare professional." | |
elif "heart" in condition.lower(): | |
medications = "Common heart medications include beta-blockers like metoprolol, ACE inhibitors like lisinopril, and statins like atorvastatin. Consult a healthcare professional." | |
elif "headache" in condition.lower() or "pain" in condition.lower(): | |
medications = "Common medications for headaches or pain include ibuprofen, acetaminophen (Tylenol), or aspirin. Natural alternatives might include turmeric or ginger. Consult a healthcare professional." | |
# Web scraping for additional info (Mayo Clinic) | |
mayo_query = f"{condition}+medications" | |
mayo_response = requests.get(MAYO_CLINIC_URL.format(query=mayo_query), timeout=5) | |
if mayo_response.status_code == 200: | |
soup = BeautifulSoup(mayo_response.text, 'html.parser') | |
content = soup.find('div', class_='content') | |
if content: | |
mayo_info = content.get_text(strip=True)[:200] + "..." # Limit length | |
medications += f"\nAdditional info from Mayo Clinic: {mayo_info} (Source: https://www.mayoclinic.org)" | |
return medications if medications else "No condition-specific information found." | |
except Exception as e: | |
print(f"Error fetching condition info: {e}") | |
return f"Error fetching condition info: {str(e)}" | |
def get_recent_studies(query: str) -> str: | |
try: | |
params = { | |
"db": "pubmed", | |
"term": query, | |
"retmax": 3, | |
"datetype": "pdat", | |
"mindate": "2020", | |
"maxdate": "2025", | |
"retmode": "json" | |
} | |
response = requests.get(PUBMED_BASE_URL, params=params, timeout=5) | |
if response.status_code == 200: | |
data = response.json() | |
id_list = data["esearchresult"]["idlist"] | |
if not id_list: | |
return "No recent studies found on PubMed." | |
summary_params = { | |
"db": "pubmed", | |
"id": ",".join(id_list), | |
"retmode": "json" | |
} | |
summary_response = requests.get(PUBMED_SUMMARY_URL, params=summary_params, timeout=5) | |
if summary_response.status_code == 200: | |
summaries = summary_response.json()["result"] | |
result = "Recent studies from PubMed:\n" | |
for uid in id_list: | |
study = summaries[uid] | |
title = study.get("title", "No title") | |
result += f"- {title} (PMID: {uid}, https://pubmed.ncbi.nlm.nih.gov/{uid}/)\n" | |
return result | |
return "Found studies, but couldn’t retrieve details." | |
return "No recent studies found." | |
except Exception as e: | |
print(f"Error fetching studies: {e}") | |
return f"Error fetching studies: {str(e)}" | |
def index(): | |
return render_template("index.html") | |
def chat(): | |
print(f"Received request method: {request.method}") # Debug request method | |
user_message = request.json.get("message", "").lower() | |
if not user_message: | |
return jsonify({"response": "Please enter a message."}) | |
drug, condition, intent, full_response = process_query(user_message) | |
def event_stream(): | |
try: | |
if intent == "error": | |
response = full_response # Return the error message | |
else: | |
response = "This is an educational tool, not medical advice. Consult a healthcare professional.\n\n" | |
if drug: | |
response += get_drug_info(drug) | |
if "alternatives" in intent or "natural" in user_message: | |
response += f"\nNatural alternatives might include dietary changes or herbal options (consult a professional)." | |
elif condition: | |
response += get_condition_info(condition) | |
if "alternatives" in intent or "natural" in user_message: | |
response += f"\nNatural alternatives might include dietary changes or herbal options (consult a professional)." | |
else: | |
response += "Please specify a drug, condition, or question for more details (e.g., 'medications for diabetes' or 'side effects of metformin')." | |
if (drug or condition) and "studies" not in intent and ("studies" in user_message or "research" in user_message): | |
response += "\n\n" + get_recent_studies(drug or condition) | |
# Web scrape for additional context if no specific data found | |
if not (drug or condition) and "medications" in user_message: | |
web_response = get_condition_info(user_message.replace("medications", "").strip() or "general health") | |
if web_response and "Error" not in web_response: | |
response += f"\n\nAdditional info: {web_response}" | |
# Stream the response character by character | |
for char in response: | |
yield f"data: {char}\n\n" | |
time.sleep(0.005) # Very fast streaming for better performance | |
yield "data: [END]\n\n" | |
except Exception as e: | |
print(f"Error in event stream: {e}") | |
response = f"Error: Cannot connect to server. Details: {str(e)}" | |
for char in response: | |
yield f"data: {char}\n\n" | |
yield "data: [END]\n\n" | |
return Response(event_stream(), mimetype="text/event-stream") | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860, debug=True) |