ADE-Explorer-MCP / openfda_client.py
Jonas
final fixes to add top_n. fix presentation of about section. add relative even frequency.
c2b5b0b
import requests
from cachetools import TTLCache, cached
import time
from typing import Optional, Tuple
API_BASE_URL = "https://api.fda.gov/drug/event.json"
# Cache with a TTL of 10 minutes (600 seconds)
cache = TTLCache(maxsize=256, ttl=600)
# 240 requests per minute per IP. A 0.25s delay is a simple way to stay under.
REQUEST_DELAY_SECONDS = 0.25
DRUG_SYNONYM_MAPPING = {
"tylenol": "acetaminophen",
"advil": "ibuprofen",
"motrin": "ibuprofen",
"aleve": "naproxen",
"benadryl": "diphenhydramine",
"claritin": "loratadine",
"zyrtec": "cetirizine",
"allegra": "fexofenadine",
"zantac": "ranitidine",
"pepcid": "famotidine",
"prilosec": "omeprazole",
"lipitor": "atorvastatin",
"zocor": "simvastatin",
"norvasc": "amlodipine",
"hydrochlorothiazide": "hctz",
"glucophage": "metformin",
"synthroid": "levothyroxine",
"ambien": "zolpidem",
"xanax": "alprazolam",
"prozac": "fluoxetine",
"zoloft": "sertraline",
"paxil": "paroxetine",
"lexapro": "escitalopram",
"cymbalta": "duloxetine",
"wellbutrin": "bupropion",
"desyrel": "trazodone",
"eliquis": "apixaban",
"xarelto": "rivaroxaban",
"pradaxa": "dabigatran",
"coumadin": "warfarin",
"januvia": "sitagliptin",
"tradjenta": "linagliptin",
"jardiance": "empagliflozin",
"farxiga": "dapagliflozin",
"invokana": "canagliflozin",
"ozempic": "semaglutide",
"victoza": "liraglutide",
"trulicity": "dulaglutide",
"humira": "adalimumab",
"enbrel": "etanercept",
"remicade": "infliximab",
"stelara": "ustekinumab",
"keytruda": "pembrolizumab",
"opdivo": "nivolumab",
"revlimid": "lenalidomide",
"rituxan": "rituximab",
"herceptin": "trastuzumab",
"avastin": "bevacizumab",
"spiriva": "tiotropium",
"advair": "fluticasone/salmeterol",
"symbicort": "budesonide/formoterol",
"singulair": "montelukast",
"lyrica": "pregabalin",
"neurontin": "gabapentin",
"topamax": "topiramate",
"lamictal": "lamotrigine",
"keppra": "levetiracetam",
"dilantin": "phenytoin",
"tegretol": "carbamazepine",
"depakote": "divalproex",
"vyvanse": "lisdexamfetamine",
"adderall": "amphetamine/dextroamphetamine",
"ritalin": "methylphenidate",
"concerta": "methylphenidate",
"focalin": "dexmethylphenidate",
"strattera": "atomoxetine",
"viagra": "sildenafil",
"cialis": "tadalafil",
"levitra": "vardenafil",
"bactrim": "sulfamethoxazole/trimethoprim",
"keflex": "cephalexin",
"augmentin": "amoxicillin/clavulanate",
"zithromax": "azithromycin",
"levaquin": "levofloxacin",
"cipro": "ciprofloxacin",
"diflucan": "fluconazole",
"tamiflu": "oseltamivir",
"valtrex": "valacyclovir",
"zofran": "ondansetron",
"phenergan": "promethazine",
"imitrex": "sumatriptan",
"flexeril": "cyclobenzaprine",
"soma": "carisoprodol",
"valium": "diazepam",
"ativan": "lorazepam",
"klonopin": "clonazepam",
"restoril": "temazepam",
"ultram": "tramadol",
"percocet": "oxycodone/acetaminophen",
"vicodin": "hydrocodone/acetaminophen",
"oxycontin": "oxycodone",
"dilaudid": "hydromorphone",
"morphine": "ms contin",
"fentanyl": "duragesic"
}
OUTCOME_MAPPING = {
"1": "Recovered/Resolved",
"2": "Recovering/Resolving",
"3": "Not Recovered/Not Resolved",
"4": "Recovered/Resolved with Sequelae",
"5": "Fatal",
"6": "Unknown",
}
QUALIFICATION_MAPPING = {
"1": "Physician",
"2": "Pharmacist",
"3": "Other Health Professional",
"4": "Lawyer",
"5": "Consumer or Non-Health Professional",
}
SERIOUS_OUTCOME_FIELDS = [
"seriousnessdeath",
"seriousnesslifethreatening",
"seriousnesshospitalization",
"seriousnessdisabling",
"seriousnesscongenitalanomali",
"seriousnessother",
]
def get_top_adverse_events(drug_name: str, limit: int = 10, patient_sex: Optional[str] = None, age_range: Optional[Tuple[int, int]] = None) -> dict:
"""
Query OpenFDA to get the top adverse events for a given drug.
Args:
drug_name (str): The name of the drug to search for (brand or generic).
limit (int): The maximum number of adverse events to return.
patient_sex (str): The patient's sex to filter by ('1' for Male, '2' for Female).
age_range (tuple): A tuple containing min and max age, e.g., (20, 50).
Returns:
dict: The JSON response from the API, or an error dictionary.
"""
if not drug_name:
return {"error": "Drug name cannot be empty."}
drug_name_processed = drug_name.lower().strip()
drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
# Build the search query
search_terms = [f'patient.drug.medicinalproduct:"{drug_name_processed}"']
if patient_sex and patient_sex in ["1", "2"]:
search_terms.append(f'patient.patientsex:"{patient_sex}"')
if age_range and len(age_range) == 2:
min_age, max_age = age_range
search_terms.append(f'patient.patientonsetage:[{min_age} TO {max_age}]')
search_query = "+AND+".join(search_terms)
# Using a simple cache key that includes filters
cache_key = f"top_events_{drug_name_processed}_{limit}_{patient_sex}_{age_range}"
if cache_key in cache:
return cache[cache_key]
# Query for top events by count
count_query_url = (
f'{API_BASE_URL}?search={search_query}'
f'&count=patient.reaction.reactionmeddrapt.exact&limit={limit}'
)
try:
# Respect rate limits
time.sleep(REQUEST_DELAY_SECONDS)
response = requests.get(count_query_url)
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
data = response.json()
# Query for total reports matching the filters
total_query_url = f'{API_BASE_URL}?search={search_query}'
time.sleep(REQUEST_DELAY_SECONDS)
total_response = requests.get(total_query_url)
total_response.raise_for_status()
total_data = total_response.json()
total_reports = total_data.get("meta", {},).get("results", {}).get("total", 0)
# Add total to the main data object
if 'meta' not in data:
data['meta'] = {}
data['meta']['total_reports_for_query'] = total_reports
cache[cache_key] = data
return data
except requests.exceptions.HTTPError as http_err:
if response.status_code == 404:
return {"error": f"No data found for '{drug_name}' with the specified filters. The drug may not be in the database, or there may be no reports matching the filter criteria."}
return {"error": f"HTTP error occurred: {http_err}"}
except requests.exceptions.RequestException as req_err:
return {"error": f"A network request error occurred: {req_err}"}
except Exception as e:
return {"error": f"An unexpected error occurred: {e}"}
def get_drug_event_pair_frequency(drug_name: str, event_name: str) -> dict:
"""
Query OpenFDA to get the total number of reports for a specific
drug-adverse event pair, and the total reports for the drug alone.
Args:
drug_name (str): The name of the drug.
event_name (str): The name of the adverse event.
Returns:
dict: A dictionary containing the results or an error message.
Includes `total` (for the pair) and `total_for_drug`.
"""
if not drug_name or not event_name:
return {"error": "Drug name and event name cannot be empty."}
drug_name_processed = drug_name.lower().strip()
drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
event_name_processed = event_name.lower().strip()
cache_key = f"pair_freq_{drug_name_processed}_{event_name_processed}"
if cache_key in cache:
return cache[cache_key]
try:
# First, get total reports for the drug to see if it exists
drug_query = f'search=patient.drug.medicinalproduct:"{drug_name_processed}"'
time.sleep(REQUEST_DELAY_SECONDS)
drug_response = requests.get(f"{API_BASE_URL}?{drug_query}")
# This is a critical failure if the drug isn't found
drug_response.raise_for_status()
drug_data = drug_response.json()
total_for_drug = drug_data.get("meta", {}).get("results", {}).get("total", 0)
# Second, get reports for the specific drug-event pair
pair_query = (
f'search=patient.drug.medicinalproduct:"{drug_name_processed}"'
f'+AND+patient.reaction.reactionmeddrapt:"{event_name_processed}"'
)
time.sleep(REQUEST_DELAY_SECONDS)
pair_response = requests.get(f"{API_BASE_URL}?{pair_query}")
total_for_pair = 0
if pair_response.status_code == 200:
pair_data = pair_response.json()
total_for_pair = pair_data.get("meta", {}).get("results", {}).get("total", 0)
# We don't raise for 404 on the pair, as it just means 0 results
elif pair_response.status_code != 404:
pair_response.raise_for_status()
# Construct a consistent response structure
data = {
"meta": {
"results": {
"total": total_for_pair,
"total_for_drug": total_for_drug
}
}
}
cache[cache_key] = data
return data
except requests.exceptions.HTTPError as http_err:
if http_err.response.status_code == 404:
return {"error": f"No data found for drug '{drug_name}'. It may be misspelled or not in the database."}
return {"error": f"HTTP error occurred: {http_err}"}
except requests.exceptions.RequestException as req_err:
return {"error": f"A network request error occurred: {req_err}"}
except Exception as e:
return {"error": f"An unexpected error occurred: {e}"}
def get_serious_outcomes(drug_name: str, limit: int = 6) -> dict:
"""
Query OpenFDA to get the most frequent serious outcomes for a given drug.
This function makes multiple API calls to count different outcome fields.
Args:
drug_name (str): The name of the drug to search for.
limit (int): The maximum number of outcomes to return.
Returns:
dict: A dictionary containing aggregated results or an error.
"""
if not drug_name:
return {"error": "Drug name cannot be empty."}
drug_name_processed = drug_name.lower().strip()
drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
# Use a cache key for the aggregated result
cache_key = f"serious_outcomes_aggregated_{drug_name_processed}_{limit}"
if cache_key in cache:
return cache[cache_key]
aggregated_results = {}
# Base search for all serious reports
base_search_query = f'patient.drug.medicinalproduct:"{drug_name_processed}"+AND+serious:1'
# Get total number of serious reports
total_serious_reports = 0
try:
total_query_url = f"{API_BASE_URL}?search={base_search_query}"
time.sleep(REQUEST_DELAY_SECONDS)
response = requests.get(total_query_url)
if response.status_code == 200:
total_data = response.json()
total_serious_reports = total_data.get("meta", {}).get("results", {}).get("total", 0)
elif response.status_code != 404:
# If this call fails, we can still proceed, the total will just be 0.
pass
except requests.exceptions.RequestException:
# If fetching total fails, proceed without it.
pass
for field in SERIOUS_OUTCOME_FIELDS:
try:
# Each query counts reports where the specific seriousness field exists
query = f"search={base_search_query}+AND+_exists_:{field}"
time.sleep(REQUEST_DELAY_SECONDS)
response = requests.get(f"{API_BASE_URL}?{query}")
if response.status_code == 200:
data = response.json()
total_count = data.get("meta", {}).get("results", {}).get("total", 0)
if total_count > 0:
# Use a more readable name for the outcome
outcome_name = field.replace("seriousness", "").replace("anomali", "anomaly").title()
aggregated_results[outcome_name] = total_count
# Ignore 404s, as they just mean no results for that specific outcome
elif response.status_code != 404:
response.raise_for_status()
except requests.exceptions.RequestException as e:
return {"error": f"A network request error occurred for field {field}: {e}"}
if not aggregated_results:
return {"error": f"No serious outcome data found for drug: '{drug_name}'."}
# Format the results to match the expected structure for plotting
final_data = {
"results": [{"term": k, "count": v} for k, v in aggregated_results.items()],
"meta": {"total_reports_for_query": total_serious_reports}
}
# Sort results by count, descending, and then limit
final_data["results"] = sorted(final_data["results"], key=lambda x: x['count'], reverse=True)
if limit:
final_data["results"] = final_data["results"][:limit]
cache[cache_key] = final_data
return final_data
def get_time_series_data(drug_name: str, event_name: str) -> dict:
"""
Query OpenFDA to get the time series data for a drug-event pair.
Args:
drug_name (str): The name of the drug.
event_name (str): The name of the adverse event.
Returns:
dict: The JSON response from the API, or an error dictionary.
"""
if not drug_name or not event_name:
return {"error": "Drug name and event name cannot be empty."}
drug_name_processed = drug_name.lower().strip()
drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
event_name_processed = event_name.lower().strip()
cache_key = f"time_series_{drug_name_processed}_{event_name_processed}"
if cache_key in cache:
return cache[cache_key]
query = (
f'search=patient.drug.medicinalproduct:"{drug_name_processed}"'
f'+AND+patient.reaction.reactionmeddrapt:"{event_name_processed}"'
f'&count=receiptdate'
)
try:
time.sleep(REQUEST_DELAY_SECONDS)
response = requests.get(f"{API_BASE_URL}?{query}")
response.raise_for_status()
data = response.json()
cache[cache_key] = data
return data
except requests.exceptions.HTTPError as http_err:
if response.status_code == 404:
return {"error": f"No data found for drug '{drug_name}' and event '{event_name}'. They may be misspelled or not in the database."}
return {"error": f"HTTP error occurred: {http_err}"}
except requests.exceptions.RequestException as req_err:
return {"error": f"A network request error occurred: {req_err}"}
except Exception as e:
return {"error": f"An unexpected error occurred: {e}"}
def get_report_source_data(drug_name: str, limit: int = 5) -> dict:
"""
Query OpenFDA to get the breakdown of report sources for a given drug.
Args:
drug_name (str): The name of the drug to search for.
limit (int): The maximum number of sources to return.
Returns:
dict: The JSON response from the API, or an error dictionary.
"""
if not drug_name:
return {"error": "Drug name cannot be empty."}
drug_name_processed = drug_name.lower().strip()
drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
cache_key = f"report_source_{drug_name_processed}_{limit}"
if cache_key in cache:
return cache[cache_key]
query = (
f'search=patient.drug.medicinalproduct:"{drug_name_processed}"'
f'&count=primarysource.qualification'
)
try:
time.sleep(REQUEST_DELAY_SECONDS)
response = requests.get(f"{API_BASE_URL}?{query}")
response.raise_for_status()
data = response.json()
# Translate the qualification codes and calculate total before limiting
if "results" in data:
# Sort by count first
data['results'] = sorted(data['results'], key=lambda x: x['count'], reverse=True)
# Calculate total from all results before limiting
total_with_source = sum(item['count'] for item in data['results'])
if 'meta' not in data:
data['meta'] = {}
data['meta']['total_reports_for_query'] = total_with_source
# Translate codes after processing
for item in data["results"]:
term_str = str(item["term"])
item["term"] = QUALIFICATION_MAPPING.get(term_str, f"Unknown ({term_str})")
# Apply limit
if limit:
data['results'] = data['results'][:limit]
cache[cache_key] = data
return data
except requests.exceptions.HTTPError as http_err:
if response.status_code == 404:
return {"error": f"No data found for drug: '{drug_name}'."}
return {"error": f"HTTP error occurred: {http_err}"}
except requests.exceptions.RequestException as req_err:
return {"error": f"A network request error occurred: {req_err}"}
except Exception as e:
return {"error": f"An unexpected error occurred: {e}"}