# prescription_validation/fuzzy_match.py import sqlite3 import re from rapidfuzz.distance import Levenshtein from config import DB_PATH, LEV_THRESH class RxLookup: def __init__(self, db_path: str = DB_PATH): self.conn = sqlite3.connect(db_path) self.conn.row_factory = sqlite3.Row self.drugs = self.conn.execute("SELECT name, cui FROM drugs").fetchall() def _clean_token(self, token: str) -> str: """Removes dosage, form factor, and non-alpha characters.""" cleaned = token.lower() cleaned = re.sub(r'(\d+)\s*(mg|ml|mcg|tab|cap|#)', '', cleaned) cleaned = re.sub(r'[^a-z]', '', cleaned) return cleaned def match(self, token: str) -> tuple[str | None, str | None]: if not token: return (None, None) cleaned_token = self._clean_token(token) if not cleaned_token: return (None, None) best_match = None min_distance = float('inf') for row in self.drugs: name, cui = row["name"], row["cui"] cleaned_db_name = self._clean_token(name) distance = Levenshtein.distance(cleaned_token, cleaned_db_name) if distance < min_distance: min_distance = distance best_match = (name, cui) if best_match and min_distance / len(cleaned_token) < LEV_THRESH: return best_match return (None, None)