Spaces:
Paused
Paused
File size: 1,475 Bytes
12f2295 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# prescription_validation/fuzzy_match.py
import sqlite3
import re
from rapidfuzz.distance import Levenshtein
from config import DB_PATH, LEV_THRESH
class RxLookup:
def __init__(self, db_path: str = DB_PATH):
self.conn = sqlite3.connect(db_path)
self.conn.row_factory = sqlite3.Row
self.drugs = self.conn.execute("SELECT name, cui FROM drugs").fetchall()
def _clean_token(self, token: str) -> str:
"""Removes dosage, form factor, and non-alpha characters."""
cleaned = token.lower()
cleaned = re.sub(r'(\d+)\s*(mg|ml|mcg|tab|cap|#)', '', cleaned)
cleaned = re.sub(r'[^a-z]', '', cleaned)
return cleaned
def match(self, token: str) -> tuple[str | None, str | None]:
if not token:
return (None, None)
cleaned_token = self._clean_token(token)
if not cleaned_token:
return (None, None)
best_match = None
min_distance = float('inf')
for row in self.drugs:
name, cui = row["name"], row["cui"]
cleaned_db_name = self._clean_token(name)
distance = Levenshtein.distance(cleaned_token, cleaned_db_name)
if distance < min_distance:
min_distance = distance
best_match = (name, cui)
if best_match and min_distance / len(cleaned_token) < LEV_THRESH:
return best_match
return (None, None)
|