File size: 1,475 Bytes
12f2295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# prescription_validation/fuzzy_match.py
import sqlite3
import re
from rapidfuzz.distance import Levenshtein
from config import DB_PATH, LEV_THRESH

class RxLookup:
    def __init__(self, db_path: str = DB_PATH):
        self.conn = sqlite3.connect(db_path)
        self.conn.row_factory = sqlite3.Row
        self.drugs = self.conn.execute("SELECT name, cui FROM drugs").fetchall()

    def _clean_token(self, token: str) -> str:
        """Removes dosage, form factor, and non-alpha characters."""
        cleaned = token.lower()
        cleaned = re.sub(r'(\d+)\s*(mg|ml|mcg|tab|cap|#)', '', cleaned)
        cleaned = re.sub(r'[^a-z]', '', cleaned)
        return cleaned

    def match(self, token: str) -> tuple[str | None, str | None]:
        if not token:
            return (None, None)
            
        cleaned_token = self._clean_token(token)
        if not cleaned_token:
            return (None, None)

        best_match = None
        min_distance = float('inf')

        for row in self.drugs:
            name, cui = row["name"], row["cui"]
            cleaned_db_name = self._clean_token(name)
            
            distance = Levenshtein.distance(cleaned_token, cleaned_db_name)
            
            if distance < min_distance:
                min_distance = distance
                best_match = (name, cui)

        if best_match and min_distance / len(cleaned_token) < LEV_THRESH:
            return best_match

        return (None, None)