File size: 1,965 Bytes
8983b2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from slotmatch.schema import SchemaValidator
from slotmatch.utils import extract_value_by_regex, fuzzy_match_key, compute_confidence

class SlotExtractor:
    def __init__(self, schema: dict):
        self.validator = SchemaValidator(schema)
        self.schema = self.validator.get_schema()
        self.schema_keys = list(self.schema.keys())

    def extract(self, text: str) -> dict:
        result = {}

        for expected_key in self.schema_keys:
            # 1. Try regex directly
            raw_value = extract_value_by_regex(text, expected_key)
            if raw_value is not None:
                result[expected_key] = {
                    "value": self._coerce_type(raw_value, self.schema[expected_key]),
                    "confidence": compute_confidence("regex")
                }
                continue

            # 2. Try fuzzy match
            fuzzy_key, score = fuzzy_match_key(expected_key, self._get_all_keys_from_text(text))
            if fuzzy_key:
                raw_value = extract_value_by_regex(text, fuzzy_key)
                if raw_value is not None:
                    result[expected_key] = {
                        "value": self._coerce_type(raw_value, self.schema[expected_key]),
                        "confidence": compute_confidence("fuzzy") * score
                    }
                    continue

            # 3. Fallback
            result[expected_key] = {
                "value": None,
                "confidence": 0.0
            }

        return result

    def _get_all_keys_from_text(self, text: str) -> list:
        import re
        pattern = r'["\']?([\w-]+)["\']?\s*[:=]'
        return list(set(re.findall(pattern, text)))

    def _coerce_type(self, value, expected_type):
        try:
            if expected_type == bool:
                return value.lower() in ['true', 'yes', '1']
            return expected_type(value)
        except:
            return value  # fallback to original