from slotmatch.schema import SchemaValidator from slotmatch.utils import extract_value_by_regex, fuzzy_match_key, compute_confidence class SlotExtractor: def __init__(self, schema: dict): self.validator = SchemaValidator(schema) self.schema = self.validator.get_schema() self.schema_keys = list(self.schema.keys()) def extract(self, text: str) -> dict: result = {} for expected_key in self.schema_keys: # 1. Try regex directly raw_value = extract_value_by_regex(text, expected_key) if raw_value is not None: result[expected_key] = { "value": self._coerce_type(raw_value, self.schema[expected_key]), "confidence": compute_confidence("regex") } continue # 2. Try fuzzy match fuzzy_key, score = fuzzy_match_key(expected_key, self._get_all_keys_from_text(text)) if fuzzy_key: raw_value = extract_value_by_regex(text, fuzzy_key) if raw_value is not None: result[expected_key] = { "value": self._coerce_type(raw_value, self.schema[expected_key]), "confidence": compute_confidence("fuzzy") * score } continue # 3. Fallback result[expected_key] = { "value": None, "confidence": 0.0 } return result def _get_all_keys_from_text(self, text: str) -> list: import re pattern = r'["\']?([\w-]+)["\']?\s*[:=]' return list(set(re.findall(pattern, text))) def _coerce_type(self, value, expected_type): try: if expected_type == bool: return value.lower() in ['true', 'yes', '1'] return expected_type(value) except: return value # fallback to original