Spaces:
Running
Running
from slotmatch.schema import SchemaValidator | |
from slotmatch.utils import extract_value_by_regex, fuzzy_match_key, compute_confidence | |
class SlotExtractor: | |
def __init__(self, schema: dict): | |
self.validator = SchemaValidator(schema) | |
self.schema = self.validator.get_schema() | |
self.schema_keys = list(self.schema.keys()) | |
def extract(self, text: str) -> dict: | |
result = {} | |
for expected_key in self.schema_keys: | |
# 1. Try regex directly | |
raw_value = extract_value_by_regex(text, expected_key) | |
if raw_value is not None: | |
result[expected_key] = { | |
"value": self._coerce_type(raw_value, self.schema[expected_key]), | |
"confidence": compute_confidence("regex") | |
} | |
continue | |
# 2. Try fuzzy match | |
fuzzy_key, score = fuzzy_match_key(expected_key, self._get_all_keys_from_text(text)) | |
if fuzzy_key: | |
raw_value = extract_value_by_regex(text, fuzzy_key) | |
if raw_value is not None: | |
result[expected_key] = { | |
"value": self._coerce_type(raw_value, self.schema[expected_key]), | |
"confidence": compute_confidence("fuzzy") * score | |
} | |
continue | |
# 3. Fallback | |
result[expected_key] = { | |
"value": None, | |
"confidence": 0.0 | |
} | |
return result | |
def _get_all_keys_from_text(self, text: str) -> list: | |
import re | |
pattern = r'["\']?([\w-]+)["\']?\s*[:=]' | |
return list(set(re.findall(pattern, text))) | |
def _coerce_type(self, value, expected_type): | |
try: | |
if expected_type == bool: | |
return value.lower() in ['true', 'yes', '1'] | |
return expected_type(value) | |
except: | |
return value # fallback to original |