Spaces:
Running
Running
File size: 1,965 Bytes
8983b2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from slotmatch.schema import SchemaValidator
from slotmatch.utils import extract_value_by_regex, fuzzy_match_key, compute_confidence
class SlotExtractor:
def __init__(self, schema: dict):
self.validator = SchemaValidator(schema)
self.schema = self.validator.get_schema()
self.schema_keys = list(self.schema.keys())
def extract(self, text: str) -> dict:
result = {}
for expected_key in self.schema_keys:
# 1. Try regex directly
raw_value = extract_value_by_regex(text, expected_key)
if raw_value is not None:
result[expected_key] = {
"value": self._coerce_type(raw_value, self.schema[expected_key]),
"confidence": compute_confidence("regex")
}
continue
# 2. Try fuzzy match
fuzzy_key, score = fuzzy_match_key(expected_key, self._get_all_keys_from_text(text))
if fuzzy_key:
raw_value = extract_value_by_regex(text, fuzzy_key)
if raw_value is not None:
result[expected_key] = {
"value": self._coerce_type(raw_value, self.schema[expected_key]),
"confidence": compute_confidence("fuzzy") * score
}
continue
# 3. Fallback
result[expected_key] = {
"value": None,
"confidence": 0.0
}
return result
def _get_all_keys_from_text(self, text: str) -> list:
import re
pattern = r'["\']?([\w-]+)["\']?\s*[:=]'
return list(set(re.findall(pattern, text)))
def _coerce_type(self, value, expected_type):
try:
if expected_type == bool:
return value.lower() in ['true', 'yes', '1']
return expected_type(value)
except:
return value # fallback to original |