import re import difflib def extract_value_by_regex(text: str, key: str): pattern = rf'["\']?{key}["\']?\s*[:=]\s*["\']([^"\']+)["\']' match = re.search(pattern, text) if match: return match.group(1) return None def fuzzy_match_key(input_key: str, schema_keys: list, cutoff: float = 0.75): best_match = difflib.get_close_matches(input_key, schema_keys, n=1, cutoff=cutoff) if best_match: return best_match[0], difflib.SequenceMatcher(None, input_key, best_match[0]).ratio() return None, 0.0 def compute_confidence(method: str): return { "regex": 1.0, "fuzzy": 0.7, "semantic": 0.5 }.get(method, 0.0)