Spaces:
Running
Running
File size: 678 Bytes
8983b2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import re
import difflib
def extract_value_by_regex(text: str, key: str):
pattern = rf'["\']?{key}["\']?\s*[:=]\s*["\']([^"\']+)["\']'
match = re.search(pattern, text)
if match:
return match.group(1)
return None
def fuzzy_match_key(input_key: str, schema_keys: list, cutoff: float = 0.75):
best_match = difflib.get_close_matches(input_key, schema_keys, n=1, cutoff=cutoff)
if best_match:
return best_match[0], difflib.SequenceMatcher(None, input_key, best_match[0]).ratio()
return None, 0.0
def compute_confidence(method: str):
return {
"regex": 1.0,
"fuzzy": 0.7,
"semantic": 0.5
}.get(method, 0.0) |