File size: 678 Bytes
8983b2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import re
import difflib

def extract_value_by_regex(text: str, key: str):
    pattern = rf'["\']?{key}["\']?\s*[:=]\s*["\']([^"\']+)["\']'
    match = re.search(pattern, text)
    if match:
        return match.group(1)
    return None

def fuzzy_match_key(input_key: str, schema_keys: list, cutoff: float = 0.75):
    best_match = difflib.get_close_matches(input_key, schema_keys, n=1, cutoff=cutoff)
    if best_match:
        return best_match[0], difflib.SequenceMatcher(None, input_key, best_match[0]).ratio()
    return None, 0.0

def compute_confidence(method: str):
    return {
        "regex": 1.0,
        "fuzzy": 0.7,
        "semantic": 0.5
    }.get(method, 0.0)