import pytesseract import cv2 import re import platform from .file_utils import convert_image_to_word def configure_tesseract(): system = platform.system() if system == "Windows": pytesseract.pytesseract.tesseract_cmd = r"C:\Users\hp\AppData\Local\Programs\Tesseract-OCR\tesseract.exe" else: pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" def extract_ktp_info(image_path, filename): configure_tesseract() img = cv2.imread(image_path) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) text = pytesseract.image_to_string(gray, lang='ind') text = text.replace('\n\n\n', '\n').replace('\n\n', '\n').replace('\n', '\n') match = re.search(r'Berlaku Hingga\n(.*?)\nGol\. Darah', text, re.DOTALL) if match: extracted_text = match.group(1) lines = [ re.sub(r'^(:|\d+)?\s*', '', line.strip()) for line in extracted_text.strip().split('\n') if line.strip() ] print("Hasil List Bersih:") print(lines) city = re.search(r'PROVINSI\s+(.+?)\n(.+?)\n', text) result = { "nik" : lines[0], "nama" : lines[1], "tempat_tgl_lahir" : lines[2], "jenis_kelamin" : lines[3], "alamat" : lines[4], "rt_rw" : lines[5], "kel/desa" : lines[6], "kecamatan" : lines[7], "provinsi" : lines[13], "agama" : lines[8], "kewarganegaraan" : lines[10], "pekerjaan" : lines[9], } # convert_image_to_word(result, f'KTP {filename}') return result