Spaces:
Sleeping
Sleeping
import pytesseract | |
import cv2 | |
import re | |
import platform | |
from .file_utils import convert_image_to_word | |
def configure_tesseract(): | |
system = platform.system() | |
if system == "Windows": | |
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\hp\AppData\Local\Programs\Tesseract-OCR\tesseract.exe" | |
else: | |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" | |
def extract_ktp_info(image_path, filename): | |
configure_tesseract() | |
img = cv2.imread(image_path) | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
text = pytesseract.image_to_string(gray, lang='ind') | |
text = text.replace('\n\n\n', '\n').replace('\n\n', '\n').replace('\n', '\n') | |
match = re.search(r'Berlaku Hingga\n(.*?)\nGol\. Darah', text, re.DOTALL) | |
if match: | |
extracted_text = match.group(1) | |
lines = [ | |
re.sub(r'^(:|\d+)?\s*', '', line.strip()) | |
for line in extracted_text.strip().split('\n') | |
if line.strip() | |
] | |
print("Hasil List Bersih:") | |
print(lines) | |
city = re.search(r'PROVINSI\s+(.+?)\n(.+?)\n', text) | |
result = { | |
"nik" : lines[0], | |
"nama" : lines[1], | |
"tempat_tgl_lahir" : lines[2], | |
"jenis_kelamin" : lines[3], | |
"alamat" : lines[4], | |
"rt_rw" : lines[5], | |
"kel/desa" : lines[6], | |
"kecamatan" : lines[7], | |
"provinsi" : lines[13], | |
"agama" : lines[8], | |
"kewarganegaraan" : lines[10], | |
"pekerjaan" : lines[9], | |
} | |
# convert_image_to_word(result, f'KTP {filename}') | |
return result |