Spaces:
Build error
Build error
import pandas as pd | |
import streamlit as st | |
import skimage.io as io | |
from PIL import Image | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import cv2 | |
from skimage.filters import threshold_local | |
import pytesseract | |
import re | |
import os | |
from pytesseract import Output | |
os.system('apt-get install tesseract-ocr') | |
def plot_gray(image): | |
plt.figure(figsize=(16,10)) | |
return plt.imshow(image, cmap='Greys_r') | |
def plot_rgb(image): | |
plt.figure(figsize=(16,10)) | |
return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
def bw_scanner(image): | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
T = threshold_local(gray, 21, offset = 5, method = "gaussian") | |
return (gray > T).astype("uint8") * 255 | |
def text_box_detection(image): | |
d = pytesseract.image_to_data(image, output_type=Output.DICT) | |
n_boxes = len(d['level']) | |
boxes = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB) | |
for i in range(n_boxes): | |
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) | |
boxes = cv2.rectangle(boxes, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
return boxes | |
def ui(): | |
st.markdown("# Text Extraction") | |
uploaded_file = st.file_uploader("Upload an Image", type=['png', 'jpeg', 'jpg']) | |
if uploaded_file is not None: | |
image = Image.open(uploaded_file) | |
img_array = np.array(image) | |
gray_image=bw_scanner(img_array) | |
boxes=text_box_detection(gray_image) | |
st.image(boxes, width = 500, channels = 'RGB') | |
extracted_text = pytesseract.image_to_string(img_array) | |
st.markdown(f"Predicted Text {extracted_text}") | |
if __name__ == '__main__': | |
ui() | |