Spaces:
Build error
Build error
File size: 1,682 Bytes
2ea1a93 311c406 2ea1a93 c422e81 311c406 2ea1a93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import pandas as pd
import streamlit as st
import skimage.io as io
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from skimage.filters import threshold_local
import pytesseract
import re
import os
from pytesseract import Output
os.system('apt-get install tesseract-ocr')
def plot_gray(image):
plt.figure(figsize=(16,10))
return plt.imshow(image, cmap='Greys_r')
def plot_rgb(image):
plt.figure(figsize=(16,10))
return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
def bw_scanner(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
T = threshold_local(gray, 21, offset = 5, method = "gaussian")
return (gray > T).astype("uint8") * 255
def text_box_detection(image):
d = pytesseract.image_to_data(image, output_type=Output.DICT)
n_boxes = len(d['level'])
boxes = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)
for i in range(n_boxes):
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
boxes = cv2.rectangle(boxes, (x, y), (x + w, y + h), (0, 255, 0), 2)
return boxes
def ui():
st.markdown("# Text Extraction")
uploaded_file = st.file_uploader("Upload an Image", type=['png', 'jpeg', 'jpg'])
if uploaded_file is not None:
image = Image.open(uploaded_file)
img_array = np.array(image)
gray_image=bw_scanner(img_array)
boxes=text_box_detection(gray_image)
st.image(boxes, width = 500, channels = 'RGB')
extracted_text = pytesseract.image_to_string(img_array)
st.markdown(f"Predicted Text {extracted_text}")
if __name__ == '__main__':
ui()
|