Spaces:

Vageesh1
/

Text_Recogniser

Build error

Text_Recogniser / app.py

Update app.py

c422e81 about 2 years ago

1.68 kB

	import pandas as pd
	import streamlit as st
	import skimage.io as io
	from PIL import Image
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import cv2
	from skimage.filters import threshold_local
	import pytesseract
	import re
	import os
	from pytesseract import Output

	os.system('apt-get install tesseract-ocr')

	def plot_gray(image):
	plt.figure(figsize=(16,10))
	return plt.imshow(image, cmap='Greys_r')

	def plot_rgb(image):
	plt.figure(figsize=(16,10))
	return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	def bw_scanner(image):
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	T = threshold_local(gray, 21, offset = 5, method = "gaussian")
	return (gray > T).astype("uint8") * 255

	def text_box_detection(image):
	d = pytesseract.image_to_data(image, output_type=Output.DICT)
	n_boxes = len(d['level'])
	boxes = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)
	for i in range(n_boxes):
	(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
	boxes = cv2.rectangle(boxes, (x, y), (x + w, y + h), (0, 255, 0), 2)

	return boxes

	def ui():
	st.markdown("# Text Extraction")
	uploaded_file = st.file_uploader("Upload an Image", type=['png', 'jpeg', 'jpg'])
	if uploaded_file is not None:
	image = Image.open(uploaded_file)
	img_array = np.array(image)
	gray_image=bw_scanner(img_array)
	boxes=text_box_detection(gray_image)
	st.image(boxes, width = 500, channels = 'RGB')
	extracted_text = pytesseract.image_to_string(img_array)
	st.markdown(f"Predicted Text {extracted_text}")

	if __name__ == '__main__':
	ui()