Spaces:

Puyush
/

Captcha_Recognition

Sleeping

App Files Files Community

Captcha_Recognition / app.py

Puyush

Update app.py

bf81fd6 verified over 1 year ago

raw

history blame contribute delete

5.33 kB

	import os
	import io
	os.environ["KERAS_BACKEND"] = "tensorflow"
	import keras
	import numpy as np
	from PIL import Image
	import gradio as gr
	import tensorflow as tf
	from keras import layers
	from pathlib import Path
	from collections import Counter

	def ctc_batch_cost(y_true, y_pred, input_length, label_length):
	label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)
	input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32)
	sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32)

	y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())

	return tf.expand_dims(
	tf.compat.v1.nn.ctc_loss(
	inputs=y_pred, labels=sparse_labels, sequence_length=input_length
	),
	1,
	)


	def ctc_label_dense_to_sparse(labels, label_lengths):
	label_shape = tf.shape(labels)
	num_batches_tns = tf.stack([label_shape[0]])
	max_num_labels_tns = tf.stack([label_shape[1]])

	def range_less_than(old_input, current_input):
	return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill(
	max_num_labels_tns, current_input
	)

	init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)
	dense_mask = tf.compat.v1.scan(
	range_less_than, label_lengths, initializer=init, parallel_iterations=1
	)
	dense_mask = dense_mask[:, 0, :]

	label_array = tf.reshape(
	tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape
	)
	label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)

	batch_array = tf.transpose(
	tf.reshape(
	tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
	tf.reverse(label_shape, [0]),
	)
	)
	batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
	indices = tf.transpose(
	tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1])
	)

	vals_sparse = tf.compat.v1.gather_nd(labels, indices)

	return tf.SparseTensor(
	tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)
	)


	class CTCLayer(layers.Layer):
	def __init__(self, name=None):
	super().__init__(name=name)
	self.loss_fn = ctc_batch_cost

	def call(self, y_true, y_pred):
	# Compute the training-time loss value and add it to the layer using `self.add_loss()`.
	batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
	input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
	label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

	input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
	label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

	loss = self.loss_fn(y_true, y_pred, input_length, label_length)
	self.add_loss(loss)

	# At test time, just return the computed predictions
	return y_pred

	loaded_model = keras.models.load_model("ocr_model_pred.keras", custom_objects={"CTCLayer": CTCLayer})
	loaded_model.load_weights("ocr_model_pred_weights.h5")
	max_len = 5

	characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
	# mapping characters to integers
	char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)

	# Mapping integers back to original characters
	num_to_char = layers.StringLookup(
	vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
	)
	image_width = 128
	image_height = 32

	def distortion_free_resize(image, img_size):
	w, h = img_size
	image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
	image = tf.image.resize_with_pad(image, target_height=h, target_width=w)
	image = tf.transpose(image, perm=[1, 0, 2])
	image = tf.image.flip_left_right(image)
	return image


	def decode_batch_predictions(input_image, img_size=(image_width, image_height)):
	img_byte_array = io.BytesIO()
	input_image.save(img_byte_array, format='JPEG') # Change the format as needed
	input_image = img_byte_array.getvalue()

	input_image = tf.io.decode_image(input_image, channels=1, dtype=tf.dtypes.uint8)
	input_image = distortion_free_resize(input_image, img_size)
	input_image = tf.expand_dims(input_image, axis=0)
	input_image = tf.image.convert_image_dtype(input_image, tf.float32)/255.0

	pred = loaded_model.predict(input_image)
	input_len = np.ones(pred.shape[0]) * pred.shape[1]
	results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
	:, :max_len
	]

	# Iterate over the results and get back the text.
	output_text = []

	for res in results:
	res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
	res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
	output_text.append(res)

	return output_text

	interface = gr.Interface(fn=decode_batch_predictions, inputs=gr.Image(label="Input image", type="pil"),
	outputs='text',title='Captcha Recognition', theme='darkhuggingface')
	interface.launch(inline=False)