Spaces:
Sleeping
Sleeping
import os | |
import io | |
os.environ["KERAS_BACKEND"] = "tensorflow" | |
import keras | |
import numpy as np | |
from PIL import Image | |
import gradio as gr | |
import tensorflow as tf | |
from keras import layers | |
from pathlib import Path | |
from collections import Counter | |
def ctc_batch_cost(y_true, y_pred, input_length, label_length): | |
label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) | |
input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) | |
sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32) | |
y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) | |
return tf.expand_dims( | |
tf.compat.v1.nn.ctc_loss( | |
inputs=y_pred, labels=sparse_labels, sequence_length=input_length | |
), | |
1, | |
) | |
def ctc_label_dense_to_sparse(labels, label_lengths): | |
label_shape = tf.shape(labels) | |
num_batches_tns = tf.stack([label_shape[0]]) | |
max_num_labels_tns = tf.stack([label_shape[1]]) | |
def range_less_than(old_input, current_input): | |
return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill( | |
max_num_labels_tns, current_input | |
) | |
init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) | |
dense_mask = tf.compat.v1.scan( | |
range_less_than, label_lengths, initializer=init, parallel_iterations=1 | |
) | |
dense_mask = dense_mask[:, 0, :] | |
label_array = tf.reshape( | |
tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape | |
) | |
label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) | |
batch_array = tf.transpose( | |
tf.reshape( | |
tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), | |
tf.reverse(label_shape, [0]), | |
) | |
) | |
batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) | |
indices = tf.transpose( | |
tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1]) | |
) | |
vals_sparse = tf.compat.v1.gather_nd(labels, indices) | |
return tf.SparseTensor( | |
tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) | |
) | |
class CTCLayer(layers.Layer): | |
def __init__(self, name=None): | |
super().__init__(name=name) | |
self.loss_fn = ctc_batch_cost | |
def call(self, y_true, y_pred): | |
# Compute the training-time loss value and add it to the layer using `self.add_loss()`. | |
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64") | |
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64") | |
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64") | |
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64") | |
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64") | |
loss = self.loss_fn(y_true, y_pred, input_length, label_length) | |
self.add_loss(loss) | |
# At test time, just return the computed predictions | |
return y_pred | |
loaded_model = keras.models.load_model("ocr_model_pred.keras", custom_objects={"CTCLayer": CTCLayer}) | |
loaded_model.load_weights("ocr_model_pred_weights.h5") | |
max_len = 5 | |
characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] | |
# mapping characters to integers | |
char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None) | |
# Mapping integers back to original characters | |
num_to_char = layers.StringLookup( | |
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True | |
) | |
image_width = 128 | |
image_height = 32 | |
def distortion_free_resize(image, img_size): | |
w, h = img_size | |
image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True) | |
image = tf.image.resize_with_pad(image, target_height=h, target_width=w) | |
image = tf.transpose(image, perm=[1, 0, 2]) | |
image = tf.image.flip_left_right(image) | |
return image | |
def decode_batch_predictions(input_image, img_size=(image_width, image_height)): | |
img_byte_array = io.BytesIO() | |
input_image.save(img_byte_array, format='JPEG') # Change the format as needed | |
input_image = img_byte_array.getvalue() | |
input_image = tf.io.decode_image(input_image, channels=1, dtype=tf.dtypes.uint8) | |
input_image = distortion_free_resize(input_image, img_size) | |
input_image = tf.expand_dims(input_image, axis=0) | |
input_image = tf.image.convert_image_dtype(input_image, tf.float32)/255.0 | |
pred = loaded_model.predict(input_image) | |
input_len = np.ones(pred.shape[0]) * pred.shape[1] | |
results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][ | |
:, :max_len | |
] | |
# Iterate over the results and get back the text. | |
output_text = [] | |
for res in results: | |
res = tf.gather(res, tf.where(tf.math.not_equal(res, -1))) | |
res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8") | |
output_text.append(res) | |
return output_text | |
interface = gr.Interface(fn=decode_batch_predictions, inputs=gr.Image(label="Input image", type="pil"), | |
outputs='text',title='Captcha Recognition', theme='darkhuggingface') | |
interface.launch(inline=False) |