Spaces:
Sleeping
Sleeping
File size: 4,291 Bytes
9ee6029 24a73c9 6896c3d 655f2f4 56cce69 24a73c9 47ae1d3 b5adec5 24a73c9 47ae1d3 24a73c9 6896c3d 24a73c9 b5adec5 24a73c9 b5adec5 24a73c9 56cce69 320ceaa 56cce69 320ceaa 2844ae3 24a73c9 9ee6029 24a73c9 b5adec5 24a73c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
from PIL import Image
import pytesseract
import re
import os
import cv2
import numpy as np
# Explicitly set the path to the Tesseract executable.
# This is crucial in container environments where the system's PATH might not
# be fully inherited or visible to pytesseract in the way it expects by default.
# /usr/bin/tesseract is the standard path for Tesseract executable on Debian/Ubuntu based systems.
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
def preprocess_image(image: Image.Image) -> Image.Image:
"""
Preprocesses the image for better OCR accuracy.
Converts to grayscale, enhances contrast, and applies thresholding.
"""
# Convert PIL Image to OpenCV format
img_cv = np.array(image)
if len(img_cv.shape) == 3: # Check if it's a color image
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2GRAY)
# Apply adaptive thresholding
# This often works better than simple thresholding for varying lighting conditions
img_processed = cv2.adaptiveThreshold(
img_cv, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
)
# Convert back to PIL Image
return Image.fromarray(img_processed)
def detect_weight_and_units(text: str) -> list[str]:
"""
Detects weight values and units from the extracted text.
It looks for numbers followed by common weight units (case-insensitive).
Examples: 70kg, 150 lbs, 50.5 g, 20.3kilograms
"""
# Regex to find numbers (integers or decimals) followed by an optional space
# and then one of the specified weight units.
# The units are made case-insensitive with re.IGNORECASE.
weight_patterns = re.findall(
r'(\d+\.?\d*)\s*(kg|lbs|g|pounds|kilograms|grams)\b',
text,
re.IGNORECASE
)
detected_weights = []
if weight_patterns:
for value, unit in weight_patterns:
detected_weights.append(f"{value} {unit.lower()}") # Standardize unit to lowercase
return detected_weights
def process_image_for_weight(image: Image.Image) -> str:
"""
Main function to process the uploaded image, perform OCR,
and detect weight information.
"""
if image is None:
return "Please upload an image."
try:
# Preprocess the image for better OCR
processed_img = preprocess_image(image)
# Perform OCR on the preprocessed image
extracted_text = pytesseract.image_to_string(processed_img)
# Detect weight and units from the extracted text
weights = detect_weight_and_units(extracted_text)
if weights:
# Join all detected weights into a single string
return "Detected Weights:\n" + "\n".join(weights)
else:
return "No weight information detected. Extracted text:\n" + extracted_text
except pytesseract.TesseractNotFoundError:
# If this error still occurs, it means Tesseract isn't found at /usr/bin/tesseract
# inside the Docker container, or there's an issue with the language data.
return (
"Tesseract OCR engine was not found at the specified path (/usr/bin/tesseract). "
"This indicates a critical issue with the Docker environment setup. "
"Please ensure the `Dockerfile` correctly installs 'tesseract-ocr' and 'tesseract-ocr-eng', "
"and that the Tesseract executable is indeed located at `/usr/bin/tesseract` within the container."
)
except Exception as e:
return f"An error occurred: {e}"
# Create the Gradio interface
iface = gr.Interface(
fn=process_image_for_weight,
inputs=gr.Image(type="pil", label="Upload Weight Log Image"),
outputs=gr.Textbox(label="Detected Weight and Units"),
title="Auto Weight Logger",
description="Upload an image containing weight logs, and I will try to detect the weight values and their units.",
allow_flagging="auto", # Allows users to flag inputs/outputs for debugging/improvement
examples=[
# You can add example images here if you have them.
# For a live demo, these would be paths to actual images.
# e.g., ["./examples/weight_log_1.png", "./examples/weight_log_2.jpg"]
]
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()
|