PRMSChallenge / util.py
vineelagampa's picture
Upload 16 files
d3a44ea verified
from PIL import Image
import io
import fitz
import re
import pytesseract
import google.generativeai as genai
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import platform
def extract_images_from_pdf_bytes(pdf_bytes: bytes) -> list:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
images = []
for page in doc:
pix = page.get_pixmap()
buf = io.BytesIO()
buf.write(pix.tobytes("png"))
images.append(buf.getvalue())
return images
def clean_ocr_text(text: str) -> str:
text = text.replace("\x0c", " ") # remove form feed
text = text.replace("\u00a0", " ") # replace NBSP with space
text = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', text) # fix split decimals
text = re.sub(r'\s+', ' ', text) # collapse multiple spaces/newlines
return text.strip()
def ocr_text_from_image(image_bytes: bytes) -> str:
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
return pytesseract.image_to_string(image)
def load_pytesseract():
if platform.system() == "Darwin":
#pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
elif platform.system() == "Windows":
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def load_genai(genai_api_key: str):
try:
genai.configure(api_key=genai_api_key)
except Exception as e:
raise RuntimeError(f"Failed to configure Gemini API: {e}")
def setupFastAPI()-> FastAPI:
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:8002"
"http://localhost:9000"
"http://localhost:5501"
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
return app