kyc_karo / utils /pan_ocr.py
manthanbachu's picture
Update Aadhaar and PAN extraction prompts to return null for missing details
678bc7e
import os
import json
import numpy as np
import openai
from dotenv import load_dotenv
from paddleocr import PaddleOCR
from PIL import Image
# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise ValueError("OPENAI_API_KEY is missing. Please set it in your .env file.")
# Set OpenAI API key
openai.api_key = OPENAI_API_KEY
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
def extract_text(image_file):
"""Extract text from a PAN card image using PaddleOCR."""
try:
image = Image.open(image_file).resize((1024, 768)).convert("RGB")
result = ocr.ocr(np.array(image), cls=True)
if result and result[0]:
return " ".join([line[1][0] for line in result[0]])
except Exception as e:
print(f"❌ Error processing PAN image: {e}")
return "" # Return empty string if OCR fails
def extract_pan_details(image):
"""Extract PAN card details using PaddleOCR & structure using GPT-3.5 Turbo."""
pan_text = extract_text(image)
if not pan_text:
return {"error": "OCR failed to extract text from the PAN card."}
print("πŸ” Extracted PAN Text:\n", pan_text)
prompt = f"""
Extract the following details from the PAN card text. If any detail is not found, return null for that field:
- Name
- Father's Name
- Date of Birth
- PAN Number
Input Text:
{pan_text}
Respond in JSON format like:
{{
"name": null if not found else "John Doe",
"father_name": null if not found else "Robert Doe",
"dob": null if not found else "01-01-1990",
"pan_number": null if not found else "ABCDE1234F"
}}
"""
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
temperature=0.2,
max_tokens=250
)
return json.loads(response["choices"][0]["message"]["content"])
except Exception as e:
print(f"❌ OpenAI API Error: {e}")
return {"error": "Failed to process PAN details."}