Spaces:
Sleeping
Sleeping
import os | |
import json | |
import numpy as np | |
import openai | |
from dotenv import load_dotenv | |
from paddleocr import PaddleOCR | |
from PIL import Image | |
# Load environment variables | |
load_dotenv() | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
if not OPENAI_API_KEY: | |
raise ValueError("OPENAI_API_KEY is missing. Please set it in your .env file.") | |
# Set OpenAI API key | |
openai.api_key = OPENAI_API_KEY | |
# Initialize PaddleOCR | |
ocr = PaddleOCR(use_angle_cls=True, lang='en') | |
def extract_text(image_file): | |
"""Extract text from a PAN card image using PaddleOCR.""" | |
try: | |
image = Image.open(image_file).resize((1024, 768)).convert("RGB") | |
result = ocr.ocr(np.array(image), cls=True) | |
if result and result[0]: | |
return " ".join([line[1][0] for line in result[0]]) | |
except Exception as e: | |
print(f"β Error processing PAN image: {e}") | |
return "" # Return empty string if OCR fails | |
def extract_pan_details(image): | |
"""Extract PAN card details using PaddleOCR & structure using GPT-3.5 Turbo.""" | |
pan_text = extract_text(image) | |
if not pan_text: | |
return {"error": "OCR failed to extract text from the PAN card."} | |
print("π Extracted PAN Text:\n", pan_text) | |
prompt = f""" | |
Extract the following details from the PAN card text. If any detail is not found, return null for that field: | |
- Name | |
- Father's Name | |
- Date of Birth | |
- PAN Number | |
Input Text: | |
{pan_text} | |
Respond in JSON format like: | |
{{ | |
"name": null if not found else "John Doe", | |
"father_name": null if not found else "Robert Doe", | |
"dob": null if not found else "01-01-1990", | |
"pan_number": null if not found else "ABCDE1234F" | |
}} | |
""" | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt} | |
], | |
temperature=0.2, | |
max_tokens=250 | |
) | |
return json.loads(response["choices"][0]["message"]["content"]) | |
except Exception as e: | |
print(f"β OpenAI API Error: {e}") | |
return {"error": "Failed to process PAN details."} | |