Spaces:
Sleeping
Sleeping
File size: 2,359 Bytes
1290ec4 678bc7e 1290ec4 678bc7e 1290ec4 678bc7e 1290ec4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
import json
import numpy as np
import openai
from dotenv import load_dotenv
from paddleocr import PaddleOCR
from PIL import Image
# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise ValueError("OPENAI_API_KEY is missing. Please set it in your .env file.")
# Set OpenAI API key
openai.api_key = OPENAI_API_KEY
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
def extract_text(image_file):
"""Extract text from an Aadhaar image using PaddleOCR."""
try:
image = Image.open(image_file).resize((1024, 768)).convert("RGB")
result = ocr.ocr(np.array(image), cls=True)
if result and result[0]:
return " ".join([line[1][0] for line in result[0]])
except Exception as e:
print(f"β Error processing Aadhaar image: {e}")
return "" # Return empty string if OCR fails
def extract_aadhaar_details(front, back):
"""Extract Aadhaar details using PaddleOCR & structure using GPT-3.5 Turbo."""
front_text = extract_text(front)
back_text = extract_text(back)
if not front_text and not back_text:
return {"error": "OCR failed to extract text from both Aadhaar front and back."}
full_text = f"Front: {front_text}\nBack: {back_text}"
print("π Extracted Aadhaar Text:\n", full_text)
prompt = f"""
Extract the following details from the Aadhaar text. If any detail is not found, return null for that field:
- Name
- Date of Birth
- Aadhaar Number
- Gender
- Address
Input Text:
{full_text}
Respond in JSON format like:
{{
"name": null if not found else "John Doe",
"dob": null if not found else "01-01-1990",
"aadhaar_number": null if not found else "1234 5678 9101",
"gender": null if not found else "Male",
"address": null if not found else "Street Name, City, State, PIN Code"
}}
"""
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": prompt}],
max_tokens=300
)
return json.loads(response["choices"][0]["message"]["content"])
except Exception as e:
print(f"β OpenAI API Error: {e}")
return {"error": "Failed to process Aadhaar details."}
|