Spaces:
Sleeping
Sleeping
import os | |
import json | |
import numpy as np | |
import openai | |
from dotenv import load_dotenv | |
from paddleocr import PaddleOCR | |
from PIL import Image | |
# Load environment variables | |
load_dotenv() | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
if not OPENAI_API_KEY: | |
raise ValueError("OPENAI_API_KEY is missing. Please set it in your .env file.") | |
# Set OpenAI API key | |
openai.api_key = OPENAI_API_KEY | |
# Initialize PaddleOCR | |
ocr = PaddleOCR(use_angle_cls=True, lang='en') | |
def extract_text(image_file): | |
"""Extract text from an Aadhaar image using PaddleOCR.""" | |
try: | |
image = Image.open(image_file).resize((1024, 768)).convert("RGB") | |
result = ocr.ocr(np.array(image), cls=True) | |
if result and result[0]: | |
return " ".join([line[1][0] for line in result[0]]) | |
except Exception as e: | |
print(f"β Error processing Aadhaar image: {e}") | |
return "" # Return empty string if OCR fails | |
def extract_aadhaar_details(front, back): | |
"""Extract Aadhaar details using PaddleOCR & structure using GPT-3.5 Turbo.""" | |
front_text = extract_text(front) | |
back_text = extract_text(back) | |
if not front_text and not back_text: | |
return {"error": "OCR failed to extract text from both Aadhaar front and back."} | |
full_text = f"Front: {front_text}\nBack: {back_text}" | |
print("π Extracted Aadhaar Text:\n", full_text) | |
prompt = f""" | |
Extract the following details from the Aadhaar text. If any detail is not found, return null for that field: | |
- Name | |
- Date of Birth | |
- Aadhaar Number | |
- Gender | |
- Address | |
Input Text: | |
{full_text} | |
Respond in JSON format like: | |
{{ | |
"name": null if not found else "John Doe", | |
"dob": null if not found else "01-01-1990", | |
"aadhaar_number": null if not found else "1234 5678 9101", | |
"gender": null if not found else "Male", | |
"address": null if not found else "Street Name, City, State, PIN Code" | |
}} | |
""" | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "system", "content": prompt}], | |
max_tokens=300 | |
) | |
return json.loads(response["choices"][0]["message"]["content"]) | |
except Exception as e: | |
print(f"β OpenAI API Error: {e}") | |
return {"error": "Failed to process Aadhaar details."} | |