File size: 2,359 Bytes
1290ec4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678bc7e
1290ec4
 
 
 
 
 
 
 
 
 
 
678bc7e
 
 
 
 
1290ec4
 
 
678bc7e
1290ec4
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import json
import numpy as np
import openai
from dotenv import load_dotenv
from paddleocr import PaddleOCR
from PIL import Image

# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY is missing. Please set it in your .env file.")

# Set OpenAI API key
openai.api_key = OPENAI_API_KEY

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

def extract_text(image_file):
    """Extract text from an Aadhaar image using PaddleOCR."""
    try:
        image = Image.open(image_file).resize((1024, 768)).convert("RGB")
        result = ocr.ocr(np.array(image), cls=True)

        if result and result[0]:  
            return " ".join([line[1][0] for line in result[0]])

    except Exception as e:
        print(f"❌ Error processing Aadhaar image: {e}")

    return ""  # Return empty string if OCR fails

def extract_aadhaar_details(front, back):
    """Extract Aadhaar details using PaddleOCR & structure using GPT-3.5 Turbo."""
    front_text = extract_text(front)
    back_text = extract_text(back)

    if not front_text and not back_text:
        return {"error": "OCR failed to extract text from both Aadhaar front and back."}

    full_text = f"Front: {front_text}\nBack: {back_text}"
    print("πŸ” Extracted Aadhaar Text:\n", full_text)

    prompt = f"""
    Extract the following details from the Aadhaar text. If any detail is not found, return null for that field:
    - Name
    - Date of Birth
    - Aadhaar Number
    - Gender
    - Address

    Input Text:
    {full_text}

    Respond in JSON format like:
    {{
      "name": null if not found else "John Doe",
      "dob": null if not found else "01-01-1990",
      "aadhaar_number": null if not found else "1234 5678 9101",
      "gender": null if not found else "Male",
      "address": null if not found else "Street Name, City, State, PIN Code"
    }}
    """


    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "system", "content": prompt}],
            max_tokens=300
        )

        return json.loads(response["choices"][0]["message"]["content"])

    except Exception as e:
        print(f"❌ OpenAI API Error: {e}")
        return {"error": "Failed to process Aadhaar details."}