File size: 3,308 Bytes
0c0a4f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python3
import os
from pathlib import Path
from dotenv import load_dotenv
from rag.ocr_mistral import parse_passport

def main():
    # Load environment variables
    PROJECT_ROOT = Path(__file__).parent.parent
    env_path = PROJECT_ROOT / ".env"
    
    print(f"Looking for .env file at: {env_path}")
    print(f"File exists: {env_path.exists()}")
    
    # Try to load the .env file
    load_dotenv(env_path)
    
    # Debug: Print all environment variables (excluding sensitive values)
    print("\nEnvironment variables:")
    print("-" * 50)
    mistral_key = os.getenv("MISTRAL_OCR_KEY")
    print(f"MISTRAL_OCR_KEY is {'set' if mistral_key else 'not set'}")
    if mistral_key:
        print(f"MISTRAL_OCR_KEY length: {len(mistral_key)}")
    
    # Check for required environment variable
    if not mistral_key:
        print("\nError: MISTRAL_OCR_KEY not set in environment")
        print("Please ensure your .env file contains:")
        print("MISTRAL_OCR_KEY=your_key_here")
        return
    
    # Test with sample passport
    passport_path = PROJECT_ROOT / "data" / "sample_passport.jpg"
    if not passport_path.exists():
        print(f"Error: Sample passport not found at {passport_path}")
        return
        
    try:
        with open(passport_path, "rb") as f:
            profile = parse_passport(f.read())
            
        print("\nExtracted Profile:")
        print("-" * 50)
        for key, value in profile.items():
            if key != "_raw_text":  # Don't print raw text by default
                print(f"{key}: {value}")
                
        # Print raw text if needed for debugging
        if os.getenv("DEBUG"):
            print("\nRaw OCR Text:")
            print("-" * 50)
            print(profile.get("_raw_text", "No raw text available"))
            
    except Exception as e:
        print(f"Error processing passport: {str(e)}")

if __name__ == "__main__":
    main()


# #!/usr/bin/env python3
# import os, sys, requests, base64
# from dotenv import load_dotenv
# load_dotenv()   # make sure this picks up your MISTRAL_OCR_KEY & URL

# KEY = os.getenv("MISTRAL_OCR_KEY")
# URL = os.getenv("MISTRAL_OCR_URL")
# print("MISTRAL_OCR_KEY:", KEY)
# print("MISTRAL_OCR_URL:", URL)

# # read your sample passport (PNG, PDF, whatever)
# path = os.path.join(os.path.dirname(__file__), "..", "data", "sample_passport.png")
# with open(path, "rb") as f:
#     data = f.read()

# # --- 1. Call Mistral raw to see status & text ---
# resp = requests.post(
#     URL,
#     headers={"Authorization": f"Bearer {KEY}"},
#     files={"file": ("sample_passport.png", data, "image/png")},
#     timeout=30,
# )
# print("Status code:", resp.status_code)
# print("Response text:", resp.text[:500])

# # --- 2. If 200, show the JSON keys and full dict ---
# if resp.status_code == 200:
#     raw = resp.json()
#     print("Raw JSON:", raw)

#     # now run through your extractor
#     from rag.ocr_mistral import _extract_fields, mistral_ocr_tool, parse_passport
#     fields = _extract_fields(raw)
#     print("Extracted fields via _extract_fields:", fields)

#     # and via the agent wrapper
#     via_agent = parse_passport(data)
#     print("parse_passport(...) returned:", via_agent)
# else:
#     print("Mistral call failed; check key/URL")