#!/usr/bin/env python3 import os from pathlib import Path from dotenv import load_dotenv from rag.ocr_mistral import parse_passport def main(): # Load environment variables PROJECT_ROOT = Path(__file__).parent.parent env_path = PROJECT_ROOT / ".env" print(f"Looking for .env file at: {env_path}") print(f"File exists: {env_path.exists()}") # Try to load the .env file load_dotenv(env_path) # Debug: Print all environment variables (excluding sensitive values) print("\nEnvironment variables:") print("-" * 50) mistral_key = os.getenv("MISTRAL_OCR_KEY") print(f"MISTRAL_OCR_KEY is {'set' if mistral_key else 'not set'}") if mistral_key: print(f"MISTRAL_OCR_KEY length: {len(mistral_key)}") # Check for required environment variable if not mistral_key: print("\nError: MISTRAL_OCR_KEY not set in environment") print("Please ensure your .env file contains:") print("MISTRAL_OCR_KEY=your_key_here") return # Test with sample passport passport_path = PROJECT_ROOT / "data" / "sample_passport.jpg" if not passport_path.exists(): print(f"Error: Sample passport not found at {passport_path}") return try: with open(passport_path, "rb") as f: profile = parse_passport(f.read()) print("\nExtracted Profile:") print("-" * 50) for key, value in profile.items(): if key != "_raw_text": # Don't print raw text by default print(f"{key}: {value}") # Print raw text if needed for debugging if os.getenv("DEBUG"): print("\nRaw OCR Text:") print("-" * 50) print(profile.get("_raw_text", "No raw text available")) except Exception as e: print(f"Error processing passport: {str(e)}") if __name__ == "__main__": main() # #!/usr/bin/env python3 # import os, sys, requests, base64 # from dotenv import load_dotenv # load_dotenv() # make sure this picks up your MISTRAL_OCR_KEY & URL # KEY = os.getenv("MISTRAL_OCR_KEY") # URL = os.getenv("MISTRAL_OCR_URL") # print("MISTRAL_OCR_KEY:", KEY) # print("MISTRAL_OCR_URL:", URL) # # read your sample passport (PNG, PDF, whatever) # path = os.path.join(os.path.dirname(__file__), "..", "data", "sample_passport.png") # with open(path, "rb") as f: # data = f.read() # # --- 1. Call Mistral raw to see status & text --- # resp = requests.post( # URL, # headers={"Authorization": f"Bearer {KEY}"}, # files={"file": ("sample_passport.png", data, "image/png")}, # timeout=30, # ) # print("Status code:", resp.status_code) # print("Response text:", resp.text[:500]) # # --- 2. If 200, show the JSON keys and full dict --- # if resp.status_code == 200: # raw = resp.json() # print("Raw JSON:", raw) # # now run through your extractor # from rag.ocr_mistral import _extract_fields, mistral_ocr_tool, parse_passport # fields = _extract_fields(raw) # print("Extracted fields via _extract_fields:", fields) # # and via the agent wrapper # via_agent = parse_passport(data) # print("parse_passport(...) returned:", via_agent) # else: # print("Mistral call failed; check key/URL")