formpilot-demo / tests /quick_ocr_test.py
afulara's picture
Auto‑deploy from GitHub
0c0a4f7 verified
#!/usr/bin/env python3
import os
from pathlib import Path
from dotenv import load_dotenv
from rag.ocr_mistral import parse_passport
def main():
# Load environment variables
PROJECT_ROOT = Path(__file__).parent.parent
env_path = PROJECT_ROOT / ".env"
print(f"Looking for .env file at: {env_path}")
print(f"File exists: {env_path.exists()}")
# Try to load the .env file
load_dotenv(env_path)
# Debug: Print all environment variables (excluding sensitive values)
print("\nEnvironment variables:")
print("-" * 50)
mistral_key = os.getenv("MISTRAL_OCR_KEY")
print(f"MISTRAL_OCR_KEY is {'set' if mistral_key else 'not set'}")
if mistral_key:
print(f"MISTRAL_OCR_KEY length: {len(mistral_key)}")
# Check for required environment variable
if not mistral_key:
print("\nError: MISTRAL_OCR_KEY not set in environment")
print("Please ensure your .env file contains:")
print("MISTRAL_OCR_KEY=your_key_here")
return
# Test with sample passport
passport_path = PROJECT_ROOT / "data" / "sample_passport.jpg"
if not passport_path.exists():
print(f"Error: Sample passport not found at {passport_path}")
return
try:
with open(passport_path, "rb") as f:
profile = parse_passport(f.read())
print("\nExtracted Profile:")
print("-" * 50)
for key, value in profile.items():
if key != "_raw_text": # Don't print raw text by default
print(f"{key}: {value}")
# Print raw text if needed for debugging
if os.getenv("DEBUG"):
print("\nRaw OCR Text:")
print("-" * 50)
print(profile.get("_raw_text", "No raw text available"))
except Exception as e:
print(f"Error processing passport: {str(e)}")
if __name__ == "__main__":
main()
# #!/usr/bin/env python3
# import os, sys, requests, base64
# from dotenv import load_dotenv
# load_dotenv() # make sure this picks up your MISTRAL_OCR_KEY & URL
# KEY = os.getenv("MISTRAL_OCR_KEY")
# URL = os.getenv("MISTRAL_OCR_URL")
# print("MISTRAL_OCR_KEY:", KEY)
# print("MISTRAL_OCR_URL:", URL)
# # read your sample passport (PNG, PDF, whatever)
# path = os.path.join(os.path.dirname(__file__), "..", "data", "sample_passport.png")
# with open(path, "rb") as f:
# data = f.read()
# # --- 1. Call Mistral raw to see status & text ---
# resp = requests.post(
# URL,
# headers={"Authorization": f"Bearer {KEY}"},
# files={"file": ("sample_passport.png", data, "image/png")},
# timeout=30,
# )
# print("Status code:", resp.status_code)
# print("Response text:", resp.text[:500])
# # --- 2. If 200, show the JSON keys and full dict ---
# if resp.status_code == 200:
# raw = resp.json()
# print("Raw JSON:", raw)
# # now run through your extractor
# from rag.ocr_mistral import _extract_fields, mistral_ocr_tool, parse_passport
# fields = _extract_fields(raw)
# print("Extracted fields via _extract_fields:", fields)
# # and via the agent wrapper
# via_agent = parse_passport(data)
# print("parse_passport(...) returned:", via_agent)
# else:
# print("Mistral call failed; check key/URL")