Spaces:
Running
Running
#!/usr/bin/env python3 | |
import os | |
from pathlib import Path | |
from dotenv import load_dotenv | |
from rag.ocr_mistral import parse_passport | |
def main(): | |
# Load environment variables | |
PROJECT_ROOT = Path(__file__).parent.parent | |
env_path = PROJECT_ROOT / ".env" | |
print(f"Looking for .env file at: {env_path}") | |
print(f"File exists: {env_path.exists()}") | |
# Try to load the .env file | |
load_dotenv(env_path) | |
# Debug: Print all environment variables (excluding sensitive values) | |
print("\nEnvironment variables:") | |
print("-" * 50) | |
mistral_key = os.getenv("MISTRAL_OCR_KEY") | |
print(f"MISTRAL_OCR_KEY is {'set' if mistral_key else 'not set'}") | |
if mistral_key: | |
print(f"MISTRAL_OCR_KEY length: {len(mistral_key)}") | |
# Check for required environment variable | |
if not mistral_key: | |
print("\nError: MISTRAL_OCR_KEY not set in environment") | |
print("Please ensure your .env file contains:") | |
print("MISTRAL_OCR_KEY=your_key_here") | |
return | |
# Test with sample passport | |
passport_path = PROJECT_ROOT / "data" / "sample_passport.jpg" | |
if not passport_path.exists(): | |
print(f"Error: Sample passport not found at {passport_path}") | |
return | |
try: | |
with open(passport_path, "rb") as f: | |
profile = parse_passport(f.read()) | |
print("\nExtracted Profile:") | |
print("-" * 50) | |
for key, value in profile.items(): | |
if key != "_raw_text": # Don't print raw text by default | |
print(f"{key}: {value}") | |
# Print raw text if needed for debugging | |
if os.getenv("DEBUG"): | |
print("\nRaw OCR Text:") | |
print("-" * 50) | |
print(profile.get("_raw_text", "No raw text available")) | |
except Exception as e: | |
print(f"Error processing passport: {str(e)}") | |
if __name__ == "__main__": | |
main() | |
# #!/usr/bin/env python3 | |
# import os, sys, requests, base64 | |
# from dotenv import load_dotenv | |
# load_dotenv() # make sure this picks up your MISTRAL_OCR_KEY & URL | |
# KEY = os.getenv("MISTRAL_OCR_KEY") | |
# URL = os.getenv("MISTRAL_OCR_URL") | |
# print("MISTRAL_OCR_KEY:", KEY) | |
# print("MISTRAL_OCR_URL:", URL) | |
# # read your sample passport (PNG, PDF, whatever) | |
# path = os.path.join(os.path.dirname(__file__), "..", "data", "sample_passport.png") | |
# with open(path, "rb") as f: | |
# data = f.read() | |
# # --- 1. Call Mistral raw to see status & text --- | |
# resp = requests.post( | |
# URL, | |
# headers={"Authorization": f"Bearer {KEY}"}, | |
# files={"file": ("sample_passport.png", data, "image/png")}, | |
# timeout=30, | |
# ) | |
# print("Status code:", resp.status_code) | |
# print("Response text:", resp.text[:500]) | |
# # --- 2. If 200, show the JSON keys and full dict --- | |
# if resp.status_code == 200: | |
# raw = resp.json() | |
# print("Raw JSON:", raw) | |
# # now run through your extractor | |
# from rag.ocr_mistral import _extract_fields, mistral_ocr_tool, parse_passport | |
# fields = _extract_fields(raw) | |
# print("Extracted fields via _extract_fields:", fields) | |
# # and via the agent wrapper | |
# via_agent = parse_passport(data) | |
# print("parse_passport(...) returned:", via_agent) | |
# else: | |
# print("Mistral call failed; check key/URL") | |