| #!/usr/bin/env python3 | |
| # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| import argparse | |
| from nemotron_ocr.inference.pipeline import NemotronOCR | |
| def main(image_path, merge_level, no_visualize, model_dir): | |
| ocr_pipeline = NemotronOCR() | |
| predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize) | |
| print(f"Found {len(predictions)} text regions.") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Run OCR inference and annotate image.") | |
| parser.add_argument("image_path", type=str, help="Path to the input image.") | |
| parser.add_argument( | |
| "--merge-level", | |
| type=str, | |
| choices=["word", "sentence", "paragraph"], | |
| default="paragraph", | |
| help="Merge level for OCR output (word, sentence, paragraph).", | |
| ) | |
| parser.add_argument("--no-visualize", action="store_true", help="Do not save the annotated image.") | |
| parser.add_argument( | |
| "--model-dir", | |
| type=str, | |
| help="Path to the model checkpoints.", | |
| default="./checkpoints", | |
| ) | |
| args = parser.parse_args() | |
| main( | |
| args.image_path, | |
| merge_level=args.merge_level, | |
| no_visualize=args.no_visualize, | |
| model_dir=args.model_dir, | |
| ) | |