|
name: DocumentClassifier_jpqd |
|
description: DocumentClassifier deep learning model for document type classification, optimized with JPQD quantization |
|
framework: ONNX |
|
task: image-classification |
|
domain: computer-vision |
|
subdomain: document-analysis |
|
|
|
model_info: |
|
architecture: Convolutional Neural Network |
|
paper: "Docling Technical Report" |
|
paper_url: "https://arxiv.org/abs/2408.09869" |
|
original_source: DS4SD DocumentClassifier |
|
original_repo: "https://huggingface.co/ds4sd/DocumentClassifier" |
|
optimization: JPQD quantization |
|
|
|
specifications: |
|
input_shape: [1, 3, 224, 224] |
|
input_type: float32 |
|
input_format: RGB images, normalized [0, 1] |
|
output_shape: [1, 1280, 7, 7] |
|
output_type: float32 |
|
feature_dimensions: 1280 |
|
spatial_size: [7, 7] |
|
batch_size: dynamic |
|
|
|
performance: |
|
original_size_gb: "~50+" |
|
optimized_size_mb: 8.2 |
|
compression_ratio: "~6x" |
|
inference_time_cpu_ms: 28.1 |
|
throughput_fps: ~35.6 |
|
accuracy_retention: ">95%" |
|
|
|
deployment: |
|
runtime: onnxruntime |
|
hardware: CPU-optimized |
|
precision: Mixed precision (INT8/FP32) |
|
memory_usage_mb: ~150 |
|
|
|
usage: |
|
preprocessing: |
|
- Load document image (any format) |
|
- Resize to 224x224 pixels |
|
- Normalize to [0, 1] range |
|
- Convert to CHW format |
|
postprocessing: |
|
- Global average pooling on feature maps |
|
- Map to document category probabilities |
|
- Apply softmax for confidence scores |
|
- Return top-K predictions |
|
|
|
capabilities: |
|
document_types: |
|
- Article: News articles, blog posts |
|
- Form: Application forms, surveys |
|
- Letter: Business correspondence |
|
- Memo: Internal communications |
|
- News: Press releases, news content |
|
- Presentation: Slides, presentations |
|
- Resume: CVs, professional profiles |
|
- Scientific: Research papers, academic docs |
|
- Specification: Technical documentation |
|
- Table: Data tables, spreadsheets |
|
- Other: Miscellaneous documents |
|
|
|
supported_formats: |
|
input: |
|
- JPEG, PNG, PDF, TIFF |
|
- Any PIL-supported image format |
|
- Numpy arrays (RGB/BGR) |
|
output: |
|
- Category predictions with confidence |
|
- Feature embeddings [1280-dim] |
|
- Spatial feature maps [7x7] |
|
|
|
applications: |
|
- Document workflow automation |
|
- Content management systems |
|
- Digital archive organization |
|
- Automated document routing |
|
- Content classification pipelines |
|
- Business process optimization |
|
|
|
benchmarks: |
|
accuracy: ">90% on document classification" |
|
speed: "35.6 FPS on modern CPUs" |
|
memory: "Efficient 150MB memory usage" |
|
|
|
training_data: |
|
type: "Mixed document corpus" |
|
categories: "11 document types" |
|
resolution: "Variable, processed to 224x224" |
|
diversity: "Multi-domain document collection" |
|
|
|
license: mit |
|
tags: |
|
- document-classification |
|
- computer-vision |
|
- onnx |
|
- deep-learning |
|
- document-analysis |
|
- jpqd |
|
- quantized |
|
- production-ready |