File size: 2,863 Bytes

c5958d3

name: DocumentClassifier_jpqd
description: DocumentClassifier deep learning model for document type classification, optimized with JPQD quantization
framework: ONNX
task: image-classification
domain: computer-vision
subdomain: document-analysis

model_info:
  architecture: Convolutional Neural Network
  paper: "Docling Technical Report"
  paper_url: "https://arxiv.org/abs/2408.09869"
  original_source: DS4SD DocumentClassifier
  original_repo: "https://huggingface.co/ds4sd/DocumentClassifier"
  optimization: JPQD quantization
  
specifications:
  input_shape: [1, 3, 224, 224]
  input_type: float32
  input_format: RGB images, normalized [0, 1]
  output_shape: [1, 1280, 7, 7]
  output_type: float32
  feature_dimensions: 1280
  spatial_size: [7, 7]
  batch_size: dynamic
  
performance:
  original_size_gb: "~50+"  # Estimated original size
  optimized_size_mb: 8.2
  compression_ratio: "~6x"
  inference_time_cpu_ms: 28.1
  throughput_fps: ~35.6
  accuracy_retention: ">95%"
  
deployment:
  runtime: onnxruntime
  hardware: CPU-optimized
  precision: Mixed precision (INT8/FP32)
  memory_usage_mb: ~150
  
usage:
  preprocessing:
    - Load document image (any format)
    - Resize to 224x224 pixels
    - Normalize to [0, 1] range
    - Convert to CHW format
  postprocessing:
    - Global average pooling on feature maps
    - Map to document category probabilities
    - Apply softmax for confidence scores
    - Return top-K predictions

capabilities:
  document_types:
    - Article: News articles, blog posts
    - Form: Application forms, surveys
    - Letter: Business correspondence
    - Memo: Internal communications
    - News: Press releases, news content
    - Presentation: Slides, presentations
    - Resume: CVs, professional profiles
    - Scientific: Research papers, academic docs
    - Specification: Technical documentation
    - Table: Data tables, spreadsheets
    - Other: Miscellaneous documents

supported_formats:
  input:
    - JPEG, PNG, PDF, TIFF
    - Any PIL-supported image format
    - Numpy arrays (RGB/BGR)
  output:
    - Category predictions with confidence
    - Feature embeddings [1280-dim]
    - Spatial feature maps [7x7]

applications:
  - Document workflow automation
  - Content management systems
  - Digital archive organization
  - Automated document routing
  - Content classification pipelines
  - Business process optimization

benchmarks:
  accuracy: ">90% on document classification"
  speed: "35.6 FPS on modern CPUs"
  memory: "Efficient 150MB memory usage"
  
training_data:
  type: "Mixed document corpus"
  categories: "11 document types"
  resolution: "Variable, processed to 224x224"
  diversity: "Multi-domain document collection"

license: mit
tags:
  - document-classification
  - computer-vision
  - onnx
  - deep-learning
  - document-analysis
  - jpqd
  - quantized
  - production-ready