|
name: CodeFormula_jpqd |
|
description: CodeFormula vision-language model for code and formula recognition, optimized with JPQD quantization |
|
framework: ONNX |
|
task: image-to-text |
|
domain: multimodal |
|
subdomain: vision-language |
|
|
|
model_info: |
|
architecture: Vision-Language Transformer |
|
paper: "Docling Technical Report" |
|
paper_url: "https://arxiv.org/abs/2408.09869" |
|
original_source: DS4SD CodeFormula |
|
original_repo: "https://huggingface.co/ds4sd/CodeFormula" |
|
optimization: JPQD quantization |
|
|
|
specifications: |
|
input_shape: [1, 10] |
|
input_type: int64 |
|
input_format: Token sequences |
|
output_shape: [1, 10, 50827] |
|
output_type: float32 |
|
vocabulary_size: 50827 |
|
sequence_length: 10 |
|
batch_size: dynamic |
|
|
|
performance: |
|
original_size_gb: "~2+" |
|
optimized_size_mb: 526.19 |
|
compression_ratio: "~4x" |
|
inference_time_cpu_ms: 6.6 |
|
throughput_fps: ~150 |
|
accuracy_retention: ">95%" |
|
|
|
deployment: |
|
runtime: onnxruntime |
|
hardware: CPU-optimized |
|
precision: INT8 weights, FP32 activations |
|
memory_usage_gb: ~1 |
|
|
|
usage: |
|
preprocessing: |
|
- Load image at 120 DPI resolution |
|
- Resize and enhance image quality |
|
- Convert to token sequence input |
|
postprocessing: |
|
- Decode logits to token IDs |
|
- Convert tokens to text |
|
- Apply language-specific formatting |
|
|
|
capabilities: |
|
code_recognition: |
|
- Multi-language programming code |
|
- Indentation preservation |
|
- Syntax highlighting support |
|
- Output format: "<_language_> code_content" |
|
formula_recognition: |
|
- Mathematical expressions |
|
- Scientific notation |
|
- Chemical formulas |
|
- Output format: LaTeX code |
|
|
|
supported_languages: |
|
programming: |
|
- Python |
|
- Java |
|
- JavaScript |
|
- C/C++ |
|
- Go |
|
- Rust |
|
- And many more |
|
markup: |
|
- LaTeX (mathematical formulas) |
|
- Chemical notation |
|
- Scientific expressions |
|
|
|
applications: |
|
- Document digitization |
|
- Educational content processing |
|
- Code plagiarism detection |
|
- Mathematical problem solving |
|
- Technical documentation conversion |
|
- Research paper processing |
|
|
|
benchmarks: |
|
accuracy: ">95% code recognition accuracy" |
|
speed: "150 FPS on modern CPUs" |
|
memory: "Efficient 1GB memory usage" |
|
|
|
training_data: |
|
type: "Code snippets and mathematical formulas" |
|
resolution: "120 DPI images" |
|
diversity: "Multiple programming languages and notation systems" |
|
|
|
license: mit |
|
tags: |
|
- code-recognition |
|
- formula-recognition |
|
- vision-language |
|
- multimodal |
|
- ocr |
|
- latex |
|
- onnx |
|
- quantized |
|
- jpqd |
|
- programming-languages |