Search.setIndex({"alltitles": {"<a href='https://pdf-extract-kit.readthedocs.io/en/latest/'>English</a>": [[22, null]], "<a href='https://pdf-extract-kit.readthedocs.io/zh_CN/latest/'>\u7b80\u4f53\u4e2d\u6587</a>": [[22, "id1"]], "HuggingFace": [[14, "huggingface"]], "ModelScope": [[14, "modelscope"]], "OCR\u7b97\u6cd5\u8bc4\u6d4b": [[9, null]], "PDF\u5185\u5bb9\u63d0\u53d6\u8bc4\u6d4b\u3010\u7aef\u5230\u7aef\u3011": [[10, null]], "PDF\u9879\u76ee": [[16, null]], "[\u63a8\u8350] \u65b9\u6cd5 1\uff1asnapshot_download": [[14, "snapshot-download"]], "v0.1.0 (2024.07.01)": [[18, "v0-1-0-2024-07-01"]], "v0.2.0 (2024.09.30)": [[18, "v0-2-0-2024-09-30"]], "\u4eae\u70b9": [[18, "id2"]], "\u4ee3\u7801\u5b9e\u73b0": [[23, null]], "\u4efb\u52a1\u5b9a\u4e49\u53ca\u6ce8\u518c": [[23, "id2"]], "\u5149\u5b66\u5b57\u7b26\u8bc6\u522b(OCR)\u7b97\u6cd5": [[3, null]], "\u516c\u5f0f\u68c0\u6d4b\u793a\u4f8b": [[15, "id3"]], "\u516c\u5f0f\u68c0\u6d4b\u7b97\u6cd5": [[0, null]], "\u516c\u5f0f\u68c0\u6d4b\u7b97\u6cd5\u8bc4\u6d4b": [[6, null]], "\u516c\u5f0f\u8bc6\u522b\u7b97\u6cd5": [[1, null]], "\u516c\u5f0f\u8bc6\u522b\u7b97\u6cd5\u8bc4\u6d4b": [[7, null]], "\u53d8\u66f4\u65e5\u5fd7": [[18, null]], "\u53ef\u89c6\u5316\u7ed3\u679c\u67e5\u770b": [[0, "id6"], [1, "id6"], [2, "id8"], [3, "id5"]], "\u57fa\u7840\u7b97\u6cd5\u6a21\u5757": [[16, null]], "\u591a\u6837\u5316\u8f93\u5165\u652f\u6301": [[0, "id5"], [1, "id5"], [2, "id7"], [3, "id4"], [20, "id5"]], "\u5b89\u88c5": [[13, null]], "\u5df2\u652f\u6301\u7684\u6a21\u578b": [[17, null]], "\u5e03\u5c40\u68c0\u6d4b\u793a\u4f8b": [[15, "id2"]], "\u5e03\u5c40\u68c0\u6d4b\u7b97\u6cd5": [[2, null]], "\u5e03\u5c40\u68c0\u6d4b\u7b97\u6cd5\u8bc4\u6d4b": [[8, null]], "\u5feb\u901f\u4e0a\u624b": [[16, null]], "\u5feb\u901f\u5f00\u59cb": [[15, null]], "\u6279\u5904\u7406\u62d3\u5c55": [[23, "id6"]], "\u652f\u6301\u7684\u6a21\u578b\u5217\u8868": [[16, null]], "\u652f\u6301\u7c7b\u578b\u62d3\u5c55": [[23, "id5"]], "\u6587\u6863": [[16, "id1"]], "\u6587\u6863\u5185\u5bb9\u63d0\u53d6\u9879\u76ee": [[20, null]], "\u6587\u6863\u7ffb\u8bd1\u9879\u76ee": [[19, null]], "\u6587\u6863\u8865\u5145": [[24, null]], "\u65b0\u4efb\u52a1\u62d3\u5c55": [[16, null]], "\u65b9\u6cd5 2\uff1a Git LFS": [[14, "git-lfs"]], "\u6700\u4f73\u5b9e\u8df5": [[13, "id2"]], "\u6a21\u578b\u4f7f\u7528": [[0, "id3"], [1, "id3"], [2, "id3"], [3, "id2"]], "\u6a21\u578b\u52a0\u901f\u9879\u76ee": [[21, null]], "\u6a21\u578b\u5b9a\u4e49\u53ca\u6ce8\u518c": [[23, "id3"]], "\u6a21\u578b\u6027\u80fd\u8bc4\u6d4b": [[16, null]], "\u6a21\u578b\u6743\u91cd\u4e0b\u8f7d": [[14, null]], "\u6a21\u578b\u8bc4\u6d4b": [[25, null]], "\u6a21\u578b\u914d\u7f6e": [[0, "id4"], [1, "id4"], [2, "id4"], [3, "id3"]], "\u6b22\u8fce\u6765\u5230 PDF-Extract-Kit \u7684\u4e2d\u6587\u6587\u6863": [[16, null]], "\u793a\u4f8b\u811a\u672c": [[23, "id4"]], "\u7b80\u4ecb": [[0, "id2"], [1, "id2"], [2, "id2"], [3, "id1"], [20, "id2"]], "\u8868\u683c\u8bc6\u522b\u7b97\u6cd5": [[5, null]], "\u8868\u683c\u8bc6\u522b\u7b97\u6cd5\u8bc4\u6d4b": [[12, null]], "\u8f93\u51fa\u7ed3\u679c": [[20, "id6"]], "\u9605\u8bfb\u987a\u5e8f\u7b97\u6cd5": [[4, null]], "\u9605\u8bfb\u987a\u5e8f\u7b97\u6cd5\u8bc4\u6d4b": [[11, null]], "\u9879\u76ee\u4f7f\u7528": [[20, "id3"]], "\u9879\u76ee\u914d\u7f6e": [[20, "id4"]]}, "docnames": ["algorithm/formula_detection", "algorithm/formula_recognition", "algorithm/layout_detection", "algorithm/ocr", "algorithm/reading_order", "algorithm/table_recognition", "evaluation/formula_detection", "evaluation/formula_recognition", "evaluation/layout_detection", "evaluation/ocr", "evaluation/pdf_extract", "evaluation/reading_order", "evaluation/table_recognition", "get_started/installation", "get_started/pretrained_model", "get_started/quickstart", "index", "models/supported", "notes/changelog", "project/doc_translate", "project/pdf_extract", "project/speed_up", "switch_language", "task_extend/code", "task_extend/doc", "task_extend/evaluation"], "envversion": {"sphinx": 63, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1}, "filenames": ["algorithm/formula_detection.rst", "algorithm/formula_recognition.rst", "algorithm/layout_detection.rst", "algorithm/ocr.rst", "algorithm/reading_order.rst", "algorithm/table_recognition.rst", "evaluation/formula_detection.rst", "evaluation/formula_recognition.rst", "evaluation/layout_detection.rst", "evaluation/ocr.rst", "evaluation/pdf_extract.rst", "evaluation/reading_order.rst", "evaluation/table_recognition.rst", "get_started/installation.rst", "get_started/pretrained_model.rst", "get_started/quickstart.rst", "index.rst", "models/supported.md", "notes/changelog.md", "project/doc_translate.rst", "project/pdf_extract.rst", "project/speed_up.rst", "switch_language.md", "task_extend/code.rst", "task_extend/doc.rst", "task_extend/evaluation.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"10": 13, "1024": 2, "128": 20, "1280": [0, 20, 23], "1419999999998": 20, "159": 20, "1654": 20, "192": 20, "20": 14, "200": 23, "2339": 20, "25": [0, 2, 20, 23], "380": 20, "45": [0, 2, 20, 23], "51073013642917": 20, "6792698635707": 20, "765": 20, "85058512958923": 20, "97": 20, "__all__": [2, 23], "__dict__": 23, "__file__": 23, "__init__": 23, "__main__": 23, "__name__": 23, "_mfd": 23, "abandon": 23, "abspath": 23, "activ": 13, "add_argu": 23, "allow_pattern": 14, "an": 20, "and": 23, "append": 23, "arg": 23, "argpars": 23, "argumentpars": 23, "as": 23, "asset": [0, 1, 2, 3, 20], "at": 23, "base": 23, "base_nam": 23, "base_task": 23, "basenam": 23, "basetask": 23, "batch": [], "batch_siz": [0, 20, 23], "be": 23, "box": 23, "cach": 14, "cache_dir": 14, "can": 23, "category_typ": 20, "cfg_path": [1, 20], "ch": [3, 20], "ch_pp": [3, 20], "class": 23, "clone": 14, "cls": 23, "cn": 14, "co": 14, "com": [2, 4, 5, 7, 8, 9, 10, 12, 14, 17, 19, 21, 25], "conda": 13, "conf": 23, "conf_thr": [0, 2, 20, 23], "config": [0, 1, 2, 3, 15, 20, 23], "config_load": 23, "config_path": 23, "configur": 23, "contain": 23, "correspond": 23, "cp310": 2, "cpu": [13, 23], "creat": 13, "cuda": 23, "cv2": 23, "data": 23, "dataload": 23, "dataset": 23, "def": 23, "demo": [0, 1, 2, 3, 20], "descript": 23, "det": [3, 20], "det_db_box_thresh": [3, 20], "det_model_dir": [3, 20], "detect": [0, 2, 23], "detection_result": [0, 2, 23], "detectron2": 2, "determin": 23, "devic": 23, "dict": 23, "dictionari": 23, "directori": 23, "dirnam": 23, "doclayout": [2, 13, 15, 18], "doclayout_yolo_ft": 2, "doclayout_yolo_model": 2, "els": 23, "enumer": 23, "evalu": 23, "exampl": 20, "exist": 23, "export": 14, "extract": [0, 1, 2, 3, 13, 14, 15, 18, 23], "fals": [0, 1, 2, 3, 23], "figur": 23, "figure_capt": 23, "file": 23, "for": [0, 2, 23], "fork": 16, "formula_capt": 23, "formula_detect": [0, 15, 20], "formula_detection_yolo": [0, 20], "formula_recognit": [1, 20], "formula_recognition_unimernet": [1, 20], "found": 23, "from": [2, 14, 23], "get": 23, "git": 16, "github": 2, "given": 23, "gpu": 13, "height": 20, "help": 23, "hf_home": 14, "hf_hub_enable_hf_transf": 14, "https": [2, 14], "hub": 14, "huggingface_hub": 14, "id_to_nam": 23, "ids": 23, "idx": 23, "if": 23, "imag": [0, 2, 23], "image_id": 23, "imagedataset": 23, "img_siz": [0, 2, 20, 23], "imgsz": 23, "import": [2, 14, 23], "imwrit": 23, "in": 23, "initi": 23, "initialize_tasks_and_model": 23, "input": [0, 1, 2, 3, 20, 23], "input_data": [0, 2, 23], "instal": [2, 13, 14], "iou": [0, 2, 23], "iou_thr": [0, 2, 20, 23], "is": 20, "is_avail": 23, "isolate_formula": 23, "join": 23, "json": 20, "key": 23, "kit": [0, 1, 2, 3, 13, 14, 15, 18, 23], "lang": [3, 20], "latex": 1, "layout": [20, 23], "layout_det": 20, "layout_detect": [2, 15, 20, 23], "layout_detection_layoutlmv3": 2, "layout_detection_task": 23, "layout_detection_yolo": [2, 20, 23], "layoutdetectionlayoutlmv3": 2, "layoutdetectiontask": 23, "layoutdetectionyolo": [2, 23], "layoutlmv3": [2, 13, 15], "layoutlmv3_ft": 2, "layoutlmv3_model": 2, "lfs": 16, "linux": 2, "linux_x86_64": 2, "list": 23, "load": 23, "load_config": 23, "load_imag": 23, "load_pdf_imag": 23, "local_dir": 14, "maco": 2, "macosx_10_9_universal2": 2, "main": [2, 23], "makedir": 23, "map": 23, "markdown": 20, "max_work": 14, "merge2markdown": 20, "mfd": [0, 14, 20], "mfr": [1, 20], "mineru": 20, "model": [0, 1, 2, 3, 14, 20, 23], "model_config": [0, 1, 2, 3, 20, 23], "model_formula_detect": 0, "model_id": 14, "model_layout_detect": [2, 23], "model_path": [0, 1, 2, 20, 23], "model_registri": [2, 23], "modelscope_cach": 14, "name": 23, "none": 23, "not": 23, "ocr": [2, 13, 16, 20], "ocr_ppocr": [3, 20], "ocrv4_det": [3, 20], "ocrv4_rec": [3, 20], "of": 23, "opendatalab": [2, 14], "option": 23, "or": 23, "os": 23, "osp": 23, "outpus": 15, "output": [0, 1, 2, 3, 20, 23], "pad": [], "paddleocr": [3, 13, 20], "page_info": 20, "page_no": 20, "paramet": 23, "parse_arg": 23, "parser": 23, "path": [0, 2, 23], "pdf": [0, 1, 2, 3, 13, 14, 15, 18, 20, 23], "pdf2markdown": 20, "pdf_dpi": 23, "pdf_extract_kit": [1, 2, 20, 23], "pdf_imag": 23, "pdfs": [0, 2], "perform": 23, "pil": 23, "pip": [2, 13], "plain": 23, "png": 23, "poli": 20, "predict": 23, "predict_imag": [0, 2, 23], "predict_pdf": [0, 2, 23], "print": 23, "project": 20, "pt": [0, 2, 20], "py": [0, 1, 2, 3, 15, 20, 23], "python": [0, 1, 2, 3, 13, 15, 20], "raw": 2, "rec": [3, 20], "rec_model_dir": [3, 20], "rect": [], "rectangular": [], "regist": 23, "registri": [2, 23], "repo_id": 14, "repo_root": 23, "requir": [13, 23], "resiz": [], "result": 23, "result_nam": 23, "result_path": [0, 2, 23], "return": 23, "run": 23, "run_project": 20, "save": 23, "score": 20, "script": [0, 1, 2, 3, 15, 20, 23], "self": 23, "set": 23, "show_log": [3, 20], "singl": 23, "snapshot_download": 16, "soon": [4, 5, 7, 8, 9, 10, 12, 14, 17, 19, 21, 25], "specifi": 23, "star": 16, "str": 23, "super": 23, "sys": 23, "tabl": 23, "table_capt": 23, "table_footnot": 23, "task": [0, 1, 2, 3, 20, 23], "task_inst": 23, "task_nam": 23, "task_registri": 23, "text": [20, 23], "the": 23, "this": 20, "titl": 23, "to": [0, 2, 23], "torch": 23, "torchvis": 23, "transform": 23, "true": [0, 1, 2, 3, 20, 23], "txt": 13, "type": 23, "ultralyt": 23, "unimernet": [1, 13, 20, 23], "unimernet_tini": [1, 20], "util": 23, "v10": 2, "valu": 23, "verbos": 23, "vis_result": 23, "visual": [0, 1, 2, 3, 20, 23], "visualize_bbox": 23, "watch": 16, "weight": 0, "whl": 2, "width": 20, "win_amd64": 2, "window": 2, "with": 23, "www": 14, "xxx": [6, 11], "xxxx": 14, "xyxi": 23, "yaml": [0, 1, 2, 3, 15, 20, 23], "yolo": [0, 2, 13, 14, 15, 18, 20, 23], "yolo_v8_ft": 20, "yolov10": [13, 15], "yolov10_model": [], "yolov10l_ft": [2, 20], "yolov8": [0, 13, 18]}, "titles": ["\u516c\u5f0f\u68c0\u6d4b\u7b97\u6cd5", "\u516c\u5f0f\u8bc6\u522b\u7b97\u6cd5", "\u5e03\u5c40\u68c0\u6d4b\u7b97\u6cd5", "\u5149\u5b66\u5b57\u7b26\u8bc6\u522b(OCR)\u7b97\u6cd5", "\u9605\u8bfb\u987a\u5e8f\u7b97\u6cd5", "\u8868\u683c\u8bc6\u522b\u7b97\u6cd5", "\u516c\u5f0f\u68c0\u6d4b\u7b97\u6cd5\u8bc4\u6d4b", "\u516c\u5f0f\u8bc6\u522b\u7b97\u6cd5\u8bc4\u6d4b", "\u5e03\u5c40\u68c0\u6d4b\u7b97\u6cd5\u8bc4\u6d4b", "OCR\u7b97\u6cd5\u8bc4\u6d4b", "PDF\u5185\u5bb9\u63d0\u53d6\u8bc4\u6d4b\u3010\u7aef\u5230\u7aef\u3011", "\u9605\u8bfb\u987a\u5e8f\u7b97\u6cd5\u8bc4\u6d4b", "\u8868\u683c\u8bc6\u522b\u7b97\u6cd5\u8bc4\u6d4b", "\u5b89\u88c5", "\u6a21\u578b\u6743\u91cd\u4e0b\u8f7d", "\u5feb\u901f\u5f00\u59cb", "\u6b22\u8fce\u6765\u5230 PDF-Extract-Kit \u7684\u4e2d\u6587\u6587\u6863", "\u5df2\u652f\u6301\u7684\u6a21\u578b", "\u53d8\u66f4\u65e5\u5fd7", "\u6587\u6863\u7ffb\u8bd1\u9879\u76ee", "\u6587\u6863\u5185\u5bb9\u63d0\u53d6\u9879\u76ee", "\u6a21\u578b\u52a0\u901f\u9879\u76ee", "<a href='https://pdf-extract-kit.readthedocs.io/en/latest/'>English</a>", "\u4ee3\u7801\u5b9e\u73b0", "\u6587\u6863\u8865\u5145", "\u6a21\u578b\u8bc4\u6d4b"], "titleterms": {"01": 18, "07": 18, "09": 18, "2024": 18, "30": 18, "en": 22, "english": 22, "extract": [16, 22], "git": 14, "href": 22, "https": 22, "huggingfac": 14, "io": 22, "kit": [16, 22], "latest": 22, "lfs": 14, "modelscop": 14, "ocr": [3, 9], "pdf": [10, 16, 22], "readthedoc": 22, "snapshot_download": 14, "v0": 18, "zh_cn": 22}}) |