File size: 2,331 Bytes
81f6231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605c260
 
81f6231
 
605c260
81f6231
 
 
 
 
 
 
 
 
 
 
 
 
605c260
81f6231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import time
import os
from pdf2docx import Converter
from docx import Document

def convert_pdf_to_word(pdf_path, docx_path):
    cv = Converter(pdf_path)
    cv.convert(docx_path)
    cv.close()

def convert_image_to_word(text, filename):
    copy_num = 0
    doc = Document()
    doc.add_heading(f"{filename}", level=1)
    doc.add_paragraph(text)
    doc.add_paragraph("\n" + "-"*50 + "\n")

    if not os.path.exists(os.path.join('/tmp', filename)):
        filepath = os.path.join('/tmp', filename)
    else:
        copy_num+=1
        filepath = os.path.join('/tmp', f'{filename}({copy_num})')

    doc.save(filepath)

def wait_for_file_release(file_path, timeout=5):
    start_time = time.time()
    while time.time() - start_time < timeout:
        try:
            with open(file_path, 'rb'):
                return True
        except PermissionError:
            time.sleep(0.5)
    return False

def delete_temp_folder(temp_path="/tmp"):
    time.sleep(0.5)
    for filename in os.listdir(temp_path):
        file_path = os.path.join(temp_path, filename)
        if wait_for_file_release(file_path):
            try:
                os.remove(file_path)
                print(f"Hapus: {file_path}")
            except Exception as e:
                print(f"Gagal hapus {file_path}: {e}")
        else:
            print(f"File terkunci terlalu lama: {file_path}")

def extract_tables_from_docx(docx_path):
    doc = Document(docx_path)
    all_tables = []

    for table in doc.tables:
        table_data = []
        bold_map = []  

        for row in table.rows:
            row_data = []
            row_bold_flags = []

            for cell in row.cells:
                texts = []
                is_bold = False

                for paragraph in cell.paragraphs:
                    for run in paragraph.runs:
                        texts.append(run.text.strip())
                        if run.bold:
                            is_bold = True  

                cell_text = " ".join(texts).strip()
                row_data.append(cell_text)
                row_bold_flags.append(is_bold)

            table_data.append(row_data)
            bold_map.append(row_bold_flags)

        all_tables.append({
            "table_data": table_data,
            "bold_map": bold_map  
        })

    return all_tables