File size: 1,569 Bytes
ca5b08e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import subprocess
import io
from typing import List, Union
from PIL import Image


def get_page_image(pdf_path, page_number, target_longest_image_dim=None, image_rotation=0):
    if pdf_path.lower().endswith(".pdf"):
        # Convert PDF page to PNG using pdftoppm
        pdftoppm_result = subprocess.run(
            [
                "pdftoppm",
                "-png",
                "-f",
                str(page_number),
                "-l",
                str(page_number),
                "-r",
                "72",  # 72 pixels per point is the conversion factor
                pdf_path,
            ],
            timeout=120,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        assert pdftoppm_result.returncode == 0, pdftoppm_result.stderr
        image = Image.open(io.BytesIO(pdftoppm_result.stdout))
    else:
        image = Image.open(pdf_path)
    if image_rotation != 0:
        image = image.rotate(-image_rotation, expand=True)
    if target_longest_image_dim is not None:
        width, height = image.size
        if width > height:
            new_width = target_longest_image_dim
            new_height = int(height * (target_longest_image_dim / width))
        else:
            new_height = target_longest_image_dim
            new_width = int(width * (target_longest_image_dim / height))
        image = image.resize((new_width, new_height))    
    return image
    

def is_image(file_path):
    try:
        Image.open(file_path)
        return True
    except:
        return False