File size: 5,135 Bytes
413a0e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a99d8a1
 
413a0e4
a99d8a1
413a0e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}


def get_main_folder_name(url):
    path = urlparse(url).path.strip("/")  # Extract and clean the path
    parts = path.split("/")  # Split the path by "/"

    # Ensure "read-scan" exists and get the next part as the main folder
    if "read-scan" in parts:
        index = parts.index("read-scan")  # Find "read-scan" position
        if index + 1 < len(parts):  # Ensure there's a next part
            return parts[index + 1]  # Main folder name

    return None  # Return None if not found


def get_base_url(url):
    """Extracts the base url up to 'chapter-' from the given url."""
    base = url.split("chapter-")[0]  # Get everything before 'chapter-'
    return base + "chapter-"  # Ensure 'chapter-' is included at the end



def get_image_links(url):
    """Fetches all image urls from a given chapter page."""
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to fetch page {url}: HTTP {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    images = soup.find_all("img", class_="wp-manga-chapter-img")
    return [img["src"] for img in images if "src" in img.attrs]

def get_chapter_name(url):
    """Extracts and formats the chapter name from the url."""
    parts = url.rstrip("/").split("/")
    chapter_number = parts[-1].replace("chapter-", "")
    return f"chapter-{int(chapter_number):03d}"  # Format as chapter-001, chapter-002, etc.

def download_images(image_links, folder_name,main_folder):
    """Downloads all images into the designated chapter folder."""
    chapter_folder = os.path.join(main_folder, folder_name)
    os.makedirs(chapter_folder, exist_ok=True)

    for idx, img_url in enumerate(image_links):
        try:
            response = requests.get(img_url, stream=True)
            if response.status_code == 200:
                img_path = os.path.join(chapter_folder, f"{idx + 1}.jpg")
                with open(img_path, "wb") as f:
                    for chunk in response.iter_content(1024):
                        f.write(chunk)
                print(f"Downloaded: {img_path}")
            else:
                print(f"Failed to download {img_url}: HTTP {response.status_code}")
        except Exception as e:
            print(f"Error downloading {img_url}: {e}")

def get_latest_chapter(base_url):
    """Fetches the latest available chapter number from the manga's main page."""
    response = requests.get(base_url[:-9], headers=HEADERS)  # Remove "chapter-" part to get the base page
    if response.status_code != 200:
        print("Failed to fetch the main manga page.")
        return None

    soup = BeautifulSoup(response.text, "html.parser")
    links = soup.find_all("a", href=True)

    chapter_numbers = []
    for link in links:
        if "chapter-" in link["href"]:
            try:
                chapter_num = int(link["href"].split("chapter-")[-1].rstrip("/"))
                chapter_numbers.append(chapter_num)
            except ValueError:
                continue

    return max(chapter_numbers) if chapter_numbers else None

def temp_main(chapter_url, main_folder):
    """Downloads an entire manga chapter."""
    chapter_name = get_chapter_name(chapter_url)
    image_links = get_image_links(chapter_url)

    if image_links:
        download_images(image_links, chapter_name, main_folder)
        print(f"All images saved in folder: {chapter_name}")
    else:
        print(f"No images found for {chapter_name}.")


def main(url):
    # Main folder to store all chapters
    main_folder = get_main_folder_name(url)

    base_url = get_base_url(url)

    os.makedirs(main_folder, exist_ok=True)

    latest_chapter = get_latest_chapter(base_url)
    if latest_chapter is None:
        print("Could not determine the latest chapter.")
        exit()

    # Track consecutive empty chapters
    empty_chapter_count = 0
    max_empty_chapters = 5  # Stop after 5 empty chapters
    #
    # for index in range(0,10):

    for index in range(0, latest_chapter + 1):

        chapter_url = f"{base_url}{index}/"
        print(f"Downloading {chapter_url}...")

        image_links = get_image_links(chapter_url)
        if not image_links:
            print(f"No images found for chapter-{index}.")
            empty_chapter_count += 1
            if empty_chapter_count >= max_empty_chapters:
                print("No images found in the last 5 chapters. Stopping download.")
                break
        else:
            empty_chapter_count = 0  # Reset if a chapter contains images
            temp_main(chapter_url, main_folder)
            print(f"Chapter {index} downloaded!")

    return main_folder


if __name__ == "__main__":
    url="https://mangadistrict.com/read-scan/boarding-diary-uncensored-fan-edition/v1-high-quality-optimized-for-moderate-data-usage/chapter-1"
    # url="hi"
    main(url)