Spaces:

amit0987
/

Manga_download

Running

File size: 5,135 Bytes

import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}


def get_main_folder_name(url):
    path = urlparse(url).path.strip("/")  # Extract and clean the path
    parts = path.split("/")  # Split the path by "/"

    # Ensure "read-scan" exists and get the next part as the main folder
    if "read-scan" in parts:
        index = parts.index("read-scan")  # Find "read-scan" position
        if index + 1 < len(parts):  # Ensure there's a next part
            return parts[index + 1]  # Main folder name

    return None  # Return None if not found


def get_base_url(url):
    """Extracts the base url up to 'chapter-' from the given url."""
    base = url.split("chapter-")[0]  # Get everything before 'chapter-'
    return base + "chapter-"  # Ensure 'chapter-' is included at the end



def get_image_links(url):
    """Fetches all image urls from a given chapter page."""
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to fetch page {url}: HTTP {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    images = soup.find_all("img", class_="wp-manga-chapter-img")
    return [img["src"] for img in images if "src" in img.attrs]

def get_chapter_name(url):
    """Extracts and formats the chapter name from the url."""
    parts = url.rstrip("/").split("/")
    chapter_number = parts[-1].replace("chapter-", "")
    return f"chapter-{int(chapter_number):03d}"  # Format as chapter-001, chapter-002, etc.

def download_images(image_links, folder_name,main_folder):
    """Downloads all images into the designated chapter folder."""
    chapter_folder = os.path.join(main_folder, folder_name)
    os.makedirs(chapter_folder, exist_ok=True)

    for idx, img_url in enumerate(image_links):
        try:
            response = requests.get(img_url, stream=True)
            if response.status_code == 200:
                img_path = os.path.join(chapter_folder, f"{idx + 1}.jpg")
                with open(img_path, "wb") as f:
                    for chunk in response.iter_content(1024):
                        f.write(chunk)
                print(f"Downloaded: {img_path}")
            else:
                print(f"Failed to download {img_url}: HTTP {response.status_code}")
        except Exception as e:
            print(f"Error downloading {img_url}: {e}")

def get_latest_chapter(base_url):
    """Fetches the latest available chapter number from the manga's main page."""
    response = requests.get(base_url[:-9], headers=HEADERS)  # Remove "chapter-" part to get the base page
    if response.status_code != 200:
        print("Failed to fetch the main manga page.")
        return None

    soup = BeautifulSoup(response.text, "html.parser")
    links = soup.find_all("a", href=True)

    chapter_numbers = []
    for link in links:
        if "chapter-" in link["href"]:
            try:
                chapter_num = int(link["href"].split("chapter-")[-1].rstrip("/"))
                chapter_numbers.append(chapter_num)
            except ValueError:
                continue

    return max(chapter_numbers) if chapter_numbers else None

def temp_main(chapter_url, main_folder):
    """Downloads an entire manga chapter."""
    chapter_name = get_chapter_name(chapter_url)
    image_links = get_image_links(chapter_url)

    if image_links:
        download_images(image_links, chapter_name, main_folder)
        print(f"All images saved in folder: {chapter_name}")
    else:
        print(f"No images found for {chapter_name}.")


def main(url):
    # Main folder to store all chapters
    main_folder = get_main_folder_name(url)

    base_url = get_base_url(url)

    os.makedirs(main_folder, exist_ok=True)

    latest_chapter = get_latest_chapter(base_url)
    if latest_chapter is None:
        print("Could not determine the latest chapter.")
        exit()

    # Track consecutive empty chapters
    empty_chapter_count = 0
    max_empty_chapters = 5  # Stop after 5 empty chapters
    #
    # for index in range(0,10):

    for index in range(0, latest_chapter + 1):

        chapter_url = f"{base_url}{index}/"
        print(f"Downloading {chapter_url}...")

        image_links = get_image_links(chapter_url)
        if not image_links:
            print(f"No images found for chapter-{index}.")
            empty_chapter_count += 1
            if empty_chapter_count >= max_empty_chapters:
                print("No images found in the last 5 chapters. Stopping download.")
                break
        else:
            empty_chapter_count = 0  # Reset if a chapter contains images
            temp_main(chapter_url, main_folder)
            print(f"Chapter {index} downloaded!")

    return main_folder


if __name__ == "__main__":
    url="https://mangadistrict.com/read-scan/boarding-diary-uncensored-fan-edition/v1-high-quality-optimized-for-moderate-data-usage/chapter-1"
    # url="hi"
    main(url)