Manga_download / download_manga.py
niharika17032001's picture
Create app.py
a99d8a1
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
def get_main_folder_name(url):
path = urlparse(url).path.strip("/") # Extract and clean the path
parts = path.split("/") # Split the path by "/"
# Ensure "read-scan" exists and get the next part as the main folder
if "read-scan" in parts:
index = parts.index("read-scan") # Find "read-scan" position
if index + 1 < len(parts): # Ensure there's a next part
return parts[index + 1] # Main folder name
return None # Return None if not found
def get_base_url(url):
"""Extracts the base url up to 'chapter-' from the given url."""
base = url.split("chapter-")[0] # Get everything before 'chapter-'
return base + "chapter-" # Ensure 'chapter-' is included at the end
def get_image_links(url):
"""Fetches all image urls from a given chapter page."""
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
print(f"Failed to fetch page {url}: HTTP {response.status_code}")
return []
soup = BeautifulSoup(response.text, "html.parser")
images = soup.find_all("img", class_="wp-manga-chapter-img")
return [img["src"] for img in images if "src" in img.attrs]
def get_chapter_name(url):
"""Extracts and formats the chapter name from the url."""
parts = url.rstrip("/").split("/")
chapter_number = parts[-1].replace("chapter-", "")
return f"chapter-{int(chapter_number):03d}" # Format as chapter-001, chapter-002, etc.
def download_images(image_links, folder_name,main_folder):
"""Downloads all images into the designated chapter folder."""
chapter_folder = os.path.join(main_folder, folder_name)
os.makedirs(chapter_folder, exist_ok=True)
for idx, img_url in enumerate(image_links):
try:
response = requests.get(img_url, stream=True)
if response.status_code == 200:
img_path = os.path.join(chapter_folder, f"{idx + 1}.jpg")
with open(img_path, "wb") as f:
for chunk in response.iter_content(1024):
f.write(chunk)
print(f"Downloaded: {img_path}")
else:
print(f"Failed to download {img_url}: HTTP {response.status_code}")
except Exception as e:
print(f"Error downloading {img_url}: {e}")
def get_latest_chapter(base_url):
"""Fetches the latest available chapter number from the manga's main page."""
response = requests.get(base_url[:-9], headers=HEADERS) # Remove "chapter-" part to get the base page
if response.status_code != 200:
print("Failed to fetch the main manga page.")
return None
soup = BeautifulSoup(response.text, "html.parser")
links = soup.find_all("a", href=True)
chapter_numbers = []
for link in links:
if "chapter-" in link["href"]:
try:
chapter_num = int(link["href"].split("chapter-")[-1].rstrip("/"))
chapter_numbers.append(chapter_num)
except ValueError:
continue
return max(chapter_numbers) if chapter_numbers else None
def temp_main(chapter_url, main_folder):
"""Downloads an entire manga chapter."""
chapter_name = get_chapter_name(chapter_url)
image_links = get_image_links(chapter_url)
if image_links:
download_images(image_links, chapter_name, main_folder)
print(f"All images saved in folder: {chapter_name}")
else:
print(f"No images found for {chapter_name}.")
def main(url):
# Main folder to store all chapters
main_folder = get_main_folder_name(url)
base_url = get_base_url(url)
os.makedirs(main_folder, exist_ok=True)
latest_chapter = get_latest_chapter(base_url)
if latest_chapter is None:
print("Could not determine the latest chapter.")
exit()
# Track consecutive empty chapters
empty_chapter_count = 0
max_empty_chapters = 5 # Stop after 5 empty chapters
#
# for index in range(0,10):
for index in range(0, latest_chapter + 1):
chapter_url = f"{base_url}{index}/"
print(f"Downloading {chapter_url}...")
image_links = get_image_links(chapter_url)
if not image_links:
print(f"No images found for chapter-{index}.")
empty_chapter_count += 1
if empty_chapter_count >= max_empty_chapters:
print("No images found in the last 5 chapters. Stopping download.")
break
else:
empty_chapter_count = 0 # Reset if a chapter contains images
temp_main(chapter_url, main_folder)
print(f"Chapter {index} downloaded!")
return main_folder
if __name__ == "__main__":
url="https://mangadistrict.com/read-scan/boarding-diary-uncensored-fan-edition/v1-high-quality-optimized-for-moderate-data-usage/chapter-1"
# url="hi"
main(url)