import os import requests from bs4 import BeautifulSoup import json from playwright.async_api import async_playwright import asyncio def create_design_directory(design_id): """Create a directory for the design if it doesn't exist""" directory = f"designs/{design_id}" if not os.path.exists(directory): os.makedirs(directory) return directory def save_css(url, directory): """Download and save CSS file""" response = requests.get(url) css_path = f"{directory}/style.css" with open(css_path, "w", encoding="utf-8") as f: f.write(response.text) def save_metadata(metadata, directory): """Save design metadata as JSON""" metadata_path = f"{directory}/metadata.json" with open(metadata_path, "w", encoding="utf-8") as f: json.dump(metadata, f, indent=4) async def take_screenshot(url, directory): """Take screenshots of the design at desktop and mobile widths""" async with async_playwright() as p: browser = await p.chromium.launch() # Desktop screenshot (1920px width) page = await browser.new_page(viewport={'width': 1600, 'height': 1080}) await page.goto(url) # Wait for network to be idle (no requests for at least 500ms) await page.wait_for_load_state("networkidle") # Add a significant delay to ensure background images are loaded await page.wait_for_timeout(3000) # Get full height height = await page.evaluate('document.body.scrollHeight') await page.set_viewport_size({'width': 1600, 'height': int(height)}) await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True) # Mobile screenshot (480px width) page = await browser.new_page(viewport={'width': 480, 'height': 1080}) await page.goto(url) # Wait for network to be idle (no requests for at least 500ms) await page.wait_for_load_state("networkidle") # Add a significant delay to ensure background images are loaded await page.wait_for_timeout(2000) # Get full height height = await page.evaluate('document.body.scrollHeight') await page.set_viewport_size({'width': 480, 'height': int(height)}) await page.screenshot(path=f"{directory}/screenshot_mobile.png", full_page=True) await browser.close() async def scrape_design(design_id): """Scrape a single design""" # Create base URLs design_url = f"https://www.csszengarden.com/{design_id}" css_url = f"https://www.csszengarden.com/{design_id}/{design_id}.css" # Create directory for this design directory = create_design_directory(design_id) # Get design page response = requests.get(design_url) print(f"{design_id}: Response status: {response.status_code}") soup = BeautifulSoup(response.text, "html.parser") # Extract metadata with error handling try: metadata = { "id": design_id, "url": design_url, "css_url": css_url } except Exception as e: print(f"\nError extracting metadata: {str(e)}") raise # Save everything save_css(css_url, directory) save_metadata(metadata, directory) await take_screenshot(design_url, directory) async def main(): """Main function to scrape multiple designs""" # Create designs directory if it doesn't exist if not os.path.exists("designs"): os.makedirs("designs") # List of design IDs to scrape design_ids = ["221", "220", "219"] # Add more IDs as needed for design_id in design_ids: try: print(f"Scraping design {design_id}...") await scrape_design(design_id) print(f"Successfully scraped design {design_id}") except Exception as e: print(f"Error scraping design {design_id}: {str(e)}") if __name__ == "__main__": asyncio.run(main())