# Zen Garden Design Analysis

## 1. Scrape

To collect our design data, we scrape csszengarden.com for design screenshots and associated styles. With over 200 designs, this should give us a good training set on how apply different styles and techniques.

In [2]:
from data_collection.scraper import scrape_design
import asyncio

async def test_scraper(ids, batch_size=5):
 """
 Asynchronously scrape designs in batches to avoid overwhelming resources.
 
 Args:
 ids (list): List of design IDs to scrape
 batch_size (int): Number of designs to process concurrently
 """
 print(f"Starting scrape of {len(ids)} designs...")
 
 successful = 0
 failed = 0
 
 # Process in batches
 for i in range(0, len(ids), batch_size):
 batch = ids[i:i + batch_size]
 print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} designs)...")
 
 # Create tasks for current batch
 tasks = [scrape_design(design_id) for design_id in batch]
 
 # Run batch tasks concurrently
 results = await asyncio.gather(*tasks, return_exceptions=True)
 
 # Process batch results
 for design_id, result in zip(batch, results):
 if isinstance(result, Exception):
 print(f"Error scraping design {design_id}: {str(result)}")
 failed += 1
 else:
 print(f"Successfully scraped design {design_id}")
 successful += 1
 
 # Optional: Add delay between batches
 # await asyncio.sleep(1)
 
 print(f"\nScraping complete:")
 print(f"Successful: {successful}")
 print(f"Failed: {failed}")
 print(f"Total: {len(ids)}")

# Example usage with batch processing:
test_set = [f"{i:03d}" for i in range(1, 222)]
await test_scraper(test_set, batch_size=5)

Starting scrape of 221 designs...

Processing batch 1 (5 designs)...
001: Response status: 200
002: Response status: 200
003: Response status: 200
004: Response status: 200
005: Response status: 200
Successfully scraped design 001
Successfully scraped design 002
Successfully scraped design 003
Successfully scraped design 004
Successfully scraped design 005

Processing batch 2 (5 designs)...
006: Response status: 200
007: Response status: 200
008: Response status: 200
009: Response status: 200
010: Response status: 200
Successfully scraped design 006
Successfully scraped design 007
Successfully scraped design 008
Successfully scraped design 009
Successfully scraped design 010

Processing batch 3 (5 designs)...
011: Response status: 200
012: Response status: 200
013: Response status: 200
014: Response status: 200
015: Response status: 200
Successfully scraped design 011
Successfully scraped design 012
Successfully scraped design 013
Successfully scraped design 014
Successfully scraped de

Now for a valuable additional step, we can extract the name of the title and its author from the CSS comments. This helps us give credit to the creative people whose work we are using and appreciating.

In [1]:
from data_collection.analyze_designs import attribute_designs
await attribute_designs()

Found 221 designs to check
Skipping design 135 - already attributed
Skipping design 132 - already attributed
Skipping design 104 - already attributed
Skipping design 103 - already attributed
Skipping design 168 - already attributed
Skipping design 157 - already attributed
Skipping design 150 - already attributed
Skipping design 159 - already attributed
Skipping design 166 - already attributed
Skipping design 192 - already attributed
Skipping design 195 - already attributed
Skipping design 161 - already attributed
Skipping design 102 - already attributed
Skipping design 105 - already attributed
Skipping design 133 - already attributed
Skipping design 134 - already attributed
Skipping design 160 - already attributed
Skipping design 194 - already attributed
Skipping design 158 - already attributed
Skipping design 193 - already attributed
Skipping design 167 - already attributed
Skipping design 151 - already attributed
Skipping design 169 - already attributed
Skipping design 156 - already 

## 2. Analyze

Now, using the screenshots and styles we downloaded, we analyze the design for characteristics that will be useful for retrieval. Our analyzer can perform a basic analysis and a detailed analysis, which will be used to test and illustrate results on the data set later.

In [3]:
from data_collection.analyze_designs import analyze_screenshot
from pathlib import Path
import asyncio

async def test_analyzer(design_ids, batch_size=5, detailed=True, output_path=None):
 """
 Asynchronously analyze designs in batches.
 
 Args:
 design_ids (list): List of design IDs to analyze
 batch_size (int): Number of designs to process concurrently
 detailed (bool): Whether to use detailed analysis
 output_path (Path): Where to save analysis results
 """
 print(f"Starting analysis of {len(design_ids)} designs...")
 
 successful = 0
 failed = 0
 
 # Process in batches
 for i in range(0, len(design_ids), batch_size):
 batch = design_ids[i:i + batch_size]
 print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} designs)...")
 
 # Create tasks for current batch
 tasks = [
 analyze_screenshot(
 design_id=design_id,
 design_path=Path(f"scraped_designs/{design_id}"),
 detailed=detailed,
 output_path=output_path
 ) for design_id in batch
 ]
 
 # Run batch tasks concurrently
 results = await asyncio.gather(*tasks, return_exceptions=True)
 
 # Process batch results
 for design_id, result in zip(batch, results):
 if isinstance(result, Exception):
 print(f"Error analyzing design {design_id}: {str(result)}")
 failed += 1
 elif result[1] is not None: # Check if analysis was successful
 successful += 1
 print(f"\nAnalysis for design {design_id}:")
 if detailed:
 print(f"Description: {result[1]}") # summary from description object
 else:
 print(f"Description: {result[1]}") # direct description string
 print(f"Categories: {', '.join(result[2])}")
 print(f"Visual Characteristics: {', '.join(result[3])}")
 else:
 print(f"Failed to analyze design {design_id}")
 failed += 1
 
 print(f"\nAnalysis complete:")
 print(f"Successful: {successful}")
 print(f"Failed: {failed}")
 print(f"Total: {len(design_ids)}")


Now we can run the analysis on a range of our choosing.

In [5]:

# Test with detailed analysis
print("Running detailed analysis...")
analysis_test_set = [f"{i:03d}" for i in range(1, 200)]
await test_analyzer(
 design_ids=analysis_test_set,
 batch_size=5,
 detailed=True,
 output_path=Path("analyses/detailed")
)

# Test with basic analysis
# print("\nRunning basic analysis...")
# await test_analyzer(
# design_ids=analysis_test_set,
# batch_size=5,
# detailed=False,
# output_path=Path("analyses/basic")
# )

Running detailed analysis...
Starting analysis of 199 designs...

Processing batch 1 (5 designs)...
Analyzing design 001...
Analyzing design 002...
Analyzing design 003...
Analyzing design 004...
Analyzing design 005...
Successfully analyzed design 004
Successfully analyzed design 002
Successfully analyzed design 001
Successfully analyzed design 005
Successfully analyzed design 003

Analysis for design 001:
Description: A serene web design that combines Eastern aesthetic elements with minimalist layout principles, featuring soft colors and symbolic imagery like lotus flowers and a traditional torii gate.
Categories: minimalist, eastern-inspired, elegant, structured, harmonious, zen-aesthetic
Visual Characteristics: pastel-palette, symbolic-imagery, balanced-composition, negative-space, typographic-hierarchy, watercolor-effects

Analysis for design 002:
Description: A serene, minimalist web design featuring a soft peach color palette with an Eastern aesthetic influence, centered around 

In [7]:
from pathlib import Path
import json

# Define paths
analyses_dir = Path("analyses/detailed")
scraped_dir = Path("scraped_designs")

# Iterate through all design folders in analyses
for design_dir in analyses_dir.iterdir():
 if not design_dir.is_dir():
 continue
 
 design_id = design_dir.name
 analysis_path = design_dir / "metadata.json"
 scraped_path = scraped_dir / design_id / "metadata.json"
 
 # Check if both files exist
 if not all(p.exists() for p in [analysis_path, scraped_path]):
 print(f"Missing metadata file for design {design_id}")
 continue
 
 try:
 # Load both JSON files
 with open(analysis_path) as f:
 analysis_data = json.load(f)
 with open(scraped_path) as f:
 scraped_data = json.load(f)
 
 # Merge the data (scraped data will not overwrite analysis data if keys conflict)
 merged_data = {**scraped_data, **analysis_data}
 
 # Write back to analysis file
 with open(analysis_path, 'w') as f:
 json.dump(merged_data, f, indent=2)
 
 print(f"Successfully merged metadata for design {design_id}")
 
 except Exception as e:
 print(f"Error processing design {design_id}: {str(e)}")

print("Merge complete!")

Successfully merged metadata for design 135
Successfully merged metadata for design 132
Successfully merged metadata for design 104
Successfully merged metadata for design 103
Successfully merged metadata for design 168
Successfully merged metadata for design 157
Successfully merged metadata for design 150
Successfully merged metadata for design 159
Successfully merged metadata for design 166
Successfully merged metadata for design 192
Successfully merged metadata for design 195
Successfully merged metadata for design 161
Successfully merged metadata for design 105
Successfully merged metadata for design 133
Successfully merged metadata for design 134
Successfully merged metadata for design 160
Successfully merged metadata for design 158
Successfully merged metadata for design 193
Successfully merged metadata for design 167
Successfully merged metadata for design 151
Successfully merged metadata for design 169
Successfully merged metadata for design 156
Successfully merged metadata for