imagineui / data_collection /analyze_designs.py
Technologic101's picture
task: update analysis function to accept list
f93c694
import os
import json
from pathlib import Path
import asyncio
import base64
from openai import AsyncOpenAI
from dotenv import load_dotenv
from anthropic import AsyncAnthropic
from .prompts import get_prompt
load_dotenv()
client = AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
VISION_MODEL = "claude-3-7-sonnet-20250219"
async def analyze_screenshot(design_id: str, design_path: Path, detailed: bool = True, output_path: Path = None):
"""
Analyze screenshots and return description, categories, and visual characteristics
Args:
design_id (str): ID of the design to analyze
design_path (Path): Path to the design's source files
detailed (bool): Whether to use detailed or core analysis prompt
output_path (Path): Path to save analysis results. If None, uses analyses/default
Returns:
tuple: (design_id, description, categories, visual_characteristics)
"""
try:
# Use output_path if provided, otherwise use default analyses path
save_path = output_path or Path("analyses/default")
# Ensure output directory exists
if not save_path.exists():
save_path.mkdir(parents=True, exist_ok=True)
# Check source files exist
metadata_path = design_path / "metadata.json"
desktop_img = design_path / "screenshot_desktop.png"
mobile_img = design_path / "screenshot_mobile.png"
if not all(f.exists() for f in [metadata_path, desktop_img, mobile_img]):
print(f"Missing required files for design {design_id}")
return design_id, None, None, None
# Load existing metadata
with open(metadata_path, "r") as f:
metadata = json.load(f)
# Read both images
try:
with open(desktop_img, "rb") as f:
desktop_base64 = base64.b64encode(f.read()).decode('utf-8')
with open(mobile_img, "rb") as f:
mobile_base64 = base64.b64encode(f.read()).decode('utf-8')
except Exception as e:
print(f"Error reading images for design {design_id}: {str(e)}")
return design_id, None, None, None
print(f"Analyzing design {design_id}...")
# Get response using specified detail level
response = await client.messages.create(
model=VISION_MODEL,
max_tokens=8000 if detailed else 4000, # More tokens for detailed analysis
system=get_prompt(detailed=detailed),
messages=[{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze this visual design. Output only the JSON object."
},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": desktop_base64
}
},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": mobile_base64
}
}
]
}]
)
response_content = response.content[0].text
if not response_content:
print(f"Empty response for design {design_id}")
return design_id, None, None, None
# Extract JSON content from markdown code block
if "```json" in response_content:
response_content = response_content.split("```json")[1].split("```")[0].strip()
try:
analysis = json.loads(response_content)
# Create design-specific directory in output path
design_output_path = save_path / design_id
design_output_path.mkdir(parents=True, exist_ok=True)
# Save metadata.json inside the design folder
output_metadata_path = design_output_path / "metadata.json"
# Save analysis to output path
with open(output_metadata_path, "w") as f:
json.dump(analysis, f, indent=2)
print(f"Successfully analyzed design {design_id}")
# Return appropriate fields based on detail level
if detailed:
return (
design_id,
analysis["description"]["summary"],
analysis["categories"],
analysis["visual_characteristics"]
)
else:
return (
design_id,
analysis["description"],
analysis["categories"],
analysis["visual_characteristics"]
)
except json.JSONDecodeError as e:
print(f"Error parsing JSON response for design {design_id}: {str(e)}")
return design_id, None, None, None
except Exception as e:
print(f"Error processing design {design_id}: {str(e)}")
return design_id, None, None, None
async def attribute_designs(design_ids: list[str] = None):
"""
Process scraped designs to extract title and author from CSS comments.
Adds these attributes to the existing metadata.json files.
Skips designs that already have both title and author.
Args:
design_ids (list[str], optional): List of design IDs to process. If None, processes all designs.
"""
designs_dir = Path("scraped_designs")
if not designs_dir.exists():
print("Scraped designs directory not found!")
return
# Get design directories based on provided IDs or all directories
if design_ids:
design_dirs = [designs_dir / design_id for design_id in design_ids]
print(f"Processing {len(design_dirs)} specified designs")
else:
design_dirs = [d for d in designs_dir.iterdir() if d.is_dir()]
print(f"Found {len(design_dirs)} designs to check")
if not design_dirs:
print("No design directories found!")
return
print(f"Found {len(design_dirs)} designs to check")
processed = 0
skipped = 0
failed = 0
for design_dir in design_dirs:
try:
# Check for required files
css_path = design_dir / "style.css"
metadata_path = design_dir / "metadata.json"
if not all(f.exists() for f in [css_path, metadata_path]):
print(f"Missing required files for design {design_dir.name}")
failed += 1
continue
# Check existing metadata
with open(metadata_path, "r") as f:
metadata = json.load(f)
# Skip if both title and author already exist and aren't default values
if (metadata.get("title") and metadata.get("author") and
metadata["title"] != "Untitled" and metadata["author"] != "Unknown"):
print(f"Skipping design {design_dir.name} - already attributed")
skipped += 1
continue
# Read CSS file
with open(css_path, "r", encoding="utf-8") as f:
css_content = f.read()
# Extract title and author using Claude
response = await client.messages.create(
model="claude-3-haiku-20240307",
max_tokens=100,
system="You are a helpful assistant that extracts title and author information from CSS comments. Return ONLY a JSON object with 'title' and 'author' fields, nothing else.",
messages=[{
"role": "user",
"content": f"Extract the title and author from these CSS comments. Return only the JSON object, no markdown:\n\n{css_content}"
}]
)
# Get response text and clean it up
response_text = response.content[0].text.strip()
# Remove markdown formatting if present
if "```json" in response_text:
response_text = response_text.split("```json")[1].split("```")[0].strip()
elif "```" in response_text:
response_text = response_text.split("```")[1].strip()
try:
attribution = json.loads(response_text)
except json.JSONDecodeError:
print(f"Failed to parse JSON for design {design_dir.name}. Response was:")
print(response_text)
# Update metadata
metadata.update({
"title": attribution.get("title", "Untitled"),
"author": attribution.get("author", "Unknown")
})
# Save updated metadata
with open(metadata_path, "w") as f:
json.dump(metadata, f, indent=2)
print(f"Successfully attributed design {design_dir.name}")
print(f"Title: {attribution.get('title', 'Untitled')}")
print(f"Author: {attribution.get('author', 'Unknown')}\n")
processed += 1
except Exception as e:
print(f"Error processing design {design_dir.name}: {str(e)}")
failed += 1
print("\nAttribution complete!")
print(f"Processed: {processed}")
print(f"Skipped: {skipped}")
print(f"Failed: {failed}")
print(f"Total: {len(design_dirs)}")
async def main():
designs_dir = Path("designs")
if not designs_dir.exists():
print("Designs directory not found!")
return
# Get all design directories
design_dirs = [d for d in designs_dir.iterdir() if d.is_dir()]
if not design_dirs:
print("No design directories found!")
return
print(f"Found {len(design_dirs)} designs to analyze")
# Create list of design IDs to analyze (001-050)
design_ids = [f"{i:03d}" for i in range(1, 51)]
# Analyze all designs
tasks = []
for design_dir in design_dirs:
design_id = design_dir.name
tasks.append(analyze_screenshot(design_id, design_dir))
# Run analyses concurrently
results = await asyncio.gather(*tasks)
# Print summary
successful = 0
for design_id, desc, cats, _ in results:
if desc is not None:
successful += 1
print(f"\nDesign {design_id}:")
print(f"Description: {desc}")
print(f"Categories: {', '.join(cats)}")
print(f"\nSuccessfully analyzed {successful} out of {len(design_dirs)} designs")
if __name__ == "__main__":
asyncio.run(main())