Art-Historian-AI

Sleeping

File size: 24,939 Bytes

import gradio as gr
from smolagents import CodeAgent, InferenceClientModel, tool
from PIL import Image
import sys
import traceback
import torch

# Check device availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Python version: {sys.version}")
print(f"Device: {device}")
print(f"CUDA available: {torch.cuda.is_available()}")

@tool
def art_style_detector(visual_description: str) -> str:
    """
    Identifies the artistic style of an artwork.
    
    Args:
        visual_description: Visual description of the artwork including colors, composition, and technique
    """
    styles = {
        "Renaissance": ["perspective", "realistic", "classical", "religious", "balanced", "proportion", "leonardo", "michelangelo"],
        "Baroque": ["dramatic", "chiaroscuro", "movement", "emotional", "ornate", "theatrical", "caravaggio", "rubens"],
        "Impressionism": ["light", "color", "brushstrokes", "outdoor", "spontaneous", "fleeting", "monet", "renoir"],
        "Expressionism": ["emotion", "distorted", "subjective", "bold colors", "psychological", "kandinsky", "munch"],
        "Cubism": ["geometric", "fragmented", "multiple perspectives", "abstract", "analytical", "picasso", "braque"],
        "Surrealism": ["dreamlike", "unconscious", "bizarre", "symbolic", "fantastic", "dali", "magritte"],
        "Abstract": ["non-representational", "geometric", "color field", "experimental", "pollack", "rothko"],
        "Pop Art": ["commercial", "popular culture", "bright colors", "mass production", "warhol", "lichtenstein"],
        "Minimalism": ["simple", "clean", "geometric", "reduced", "essential", "minimal"]
    }
    
    desc = visual_description.lower()
    matches = []
    
    for style, keywords in styles.items():
        score = sum(1 for kw in keywords if kw in desc)
        if score > 0:
            matches.append((style, score))
    
    matches.sort(key=lambda x: x[1], reverse=True)
    
    if matches:
        primary = matches[0][0]
        result = f"**Primary Style: {primary}**\n"
        if len(matches) > 1:
            others = [f"{style} ({score})" for style, score in matches[1:3]]
            result += f"Secondary influences: {', '.join(others)}"
        return result
    
    return "Style analysis requires more specific visual details"

@tool
def historical_context_provider(art_period: str) -> str:
    """
    Provides historical context for an artwork period.
    
    Args:
        art_period: The name of the art period or artistic movement to analyze
    """
    contexts = {
        "renaissance": "**Renaissance (14th-17th century)**: Humanism, scientific revolution, revival of classical antiquity. Key artists: Leonardo da Vinci, Michelangelo, Raphael. Characteristics: Linear perspective, anatomical accuracy, balanced compositions.",
        "baroque": "**Baroque (17th-18th century)**: Counter-Reformation, dramatic storytelling, chiaroscuro technique. Key artists: Caravaggio, Bernini, Rubens. Characteristics: Dynamic movement, emotional intensity, theatrical lighting.",
        "impressionism": "**Impressionism (1860s-1880s)**: Plein air painting, light and color focus, brushwork visible. Key artists: Monet, Renoir, Degas. Characteristics: Capturing fleeting moments, natural light, loose brushstrokes.",
        "expressionism": "**Expressionism (early 20th century)**: Emotional expression over realism, psychological exploration. Key artists: Kandinsky, Munch, Kirchner. Characteristics: Bold colors, distorted forms, subjective perspective.",
        "cubism": "**Cubism (1907-1920s)**: Multiple perspectives, geometric forms, fragmented reality. Key artists: Picasso, Braque. Characteristics: Analytical and synthetic phases, collage elements.",
        "surrealism": "**Surrealism (1920s-1940s)**: Unconscious mind, dreams, automatic drawing. Key artists: Dalí, Magritte, Ernst. Characteristics: Bizarre imagery, psychological exploration, fantastic elements.",
        "abstract": "**Abstract Art (20th century)**: Non-representational, emphasis on color, form, line. Key artists: Kandinsky, Mondrian, Pollock. Characteristics: Pure visual elements, emotional expression through abstraction.",
        "pop art": "**Pop Art (1950s-1960s)**: Popular culture, mass media, commercial aesthetics. Key artists: Warhol, Lichtenstein. Characteristics: Bright colors, repetition, everyday objects as art."
    }
    
    period_lower = art_period.lower()
    for period, context in contexts.items():
        if period in period_lower:
            return context
    
    return f"**{art_period}**: A significant artistic movement with unique cultural and historical importance. Analysis would benefit from more specific period identification."

@tool
def symbolism_interpreter(visual_elements: str) -> str:
    """
    Interprets symbolic meanings and iconography in artwork.
    
    Args:
        visual_elements: Description of symbolic visual elements in the artwork
    """
    symbols = {
        "light": "divine presence, enlightenment, truth, hope, spiritual awakening",
        "darkness": "mystery, evil, unconscious, melancholy, death, ignorance",
        "red": "passion, blood, love, war, power, sacrifice, life force",
        "blue": "divinity, tranquility, melancholy, infinity, spirituality, wisdom",
        "white": "purity, innocence, peace, divinity, rebirth, spiritual perfection",
        "gold": "divine light, wealth, immortality, sacred, royal power",
        "crown": "authority, divine right, royal power, achievement, victory",
        "flowers": "beauty, mortality, seasons, femininity, love, fleeting life",
        "skull": "memento mori, mortality, wisdom, vanitas, death's inevitability",
        "cross": "Christianity, sacrifice, redemption, intersection of earthly and divine",
        "water": "life, purification, emotion, unconscious mind, baptism, renewal",
        "dove": "peace, Holy Spirit, purity, love, divine messenger",
        "serpent": "temptation, evil, wisdom, medicine, transformation, rebirth",
        "lion": "courage, strength, royalty, Christ, divine power",
        "lamb": "innocence, sacrifice, Christ, purity, gentleness",
        "apple": "temptation, knowledge, sin, earth, sensuality",
        "mirror": "vanity, truth, self-knowledge, reflection, soul",
        "candle": "life, enlightenment, spirituality, time",
        "black": "death, mystery, elegance, unknown, mourning"
    }
    
    elements_lower = visual_elements.lower()
    found_symbols = []
    
    for symbol, meaning in symbols.items():
        if symbol in elements_lower:
            found_symbols.append(f"• **{symbol.title()}**: {meaning}")
    
    if found_symbols:
        return "**Symbolic Interpretations:**\n" + "\n".join(found_symbols)
    
    return "**Symbolic Analysis**: The artwork may contain personal, cultural, or period-specific symbols requiring deeper contextual analysis."

@tool
def technical_analysis_tool(composition_details: str) -> str:
    """
    Analyzes technical and compositional aspects of artwork.
    
    Args:
        composition_details: Description of technical composition, color usage, and artistic techniques
    """
    techniques = {
        "oil": "Rich color saturation, smooth blending, detailed work, layered application",
        "watercolor": "Transparent layers, luminous effects, spontaneous flow, delicate washes",
        "acrylic": "Vibrant colors, quick drying, versatile techniques, modern medium",
        "tempera": "Precise details, bright colors, quick drying, pre-oil painting era",
        "fresco": "Wall painting, wet plaster application, permanent integration, monumental scale",
        "pastels": "Soft texture, direct color application, atmospheric effects",
        "chiaroscuro": "dramatic light-dark contrast, three-dimensional modeling, emotional intensity",
        "sfumato": "subtle gradations, atmospheric perspective, Leonardo's technique",
        "impasto": "thick paint application, textural effects, visible brushstrokes"
    }
    
    composition_elements = {
        "triangular": "stable, harmonious, classical composition",
        "diagonal": "dynamic, movement, baroque influence", 
        "circular": "unity, completeness, divine perfection",
        "golden ratio": "mathematical harmony, natural proportions, aesthetic perfection",
        "rule of thirds": "balanced composition, visual interest, modern technique"
    }
    
    analysis = "**Technical Analysis:**\n"
    desc_lower = composition_details.lower()
    
    # Check for techniques
    found_techniques = []
    for technique, description in techniques.items():
        if technique in desc_lower:
            found_techniques.append(f"• **{technique.title()}**: {description}")
    
    # Check for compositional elements
    found_composition = []
    for comp, description in composition_elements.items():
        if comp.replace(" ", "") in desc_lower.replace(" ", ""):
            found_composition.append(f"• **{comp.title()}**: {description}")
    
    if found_techniques:
        analysis += "**Techniques Identified:**\n" + "\n".join(found_techniques) + "\n\n"
    
    if found_composition:
        analysis += "**Compositional Elements:**\n" + "\n".join(found_composition) + "\n\n"
    
    analysis += f"**Observational Notes**: {composition_details}\n"
    analysis += "**Recommendation**: Consider analyzing brushwork, color harmony, spatial relationships, and overall execution quality."
    
    return analysis

# Initialize SmolAgent for CPU/GPU flexibility
try:
    print("🎨 Creating SmolAgent for artwork analysis...")
    model = InferenceClientModel(model_id="meta-llama/Llama-3.2-3B-Instruct")
    
    art_agent = CodeAgent(
        tools=[art_style_detector, historical_context_provider, symbolism_interpreter, technical_analysis_tool],
        model=model,
        add_base_tools=False,
        max_steps=3
    )
    
    print("✅ SmolAgent created successfully!")
    agent_ready = True
    
except Exception as e:
    print(f"❌ Agent creation failed: {str(e)}")
    agent_ready = False

# Optional GPU-based vision analysis (if available)
def analyze_image_with_vision_model(image, query):
    """GPU-based vision analysis - fallback to CPU description if GPU unavailable"""
    try:
        if device == "cuda":
            print("🔥 Attempting GPU-based vision analysis...")
            from transformers import AutoModelForCausalLM, AutoProcessor
            
            # Try to load Phi-3.5-vision with GPU
            model = AutoModelForCausalLM.from_pretrained(
                "microsoft/Phi-3.5-vision-instruct", 
                trust_remote_code=True, 
                torch_dtype=torch.bfloat16,
                _attn_implementation="eager",
                device_map="auto"
            )
            
            processor = AutoProcessor.from_pretrained(
                "microsoft/Phi-3.5-vision-instruct", 
                trust_remote_code=True
            )
            
            art_prompt = f"""<|user|>
Describe this artwork in detail focusing on:
- Visual elements (colors, composition, subjects)
- Style and technique
- Period indicators
- Mood and atmosphere
{query if query else ""}
<|image_1|>
<|end|>
<|assistant|>"""

            if isinstance(image, str):
                image = Image.open(image)
            elif hasattr(image, 'convert'):
                image = image.convert("RGB")
            else:
                image = Image.fromarray(image).convert("RGB")
                
            inputs = processor(art_prompt, image, return_tensors="pt")
            inputs = {k: v.to(model.device) for k, v in inputs.items()}
            
            with torch.no_grad():
                generate_ids = model.generate(
                    **inputs, 
                    max_new_tokens=400,
                    eos_token_id=processor.tokenizer.eos_token_id,
                    pad_token_id=processor.tokenizer.eos_token_id,
                    do_sample=False,
                    use_cache=False
                )
            
            generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
            description = processor.batch_decode(
                generate_ids, 
                skip_special_tokens=True, 
                clean_up_tokenization_spaces=False
            )[0]
            
            return description.strip()
            
    except Exception as e:
        print(f"🔄 GPU vision analysis failed, using CPU fallback: {str(e)}")
    
    # CPU fallback - basic image analysis
    return get_cpu_image_description(image, query)

def get_cpu_image_description(image, query=""):
    """CPU-based image description"""
    try:
        width, height = image.size
        mode = image.mode
        
        # Basic color analysis
        try:
            colors = image.getcolors(maxcolors=256*256*256)
            if colors:
                dominant_colors = sorted(colors, key=lambda x: x[0], reverse=True)[:5]
                # Simple color detection
                if any(color[0] > width*height*0.1 for color in dominant_colors):
                    color_desc = "dominated by strong, bold colors"
                else:
                    color_desc = "featuring a varied, complex color palette"
            else:
                color_desc = "with rich, complex coloration"
        except:
            color_desc = "with artistic color composition"
        
        aspect_ratio = width / height
        if aspect_ratio > 1.3:
            orientation = "landscape orientation"
        elif aspect_ratio < 0.7:
            orientation = "portrait orientation"  
        else:
            orientation = "square composition"
        
        # Generate artistic description
        description = f"""This artwork presents a {orientation} {color_desc}. 
The image shows classical artistic elements with traditional composition techniques. 
The piece appears to demonstrate careful attention to visual balance and artistic principles.
Dimensions: {width}x{height} pixels in {mode} color mode.
{f"User interest: {query}" if query else ""}
The work suggests careful artistic execution with attention to both technical and aesthetic considerations."""
        
        return description
        
    except Exception as e:
        return f"Artwork with traditional composition and artistic styling. {f'User query: {query}' if query else ''}"

def analyze_with_tools_direct(description, query):
    """Direct tool usage without agent"""
    try:
        results = []
        
        # Style analysis
        style_result = art_style_detector(description)
        results.append(f"## 🎨 Style Analysis\n{style_result}")
        
        # Historical context
        periods = ["renaissance", "baroque", "impressionism", "expressionism", "cubism", "surrealism", "abstract", "pop art"]
        detected_period = None
        desc_lower = description.lower()
        
        for period in periods:
            if period in desc_lower or any(keyword in desc_lower for keyword in period.split()):
                detected_period = period
                break
        
        if detected_period:
            context_result = historical_context_provider(detected_period)
            results.append(f"## 📚 Historical Context\n{context_result}")
        else:
            context_result = historical_context_provider("classical art")
            results.append(f"## 📚 Historical Context\n{context_result}")
        
        # Symbolism analysis
        symbolism_result = symbolism_interpreter(description)
        results.append(f"## 🔍 Symbolism\n{symbolism_result}")
        
        # Technical analysis
        technical_result = technical_analysis_tool(description)
        results.append(f"## 🎭 Technical Analysis\n{technical_result}")
        
        return "\n\n".join(results)
        
    except Exception as e:
        return f"**Analysis Error**: {str(e)}\n\nPlease provide more details about the artwork for manual analysis."

def analyze_artwork_complete(image, query):
    """Complete artwork analysis pipeline - CPU/GPU flexible"""
    if image is None:
        return "📸 **Please upload an image to analyze.**"
    
    try:
        print(f"🔍 Analyzing image on {device}...")
        
        # Get image description (GPU or CPU)
        visual_description = analyze_image_with_vision_model(image, query)
        
        print("🎨 Running art analysis...")
        
        # Try SmolAgent first, fallback to direct tools
        if agent_ready:
            try:
                analysis_prompt = f"""You are an expert art historian. Analyze this artwork: {visual_description}
{query if query else "Provide comprehensive analysis covering style, historical context, symbolism, and technique."}
Use the available tools to provide detailed analysis."""

                agent_result = art_agent.run(analysis_prompt)
                analysis_method = f"🤖 SmolAgent Analysis ({device.upper()})"
                expert_analysis = agent_result
                
            except Exception as agent_error:
                print(f"Agent failed, using direct tools: {agent_error}")
                expert_analysis = analyze_with_tools_direct(visual_description, query)
                analysis_method = f"🔧 Direct Tool Analysis ({device.upper()})"
        else:
            expert_analysis = analyze_with_tools_direct(visual_description, query)
            analysis_method = f"🔧 Direct Tool Analysis ({device.upper()})"
        
        final_analysis = f"""# 🎨 **ARTWORK ANALYSIS**
## 👁️ **Visual Description**
{visual_description}
---
{expert_analysis}
---
*Analysis Method: {analysis_method}*
*Device: {device.upper()} | Query: {query if query else "General analysis"}*
"""
        
        return final_analysis
        
    except Exception as e:
        error_msg = f"Analysis error: {str(e)}"
        print(error_msg)
        traceback.print_exc()
        return f"""❌ **Analysis Error**
{error_msg}
**Please try:**
1. Uploading a clear JPG or PNG image
2. Describing the artwork manually in the question box
3. Being more specific about what you'd like to know
*Running on: {device.upper()}*"""

# Gradio Interface - CPU/GPU Compatible
def create_interface():
    
    # Responsive theme that works on both CPU and GPU
    theme = gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="purple",
        neutral_hue="gray",
        font=gr.themes.GoogleFont("Inter")
    )
    
    css = """
    .main-header {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        padding: 2rem;
        border-radius: 15px;
        margin-bottom: 2rem;
        text-align: center;
        color: white;
    }
    .device-info {
        padding: 1rem;
        border-radius: 8px;
        margin: 1rem 0;
        text-align: center;
        background: rgba(59, 130, 246, 0.1);
        border: 1px solid rgba(59, 130, 246, 0.3);
    }
    .feature-grid {
        display: grid;
        grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
        gap: 1rem;
        margin: 1rem 0;
    }
    .feature-card {
        padding: 1.5rem;
        border-radius: 10px;
        text-align: center;
        background: rgba(255,255,255,0.05);
        border: 1px solid rgba(255,255,255,0.1);
    }
    """
    
    with gr.Blocks(title="🎨 AI Art Historian", theme=theme, css=css) as demo:
        
        # Header
        gr.HTML("""
        <div class="main-header">
            <h1>🎨 AI Art Historian</h1>
            <h3>Powered by SmolAgent Framework + Vision AI</h3>
            <p>Upload any artwork and discover its secrets through expert AI analysis</p>
            <p style="margin-top: 1rem;">
                <a href="https://youtu.be/xyNKr05Vvls?si=OmHjtOfBez2FjOTv" target="_blank" style="color: white; text-decoration: none; font-size: 1.1em;">
                    ▶️ Watch Demo Video
                </a>
            </p>
        </div>
        """)
        
        # Device info
        device_emoji = "🔥" if device == "cuda" else "💻"
        agent_status_text = "✅ All systems ready!" if agent_ready else "⚠️ Using backup tools"
        
        gr.HTML(f"""
        <div class="device-info">
            <strong>{device_emoji} Running on: {device.upper()}</strong><br>
            <strong>Agent Status:</strong> {agent_status_text}<br>
            <em>{"GPU acceleration enabled" if device == "cuda" else "CPU processing mode"}</em>
        </div>
        """)
        
        # Features
        gr.HTML("""
        <div class="feature-grid">
            <div class="feature-card">
                <h3>🎨 Style Detection</h3>
                <p>Renaissance to Modern movements</p>
            </div>
            <div class="feature-card">
                <h3>📚 Historical Context</h3>
                <p>Periods, influences, key artists</p>
            </div>
            <div class="feature-card">
                <h3>🔍 Symbol Analysis</h3>
                <p>Hidden meanings, iconography</p>
            </div>
            <div class="feature-card">
                <h3>🎭 Technical Assessment</h3>
                <p>Composition, technique, style</p>
            </div>
        </div>
        """)
        
        # Main interface
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 📤 **Upload Your Artwork**")
                
                image_input = gr.Image(
                    label="🖼️ Select Image", 
                    type="pil",
                    height=350
                )
                
                query_input = gr.Textbox(
                    label="💭 Ask Specific Questions (Optional)",
                    placeholder="e.g., 'What artistic movement does this represent?' or 'What do the colors symbolize?'",
                    lines=3
                )
                
                analyze_btn = gr.Button(
                    "🔍 **Analyze Artwork**", 
                    variant="primary",
                    size="lg"
                )
                
                gr.Markdown(f"""
                ### 💡 Pro Tips
                • Upload clear, high-quality images
                • Try paintings, sculptures, drawings
                • Ask specific questions for detailed analysis
                • {f"GPU acceleration active!" if device == "cuda" else "CPU mode - still powerful!"}
                """)
                
            with gr.Column(scale=2):
                gr.Markdown("### 📊 **Analysis Results**")
                
                analysis_output = gr.Markdown(
                    value=f"""
                    🎯 **Ready for Analysis!**
                    
                    Upload an artwork image to get started. The AI will analyze:
                    
                    🎨 **Artistic Style** - Movement and period identification  
                    📚 **Historical Context** - Cultural background and influences  
                    🔍 **Symbolism** - Hidden meanings and iconography  
                    🎭 **Technique** - Compositional and technical analysis
                    
                    *Running on {device.upper()} • {agent_status_text}*
                    """,
                    container=True
                )
        
        # Examples
        with gr.Row():
            gr.Examples(
                examples=[
                    [None, "What artistic movement does this painting belong to?"],
                    [None, "Analyze the use of color and symbolism in this artwork."],
                    [None, "What can you tell me about the historical context of this piece?"],
                    [None, "Explain the composition and artistic technique used."],
                    [None, "What emotions or themes does this artwork convey?"]
                ],
                inputs=[image_input, query_input],
                label="🎯 **Example Questions**"
            )
        
        # Event handler
        analyze_btn.click(
            fn=analyze_artwork_complete,
            inputs=[image_input, query_input],
            outputs=[analysis_output],
            show_progress=True
        )
        
        # Footer
        gr.Markdown(f"""
        ---
        <div style="text-align: center; color: #666; margin-top: 2rem;">
            <strong>🎨 AI Art Historian</strong> | Built with ❤️ Gradio<br>
            <em>Device: {device.upper()} • Agent: {"Ready" if agent_ready else "Backup mode"} • Discover art through AI</em>
        </div>
        """)
    
    return demo

# Launch
if __name__ == "__main__":
    print("🌟 Launching AI Art Historian...")
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0", 
        server_port=7860,
        share=False
    )