import gradio as gr from smolagents import CodeAgent, InferenceClientModel, tool from PIL import Image import sys import traceback import torch # Check device availability device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Python version: {sys.version}") print(f"Device: {device}") print(f"CUDA available: {torch.cuda.is_available()}") @tool def art_style_detector(visual_description: str) -> str: """ Identifies the artistic style of an artwork. Args: visual_description: Visual description of the artwork including colors, composition, and technique """ styles = { "Renaissance": ["perspective", "realistic", "classical", "religious", "balanced", "proportion", "leonardo", "michelangelo"], "Baroque": ["dramatic", "chiaroscuro", "movement", "emotional", "ornate", "theatrical", "caravaggio", "rubens"], "Impressionism": ["light", "color", "brushstrokes", "outdoor", "spontaneous", "fleeting", "monet", "renoir"], "Expressionism": ["emotion", "distorted", "subjective", "bold colors", "psychological", "kandinsky", "munch"], "Cubism": ["geometric", "fragmented", "multiple perspectives", "abstract", "analytical", "picasso", "braque"], "Surrealism": ["dreamlike", "unconscious", "bizarre", "symbolic", "fantastic", "dali", "magritte"], "Abstract": ["non-representational", "geometric", "color field", "experimental", "pollack", "rothko"], "Pop Art": ["commercial", "popular culture", "bright colors", "mass production", "warhol", "lichtenstein"], "Minimalism": ["simple", "clean", "geometric", "reduced", "essential", "minimal"] } desc = visual_description.lower() matches = [] for style, keywords in styles.items(): score = sum(1 for kw in keywords if kw in desc) if score > 0: matches.append((style, score)) matches.sort(key=lambda x: x[1], reverse=True) if matches: primary = matches[0][0] result = f"**Primary Style: {primary}**\n" if len(matches) > 1: others = [f"{style} ({score})" for style, score in matches[1:3]] result += f"Secondary influences: {', '.join(others)}" return result return "Style analysis requires more specific visual details" @tool def historical_context_provider(art_period: str) -> str: """ Provides historical context for an artwork period. Args: art_period: The name of the art period or artistic movement to analyze """ contexts = { "renaissance": "**Renaissance (14th-17th century)**: Humanism, scientific revolution, revival of classical antiquity. Key artists: Leonardo da Vinci, Michelangelo, Raphael. Characteristics: Linear perspective, anatomical accuracy, balanced compositions.", "baroque": "**Baroque (17th-18th century)**: Counter-Reformation, dramatic storytelling, chiaroscuro technique. Key artists: Caravaggio, Bernini, Rubens. Characteristics: Dynamic movement, emotional intensity, theatrical lighting.", "impressionism": "**Impressionism (1860s-1880s)**: Plein air painting, light and color focus, brushwork visible. Key artists: Monet, Renoir, Degas. Characteristics: Capturing fleeting moments, natural light, loose brushstrokes.", "expressionism": "**Expressionism (early 20th century)**: Emotional expression over realism, psychological exploration. Key artists: Kandinsky, Munch, Kirchner. Characteristics: Bold colors, distorted forms, subjective perspective.", "cubism": "**Cubism (1907-1920s)**: Multiple perspectives, geometric forms, fragmented reality. Key artists: Picasso, Braque. Characteristics: Analytical and synthetic phases, collage elements.", "surrealism": "**Surrealism (1920s-1940s)**: Unconscious mind, dreams, automatic drawing. Key artists: DalΓ­, Magritte, Ernst. Characteristics: Bizarre imagery, psychological exploration, fantastic elements.", "abstract": "**Abstract Art (20th century)**: Non-representational, emphasis on color, form, line. Key artists: Kandinsky, Mondrian, Pollock. Characteristics: Pure visual elements, emotional expression through abstraction.", "pop art": "**Pop Art (1950s-1960s)**: Popular culture, mass media, commercial aesthetics. Key artists: Warhol, Lichtenstein. Characteristics: Bright colors, repetition, everyday objects as art." } period_lower = art_period.lower() for period, context in contexts.items(): if period in period_lower: return context return f"**{art_period}**: A significant artistic movement with unique cultural and historical importance. Analysis would benefit from more specific period identification." @tool def symbolism_interpreter(visual_elements: str) -> str: """ Interprets symbolic meanings and iconography in artwork. Args: visual_elements: Description of symbolic visual elements in the artwork """ symbols = { "light": "divine presence, enlightenment, truth, hope, spiritual awakening", "darkness": "mystery, evil, unconscious, melancholy, death, ignorance", "red": "passion, blood, love, war, power, sacrifice, life force", "blue": "divinity, tranquility, melancholy, infinity, spirituality, wisdom", "white": "purity, innocence, peace, divinity, rebirth, spiritual perfection", "gold": "divine light, wealth, immortality, sacred, royal power", "crown": "authority, divine right, royal power, achievement, victory", "flowers": "beauty, mortality, seasons, femininity, love, fleeting life", "skull": "memento mori, mortality, wisdom, vanitas, death's inevitability", "cross": "Christianity, sacrifice, redemption, intersection of earthly and divine", "water": "life, purification, emotion, unconscious mind, baptism, renewal", "dove": "peace, Holy Spirit, purity, love, divine messenger", "serpent": "temptation, evil, wisdom, medicine, transformation, rebirth", "lion": "courage, strength, royalty, Christ, divine power", "lamb": "innocence, sacrifice, Christ, purity, gentleness", "apple": "temptation, knowledge, sin, earth, sensuality", "mirror": "vanity, truth, self-knowledge, reflection, soul", "candle": "life, enlightenment, spirituality, time", "black": "death, mystery, elegance, unknown, mourning" } elements_lower = visual_elements.lower() found_symbols = [] for symbol, meaning in symbols.items(): if symbol in elements_lower: found_symbols.append(f"β€’ **{symbol.title()}**: {meaning}") if found_symbols: return "**Symbolic Interpretations:**\n" + "\n".join(found_symbols) return "**Symbolic Analysis**: The artwork may contain personal, cultural, or period-specific symbols requiring deeper contextual analysis." @tool def technical_analysis_tool(composition_details: str) -> str: """ Analyzes technical and compositional aspects of artwork. Args: composition_details: Description of technical composition, color usage, and artistic techniques """ techniques = { "oil": "Rich color saturation, smooth blending, detailed work, layered application", "watercolor": "Transparent layers, luminous effects, spontaneous flow, delicate washes", "acrylic": "Vibrant colors, quick drying, versatile techniques, modern medium", "tempera": "Precise details, bright colors, quick drying, pre-oil painting era", "fresco": "Wall painting, wet plaster application, permanent integration, monumental scale", "pastels": "Soft texture, direct color application, atmospheric effects", "chiaroscuro": "dramatic light-dark contrast, three-dimensional modeling, emotional intensity", "sfumato": "subtle gradations, atmospheric perspective, Leonardo's technique", "impasto": "thick paint application, textural effects, visible brushstrokes" } composition_elements = { "triangular": "stable, harmonious, classical composition", "diagonal": "dynamic, movement, baroque influence", "circular": "unity, completeness, divine perfection", "golden ratio": "mathematical harmony, natural proportions, aesthetic perfection", "rule of thirds": "balanced composition, visual interest, modern technique" } analysis = "**Technical Analysis:**\n" desc_lower = composition_details.lower() # Check for techniques found_techniques = [] for technique, description in techniques.items(): if technique in desc_lower: found_techniques.append(f"β€’ **{technique.title()}**: {description}") # Check for compositional elements found_composition = [] for comp, description in composition_elements.items(): if comp.replace(" ", "") in desc_lower.replace(" ", ""): found_composition.append(f"β€’ **{comp.title()}**: {description}") if found_techniques: analysis += "**Techniques Identified:**\n" + "\n".join(found_techniques) + "\n\n" if found_composition: analysis += "**Compositional Elements:**\n" + "\n".join(found_composition) + "\n\n" analysis += f"**Observational Notes**: {composition_details}\n" analysis += "**Recommendation**: Consider analyzing brushwork, color harmony, spatial relationships, and overall execution quality." return analysis # Initialize SmolAgent for CPU/GPU flexibility try: print("🎨 Creating SmolAgent for artwork analysis...") model = InferenceClientModel(model_id="meta-llama/Llama-3.2-3B-Instruct") art_agent = CodeAgent( tools=[art_style_detector, historical_context_provider, symbolism_interpreter, technical_analysis_tool], model=model, add_base_tools=False, max_steps=3 ) print("βœ… SmolAgent created successfully!") agent_ready = True except Exception as e: print(f"❌ Agent creation failed: {str(e)}") agent_ready = False # Optional GPU-based vision analysis (if available) def analyze_image_with_vision_model(image, query): """GPU-based vision analysis - fallback to CPU description if GPU unavailable""" try: if device == "cuda": print("πŸ”₯ Attempting GPU-based vision analysis...") from transformers import AutoModelForCausalLM, AutoProcessor # Try to load Phi-3.5-vision with GPU model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype=torch.bfloat16, _attn_implementation="eager", device_map="auto" ) processor = AutoProcessor.from_pretrained( "microsoft/Phi-3.5-vision-instruct", trust_remote_code=True ) art_prompt = f"""<|user|> Describe this artwork in detail focusing on: - Visual elements (colors, composition, subjects) - Style and technique - Period indicators - Mood and atmosphere {query if query else ""} <|image_1|> <|end|> <|assistant|>""" if isinstance(image, str): image = Image.open(image) elif hasattr(image, 'convert'): image = image.convert("RGB") else: image = Image.fromarray(image).convert("RGB") inputs = processor(art_prompt, image, return_tensors="pt") inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): generate_ids = model.generate( **inputs, max_new_tokens=400, eos_token_id=processor.tokenizer.eos_token_id, pad_token_id=processor.tokenizer.eos_token_id, do_sample=False, use_cache=False ) generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:] description = processor.batch_decode( generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False )[0] return description.strip() except Exception as e: print(f"πŸ”„ GPU vision analysis failed, using CPU fallback: {str(e)}") # CPU fallback - basic image analysis return get_cpu_image_description(image, query) def get_cpu_image_description(image, query=""): """CPU-based image description""" try: width, height = image.size mode = image.mode # Basic color analysis try: colors = image.getcolors(maxcolors=256*256*256) if colors: dominant_colors = sorted(colors, key=lambda x: x[0], reverse=True)[:5] # Simple color detection if any(color[0] > width*height*0.1 for color in dominant_colors): color_desc = "dominated by strong, bold colors" else: color_desc = "featuring a varied, complex color palette" else: color_desc = "with rich, complex coloration" except: color_desc = "with artistic color composition" aspect_ratio = width / height if aspect_ratio > 1.3: orientation = "landscape orientation" elif aspect_ratio < 0.7: orientation = "portrait orientation" else: orientation = "square composition" # Generate artistic description description = f"""This artwork presents a {orientation} {color_desc}. The image shows classical artistic elements with traditional composition techniques. The piece appears to demonstrate careful attention to visual balance and artistic principles. Dimensions: {width}x{height} pixels in {mode} color mode. {f"User interest: {query}" if query else ""} The work suggests careful artistic execution with attention to both technical and aesthetic considerations.""" return description except Exception as e: return f"Artwork with traditional composition and artistic styling. {f'User query: {query}' if query else ''}" def analyze_with_tools_direct(description, query): """Direct tool usage without agent""" try: results = [] # Style analysis style_result = art_style_detector(description) results.append(f"## 🎨 Style Analysis\n{style_result}") # Historical context periods = ["renaissance", "baroque", "impressionism", "expressionism", "cubism", "surrealism", "abstract", "pop art"] detected_period = None desc_lower = description.lower() for period in periods: if period in desc_lower or any(keyword in desc_lower for keyword in period.split()): detected_period = period break if detected_period: context_result = historical_context_provider(detected_period) results.append(f"## πŸ“š Historical Context\n{context_result}") else: context_result = historical_context_provider("classical art") results.append(f"## πŸ“š Historical Context\n{context_result}") # Symbolism analysis symbolism_result = symbolism_interpreter(description) results.append(f"## πŸ” Symbolism\n{symbolism_result}") # Technical analysis technical_result = technical_analysis_tool(description) results.append(f"## 🎭 Technical Analysis\n{technical_result}") return "\n\n".join(results) except Exception as e: return f"**Analysis Error**: {str(e)}\n\nPlease provide more details about the artwork for manual analysis." def analyze_artwork_complete(image, query): """Complete artwork analysis pipeline - CPU/GPU flexible""" if image is None: return "πŸ“Έ **Please upload an image to analyze.**" try: print(f"πŸ” Analyzing image on {device}...") # Get image description (GPU or CPU) visual_description = analyze_image_with_vision_model(image, query) print("🎨 Running art analysis...") # Try SmolAgent first, fallback to direct tools if agent_ready: try: analysis_prompt = f"""You are an expert art historian. Analyze this artwork: {visual_description} {query if query else "Provide comprehensive analysis covering style, historical context, symbolism, and technique."} Use the available tools to provide detailed analysis.""" agent_result = art_agent.run(analysis_prompt) analysis_method = f"πŸ€– SmolAgent Analysis ({device.upper()})" expert_analysis = agent_result except Exception as agent_error: print(f"Agent failed, using direct tools: {agent_error}") expert_analysis = analyze_with_tools_direct(visual_description, query) analysis_method = f"πŸ”§ Direct Tool Analysis ({device.upper()})" else: expert_analysis = analyze_with_tools_direct(visual_description, query) analysis_method = f"πŸ”§ Direct Tool Analysis ({device.upper()})" final_analysis = f"""# 🎨 **ARTWORK ANALYSIS** ## πŸ‘οΈ **Visual Description** {visual_description} --- {expert_analysis} --- *Analysis Method: {analysis_method}* *Device: {device.upper()} | Query: {query if query else "General analysis"}* """ return final_analysis except Exception as e: error_msg = f"Analysis error: {str(e)}" print(error_msg) traceback.print_exc() return f"""❌ **Analysis Error** {error_msg} **Please try:** 1. Uploading a clear JPG or PNG image 2. Describing the artwork manually in the question box 3. Being more specific about what you'd like to know *Running on: {device.upper()}*""" # Gradio Interface - CPU/GPU Compatible def create_interface(): # Responsive theme that works on both CPU and GPU theme = gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="gray", font=gr.themes.GoogleFont("Inter") ) css = """ .main-header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 2rem; border-radius: 15px; margin-bottom: 2rem; text-align: center; color: white; } .device-info { padding: 1rem; border-radius: 8px; margin: 1rem 0; text-align: center; background: rgba(59, 130, 246, 0.1); border: 1px solid rgba(59, 130, 246, 0.3); } .feature-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem; margin: 1rem 0; } .feature-card { padding: 1.5rem; border-radius: 10px; text-align: center; background: rgba(255,255,255,0.05); border: 1px solid rgba(255,255,255,0.1); } """ with gr.Blocks(title="🎨 AI Art Historian", theme=theme, css=css) as demo: # Header gr.HTML("""

🎨 AI Art Historian

Powered by SmolAgent Framework + Vision AI

Upload any artwork and discover its secrets through expert AI analysis

▢️ Watch Demo Video

""") # Device info device_emoji = "πŸ”₯" if device == "cuda" else "πŸ’»" agent_status_text = "βœ… All systems ready!" if agent_ready else "⚠️ Using backup tools" gr.HTML(f"""
{device_emoji} Running on: {device.upper()}
Agent Status: {agent_status_text}
{"GPU acceleration enabled" if device == "cuda" else "CPU processing mode"}
""") # Features gr.HTML("""

🎨 Style Detection

Renaissance to Modern movements

πŸ“š Historical Context

Periods, influences, key artists

πŸ” Symbol Analysis

Hidden meanings, iconography

🎭 Technical Assessment

Composition, technique, style

""") # Main interface with gr.Row(): with gr.Column(scale=1): gr.Markdown("### πŸ“€ **Upload Your Artwork**") image_input = gr.Image( label="πŸ–ΌοΈ Select Image", type="pil", height=350 ) query_input = gr.Textbox( label="πŸ’­ Ask Specific Questions (Optional)", placeholder="e.g., 'What artistic movement does this represent?' or 'What do the colors symbolize?'", lines=3 ) analyze_btn = gr.Button( "πŸ” **Analyze Artwork**", variant="primary", size="lg" ) gr.Markdown(f""" ### πŸ’‘ Pro Tips β€’ Upload clear, high-quality images β€’ Try paintings, sculptures, drawings β€’ Ask specific questions for detailed analysis β€’ {f"GPU acceleration active!" if device == "cuda" else "CPU mode - still powerful!"} """) with gr.Column(scale=2): gr.Markdown("### πŸ“Š **Analysis Results**") analysis_output = gr.Markdown( value=f""" 🎯 **Ready for Analysis!** Upload an artwork image to get started. The AI will analyze: 🎨 **Artistic Style** - Movement and period identification πŸ“š **Historical Context** - Cultural background and influences πŸ” **Symbolism** - Hidden meanings and iconography 🎭 **Technique** - Compositional and technical analysis *Running on {device.upper()} β€’ {agent_status_text}* """, container=True ) # Examples with gr.Row(): gr.Examples( examples=[ [None, "What artistic movement does this painting belong to?"], [None, "Analyze the use of color and symbolism in this artwork."], [None, "What can you tell me about the historical context of this piece?"], [None, "Explain the composition and artistic technique used."], [None, "What emotions or themes does this artwork convey?"] ], inputs=[image_input, query_input], label="🎯 **Example Questions**" ) # Event handler analyze_btn.click( fn=analyze_artwork_complete, inputs=[image_input, query_input], outputs=[analysis_output], show_progress=True ) # Footer gr.Markdown(f""" ---
🎨 AI Art Historian | Built with ❀️ Gradio
Device: {device.upper()} β€’ Agent: {"Ready" if agent_ready else "Backup mode"} β€’ Discover art through AI
""") return demo # Launch if __name__ == "__main__": print("🌟 Launching AI Art Historian...") demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=False )