gizemsarsinlar's picture
Added video demo link
20c5979 verified
import gradio as gr
from smolagents import CodeAgent, InferenceClientModel, tool
from PIL import Image
import sys
import traceback
import torch
# Check device availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Python version: {sys.version}")
print(f"Device: {device}")
print(f"CUDA available: {torch.cuda.is_available()}")
@tool
def art_style_detector(visual_description: str) -> str:
"""
Identifies the artistic style of an artwork.
Args:
visual_description: Visual description of the artwork including colors, composition, and technique
"""
styles = {
"Renaissance": ["perspective", "realistic", "classical", "religious", "balanced", "proportion", "leonardo", "michelangelo"],
"Baroque": ["dramatic", "chiaroscuro", "movement", "emotional", "ornate", "theatrical", "caravaggio", "rubens"],
"Impressionism": ["light", "color", "brushstrokes", "outdoor", "spontaneous", "fleeting", "monet", "renoir"],
"Expressionism": ["emotion", "distorted", "subjective", "bold colors", "psychological", "kandinsky", "munch"],
"Cubism": ["geometric", "fragmented", "multiple perspectives", "abstract", "analytical", "picasso", "braque"],
"Surrealism": ["dreamlike", "unconscious", "bizarre", "symbolic", "fantastic", "dali", "magritte"],
"Abstract": ["non-representational", "geometric", "color field", "experimental", "pollack", "rothko"],
"Pop Art": ["commercial", "popular culture", "bright colors", "mass production", "warhol", "lichtenstein"],
"Minimalism": ["simple", "clean", "geometric", "reduced", "essential", "minimal"]
}
desc = visual_description.lower()
matches = []
for style, keywords in styles.items():
score = sum(1 for kw in keywords if kw in desc)
if score > 0:
matches.append((style, score))
matches.sort(key=lambda x: x[1], reverse=True)
if matches:
primary = matches[0][0]
result = f"**Primary Style: {primary}**\n"
if len(matches) > 1:
others = [f"{style} ({score})" for style, score in matches[1:3]]
result += f"Secondary influences: {', '.join(others)}"
return result
return "Style analysis requires more specific visual details"
@tool
def historical_context_provider(art_period: str) -> str:
"""
Provides historical context for an artwork period.
Args:
art_period: The name of the art period or artistic movement to analyze
"""
contexts = {
"renaissance": "**Renaissance (14th-17th century)**: Humanism, scientific revolution, revival of classical antiquity. Key artists: Leonardo da Vinci, Michelangelo, Raphael. Characteristics: Linear perspective, anatomical accuracy, balanced compositions.",
"baroque": "**Baroque (17th-18th century)**: Counter-Reformation, dramatic storytelling, chiaroscuro technique. Key artists: Caravaggio, Bernini, Rubens. Characteristics: Dynamic movement, emotional intensity, theatrical lighting.",
"impressionism": "**Impressionism (1860s-1880s)**: Plein air painting, light and color focus, brushwork visible. Key artists: Monet, Renoir, Degas. Characteristics: Capturing fleeting moments, natural light, loose brushstrokes.",
"expressionism": "**Expressionism (early 20th century)**: Emotional expression over realism, psychological exploration. Key artists: Kandinsky, Munch, Kirchner. Characteristics: Bold colors, distorted forms, subjective perspective.",
"cubism": "**Cubism (1907-1920s)**: Multiple perspectives, geometric forms, fragmented reality. Key artists: Picasso, Braque. Characteristics: Analytical and synthetic phases, collage elements.",
"surrealism": "**Surrealism (1920s-1940s)**: Unconscious mind, dreams, automatic drawing. Key artists: DalΓ­, Magritte, Ernst. Characteristics: Bizarre imagery, psychological exploration, fantastic elements.",
"abstract": "**Abstract Art (20th century)**: Non-representational, emphasis on color, form, line. Key artists: Kandinsky, Mondrian, Pollock. Characteristics: Pure visual elements, emotional expression through abstraction.",
"pop art": "**Pop Art (1950s-1960s)**: Popular culture, mass media, commercial aesthetics. Key artists: Warhol, Lichtenstein. Characteristics: Bright colors, repetition, everyday objects as art."
}
period_lower = art_period.lower()
for period, context in contexts.items():
if period in period_lower:
return context
return f"**{art_period}**: A significant artistic movement with unique cultural and historical importance. Analysis would benefit from more specific period identification."
@tool
def symbolism_interpreter(visual_elements: str) -> str:
"""
Interprets symbolic meanings and iconography in artwork.
Args:
visual_elements: Description of symbolic visual elements in the artwork
"""
symbols = {
"light": "divine presence, enlightenment, truth, hope, spiritual awakening",
"darkness": "mystery, evil, unconscious, melancholy, death, ignorance",
"red": "passion, blood, love, war, power, sacrifice, life force",
"blue": "divinity, tranquility, melancholy, infinity, spirituality, wisdom",
"white": "purity, innocence, peace, divinity, rebirth, spiritual perfection",
"gold": "divine light, wealth, immortality, sacred, royal power",
"crown": "authority, divine right, royal power, achievement, victory",
"flowers": "beauty, mortality, seasons, femininity, love, fleeting life",
"skull": "memento mori, mortality, wisdom, vanitas, death's inevitability",
"cross": "Christianity, sacrifice, redemption, intersection of earthly and divine",
"water": "life, purification, emotion, unconscious mind, baptism, renewal",
"dove": "peace, Holy Spirit, purity, love, divine messenger",
"serpent": "temptation, evil, wisdom, medicine, transformation, rebirth",
"lion": "courage, strength, royalty, Christ, divine power",
"lamb": "innocence, sacrifice, Christ, purity, gentleness",
"apple": "temptation, knowledge, sin, earth, sensuality",
"mirror": "vanity, truth, self-knowledge, reflection, soul",
"candle": "life, enlightenment, spirituality, time",
"black": "death, mystery, elegance, unknown, mourning"
}
elements_lower = visual_elements.lower()
found_symbols = []
for symbol, meaning in symbols.items():
if symbol in elements_lower:
found_symbols.append(f"β€’ **{symbol.title()}**: {meaning}")
if found_symbols:
return "**Symbolic Interpretations:**\n" + "\n".join(found_symbols)
return "**Symbolic Analysis**: The artwork may contain personal, cultural, or period-specific symbols requiring deeper contextual analysis."
@tool
def technical_analysis_tool(composition_details: str) -> str:
"""
Analyzes technical and compositional aspects of artwork.
Args:
composition_details: Description of technical composition, color usage, and artistic techniques
"""
techniques = {
"oil": "Rich color saturation, smooth blending, detailed work, layered application",
"watercolor": "Transparent layers, luminous effects, spontaneous flow, delicate washes",
"acrylic": "Vibrant colors, quick drying, versatile techniques, modern medium",
"tempera": "Precise details, bright colors, quick drying, pre-oil painting era",
"fresco": "Wall painting, wet plaster application, permanent integration, monumental scale",
"pastels": "Soft texture, direct color application, atmospheric effects",
"chiaroscuro": "dramatic light-dark contrast, three-dimensional modeling, emotional intensity",
"sfumato": "subtle gradations, atmospheric perspective, Leonardo's technique",
"impasto": "thick paint application, textural effects, visible brushstrokes"
}
composition_elements = {
"triangular": "stable, harmonious, classical composition",
"diagonal": "dynamic, movement, baroque influence",
"circular": "unity, completeness, divine perfection",
"golden ratio": "mathematical harmony, natural proportions, aesthetic perfection",
"rule of thirds": "balanced composition, visual interest, modern technique"
}
analysis = "**Technical Analysis:**\n"
desc_lower = composition_details.lower()
# Check for techniques
found_techniques = []
for technique, description in techniques.items():
if technique in desc_lower:
found_techniques.append(f"β€’ **{technique.title()}**: {description}")
# Check for compositional elements
found_composition = []
for comp, description in composition_elements.items():
if comp.replace(" ", "") in desc_lower.replace(" ", ""):
found_composition.append(f"β€’ **{comp.title()}**: {description}")
if found_techniques:
analysis += "**Techniques Identified:**\n" + "\n".join(found_techniques) + "\n\n"
if found_composition:
analysis += "**Compositional Elements:**\n" + "\n".join(found_composition) + "\n\n"
analysis += f"**Observational Notes**: {composition_details}\n"
analysis += "**Recommendation**: Consider analyzing brushwork, color harmony, spatial relationships, and overall execution quality."
return analysis
# Initialize SmolAgent for CPU/GPU flexibility
try:
print("🎨 Creating SmolAgent for artwork analysis...")
model = InferenceClientModel(model_id="meta-llama/Llama-3.2-3B-Instruct")
art_agent = CodeAgent(
tools=[art_style_detector, historical_context_provider, symbolism_interpreter, technical_analysis_tool],
model=model,
add_base_tools=False,
max_steps=3
)
print("βœ… SmolAgent created successfully!")
agent_ready = True
except Exception as e:
print(f"❌ Agent creation failed: {str(e)}")
agent_ready = False
# Optional GPU-based vision analysis (if available)
def analyze_image_with_vision_model(image, query):
"""GPU-based vision analysis - fallback to CPU description if GPU unavailable"""
try:
if device == "cuda":
print("πŸ”₯ Attempting GPU-based vision analysis...")
from transformers import AutoModelForCausalLM, AutoProcessor
# Try to load Phi-3.5-vision with GPU
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
torch_dtype=torch.bfloat16,
_attn_implementation="eager",
device_map="auto"
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True
)
art_prompt = f"""<|user|>
Describe this artwork in detail focusing on:
- Visual elements (colors, composition, subjects)
- Style and technique
- Period indicators
- Mood and atmosphere
{query if query else ""}
<|image_1|>
<|end|>
<|assistant|>"""
if isinstance(image, str):
image = Image.open(image)
elif hasattr(image, 'convert'):
image = image.convert("RGB")
else:
image = Image.fromarray(image).convert("RGB")
inputs = processor(art_prompt, image, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
generate_ids = model.generate(
**inputs,
max_new_tokens=400,
eos_token_id=processor.tokenizer.eos_token_id,
pad_token_id=processor.tokenizer.eos_token_id,
do_sample=False,
use_cache=False
)
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
description = processor.batch_decode(
generate_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)[0]
return description.strip()
except Exception as e:
print(f"πŸ”„ GPU vision analysis failed, using CPU fallback: {str(e)}")
# CPU fallback - basic image analysis
return get_cpu_image_description(image, query)
def get_cpu_image_description(image, query=""):
"""CPU-based image description"""
try:
width, height = image.size
mode = image.mode
# Basic color analysis
try:
colors = image.getcolors(maxcolors=256*256*256)
if colors:
dominant_colors = sorted(colors, key=lambda x: x[0], reverse=True)[:5]
# Simple color detection
if any(color[0] > width*height*0.1 for color in dominant_colors):
color_desc = "dominated by strong, bold colors"
else:
color_desc = "featuring a varied, complex color palette"
else:
color_desc = "with rich, complex coloration"
except:
color_desc = "with artistic color composition"
aspect_ratio = width / height
if aspect_ratio > 1.3:
orientation = "landscape orientation"
elif aspect_ratio < 0.7:
orientation = "portrait orientation"
else:
orientation = "square composition"
# Generate artistic description
description = f"""This artwork presents a {orientation} {color_desc}.
The image shows classical artistic elements with traditional composition techniques.
The piece appears to demonstrate careful attention to visual balance and artistic principles.
Dimensions: {width}x{height} pixels in {mode} color mode.
{f"User interest: {query}" if query else ""}
The work suggests careful artistic execution with attention to both technical and aesthetic considerations."""
return description
except Exception as e:
return f"Artwork with traditional composition and artistic styling. {f'User query: {query}' if query else ''}"
def analyze_with_tools_direct(description, query):
"""Direct tool usage without agent"""
try:
results = []
# Style analysis
style_result = art_style_detector(description)
results.append(f"## 🎨 Style Analysis\n{style_result}")
# Historical context
periods = ["renaissance", "baroque", "impressionism", "expressionism", "cubism", "surrealism", "abstract", "pop art"]
detected_period = None
desc_lower = description.lower()
for period in periods:
if period in desc_lower or any(keyword in desc_lower for keyword in period.split()):
detected_period = period
break
if detected_period:
context_result = historical_context_provider(detected_period)
results.append(f"## πŸ“š Historical Context\n{context_result}")
else:
context_result = historical_context_provider("classical art")
results.append(f"## πŸ“š Historical Context\n{context_result}")
# Symbolism analysis
symbolism_result = symbolism_interpreter(description)
results.append(f"## πŸ” Symbolism\n{symbolism_result}")
# Technical analysis
technical_result = technical_analysis_tool(description)
results.append(f"## 🎭 Technical Analysis\n{technical_result}")
return "\n\n".join(results)
except Exception as e:
return f"**Analysis Error**: {str(e)}\n\nPlease provide more details about the artwork for manual analysis."
def analyze_artwork_complete(image, query):
"""Complete artwork analysis pipeline - CPU/GPU flexible"""
if image is None:
return "πŸ“Έ **Please upload an image to analyze.**"
try:
print(f"πŸ” Analyzing image on {device}...")
# Get image description (GPU or CPU)
visual_description = analyze_image_with_vision_model(image, query)
print("🎨 Running art analysis...")
# Try SmolAgent first, fallback to direct tools
if agent_ready:
try:
analysis_prompt = f"""You are an expert art historian. Analyze this artwork: {visual_description}
{query if query else "Provide comprehensive analysis covering style, historical context, symbolism, and technique."}
Use the available tools to provide detailed analysis."""
agent_result = art_agent.run(analysis_prompt)
analysis_method = f"πŸ€– SmolAgent Analysis ({device.upper()})"
expert_analysis = agent_result
except Exception as agent_error:
print(f"Agent failed, using direct tools: {agent_error}")
expert_analysis = analyze_with_tools_direct(visual_description, query)
analysis_method = f"πŸ”§ Direct Tool Analysis ({device.upper()})"
else:
expert_analysis = analyze_with_tools_direct(visual_description, query)
analysis_method = f"πŸ”§ Direct Tool Analysis ({device.upper()})"
final_analysis = f"""# 🎨 **ARTWORK ANALYSIS**
## πŸ‘οΈ **Visual Description**
{visual_description}
---
{expert_analysis}
---
*Analysis Method: {analysis_method}*
*Device: {device.upper()} | Query: {query if query else "General analysis"}*
"""
return final_analysis
except Exception as e:
error_msg = f"Analysis error: {str(e)}"
print(error_msg)
traceback.print_exc()
return f"""❌ **Analysis Error**
{error_msg}
**Please try:**
1. Uploading a clear JPG or PNG image
2. Describing the artwork manually in the question box
3. Being more specific about what you'd like to know
*Running on: {device.upper()}*"""
# Gradio Interface - CPU/GPU Compatible
def create_interface():
# Responsive theme that works on both CPU and GPU
theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
neutral_hue="gray",
font=gr.themes.GoogleFont("Inter")
)
css = """
.main-header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 2rem;
border-radius: 15px;
margin-bottom: 2rem;
text-align: center;
color: white;
}
.device-info {
padding: 1rem;
border-radius: 8px;
margin: 1rem 0;
text-align: center;
background: rgba(59, 130, 246, 0.1);
border: 1px solid rgba(59, 130, 246, 0.3);
}
.feature-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 1rem;
margin: 1rem 0;
}
.feature-card {
padding: 1.5rem;
border-radius: 10px;
text-align: center;
background: rgba(255,255,255,0.05);
border: 1px solid rgba(255,255,255,0.1);
}
"""
with gr.Blocks(title="🎨 AI Art Historian", theme=theme, css=css) as demo:
# Header
gr.HTML("""
<div class="main-header">
<h1>🎨 AI Art Historian</h1>
<h3>Powered by SmolAgent Framework + Vision AI</h3>
<p>Upload any artwork and discover its secrets through expert AI analysis</p>
<p style="margin-top: 1rem;">
<a href="https://youtu.be/xyNKr05Vvls?si=OmHjtOfBez2FjOTv" target="_blank" style="color: white; text-decoration: none; font-size: 1.1em;">
▢️ Watch Demo Video
</a>
</p>
</div>
""")
# Device info
device_emoji = "πŸ”₯" if device == "cuda" else "πŸ’»"
agent_status_text = "βœ… All systems ready!" if agent_ready else "⚠️ Using backup tools"
gr.HTML(f"""
<div class="device-info">
<strong>{device_emoji} Running on: {device.upper()}</strong><br>
<strong>Agent Status:</strong> {agent_status_text}<br>
<em>{"GPU acceleration enabled" if device == "cuda" else "CPU processing mode"}</em>
</div>
""")
# Features
gr.HTML("""
<div class="feature-grid">
<div class="feature-card">
<h3>🎨 Style Detection</h3>
<p>Renaissance to Modern movements</p>
</div>
<div class="feature-card">
<h3>πŸ“š Historical Context</h3>
<p>Periods, influences, key artists</p>
</div>
<div class="feature-card">
<h3>πŸ” Symbol Analysis</h3>
<p>Hidden meanings, iconography</p>
</div>
<div class="feature-card">
<h3>🎭 Technical Assessment</h3>
<p>Composition, technique, style</p>
</div>
</div>
""")
# Main interface
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“€ **Upload Your Artwork**")
image_input = gr.Image(
label="πŸ–ΌοΈ Select Image",
type="pil",
height=350
)
query_input = gr.Textbox(
label="πŸ’­ Ask Specific Questions (Optional)",
placeholder="e.g., 'What artistic movement does this represent?' or 'What do the colors symbolize?'",
lines=3
)
analyze_btn = gr.Button(
"πŸ” **Analyze Artwork**",
variant="primary",
size="lg"
)
gr.Markdown(f"""
### πŸ’‘ Pro Tips
β€’ Upload clear, high-quality images
β€’ Try paintings, sculptures, drawings
β€’ Ask specific questions for detailed analysis
β€’ {f"GPU acceleration active!" if device == "cuda" else "CPU mode - still powerful!"}
""")
with gr.Column(scale=2):
gr.Markdown("### πŸ“Š **Analysis Results**")
analysis_output = gr.Markdown(
value=f"""
🎯 **Ready for Analysis!**
Upload an artwork image to get started. The AI will analyze:
🎨 **Artistic Style** - Movement and period identification
πŸ“š **Historical Context** - Cultural background and influences
πŸ” **Symbolism** - Hidden meanings and iconography
🎭 **Technique** - Compositional and technical analysis
*Running on {device.upper()} β€’ {agent_status_text}*
""",
container=True
)
# Examples
with gr.Row():
gr.Examples(
examples=[
[None, "What artistic movement does this painting belong to?"],
[None, "Analyze the use of color and symbolism in this artwork."],
[None, "What can you tell me about the historical context of this piece?"],
[None, "Explain the composition and artistic technique used."],
[None, "What emotions or themes does this artwork convey?"]
],
inputs=[image_input, query_input],
label="🎯 **Example Questions**"
)
# Event handler
analyze_btn.click(
fn=analyze_artwork_complete,
inputs=[image_input, query_input],
outputs=[analysis_output],
show_progress=True
)
# Footer
gr.Markdown(f"""
---
<div style="text-align: center; color: #666; margin-top: 2rem;">
<strong>🎨 AI Art Historian</strong> | Built with ❀️ Gradio<br>
<em>Device: {device.upper()} β€’ Agent: {"Ready" if agent_ready else "Backup mode"} β€’ Discover art through AI</em>
</div>
""")
return demo
# Launch
if __name__ == "__main__":
print("🌟 Launching AI Art Historian...")
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)