File size: 24,939 Bytes
35d9124 20c5979 35d9124 20c5979 35d9124 20c5979 35d9124 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 |
import gradio as gr
from smolagents import CodeAgent, InferenceClientModel, tool
from PIL import Image
import sys
import traceback
import torch
# Check device availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Python version: {sys.version}")
print(f"Device: {device}")
print(f"CUDA available: {torch.cuda.is_available()}")
@tool
def art_style_detector(visual_description: str) -> str:
"""
Identifies the artistic style of an artwork.
Args:
visual_description: Visual description of the artwork including colors, composition, and technique
"""
styles = {
"Renaissance": ["perspective", "realistic", "classical", "religious", "balanced", "proportion", "leonardo", "michelangelo"],
"Baroque": ["dramatic", "chiaroscuro", "movement", "emotional", "ornate", "theatrical", "caravaggio", "rubens"],
"Impressionism": ["light", "color", "brushstrokes", "outdoor", "spontaneous", "fleeting", "monet", "renoir"],
"Expressionism": ["emotion", "distorted", "subjective", "bold colors", "psychological", "kandinsky", "munch"],
"Cubism": ["geometric", "fragmented", "multiple perspectives", "abstract", "analytical", "picasso", "braque"],
"Surrealism": ["dreamlike", "unconscious", "bizarre", "symbolic", "fantastic", "dali", "magritte"],
"Abstract": ["non-representational", "geometric", "color field", "experimental", "pollack", "rothko"],
"Pop Art": ["commercial", "popular culture", "bright colors", "mass production", "warhol", "lichtenstein"],
"Minimalism": ["simple", "clean", "geometric", "reduced", "essential", "minimal"]
}
desc = visual_description.lower()
matches = []
for style, keywords in styles.items():
score = sum(1 for kw in keywords if kw in desc)
if score > 0:
matches.append((style, score))
matches.sort(key=lambda x: x[1], reverse=True)
if matches:
primary = matches[0][0]
result = f"**Primary Style: {primary}**\n"
if len(matches) > 1:
others = [f"{style} ({score})" for style, score in matches[1:3]]
result += f"Secondary influences: {', '.join(others)}"
return result
return "Style analysis requires more specific visual details"
@tool
def historical_context_provider(art_period: str) -> str:
"""
Provides historical context for an artwork period.
Args:
art_period: The name of the art period or artistic movement to analyze
"""
contexts = {
"renaissance": "**Renaissance (14th-17th century)**: Humanism, scientific revolution, revival of classical antiquity. Key artists: Leonardo da Vinci, Michelangelo, Raphael. Characteristics: Linear perspective, anatomical accuracy, balanced compositions.",
"baroque": "**Baroque (17th-18th century)**: Counter-Reformation, dramatic storytelling, chiaroscuro technique. Key artists: Caravaggio, Bernini, Rubens. Characteristics: Dynamic movement, emotional intensity, theatrical lighting.",
"impressionism": "**Impressionism (1860s-1880s)**: Plein air painting, light and color focus, brushwork visible. Key artists: Monet, Renoir, Degas. Characteristics: Capturing fleeting moments, natural light, loose brushstrokes.",
"expressionism": "**Expressionism (early 20th century)**: Emotional expression over realism, psychological exploration. Key artists: Kandinsky, Munch, Kirchner. Characteristics: Bold colors, distorted forms, subjective perspective.",
"cubism": "**Cubism (1907-1920s)**: Multiple perspectives, geometric forms, fragmented reality. Key artists: Picasso, Braque. Characteristics: Analytical and synthetic phases, collage elements.",
"surrealism": "**Surrealism (1920s-1940s)**: Unconscious mind, dreams, automatic drawing. Key artists: DalΓ, Magritte, Ernst. Characteristics: Bizarre imagery, psychological exploration, fantastic elements.",
"abstract": "**Abstract Art (20th century)**: Non-representational, emphasis on color, form, line. Key artists: Kandinsky, Mondrian, Pollock. Characteristics: Pure visual elements, emotional expression through abstraction.",
"pop art": "**Pop Art (1950s-1960s)**: Popular culture, mass media, commercial aesthetics. Key artists: Warhol, Lichtenstein. Characteristics: Bright colors, repetition, everyday objects as art."
}
period_lower = art_period.lower()
for period, context in contexts.items():
if period in period_lower:
return context
return f"**{art_period}**: A significant artistic movement with unique cultural and historical importance. Analysis would benefit from more specific period identification."
@tool
def symbolism_interpreter(visual_elements: str) -> str:
"""
Interprets symbolic meanings and iconography in artwork.
Args:
visual_elements: Description of symbolic visual elements in the artwork
"""
symbols = {
"light": "divine presence, enlightenment, truth, hope, spiritual awakening",
"darkness": "mystery, evil, unconscious, melancholy, death, ignorance",
"red": "passion, blood, love, war, power, sacrifice, life force",
"blue": "divinity, tranquility, melancholy, infinity, spirituality, wisdom",
"white": "purity, innocence, peace, divinity, rebirth, spiritual perfection",
"gold": "divine light, wealth, immortality, sacred, royal power",
"crown": "authority, divine right, royal power, achievement, victory",
"flowers": "beauty, mortality, seasons, femininity, love, fleeting life",
"skull": "memento mori, mortality, wisdom, vanitas, death's inevitability",
"cross": "Christianity, sacrifice, redemption, intersection of earthly and divine",
"water": "life, purification, emotion, unconscious mind, baptism, renewal",
"dove": "peace, Holy Spirit, purity, love, divine messenger",
"serpent": "temptation, evil, wisdom, medicine, transformation, rebirth",
"lion": "courage, strength, royalty, Christ, divine power",
"lamb": "innocence, sacrifice, Christ, purity, gentleness",
"apple": "temptation, knowledge, sin, earth, sensuality",
"mirror": "vanity, truth, self-knowledge, reflection, soul",
"candle": "life, enlightenment, spirituality, time",
"black": "death, mystery, elegance, unknown, mourning"
}
elements_lower = visual_elements.lower()
found_symbols = []
for symbol, meaning in symbols.items():
if symbol in elements_lower:
found_symbols.append(f"β’ **{symbol.title()}**: {meaning}")
if found_symbols:
return "**Symbolic Interpretations:**\n" + "\n".join(found_symbols)
return "**Symbolic Analysis**: The artwork may contain personal, cultural, or period-specific symbols requiring deeper contextual analysis."
@tool
def technical_analysis_tool(composition_details: str) -> str:
"""
Analyzes technical and compositional aspects of artwork.
Args:
composition_details: Description of technical composition, color usage, and artistic techniques
"""
techniques = {
"oil": "Rich color saturation, smooth blending, detailed work, layered application",
"watercolor": "Transparent layers, luminous effects, spontaneous flow, delicate washes",
"acrylic": "Vibrant colors, quick drying, versatile techniques, modern medium",
"tempera": "Precise details, bright colors, quick drying, pre-oil painting era",
"fresco": "Wall painting, wet plaster application, permanent integration, monumental scale",
"pastels": "Soft texture, direct color application, atmospheric effects",
"chiaroscuro": "dramatic light-dark contrast, three-dimensional modeling, emotional intensity",
"sfumato": "subtle gradations, atmospheric perspective, Leonardo's technique",
"impasto": "thick paint application, textural effects, visible brushstrokes"
}
composition_elements = {
"triangular": "stable, harmonious, classical composition",
"diagonal": "dynamic, movement, baroque influence",
"circular": "unity, completeness, divine perfection",
"golden ratio": "mathematical harmony, natural proportions, aesthetic perfection",
"rule of thirds": "balanced composition, visual interest, modern technique"
}
analysis = "**Technical Analysis:**\n"
desc_lower = composition_details.lower()
# Check for techniques
found_techniques = []
for technique, description in techniques.items():
if technique in desc_lower:
found_techniques.append(f"β’ **{technique.title()}**: {description}")
# Check for compositional elements
found_composition = []
for comp, description in composition_elements.items():
if comp.replace(" ", "") in desc_lower.replace(" ", ""):
found_composition.append(f"β’ **{comp.title()}**: {description}")
if found_techniques:
analysis += "**Techniques Identified:**\n" + "\n".join(found_techniques) + "\n\n"
if found_composition:
analysis += "**Compositional Elements:**\n" + "\n".join(found_composition) + "\n\n"
analysis += f"**Observational Notes**: {composition_details}\n"
analysis += "**Recommendation**: Consider analyzing brushwork, color harmony, spatial relationships, and overall execution quality."
return analysis
# Initialize SmolAgent for CPU/GPU flexibility
try:
print("π¨ Creating SmolAgent for artwork analysis...")
model = InferenceClientModel(model_id="meta-llama/Llama-3.2-3B-Instruct")
art_agent = CodeAgent(
tools=[art_style_detector, historical_context_provider, symbolism_interpreter, technical_analysis_tool],
model=model,
add_base_tools=False,
max_steps=3
)
print("β
SmolAgent created successfully!")
agent_ready = True
except Exception as e:
print(f"β Agent creation failed: {str(e)}")
agent_ready = False
# Optional GPU-based vision analysis (if available)
def analyze_image_with_vision_model(image, query):
"""GPU-based vision analysis - fallback to CPU description if GPU unavailable"""
try:
if device == "cuda":
print("π₯ Attempting GPU-based vision analysis...")
from transformers import AutoModelForCausalLM, AutoProcessor
# Try to load Phi-3.5-vision with GPU
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
torch_dtype=torch.bfloat16,
_attn_implementation="eager",
device_map="auto"
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True
)
art_prompt = f"""<|user|>
Describe this artwork in detail focusing on:
- Visual elements (colors, composition, subjects)
- Style and technique
- Period indicators
- Mood and atmosphere
{query if query else ""}
<|image_1|>
<|end|>
<|assistant|>"""
if isinstance(image, str):
image = Image.open(image)
elif hasattr(image, 'convert'):
image = image.convert("RGB")
else:
image = Image.fromarray(image).convert("RGB")
inputs = processor(art_prompt, image, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
generate_ids = model.generate(
**inputs,
max_new_tokens=400,
eos_token_id=processor.tokenizer.eos_token_id,
pad_token_id=processor.tokenizer.eos_token_id,
do_sample=False,
use_cache=False
)
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
description = processor.batch_decode(
generate_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)[0]
return description.strip()
except Exception as e:
print(f"π GPU vision analysis failed, using CPU fallback: {str(e)}")
# CPU fallback - basic image analysis
return get_cpu_image_description(image, query)
def get_cpu_image_description(image, query=""):
"""CPU-based image description"""
try:
width, height = image.size
mode = image.mode
# Basic color analysis
try:
colors = image.getcolors(maxcolors=256*256*256)
if colors:
dominant_colors = sorted(colors, key=lambda x: x[0], reverse=True)[:5]
# Simple color detection
if any(color[0] > width*height*0.1 for color in dominant_colors):
color_desc = "dominated by strong, bold colors"
else:
color_desc = "featuring a varied, complex color palette"
else:
color_desc = "with rich, complex coloration"
except:
color_desc = "with artistic color composition"
aspect_ratio = width / height
if aspect_ratio > 1.3:
orientation = "landscape orientation"
elif aspect_ratio < 0.7:
orientation = "portrait orientation"
else:
orientation = "square composition"
# Generate artistic description
description = f"""This artwork presents a {orientation} {color_desc}.
The image shows classical artistic elements with traditional composition techniques.
The piece appears to demonstrate careful attention to visual balance and artistic principles.
Dimensions: {width}x{height} pixels in {mode} color mode.
{f"User interest: {query}" if query else ""}
The work suggests careful artistic execution with attention to both technical and aesthetic considerations."""
return description
except Exception as e:
return f"Artwork with traditional composition and artistic styling. {f'User query: {query}' if query else ''}"
def analyze_with_tools_direct(description, query):
"""Direct tool usage without agent"""
try:
results = []
# Style analysis
style_result = art_style_detector(description)
results.append(f"## π¨ Style Analysis\n{style_result}")
# Historical context
periods = ["renaissance", "baroque", "impressionism", "expressionism", "cubism", "surrealism", "abstract", "pop art"]
detected_period = None
desc_lower = description.lower()
for period in periods:
if period in desc_lower or any(keyword in desc_lower for keyword in period.split()):
detected_period = period
break
if detected_period:
context_result = historical_context_provider(detected_period)
results.append(f"## π Historical Context\n{context_result}")
else:
context_result = historical_context_provider("classical art")
results.append(f"## π Historical Context\n{context_result}")
# Symbolism analysis
symbolism_result = symbolism_interpreter(description)
results.append(f"## π Symbolism\n{symbolism_result}")
# Technical analysis
technical_result = technical_analysis_tool(description)
results.append(f"## π Technical Analysis\n{technical_result}")
return "\n\n".join(results)
except Exception as e:
return f"**Analysis Error**: {str(e)}\n\nPlease provide more details about the artwork for manual analysis."
def analyze_artwork_complete(image, query):
"""Complete artwork analysis pipeline - CPU/GPU flexible"""
if image is None:
return "πΈ **Please upload an image to analyze.**"
try:
print(f"π Analyzing image on {device}...")
# Get image description (GPU or CPU)
visual_description = analyze_image_with_vision_model(image, query)
print("π¨ Running art analysis...")
# Try SmolAgent first, fallback to direct tools
if agent_ready:
try:
analysis_prompt = f"""You are an expert art historian. Analyze this artwork: {visual_description}
{query if query else "Provide comprehensive analysis covering style, historical context, symbolism, and technique."}
Use the available tools to provide detailed analysis."""
agent_result = art_agent.run(analysis_prompt)
analysis_method = f"π€ SmolAgent Analysis ({device.upper()})"
expert_analysis = agent_result
except Exception as agent_error:
print(f"Agent failed, using direct tools: {agent_error}")
expert_analysis = analyze_with_tools_direct(visual_description, query)
analysis_method = f"π§ Direct Tool Analysis ({device.upper()})"
else:
expert_analysis = analyze_with_tools_direct(visual_description, query)
analysis_method = f"π§ Direct Tool Analysis ({device.upper()})"
final_analysis = f"""# π¨ **ARTWORK ANALYSIS**
## ποΈ **Visual Description**
{visual_description}
---
{expert_analysis}
---
*Analysis Method: {analysis_method}*
*Device: {device.upper()} | Query: {query if query else "General analysis"}*
"""
return final_analysis
except Exception as e:
error_msg = f"Analysis error: {str(e)}"
print(error_msg)
traceback.print_exc()
return f"""β **Analysis Error**
{error_msg}
**Please try:**
1. Uploading a clear JPG or PNG image
2. Describing the artwork manually in the question box
3. Being more specific about what you'd like to know
*Running on: {device.upper()}*"""
# Gradio Interface - CPU/GPU Compatible
def create_interface():
# Responsive theme that works on both CPU and GPU
theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
neutral_hue="gray",
font=gr.themes.GoogleFont("Inter")
)
css = """
.main-header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 2rem;
border-radius: 15px;
margin-bottom: 2rem;
text-align: center;
color: white;
}
.device-info {
padding: 1rem;
border-radius: 8px;
margin: 1rem 0;
text-align: center;
background: rgba(59, 130, 246, 0.1);
border: 1px solid rgba(59, 130, 246, 0.3);
}
.feature-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 1rem;
margin: 1rem 0;
}
.feature-card {
padding: 1.5rem;
border-radius: 10px;
text-align: center;
background: rgba(255,255,255,0.05);
border: 1px solid rgba(255,255,255,0.1);
}
"""
with gr.Blocks(title="π¨ AI Art Historian", theme=theme, css=css) as demo:
# Header
gr.HTML("""
<div class="main-header">
<h1>π¨ AI Art Historian</h1>
<h3>Powered by SmolAgent Framework + Vision AI</h3>
<p>Upload any artwork and discover its secrets through expert AI analysis</p>
<p style="margin-top: 1rem;">
<a href="https://youtu.be/xyNKr05Vvls?si=OmHjtOfBez2FjOTv" target="_blank" style="color: white; text-decoration: none; font-size: 1.1em;">
βΆοΈ Watch Demo Video
</a>
</p>
</div>
""")
# Device info
device_emoji = "π₯" if device == "cuda" else "π»"
agent_status_text = "β
All systems ready!" if agent_ready else "β οΈ Using backup tools"
gr.HTML(f"""
<div class="device-info">
<strong>{device_emoji} Running on: {device.upper()}</strong><br>
<strong>Agent Status:</strong> {agent_status_text}<br>
<em>{"GPU acceleration enabled" if device == "cuda" else "CPU processing mode"}</em>
</div>
""")
# Features
gr.HTML("""
<div class="feature-grid">
<div class="feature-card">
<h3>π¨ Style Detection</h3>
<p>Renaissance to Modern movements</p>
</div>
<div class="feature-card">
<h3>π Historical Context</h3>
<p>Periods, influences, key artists</p>
</div>
<div class="feature-card">
<h3>π Symbol Analysis</h3>
<p>Hidden meanings, iconography</p>
</div>
<div class="feature-card">
<h3>π Technical Assessment</h3>
<p>Composition, technique, style</p>
</div>
</div>
""")
# Main interface
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### π€ **Upload Your Artwork**")
image_input = gr.Image(
label="πΌοΈ Select Image",
type="pil",
height=350
)
query_input = gr.Textbox(
label="π Ask Specific Questions (Optional)",
placeholder="e.g., 'What artistic movement does this represent?' or 'What do the colors symbolize?'",
lines=3
)
analyze_btn = gr.Button(
"π **Analyze Artwork**",
variant="primary",
size="lg"
)
gr.Markdown(f"""
### π‘ Pro Tips
β’ Upload clear, high-quality images
β’ Try paintings, sculptures, drawings
β’ Ask specific questions for detailed analysis
β’ {f"GPU acceleration active!" if device == "cuda" else "CPU mode - still powerful!"}
""")
with gr.Column(scale=2):
gr.Markdown("### π **Analysis Results**")
analysis_output = gr.Markdown(
value=f"""
π― **Ready for Analysis!**
Upload an artwork image to get started. The AI will analyze:
π¨ **Artistic Style** - Movement and period identification
π **Historical Context** - Cultural background and influences
π **Symbolism** - Hidden meanings and iconography
π **Technique** - Compositional and technical analysis
*Running on {device.upper()} β’ {agent_status_text}*
""",
container=True
)
# Examples
with gr.Row():
gr.Examples(
examples=[
[None, "What artistic movement does this painting belong to?"],
[None, "Analyze the use of color and symbolism in this artwork."],
[None, "What can you tell me about the historical context of this piece?"],
[None, "Explain the composition and artistic technique used."],
[None, "What emotions or themes does this artwork convey?"]
],
inputs=[image_input, query_input],
label="π― **Example Questions**"
)
# Event handler
analyze_btn.click(
fn=analyze_artwork_complete,
inputs=[image_input, query_input],
outputs=[analysis_output],
show_progress=True
)
# Footer
gr.Markdown(f"""
---
<div style="text-align: center; color: #666; margin-top: 2rem;">
<strong>π¨ AI Art Historian</strong> | Built with β€οΈ Gradio<br>
<em>Device: {device.upper()} β’ Agent: {"Ready" if agent_ready else "Backup mode"} β’ Discover art through AI</em>
</div>
""")
return demo
# Launch
if __name__ == "__main__":
print("π Launching AI Art Historian...")
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
) |