|
import gradio as gr |
|
from smolagents import CodeAgent, InferenceClientModel, tool |
|
from PIL import Image |
|
import sys |
|
import traceback |
|
import torch |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
print(f"Python version: {sys.version}") |
|
print(f"Device: {device}") |
|
print(f"CUDA available: {torch.cuda.is_available()}") |
|
|
|
@tool |
|
def art_style_detector(visual_description: str) -> str: |
|
""" |
|
Identifies the artistic style of an artwork. |
|
|
|
Args: |
|
visual_description: Visual description of the artwork including colors, composition, and technique |
|
""" |
|
styles = { |
|
"Renaissance": ["perspective", "realistic", "classical", "religious", "balanced", "proportion", "leonardo", "michelangelo"], |
|
"Baroque": ["dramatic", "chiaroscuro", "movement", "emotional", "ornate", "theatrical", "caravaggio", "rubens"], |
|
"Impressionism": ["light", "color", "brushstrokes", "outdoor", "spontaneous", "fleeting", "monet", "renoir"], |
|
"Expressionism": ["emotion", "distorted", "subjective", "bold colors", "psychological", "kandinsky", "munch"], |
|
"Cubism": ["geometric", "fragmented", "multiple perspectives", "abstract", "analytical", "picasso", "braque"], |
|
"Surrealism": ["dreamlike", "unconscious", "bizarre", "symbolic", "fantastic", "dali", "magritte"], |
|
"Abstract": ["non-representational", "geometric", "color field", "experimental", "pollack", "rothko"], |
|
"Pop Art": ["commercial", "popular culture", "bright colors", "mass production", "warhol", "lichtenstein"], |
|
"Minimalism": ["simple", "clean", "geometric", "reduced", "essential", "minimal"] |
|
} |
|
|
|
desc = visual_description.lower() |
|
matches = [] |
|
|
|
for style, keywords in styles.items(): |
|
score = sum(1 for kw in keywords if kw in desc) |
|
if score > 0: |
|
matches.append((style, score)) |
|
|
|
matches.sort(key=lambda x: x[1], reverse=True) |
|
|
|
if matches: |
|
primary = matches[0][0] |
|
result = f"**Primary Style: {primary}**\n" |
|
if len(matches) > 1: |
|
others = [f"{style} ({score})" for style, score in matches[1:3]] |
|
result += f"Secondary influences: {', '.join(others)}" |
|
return result |
|
|
|
return "Style analysis requires more specific visual details" |
|
|
|
@tool |
|
def historical_context_provider(art_period: str) -> str: |
|
""" |
|
Provides historical context for an artwork period. |
|
|
|
Args: |
|
art_period: The name of the art period or artistic movement to analyze |
|
""" |
|
contexts = { |
|
"renaissance": "**Renaissance (14th-17th century)**: Humanism, scientific revolution, revival of classical antiquity. Key artists: Leonardo da Vinci, Michelangelo, Raphael. Characteristics: Linear perspective, anatomical accuracy, balanced compositions.", |
|
"baroque": "**Baroque (17th-18th century)**: Counter-Reformation, dramatic storytelling, chiaroscuro technique. Key artists: Caravaggio, Bernini, Rubens. Characteristics: Dynamic movement, emotional intensity, theatrical lighting.", |
|
"impressionism": "**Impressionism (1860s-1880s)**: Plein air painting, light and color focus, brushwork visible. Key artists: Monet, Renoir, Degas. Characteristics: Capturing fleeting moments, natural light, loose brushstrokes.", |
|
"expressionism": "**Expressionism (early 20th century)**: Emotional expression over realism, psychological exploration. Key artists: Kandinsky, Munch, Kirchner. Characteristics: Bold colors, distorted forms, subjective perspective.", |
|
"cubism": "**Cubism (1907-1920s)**: Multiple perspectives, geometric forms, fragmented reality. Key artists: Picasso, Braque. Characteristics: Analytical and synthetic phases, collage elements.", |
|
"surrealism": "**Surrealism (1920s-1940s)**: Unconscious mind, dreams, automatic drawing. Key artists: DalΓ, Magritte, Ernst. Characteristics: Bizarre imagery, psychological exploration, fantastic elements.", |
|
"abstract": "**Abstract Art (20th century)**: Non-representational, emphasis on color, form, line. Key artists: Kandinsky, Mondrian, Pollock. Characteristics: Pure visual elements, emotional expression through abstraction.", |
|
"pop art": "**Pop Art (1950s-1960s)**: Popular culture, mass media, commercial aesthetics. Key artists: Warhol, Lichtenstein. Characteristics: Bright colors, repetition, everyday objects as art." |
|
} |
|
|
|
period_lower = art_period.lower() |
|
for period, context in contexts.items(): |
|
if period in period_lower: |
|
return context |
|
|
|
return f"**{art_period}**: A significant artistic movement with unique cultural and historical importance. Analysis would benefit from more specific period identification." |
|
|
|
@tool |
|
def symbolism_interpreter(visual_elements: str) -> str: |
|
""" |
|
Interprets symbolic meanings and iconography in artwork. |
|
|
|
Args: |
|
visual_elements: Description of symbolic visual elements in the artwork |
|
""" |
|
symbols = { |
|
"light": "divine presence, enlightenment, truth, hope, spiritual awakening", |
|
"darkness": "mystery, evil, unconscious, melancholy, death, ignorance", |
|
"red": "passion, blood, love, war, power, sacrifice, life force", |
|
"blue": "divinity, tranquility, melancholy, infinity, spirituality, wisdom", |
|
"white": "purity, innocence, peace, divinity, rebirth, spiritual perfection", |
|
"gold": "divine light, wealth, immortality, sacred, royal power", |
|
"crown": "authority, divine right, royal power, achievement, victory", |
|
"flowers": "beauty, mortality, seasons, femininity, love, fleeting life", |
|
"skull": "memento mori, mortality, wisdom, vanitas, death's inevitability", |
|
"cross": "Christianity, sacrifice, redemption, intersection of earthly and divine", |
|
"water": "life, purification, emotion, unconscious mind, baptism, renewal", |
|
"dove": "peace, Holy Spirit, purity, love, divine messenger", |
|
"serpent": "temptation, evil, wisdom, medicine, transformation, rebirth", |
|
"lion": "courage, strength, royalty, Christ, divine power", |
|
"lamb": "innocence, sacrifice, Christ, purity, gentleness", |
|
"apple": "temptation, knowledge, sin, earth, sensuality", |
|
"mirror": "vanity, truth, self-knowledge, reflection, soul", |
|
"candle": "life, enlightenment, spirituality, time", |
|
"black": "death, mystery, elegance, unknown, mourning" |
|
} |
|
|
|
elements_lower = visual_elements.lower() |
|
found_symbols = [] |
|
|
|
for symbol, meaning in symbols.items(): |
|
if symbol in elements_lower: |
|
found_symbols.append(f"β’ **{symbol.title()}**: {meaning}") |
|
|
|
if found_symbols: |
|
return "**Symbolic Interpretations:**\n" + "\n".join(found_symbols) |
|
|
|
return "**Symbolic Analysis**: The artwork may contain personal, cultural, or period-specific symbols requiring deeper contextual analysis." |
|
|
|
@tool |
|
def technical_analysis_tool(composition_details: str) -> str: |
|
""" |
|
Analyzes technical and compositional aspects of artwork. |
|
|
|
Args: |
|
composition_details: Description of technical composition, color usage, and artistic techniques |
|
""" |
|
techniques = { |
|
"oil": "Rich color saturation, smooth blending, detailed work, layered application", |
|
"watercolor": "Transparent layers, luminous effects, spontaneous flow, delicate washes", |
|
"acrylic": "Vibrant colors, quick drying, versatile techniques, modern medium", |
|
"tempera": "Precise details, bright colors, quick drying, pre-oil painting era", |
|
"fresco": "Wall painting, wet plaster application, permanent integration, monumental scale", |
|
"pastels": "Soft texture, direct color application, atmospheric effects", |
|
"chiaroscuro": "dramatic light-dark contrast, three-dimensional modeling, emotional intensity", |
|
"sfumato": "subtle gradations, atmospheric perspective, Leonardo's technique", |
|
"impasto": "thick paint application, textural effects, visible brushstrokes" |
|
} |
|
|
|
composition_elements = { |
|
"triangular": "stable, harmonious, classical composition", |
|
"diagonal": "dynamic, movement, baroque influence", |
|
"circular": "unity, completeness, divine perfection", |
|
"golden ratio": "mathematical harmony, natural proportions, aesthetic perfection", |
|
"rule of thirds": "balanced composition, visual interest, modern technique" |
|
} |
|
|
|
analysis = "**Technical Analysis:**\n" |
|
desc_lower = composition_details.lower() |
|
|
|
|
|
found_techniques = [] |
|
for technique, description in techniques.items(): |
|
if technique in desc_lower: |
|
found_techniques.append(f"β’ **{technique.title()}**: {description}") |
|
|
|
|
|
found_composition = [] |
|
for comp, description in composition_elements.items(): |
|
if comp.replace(" ", "") in desc_lower.replace(" ", ""): |
|
found_composition.append(f"β’ **{comp.title()}**: {description}") |
|
|
|
if found_techniques: |
|
analysis += "**Techniques Identified:**\n" + "\n".join(found_techniques) + "\n\n" |
|
|
|
if found_composition: |
|
analysis += "**Compositional Elements:**\n" + "\n".join(found_composition) + "\n\n" |
|
|
|
analysis += f"**Observational Notes**: {composition_details}\n" |
|
analysis += "**Recommendation**: Consider analyzing brushwork, color harmony, spatial relationships, and overall execution quality." |
|
|
|
return analysis |
|
|
|
|
|
try: |
|
print("π¨ Creating SmolAgent for artwork analysis...") |
|
model = InferenceClientModel(model_id="meta-llama/Llama-3.2-3B-Instruct") |
|
|
|
art_agent = CodeAgent( |
|
tools=[art_style_detector, historical_context_provider, symbolism_interpreter, technical_analysis_tool], |
|
model=model, |
|
add_base_tools=False, |
|
max_steps=3 |
|
) |
|
|
|
print("β
SmolAgent created successfully!") |
|
agent_ready = True |
|
|
|
except Exception as e: |
|
print(f"β Agent creation failed: {str(e)}") |
|
agent_ready = False |
|
|
|
|
|
def analyze_image_with_vision_model(image, query): |
|
"""GPU-based vision analysis - fallback to CPU description if GPU unavailable""" |
|
try: |
|
if device == "cuda": |
|
print("π₯ Attempting GPU-based vision analysis...") |
|
from transformers import AutoModelForCausalLM, AutoProcessor |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"microsoft/Phi-3.5-vision-instruct", |
|
trust_remote_code=True, |
|
torch_dtype=torch.bfloat16, |
|
_attn_implementation="eager", |
|
device_map="auto" |
|
) |
|
|
|
processor = AutoProcessor.from_pretrained( |
|
"microsoft/Phi-3.5-vision-instruct", |
|
trust_remote_code=True |
|
) |
|
|
|
art_prompt = f"""<|user|> |
|
Describe this artwork in detail focusing on: |
|
- Visual elements (colors, composition, subjects) |
|
- Style and technique |
|
- Period indicators |
|
- Mood and atmosphere |
|
{query if query else ""} |
|
<|image_1|> |
|
<|end|> |
|
<|assistant|>""" |
|
|
|
if isinstance(image, str): |
|
image = Image.open(image) |
|
elif hasattr(image, 'convert'): |
|
image = image.convert("RGB") |
|
else: |
|
image = Image.fromarray(image).convert("RGB") |
|
|
|
inputs = processor(art_prompt, image, return_tensors="pt") |
|
inputs = {k: v.to(model.device) for k, v in inputs.items()} |
|
|
|
with torch.no_grad(): |
|
generate_ids = model.generate( |
|
**inputs, |
|
max_new_tokens=400, |
|
eos_token_id=processor.tokenizer.eos_token_id, |
|
pad_token_id=processor.tokenizer.eos_token_id, |
|
do_sample=False, |
|
use_cache=False |
|
) |
|
|
|
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:] |
|
description = processor.batch_decode( |
|
generate_ids, |
|
skip_special_tokens=True, |
|
clean_up_tokenization_spaces=False |
|
)[0] |
|
|
|
return description.strip() |
|
|
|
except Exception as e: |
|
print(f"π GPU vision analysis failed, using CPU fallback: {str(e)}") |
|
|
|
|
|
return get_cpu_image_description(image, query) |
|
|
|
def get_cpu_image_description(image, query=""): |
|
"""CPU-based image description""" |
|
try: |
|
width, height = image.size |
|
mode = image.mode |
|
|
|
|
|
try: |
|
colors = image.getcolors(maxcolors=256*256*256) |
|
if colors: |
|
dominant_colors = sorted(colors, key=lambda x: x[0], reverse=True)[:5] |
|
|
|
if any(color[0] > width*height*0.1 for color in dominant_colors): |
|
color_desc = "dominated by strong, bold colors" |
|
else: |
|
color_desc = "featuring a varied, complex color palette" |
|
else: |
|
color_desc = "with rich, complex coloration" |
|
except: |
|
color_desc = "with artistic color composition" |
|
|
|
aspect_ratio = width / height |
|
if aspect_ratio > 1.3: |
|
orientation = "landscape orientation" |
|
elif aspect_ratio < 0.7: |
|
orientation = "portrait orientation" |
|
else: |
|
orientation = "square composition" |
|
|
|
|
|
description = f"""This artwork presents a {orientation} {color_desc}. |
|
The image shows classical artistic elements with traditional composition techniques. |
|
The piece appears to demonstrate careful attention to visual balance and artistic principles. |
|
Dimensions: {width}x{height} pixels in {mode} color mode. |
|
{f"User interest: {query}" if query else ""} |
|
The work suggests careful artistic execution with attention to both technical and aesthetic considerations.""" |
|
|
|
return description |
|
|
|
except Exception as e: |
|
return f"Artwork with traditional composition and artistic styling. {f'User query: {query}' if query else ''}" |
|
|
|
def analyze_with_tools_direct(description, query): |
|
"""Direct tool usage without agent""" |
|
try: |
|
results = [] |
|
|
|
|
|
style_result = art_style_detector(description) |
|
results.append(f"## π¨ Style Analysis\n{style_result}") |
|
|
|
|
|
periods = ["renaissance", "baroque", "impressionism", "expressionism", "cubism", "surrealism", "abstract", "pop art"] |
|
detected_period = None |
|
desc_lower = description.lower() |
|
|
|
for period in periods: |
|
if period in desc_lower or any(keyword in desc_lower for keyword in period.split()): |
|
detected_period = period |
|
break |
|
|
|
if detected_period: |
|
context_result = historical_context_provider(detected_period) |
|
results.append(f"## π Historical Context\n{context_result}") |
|
else: |
|
context_result = historical_context_provider("classical art") |
|
results.append(f"## π Historical Context\n{context_result}") |
|
|
|
|
|
symbolism_result = symbolism_interpreter(description) |
|
results.append(f"## π Symbolism\n{symbolism_result}") |
|
|
|
|
|
technical_result = technical_analysis_tool(description) |
|
results.append(f"## π Technical Analysis\n{technical_result}") |
|
|
|
return "\n\n".join(results) |
|
|
|
except Exception as e: |
|
return f"**Analysis Error**: {str(e)}\n\nPlease provide more details about the artwork for manual analysis." |
|
|
|
def analyze_artwork_complete(image, query): |
|
"""Complete artwork analysis pipeline - CPU/GPU flexible""" |
|
if image is None: |
|
return "πΈ **Please upload an image to analyze.**" |
|
|
|
try: |
|
print(f"π Analyzing image on {device}...") |
|
|
|
|
|
visual_description = analyze_image_with_vision_model(image, query) |
|
|
|
print("π¨ Running art analysis...") |
|
|
|
|
|
if agent_ready: |
|
try: |
|
analysis_prompt = f"""You are an expert art historian. Analyze this artwork: {visual_description} |
|
{query if query else "Provide comprehensive analysis covering style, historical context, symbolism, and technique."} |
|
Use the available tools to provide detailed analysis.""" |
|
|
|
agent_result = art_agent.run(analysis_prompt) |
|
analysis_method = f"π€ SmolAgent Analysis ({device.upper()})" |
|
expert_analysis = agent_result |
|
|
|
except Exception as agent_error: |
|
print(f"Agent failed, using direct tools: {agent_error}") |
|
expert_analysis = analyze_with_tools_direct(visual_description, query) |
|
analysis_method = f"π§ Direct Tool Analysis ({device.upper()})" |
|
else: |
|
expert_analysis = analyze_with_tools_direct(visual_description, query) |
|
analysis_method = f"π§ Direct Tool Analysis ({device.upper()})" |
|
|
|
final_analysis = f"""# π¨ **ARTWORK ANALYSIS** |
|
## ποΈ **Visual Description** |
|
{visual_description} |
|
--- |
|
{expert_analysis} |
|
--- |
|
*Analysis Method: {analysis_method}* |
|
*Device: {device.upper()} | Query: {query if query else "General analysis"}* |
|
""" |
|
|
|
return final_analysis |
|
|
|
except Exception as e: |
|
error_msg = f"Analysis error: {str(e)}" |
|
print(error_msg) |
|
traceback.print_exc() |
|
return f"""β **Analysis Error** |
|
{error_msg} |
|
**Please try:** |
|
1. Uploading a clear JPG or PNG image |
|
2. Describing the artwork manually in the question box |
|
3. Being more specific about what you'd like to know |
|
*Running on: {device.upper()}*""" |
|
|
|
|
|
def create_interface(): |
|
|
|
|
|
theme = gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="purple", |
|
neutral_hue="gray", |
|
font=gr.themes.GoogleFont("Inter") |
|
) |
|
|
|
css = """ |
|
.main-header { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
padding: 2rem; |
|
border-radius: 15px; |
|
margin-bottom: 2rem; |
|
text-align: center; |
|
color: white; |
|
} |
|
.device-info { |
|
padding: 1rem; |
|
border-radius: 8px; |
|
margin: 1rem 0; |
|
text-align: center; |
|
background: rgba(59, 130, 246, 0.1); |
|
border: 1px solid rgba(59, 130, 246, 0.3); |
|
} |
|
.feature-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); |
|
gap: 1rem; |
|
margin: 1rem 0; |
|
} |
|
.feature-card { |
|
padding: 1.5rem; |
|
border-radius: 10px; |
|
text-align: center; |
|
background: rgba(255,255,255,0.05); |
|
border: 1px solid rgba(255,255,255,0.1); |
|
} |
|
""" |
|
|
|
with gr.Blocks(title="π¨ AI Art Historian", theme=theme, css=css) as demo: |
|
|
|
|
|
gr.HTML(""" |
|
<div class="main-header"> |
|
<h1>π¨ AI Art Historian</h1> |
|
<h3>Powered by SmolAgent Framework + Vision AI</h3> |
|
<p>Upload any artwork and discover its secrets through expert AI analysis</p> |
|
<p style="margin-top: 1rem;"> |
|
<a href="https://youtu.be/xyNKr05Vvls?si=OmHjtOfBez2FjOTv" target="_blank" style="color: white; text-decoration: none; font-size: 1.1em;"> |
|
βΆοΈ Watch Demo Video |
|
</a> |
|
</p> |
|
</div> |
|
""") |
|
|
|
|
|
device_emoji = "π₯" if device == "cuda" else "π»" |
|
agent_status_text = "β
All systems ready!" if agent_ready else "β οΈ Using backup tools" |
|
|
|
gr.HTML(f""" |
|
<div class="device-info"> |
|
<strong>{device_emoji} Running on: {device.upper()}</strong><br> |
|
<strong>Agent Status:</strong> {agent_status_text}<br> |
|
<em>{"GPU acceleration enabled" if device == "cuda" else "CPU processing mode"}</em> |
|
</div> |
|
""") |
|
|
|
|
|
gr.HTML(""" |
|
<div class="feature-grid"> |
|
<div class="feature-card"> |
|
<h3>π¨ Style Detection</h3> |
|
<p>Renaissance to Modern movements</p> |
|
</div> |
|
<div class="feature-card"> |
|
<h3>π Historical Context</h3> |
|
<p>Periods, influences, key artists</p> |
|
</div> |
|
<div class="feature-card"> |
|
<h3>π Symbol Analysis</h3> |
|
<p>Hidden meanings, iconography</p> |
|
</div> |
|
<div class="feature-card"> |
|
<h3>π Technical Assessment</h3> |
|
<p>Composition, technique, style</p> |
|
</div> |
|
</div> |
|
""") |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("### π€ **Upload Your Artwork**") |
|
|
|
image_input = gr.Image( |
|
label="πΌοΈ Select Image", |
|
type="pil", |
|
height=350 |
|
) |
|
|
|
query_input = gr.Textbox( |
|
label="π Ask Specific Questions (Optional)", |
|
placeholder="e.g., 'What artistic movement does this represent?' or 'What do the colors symbolize?'", |
|
lines=3 |
|
) |
|
|
|
analyze_btn = gr.Button( |
|
"π **Analyze Artwork**", |
|
variant="primary", |
|
size="lg" |
|
) |
|
|
|
gr.Markdown(f""" |
|
### π‘ Pro Tips |
|
β’ Upload clear, high-quality images |
|
β’ Try paintings, sculptures, drawings |
|
β’ Ask specific questions for detailed analysis |
|
β’ {f"GPU acceleration active!" if device == "cuda" else "CPU mode - still powerful!"} |
|
""") |
|
|
|
with gr.Column(scale=2): |
|
gr.Markdown("### π **Analysis Results**") |
|
|
|
analysis_output = gr.Markdown( |
|
value=f""" |
|
π― **Ready for Analysis!** |
|
|
|
Upload an artwork image to get started. The AI will analyze: |
|
|
|
π¨ **Artistic Style** - Movement and period identification |
|
π **Historical Context** - Cultural background and influences |
|
π **Symbolism** - Hidden meanings and iconography |
|
π **Technique** - Compositional and technical analysis |
|
|
|
*Running on {device.upper()} β’ {agent_status_text}* |
|
""", |
|
container=True |
|
) |
|
|
|
|
|
with gr.Row(): |
|
gr.Examples( |
|
examples=[ |
|
[None, "What artistic movement does this painting belong to?"], |
|
[None, "Analyze the use of color and symbolism in this artwork."], |
|
[None, "What can you tell me about the historical context of this piece?"], |
|
[None, "Explain the composition and artistic technique used."], |
|
[None, "What emotions or themes does this artwork convey?"] |
|
], |
|
inputs=[image_input, query_input], |
|
label="π― **Example Questions**" |
|
) |
|
|
|
|
|
analyze_btn.click( |
|
fn=analyze_artwork_complete, |
|
inputs=[image_input, query_input], |
|
outputs=[analysis_output], |
|
show_progress=True |
|
) |
|
|
|
|
|
gr.Markdown(f""" |
|
--- |
|
<div style="text-align: center; color: #666; margin-top: 2rem;"> |
|
<strong>π¨ AI Art Historian</strong> | Built with β€οΈ Gradio<br> |
|
<em>Device: {device.upper()} β’ Agent: {"Ready" if agent_ready else "Backup mode"} β’ Discover art through AI</em> |
|
</div> |
|
""") |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
print("π Launching AI Art Historian...") |
|
demo = create_interface() |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False |
|
) |