Art-Historian-AI

Sleeping

App Files Files Community

Art-Historian-AI / app.py

gizemsarsinlar

Added video demo link

20c5979 verified 3 months ago

raw

history blame contribute delete

24.9 kB

	import gradio as gr
	from smolagents import CodeAgent, InferenceClientModel, tool
	from PIL import Image
	import sys
	import traceback
	import torch

	# Check device availability
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Python version: {sys.version}")
	print(f"Device: {device}")
	print(f"CUDA available: {torch.cuda.is_available()}")

	@tool
	def art_style_detector(visual_description: str) -> str:
	"""
	Identifies the artistic style of an artwork.

	Args:
	visual_description: Visual description of the artwork including colors, composition, and technique
	"""
	styles = {
	"Renaissance": ["perspective", "realistic", "classical", "religious", "balanced", "proportion", "leonardo", "michelangelo"],
	"Baroque": ["dramatic", "chiaroscuro", "movement", "emotional", "ornate", "theatrical", "caravaggio", "rubens"],
	"Impressionism": ["light", "color", "brushstrokes", "outdoor", "spontaneous", "fleeting", "monet", "renoir"],
	"Expressionism": ["emotion", "distorted", "subjective", "bold colors", "psychological", "kandinsky", "munch"],
	"Cubism": ["geometric", "fragmented", "multiple perspectives", "abstract", "analytical", "picasso", "braque"],
	"Surrealism": ["dreamlike", "unconscious", "bizarre", "symbolic", "fantastic", "dali", "magritte"],
	"Abstract": ["non-representational", "geometric", "color field", "experimental", "pollack", "rothko"],
	"Pop Art": ["commercial", "popular culture", "bright colors", "mass production", "warhol", "lichtenstein"],
	"Minimalism": ["simple", "clean", "geometric", "reduced", "essential", "minimal"]
	}

	desc = visual_description.lower()
	matches = []

	for style, keywords in styles.items():
	score = sum(1 for kw in keywords if kw in desc)
	if score > 0:
	matches.append((style, score))

	matches.sort(key=lambda x: x[1], reverse=True)

	if matches:
	primary = matches[0][0]
	result = f"Primary Style: {primary}\n"
	if len(matches) > 1:
	others = [f"{style} ({score})" for style, score in matches[1:3]]
	result += f"Secondary influences: {', '.join(others)}"
	return result

	return "Style analysis requires more specific visual details"

	@tool
	def historical_context_provider(art_period: str) -> str:
	"""
	Provides historical context for an artwork period.

	Args:
	art_period: The name of the art period or artistic movement to analyze
	"""
	contexts = {
	"renaissance": "Renaissance (14th-17th century): Humanism, scientific revolution, revival of classical antiquity. Key artists: Leonardo da Vinci, Michelangelo, Raphael. Characteristics: Linear perspective, anatomical accuracy, balanced compositions.",
	"baroque": "Baroque (17th-18th century): Counter-Reformation, dramatic storytelling, chiaroscuro technique. Key artists: Caravaggio, Bernini, Rubens. Characteristics: Dynamic movement, emotional intensity, theatrical lighting.",
	"impressionism": "Impressionism (1860s-1880s): Plein air painting, light and color focus, brushwork visible. Key artists: Monet, Renoir, Degas. Characteristics: Capturing fleeting moments, natural light, loose brushstrokes.",
	"expressionism": "Expressionism (early 20th century): Emotional expression over realism, psychological exploration. Key artists: Kandinsky, Munch, Kirchner. Characteristics: Bold colors, distorted forms, subjective perspective.",
	"cubism": "Cubism (1907-1920s): Multiple perspectives, geometric forms, fragmented reality. Key artists: Picasso, Braque. Characteristics: Analytical and synthetic phases, collage elements.",
	"surrealism": "Surrealism (1920s-1940s): Unconscious mind, dreams, automatic drawing. Key artists: Dalí, Magritte, Ernst. Characteristics: Bizarre imagery, psychological exploration, fantastic elements.",
	"abstract": "Abstract Art (20th century): Non-representational, emphasis on color, form, line. Key artists: Kandinsky, Mondrian, Pollock. Characteristics: Pure visual elements, emotional expression through abstraction.",
	"pop art": "Pop Art (1950s-1960s): Popular culture, mass media, commercial aesthetics. Key artists: Warhol, Lichtenstein. Characteristics: Bright colors, repetition, everyday objects as art."
	}

	period_lower = art_period.lower()
	for period, context in contexts.items():
	if period in period_lower:
	return context

	return f"{art_period}: A significant artistic movement with unique cultural and historical importance. Analysis would benefit from more specific period identification."

	@tool
	def symbolism_interpreter(visual_elements: str) -> str:
	"""
	Interprets symbolic meanings and iconography in artwork.

	Args:
	visual_elements: Description of symbolic visual elements in the artwork
	"""
	symbols = {
	"light": "divine presence, enlightenment, truth, hope, spiritual awakening",
	"darkness": "mystery, evil, unconscious, melancholy, death, ignorance",
	"red": "passion, blood, love, war, power, sacrifice, life force",
	"blue": "divinity, tranquility, melancholy, infinity, spirituality, wisdom",
	"white": "purity, innocence, peace, divinity, rebirth, spiritual perfection",
	"gold": "divine light, wealth, immortality, sacred, royal power",
	"crown": "authority, divine right, royal power, achievement, victory",
	"flowers": "beauty, mortality, seasons, femininity, love, fleeting life",
	"skull": "memento mori, mortality, wisdom, vanitas, death's inevitability",
	"cross": "Christianity, sacrifice, redemption, intersection of earthly and divine",
	"water": "life, purification, emotion, unconscious mind, baptism, renewal",
	"dove": "peace, Holy Spirit, purity, love, divine messenger",
	"serpent": "temptation, evil, wisdom, medicine, transformation, rebirth",
	"lion": "courage, strength, royalty, Christ, divine power",
	"lamb": "innocence, sacrifice, Christ, purity, gentleness",
	"apple": "temptation, knowledge, sin, earth, sensuality",
	"mirror": "vanity, truth, self-knowledge, reflection, soul",
	"candle": "life, enlightenment, spirituality, time",
	"black": "death, mystery, elegance, unknown, mourning"
	}

	elements_lower = visual_elements.lower()
	found_symbols = []

	for symbol, meaning in symbols.items():
	if symbol in elements_lower:
	found_symbols.append(f"• {symbol.title()}: {meaning}")

	if found_symbols:
	return "Symbolic Interpretations:\n" + "\n".join(found_symbols)

	return "Symbolic Analysis: The artwork may contain personal, cultural, or period-specific symbols requiring deeper contextual analysis."

	@tool
	def technical_analysis_tool(composition_details: str) -> str:
	"""
	Analyzes technical and compositional aspects of artwork.

	Args:
	composition_details: Description of technical composition, color usage, and artistic techniques
	"""
	techniques = {
	"oil": "Rich color saturation, smooth blending, detailed work, layered application",
	"watercolor": "Transparent layers, luminous effects, spontaneous flow, delicate washes",
	"acrylic": "Vibrant colors, quick drying, versatile techniques, modern medium",
	"tempera": "Precise details, bright colors, quick drying, pre-oil painting era",
	"fresco": "Wall painting, wet plaster application, permanent integration, monumental scale",
	"pastels": "Soft texture, direct color application, atmospheric effects",
	"chiaroscuro": "dramatic light-dark contrast, three-dimensional modeling, emotional intensity",
	"sfumato": "subtle gradations, atmospheric perspective, Leonardo's technique",
	"impasto": "thick paint application, textural effects, visible brushstrokes"
	}

	composition_elements = {
	"triangular": "stable, harmonious, classical composition",
	"diagonal": "dynamic, movement, baroque influence",
	"circular": "unity, completeness, divine perfection",
	"golden ratio": "mathematical harmony, natural proportions, aesthetic perfection",
	"rule of thirds": "balanced composition, visual interest, modern technique"
	}

	analysis = "Technical Analysis:\n"
	desc_lower = composition_details.lower()

	# Check for techniques
	found_techniques = []
	for technique, description in techniques.items():
	if technique in desc_lower:
	found_techniques.append(f"• {technique.title()}: {description}")

	# Check for compositional elements
	found_composition = []
	for comp, description in composition_elements.items():
	if comp.replace(" ", "") in desc_lower.replace(" ", ""):
	found_composition.append(f"• {comp.title()}: {description}")

	if found_techniques:
	analysis += "Techniques Identified:\n" + "\n".join(found_techniques) + "\n\n"

	if found_composition:
	analysis += "Compositional Elements:\n" + "\n".join(found_composition) + "\n\n"

	analysis += f"Observational Notes: {composition_details}\n"
	analysis += "Recommendation: Consider analyzing brushwork, color harmony, spatial relationships, and overall execution quality."

	return analysis

	# Initialize SmolAgent for CPU/GPU flexibility
	try:
	print("🎨 Creating SmolAgent for artwork analysis...")
	model = InferenceClientModel(model_id="meta-llama/Llama-3.2-3B-Instruct")

	art_agent = CodeAgent(
	tools=[art_style_detector, historical_context_provider, symbolism_interpreter, technical_analysis_tool],
	model=model,
	add_base_tools=False,
	max_steps=3
	)

	print("✅ SmolAgent created successfully!")
	agent_ready = True

	except Exception as e:
	print(f"❌ Agent creation failed: {str(e)}")
	agent_ready = False

	# Optional GPU-based vision analysis (if available)
	def analyze_image_with_vision_model(image, query):
	"""GPU-based vision analysis - fallback to CPU description if GPU unavailable"""
	try:
	if device == "cuda":
	print("🔥 Attempting GPU-based vision analysis...")
	from transformers import AutoModelForCausalLM, AutoProcessor

	# Try to load Phi-3.5-vision with GPU
	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/Phi-3.5-vision-instruct",
	trust_remote_code=True,
	torch_dtype=torch.bfloat16,
	_attn_implementation="eager",
	device_map="auto"
	)

	processor = AutoProcessor.from_pretrained(
	"microsoft/Phi-3.5-vision-instruct",
	trust_remote_code=True
	)

	art_prompt = f"""<\|user\|>
	Describe this artwork in detail focusing on:
	- Visual elements (colors, composition, subjects)
	- Style and technique
	- Period indicators
	- Mood and atmosphere
	{query if query else ""}
	<\|image_1\|>
	<\|end\|>
	<\|assistant\|>"""

	if isinstance(image, str):
	image = Image.open(image)
	elif hasattr(image, 'convert'):
	image = image.convert("RGB")
	else:
	image = Image.fromarray(image).convert("RGB")

	inputs = processor(art_prompt, image, return_tensors="pt")
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.no_grad():
	generate_ids = model.generate(
	**inputs,
	max_new_tokens=400,
	eos_token_id=processor.tokenizer.eos_token_id,
	pad_token_id=processor.tokenizer.eos_token_id,
	do_sample=False,
	use_cache=False
	)

	generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
	description = processor.batch_decode(
	generate_ids,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	)[0]

	return description.strip()

	except Exception as e:
	print(f"🔄 GPU vision analysis failed, using CPU fallback: {str(e)}")

	# CPU fallback - basic image analysis
	return get_cpu_image_description(image, query)

	def get_cpu_image_description(image, query=""):
	"""CPU-based image description"""
	try:
	width, height = image.size
	mode = image.mode

	# Basic color analysis
	try:
	colors = image.getcolors(maxcolors=256256256)
	if colors:
	dominant_colors = sorted(colors, key=lambda x: x[0], reverse=True)[:5]
	# Simple color detection
	if any(color[0] > widthheight0.1 for color in dominant_colors):
	color_desc = "dominated by strong, bold colors"
	else:
	color_desc = "featuring a varied, complex color palette"
	else:
	color_desc = "with rich, complex coloration"
	except:
	color_desc = "with artistic color composition"

	aspect_ratio = width / height
	if aspect_ratio > 1.3:
	orientation = "landscape orientation"
	elif aspect_ratio < 0.7:
	orientation = "portrait orientation"
	else:
	orientation = "square composition"

	# Generate artistic description
	description = f"""This artwork presents a {orientation} {color_desc}.
	The image shows classical artistic elements with traditional composition techniques.
	The piece appears to demonstrate careful attention to visual balance and artistic principles.
	Dimensions: {width}x{height} pixels in {mode} color mode.
	{f"User interest: {query}" if query else ""}
	The work suggests careful artistic execution with attention to both technical and aesthetic considerations."""

	return description

	except Exception as e:
	return f"Artwork with traditional composition and artistic styling. {f'User query: {query}' if query else ''}"

	def analyze_with_tools_direct(description, query):
	"""Direct tool usage without agent"""
	try:
	results = []

	# Style analysis
	style_result = art_style_detector(description)
	results.append(f"## 🎨 Style Analysis\n{style_result}")

	# Historical context
	periods = ["renaissance", "baroque", "impressionism", "expressionism", "cubism", "surrealism", "abstract", "pop art"]
	detected_period = None
	desc_lower = description.lower()

	for period in periods:
	if period in desc_lower or any(keyword in desc_lower for keyword in period.split()):
	detected_period = period
	break

	if detected_period:
	context_result = historical_context_provider(detected_period)
	results.append(f"## 📚 Historical Context\n{context_result}")
	else:
	context_result = historical_context_provider("classical art")
	results.append(f"## 📚 Historical Context\n{context_result}")

	# Symbolism analysis
	symbolism_result = symbolism_interpreter(description)
	results.append(f"## 🔍 Symbolism\n{symbolism_result}")

	# Technical analysis
	technical_result = technical_analysis_tool(description)
	results.append(f"## 🎭 Technical Analysis\n{technical_result}")

	return "\n\n".join(results)

	except Exception as e:
	return f"Analysis Error: {str(e)}\n\nPlease provide more details about the artwork for manual analysis."

	def analyze_artwork_complete(image, query):
	"""Complete artwork analysis pipeline - CPU/GPU flexible"""
	if image is None:
	return "📸 Please upload an image to analyze."

	try:
	print(f"🔍 Analyzing image on {device}...")

	# Get image description (GPU or CPU)
	visual_description = analyze_image_with_vision_model(image, query)

	print("🎨 Running art analysis...")

	# Try SmolAgent first, fallback to direct tools
	if agent_ready:
	try:
	analysis_prompt = f"""You are an expert art historian. Analyze this artwork: {visual_description}
	{query if query else "Provide comprehensive analysis covering style, historical context, symbolism, and technique."}
	Use the available tools to provide detailed analysis."""

	agent_result = art_agent.run(analysis_prompt)
	analysis_method = f"🤖 SmolAgent Analysis ({device.upper()})"
	expert_analysis = agent_result

	except Exception as agent_error:
	print(f"Agent failed, using direct tools: {agent_error}")
	expert_analysis = analyze_with_tools_direct(visual_description, query)
	analysis_method = f"🔧 Direct Tool Analysis ({device.upper()})"
	else:
	expert_analysis = analyze_with_tools_direct(visual_description, query)
	analysis_method = f"🔧 Direct Tool Analysis ({device.upper()})"

	final_analysis = f"""# 🎨 ARTWORK ANALYSIS
	## 👁️ Visual Description
	{visual_description}
	---
	{expert_analysis}
	---
	Analysis Method: {analysis_method}
	Device: {device.upper()} \| Query: {query if query else "General analysis"}
	"""

	return final_analysis

	except Exception as e:
	error_msg = f"Analysis error: {str(e)}"
	print(error_msg)
	traceback.print_exc()
	return f"""❌ Analysis Error
	{error_msg}
	Please try:
	1. Uploading a clear JPG or PNG image
	2. Describing the artwork manually in the question box
	3. Being more specific about what you'd like to know
	Running on: {device.upper()}"""

	# Gradio Interface - CPU/GPU Compatible
	def create_interface():

	# Responsive theme that works on both CPU and GPU
	theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="purple",
	neutral_hue="gray",
	font=gr.themes.GoogleFont("Inter")
	)

	css = """
	.main-header {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 2rem;
	border-radius: 15px;
	margin-bottom: 2rem;
	text-align: center;
	color: white;
	}
	.device-info {
	padding: 1rem;
	border-radius: 8px;
	margin: 1rem 0;
	text-align: center;
	background: rgba(59, 130, 246, 0.1);
	border: 1px solid rgba(59, 130, 246, 0.3);
	}
	.feature-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
	gap: 1rem;
	margin: 1rem 0;
	}
	.feature-card {
	padding: 1.5rem;
	border-radius: 10px;
	text-align: center;
	background: rgba(255,255,255,0.05);
	border: 1px solid rgba(255,255,255,0.1);
	}
	"""

	with gr.Blocks(title="🎨 AI Art Historian", theme=theme, css=css) as demo:

	# Header
	gr.HTML("""
	<div class="main-header">
	<h1>🎨 AI Art Historian</h1>
	<h3>Powered by SmolAgent Framework + Vision AI</h3>
	<p>Upload any artwork and discover its secrets through expert AI analysis</p>
	<p style="margin-top: 1rem;">
	<a href="https://youtu.be/xyNKr05Vvls?si=OmHjtOfBez2FjOTv" target="_blank" style="color: white; text-decoration: none; font-size: 1.1em;">
	▶️ Watch Demo Video
	</a>
	</p>
	</div>
	""")

	# Device info
	device_emoji = "🔥" if device == "cuda" else "💻"
	agent_status_text = "✅ All systems ready!" if agent_ready else "⚠️ Using backup tools"

	gr.HTML(f"""
	<div class="device-info">
	<strong>{device_emoji} Running on: {device.upper()}</strong><br>
	<strong>Agent Status:</strong> {agent_status_text}<br>
	<em>{"GPU acceleration enabled" if device == "cuda" else "CPU processing mode"}</em>
	</div>
	""")

	# Features
	gr.HTML("""
	<div class="feature-grid">
	<div class="feature-card">
	<h3>🎨 Style Detection</h3>
	<p>Renaissance to Modern movements</p>
	</div>
	<div class="feature-card">
	<h3>📚 Historical Context</h3>
	<p>Periods, influences, key artists</p>
	</div>
	<div class="feature-card">
	<h3>🔍 Symbol Analysis</h3>
	<p>Hidden meanings, iconography</p>
	</div>
	<div class="feature-card">
	<h3>🎭 Technical Assessment</h3>
	<p>Composition, technique, style</p>
	</div>
	</div>
	""")

	# Main interface
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Upload Your Artwork")

	image_input = gr.Image(
	label="🖼️ Select Image",
	type="pil",
	height=350
	)

	query_input = gr.Textbox(
	label="💭 Ask Specific Questions (Optional)",
	placeholder="e.g., 'What artistic movement does this represent?' or 'What do the colors symbolize?'",
	lines=3
	)

	analyze_btn = gr.Button(
	"🔍 Analyze Artwork",
	variant="primary",
	size="lg"
	)

	gr.Markdown(f"""
	### 💡 Pro Tips
	• Upload clear, high-quality images
	• Try paintings, sculptures, drawings
	• Ask specific questions for detailed analysis
	• {f"GPU acceleration active!" if device == "cuda" else "CPU mode - still powerful!"}
	""")

	with gr.Column(scale=2):
	gr.Markdown("### 📊 Analysis Results")

	analysis_output = gr.Markdown(
	value=f"""
	🎯 Ready for Analysis!

	Upload an artwork image to get started. The AI will analyze:

	🎨 Artistic Style - Movement and period identification
	📚 Historical Context - Cultural background and influences
	🔍 Symbolism - Hidden meanings and iconography
	🎭 Technique - Compositional and technical analysis

	Running on {device.upper()} • {agent_status_text}
	""",
	container=True
	)

	# Examples
	with gr.Row():
	gr.Examples(
	examples=[
	[None, "What artistic movement does this painting belong to?"],
	[None, "Analyze the use of color and symbolism in this artwork."],
	[None, "What can you tell me about the historical context of this piece?"],
	[None, "Explain the composition and artistic technique used."],
	[None, "What emotions or themes does this artwork convey?"]
	],
	inputs=[image_input, query_input],
	label="🎯 Example Questions"
	)

	# Event handler
	analyze_btn.click(
	fn=analyze_artwork_complete,
	inputs=[image_input, query_input],
	outputs=[analysis_output],
	show_progress=True
	)

	# Footer
	gr.Markdown(f"""
	---
	<div style="text-align: center; color: #666; margin-top: 2rem;">
	<strong>🎨 AI Art Historian</strong> \| Built with ❤️ Gradio<br>
	<em>Device: {device.upper()} • Agent: {"Ready" if agent_ready else "Backup mode"} • Discover art through AI</em>
	</div>
	""")

	return demo

	# Launch
	if __name__ == "__main__":
	print("🌟 Launching AI Art Historian...")
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)