Spaces:

electricsheepafrica
/

Mandelmem

Sleeping

App Files Files Community

Mandelmem / app.py

Kossisoroyce

Upload 10 files

c05fcc5 verified 4 months ago

raw

history blame contribute delete

17.3 kB

	# app.py
	"""
	MandelMem vs Chain-of-Thought Interactive Comparison App

	A Streamlit application for comparing MandelMem integrated architecture
	against Chain-of-Thought reasoning on mathematical problems.
	"""

	import streamlit as st
	import asyncio
	import time
	import json
	import os
	from datetime import datetime
	from typing import Dict, Any, List, Optional
	from dataclasses import dataclass, asdict
	import openai
	from openai import AsyncOpenAI

	# Import MandelMem components
	from mandelmem.core import MandelMem
	from mandelmem.quadtree import QuadTree, Tile
	from mandelmem.dynamics import FractalDynamics
	from mandelmem.memory import MemorySystem
	from mandelmem.encoders import TextEncoder

	@dataclass
	class ComparisonResult:
	method: str
	question: str
	reasoning: str
	answer: str
	confidence: float
	processing_time: float
	tokens_used: int
	memory_trace: Optional[str] = None

	class MandelMemComparator:
	def __init__(self, api_key: str):
	self.client = AsyncOpenAI(api_key=api_key)
	self.mandelmem = MandelMem()

	async def evaluate_chain_of_thought(self, question: str) -> ComparisonResult:
	start_time = time.time()

	prompt = f"""
	Solve this problem step by step using clear reasoning:

	Problem: {question}

	Please provide:
	1. Your step-by-step reasoning
	2. Your final answer
	3. Your confidence level (0.0 to 1.0)
	"""

	try:
	response = await self.client.chat.completions.create(
	model="gpt-4-turbo-preview",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.1,
	max_tokens=1000
	)

	processing_time = time.time() - start_time
	content = response.choices[0].message.content

	# Extract answer and confidence
	answer = self._extract_answer(content)
	confidence = self._extract_confidence(content)

	return ComparisonResult(
	method="Chain-of-Thought",
	question=question,
	reasoning=content,
	answer=answer,
	confidence=confidence,
	processing_time=processing_time,
	tokens_used=response.usage.total_tokens
	)

	except Exception as e:
	return ComparisonResult(
	method="Chain-of-Thought",
	question=question,
	reasoning=f"Error: {str(e)}",
	answer="Error occurred",
	confidence=0.0,
	processing_time=time.time() - start_time,
	tokens_used=0
	)

	async def evaluate_mandelmem_integrated(self, question: str) -> ComparisonResult:
	start_time = time.time()

	try:
	# Write to MandelMem memory
	problem_metadata = {
	"type": "reasoning_problem",
	"domain": self._classify_domain(question),
	"timestamp": datetime.now().isoformat()
	}

	write_result = self.mandelmem.write(question, problem_metadata)
	tile_id = write_result.tile_id

	# Read from memory to get context
	read_result = self.mandelmem.read(question, k=3, with_trace=True)

	# Create architectural context
	architecture_context = f"""
	You are MandelMem, an advanced multi-resolution reasoning system. Use your architectural insights:

	QUADTREE DECOMPOSITION: Break this problem into hierarchical components
	FRACTAL DYNAMICS: Classify solution stability (stable/plastic/escape)
	MEMORY INTEGRATION: Leverage similar problems from your experience
	BOUNDEDNESS VERIFICATION: Ensure solution quality and prevent drift

	Current tile: {tile_id}
	Memory context: {read_result.explanation if hasattr(read_result, 'explanation') else 'No prior context'}
	Similar problems: {len(read_result.results)} found
	"""

	solve_prompt = f"""{architecture_context}

	Problem: {question}

	Apply MandelMem architecture:
	1. DECOMPOSE: Break into quadtree components
	2. CLASSIFY: Determine stability (stable/plastic/escape)
	3. INTEGRATE: Use memory context from similar problems
	4. VERIFY: Check boundedness and solution quality
	5. SOLVE: Provide final answer with confidence

	Show your multi-resolution reasoning process.
	"""

	response = await self.client.chat.completions.create(
	model="gpt-4-turbo-preview",
	messages=[{"role": "user", "content": solve_prompt}],
	temperature=0.1,
	max_tokens=1500
	)

	processing_time = time.time() - start_time
	content = response.choices[0].message.content

	# Extract answer and confidence
	answer = self._extract_answer(content)
	confidence = self._extract_confidence(content)

	# Create memory trace
	memory_trace = f"Tile: {tile_id}, Similar problems: {len(read_result.results)}"
	if hasattr(read_result, 'results') and read_result.results:
	similarities = [f"{r.similarity:.3f}" for r in read_result.results[:3]]
	memory_trace += f", Similarities: {similarities}"

	return ComparisonResult(
	method="MandelMem Integrated",
	question=question,
	reasoning=content,
	answer=answer,
	confidence=confidence,
	processing_time=processing_time,
	tokens_used=response.usage.total_tokens,
	memory_trace=memory_trace
	)

	except Exception as e:
	return ComparisonResult(
	method="MandelMem Integrated",
	question=question,
	reasoning=f"Error: {str(e)}",
	answer="Error occurred",
	confidence=0.0,
	processing_time=time.time() - start_time,
	tokens_used=0,
	memory_trace="Error in processing"
	)

	def _classify_domain(self, question: str) -> str:
	question_lower = question.lower()
	if any(word in question_lower for word in ['calculate', 'math', 'equation', 'solve', '%', 'percent']):
	return "mathematical"
	elif any(word in question_lower for word in ['strategy', 'plan', 'decision', 'choose']):
	return "strategic"
	elif any(word in question_lower for word in ['logic', 'reasoning', 'if', 'then', 'because']):
	return "logical"
	else:
	return "general"

	def _extract_answer(self, content: str) -> str:
	# Look for common answer patterns
	lines = content.split('\n')
	for line in lines:
	line = line.strip()
	if line.startswith(('Final answer:', 'Answer:', 'The answer is', 'Result:')):
	return line.split(':', 1)[-1].strip()
	elif 'final answer' in line.lower() and ':' in line:
	return line.split(':', 1)[-1].strip()

	# If no clear answer pattern, return last non-empty line
	for line in reversed(lines):
	if line.strip():
	return line.strip()

	return "No clear answer extracted"

	def _extract_confidence(self, content: str) -> float:
	# Look for confidence patterns
	import re
	confidence_patterns = [
	r'confidence[:\s]+([0-9]*\.?[0-9]+)',
	r'confidence level[:\s]+([0-9]*\.?[0-9]+)',
	r'$confidence[:\s]+([0-9]*\.?[0-9]+)$',
	]

	for pattern in confidence_patterns:
	match = re.search(pattern, content.lower())
	if match:
	try:
	conf = float(match.group(1))
	return min(1.0, max(0.0, conf)) # Clamp between 0 and 1
	except ValueError:
	continue

	return 0.8 # Default confidence

	def main():
	st.set_page_config(
	page_title="MandelMem vs Chain-of-Thought Comparison",
	page_icon="🧠",
	layout="wide"
	)

	st.title("🧠 MandelMem vs Chain-of-Thought Reasoning")
	st.markdown("Compare MandelMem's integrated multi-resolution architecture against traditional Chain-of-Thought reasoning")

	# Sidebar for API key and settings
	with st.sidebar:
	st.header("Configuration")

	api_key = st.text_input(
	"OpenAI API Key",
	type="password",
	help="Enter your OpenAI API key to enable comparisons"
	)

	if not api_key:
	st.warning("⚠️ Please enter your OpenAI API key to use the comparison features")
	st.stop()

	st.success("✅ API key configured")

	# Sample problems
	st.header("Sample Problems")
	sample_problems = [
	"What is 15% of 240?",
	"If a train travels 120 miles in 2 hours, what is its average speed?",
	"A rectangle has length 8 and width 5. What is its area and perimeter?",
	"If you buy 3 apples for $1.50 each and 2 oranges for $2.00 each, what is the total cost?",
	"A company's revenue increased from $100,000 to $125,000. What is the percentage increase?",
	"If it takes 5 workers 8 hours to complete a job, how long would it take 8 workers?",
	"What is the next number in the sequence: 2, 4, 8, 16, ...?",
	"A pizza is cut into 8 equal slices. If you eat 3 slices, what fraction of the pizza remains?",
	"If a car uses 1 gallon of gas to travel 25 miles, how many gallons are needed for 150 miles?",
	"A store offers a 20% discount on a $50 item. What is the final price?"
	]

	selected_problem = st.selectbox(
	"Choose a sample problem:",
	[""] + sample_problems
	)

	# Main content area
	col1, col2 = st.columns([1, 1])

	with col1:
	st.header("Problem Input")

	# Problem input
	if selected_problem:
	problem_text = st.text_area(
	"Enter your problem:",
	value=selected_problem,
	height=100
	)
	else:
	problem_text = st.text_area(
	"Enter your problem:",
	placeholder="e.g., What is 25% of 80?",
	height=100
	)

	# Expected answer (optional)
	expected_answer = st.text_input(
	"Expected Answer (optional):",
	help="Provide the expected answer for comparison"
	)

	# Compare button
	if st.button("🚀 Compare Reasoning Methods", type="primary"):
	if not problem_text.strip():
	st.error("Please enter a problem to solve")
	else:
	with st.spinner("Running comparisons..."):
	# Initialize comparator
	comparator = MandelMemComparator(api_key)

	# Run both methods
	try:
	# Create async event loop
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)

	# Run comparisons
	cot_result = loop.run_until_complete(
	comparator.evaluate_chain_of_thought(problem_text)
	)
	mandelmem_result = loop.run_until_complete(
	comparator.evaluate_mandelmem_integrated(problem_text)
	)

	# Store results in session state
	st.session_state.cot_result = cot_result
	st.session_state.mandelmem_result = mandelmem_result
	st.session_state.expected_answer = expected_answer

	loop.close()

	except Exception as e:
	st.error(f"Error running comparison: {str(e)}")

	with col2:
	st.header("Comparison Results")

	if hasattr(st.session_state, 'cot_result') and hasattr(st.session_state, 'mandelmem_result'):
	cot_result = st.session_state.cot_result
	mandelmem_result = st.session_state.mandelmem_result
	expected_answer = st.session_state.get('expected_answer', '')

	# Results tabs
	tab1, tab2, tab3 = st.tabs(["📊 Summary", "🔗 Chain-of-Thought", "🧠 MandelMem"])

	with tab1:
	st.subheader("Performance Comparison")

	# Create comparison table
	comparison_data = {
	"Method": ["Chain-of-Thought", "MandelMem"],
	"Answer": [cot_result.answer, mandelmem_result.answer],
	"Confidence": [f"{cot_result.confidence:.2f}", f"{mandelmem_result.confidence:.2f}"],
	"Time (s)": [f"{cot_result.processing_time:.2f}", f"{mandelmem_result.processing_time:.2f}"],
	"Tokens": [cot_result.tokens_used, mandelmem_result.tokens_used]
	}

	st.table(comparison_data)

	# Expected answer comparison
	if expected_answer:
	st.subheader("Answer Accuracy")
	st.write(f"Expected Answer: {expected_answer}")

	cot_match = "✅" if expected_answer.lower() in cot_result.answer.lower() else "❌"
	mandelmem_match = "✅" if expected_answer.lower() in mandelmem_result.answer.lower() else "❌"

	st.write(f"Chain-of-Thought: {cot_match} {cot_result.answer}")
	st.write(f"MandelMem: {mandelmem_match} {mandelmem_result.answer}")

	# Export results
	if st.button("📥 Export Results as JSON"):
	results = {
	"problem": problem_text,
	"expected_answer": expected_answer,
	"timestamp": datetime.now().isoformat(),
	"chain_of_thought": asdict(cot_result),
	"mandelmem": asdict(mandelmem_result)
	}

	st.download_button(
	label="Download JSON",
	data=json.dumps(results, indent=2),
	file_name=f"mandelmem_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
	mime="application/json"
	)

	with tab2:
	st.subheader("Chain-of-Thought Reasoning")
	st.write(f"Answer: {cot_result.answer}")
	st.write(f"Confidence: {cot_result.confidence:.2f}")
	st.write(f"Processing Time: {cot_result.processing_time:.2f}s")
	st.write(f"Tokens Used: {cot_result.tokens_used}")

	st.subheader("Reasoning Process")
	st.text_area("", value=cot_result.reasoning, height=400, disabled=True)

	with tab3:
	st.subheader("MandelMem Integrated Reasoning")
	st.write(f"Answer: {mandelmem_result.answer}")
	st.write(f"Confidence: {mandelmem_result.confidence:.2f}")
	st.write(f"Processing Time: {mandelmem_result.processing_time:.2f}s")
	st.write(f"Tokens Used: {mandelmem_result.tokens_used}")

	if mandelmem_result.memory_trace:
	st.write(f"Memory Trace: {mandelmem_result.memory_trace}")

	st.subheader("Multi-Resolution Reasoning Process")
	st.text_area("", value=mandelmem_result.reasoning, height=400, disabled=True)

	else:
	st.info("👆 Enter a problem and click 'Compare Reasoning Methods' to see results")

	# Footer
	st.markdown("---")
	st.markdown("""
	About MandelMem: A multi-resolution reasoning architecture inspired by fractal dynamics and quadtree decomposition.
	Built to demonstrate the advantages of integrated reasoning systems over prompt-based approaches.

	[📄 Research Paper](./mandelmem_paper.pdf) \| [💻 GitHub](https://github.com/kossisoroyce/mandlemem) \| [📧 Contact](mailto:kossi@electricsheep.africa)
	""")

	if __name__ == "__main__":
	main()