Mandelmem / app.py
Kossisoroyce's picture
Upload 10 files
c05fcc5 verified
# app.py
"""
MandelMem vs Chain-of-Thought Interactive Comparison App
A Streamlit application for comparing MandelMem integrated architecture
against Chain-of-Thought reasoning on mathematical problems.
"""
import streamlit as st
import asyncio
import time
import json
import os
from datetime import datetime
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, asdict
import openai
from openai import AsyncOpenAI
# Import MandelMem components
from mandelmem.core import MandelMem
from mandelmem.quadtree import QuadTree, Tile
from mandelmem.dynamics import FractalDynamics
from mandelmem.memory import MemorySystem
from mandelmem.encoders import TextEncoder
@dataclass
class ComparisonResult:
method: str
question: str
reasoning: str
answer: str
confidence: float
processing_time: float
tokens_used: int
memory_trace: Optional[str] = None
class MandelMemComparator:
def __init__(self, api_key: str):
self.client = AsyncOpenAI(api_key=api_key)
self.mandelmem = MandelMem()
async def evaluate_chain_of_thought(self, question: str) -> ComparisonResult:
start_time = time.time()
prompt = f"""
Solve this problem step by step using clear reasoning:
Problem: {question}
Please provide:
1. Your step-by-step reasoning
2. Your final answer
3. Your confidence level (0.0 to 1.0)
"""
try:
response = await self.client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": prompt}],
temperature=0.1,
max_tokens=1000
)
processing_time = time.time() - start_time
content = response.choices[0].message.content
# Extract answer and confidence
answer = self._extract_answer(content)
confidence = self._extract_confidence(content)
return ComparisonResult(
method="Chain-of-Thought",
question=question,
reasoning=content,
answer=answer,
confidence=confidence,
processing_time=processing_time,
tokens_used=response.usage.total_tokens
)
except Exception as e:
return ComparisonResult(
method="Chain-of-Thought",
question=question,
reasoning=f"Error: {str(e)}",
answer="Error occurred",
confidence=0.0,
processing_time=time.time() - start_time,
tokens_used=0
)
async def evaluate_mandelmem_integrated(self, question: str) -> ComparisonResult:
start_time = time.time()
try:
# Write to MandelMem memory
problem_metadata = {
"type": "reasoning_problem",
"domain": self._classify_domain(question),
"timestamp": datetime.now().isoformat()
}
write_result = self.mandelmem.write(question, problem_metadata)
tile_id = write_result.tile_id
# Read from memory to get context
read_result = self.mandelmem.read(question, k=3, with_trace=True)
# Create architectural context
architecture_context = f"""
You are MandelMem, an advanced multi-resolution reasoning system. Use your architectural insights:
QUADTREE DECOMPOSITION: Break this problem into hierarchical components
FRACTAL DYNAMICS: Classify solution stability (stable/plastic/escape)
MEMORY INTEGRATION: Leverage similar problems from your experience
BOUNDEDNESS VERIFICATION: Ensure solution quality and prevent drift
Current tile: {tile_id}
Memory context: {read_result.explanation if hasattr(read_result, 'explanation') else 'No prior context'}
Similar problems: {len(read_result.results)} found
"""
solve_prompt = f"""{architecture_context}
Problem: {question}
Apply MandelMem architecture:
1. DECOMPOSE: Break into quadtree components
2. CLASSIFY: Determine stability (stable/plastic/escape)
3. INTEGRATE: Use memory context from similar problems
4. VERIFY: Check boundedness and solution quality
5. SOLVE: Provide final answer with confidence
Show your multi-resolution reasoning process.
"""
response = await self.client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": solve_prompt}],
temperature=0.1,
max_tokens=1500
)
processing_time = time.time() - start_time
content = response.choices[0].message.content
# Extract answer and confidence
answer = self._extract_answer(content)
confidence = self._extract_confidence(content)
# Create memory trace
memory_trace = f"Tile: {tile_id}, Similar problems: {len(read_result.results)}"
if hasattr(read_result, 'results') and read_result.results:
similarities = [f"{r.similarity:.3f}" for r in read_result.results[:3]]
memory_trace += f", Similarities: {similarities}"
return ComparisonResult(
method="MandelMem Integrated",
question=question,
reasoning=content,
answer=answer,
confidence=confidence,
processing_time=processing_time,
tokens_used=response.usage.total_tokens,
memory_trace=memory_trace
)
except Exception as e:
return ComparisonResult(
method="MandelMem Integrated",
question=question,
reasoning=f"Error: {str(e)}",
answer="Error occurred",
confidence=0.0,
processing_time=time.time() - start_time,
tokens_used=0,
memory_trace="Error in processing"
)
def _classify_domain(self, question: str) -> str:
question_lower = question.lower()
if any(word in question_lower for word in ['calculate', 'math', 'equation', 'solve', '%', 'percent']):
return "mathematical"
elif any(word in question_lower for word in ['strategy', 'plan', 'decision', 'choose']):
return "strategic"
elif any(word in question_lower for word in ['logic', 'reasoning', 'if', 'then', 'because']):
return "logical"
else:
return "general"
def _extract_answer(self, content: str) -> str:
# Look for common answer patterns
lines = content.split('\n')
for line in lines:
line = line.strip()
if line.startswith(('Final answer:', 'Answer:', 'The answer is', 'Result:')):
return line.split(':', 1)[-1].strip()
elif 'final answer' in line.lower() and ':' in line:
return line.split(':', 1)[-1].strip()
# If no clear answer pattern, return last non-empty line
for line in reversed(lines):
if line.strip():
return line.strip()
return "No clear answer extracted"
def _extract_confidence(self, content: str) -> float:
# Look for confidence patterns
import re
confidence_patterns = [
r'confidence[:\s]+([0-9]*\.?[0-9]+)',
r'confidence level[:\s]+([0-9]*\.?[0-9]+)',
r'\(confidence[:\s]+([0-9]*\.?[0-9]+)\)',
]
for pattern in confidence_patterns:
match = re.search(pattern, content.lower())
if match:
try:
conf = float(match.group(1))
return min(1.0, max(0.0, conf)) # Clamp between 0 and 1
except ValueError:
continue
return 0.8 # Default confidence
def main():
st.set_page_config(
page_title="MandelMem vs Chain-of-Thought Comparison",
page_icon="🧠",
layout="wide"
)
st.title("🧠 MandelMem vs Chain-of-Thought Reasoning")
st.markdown("Compare MandelMem's integrated multi-resolution architecture against traditional Chain-of-Thought reasoning")
# Sidebar for API key and settings
with st.sidebar:
st.header("Configuration")
api_key = st.text_input(
"OpenAI API Key",
type="password",
help="Enter your OpenAI API key to enable comparisons"
)
if not api_key:
st.warning("⚠️ Please enter your OpenAI API key to use the comparison features")
st.stop()
st.success("✅ API key configured")
# Sample problems
st.header("Sample Problems")
sample_problems = [
"What is 15% of 240?",
"If a train travels 120 miles in 2 hours, what is its average speed?",
"A rectangle has length 8 and width 5. What is its area and perimeter?",
"If you buy 3 apples for $1.50 each and 2 oranges for $2.00 each, what is the total cost?",
"A company's revenue increased from $100,000 to $125,000. What is the percentage increase?",
"If it takes 5 workers 8 hours to complete a job, how long would it take 8 workers?",
"What is the next number in the sequence: 2, 4, 8, 16, ...?",
"A pizza is cut into 8 equal slices. If you eat 3 slices, what fraction of the pizza remains?",
"If a car uses 1 gallon of gas to travel 25 miles, how many gallons are needed for 150 miles?",
"A store offers a 20% discount on a $50 item. What is the final price?"
]
selected_problem = st.selectbox(
"Choose a sample problem:",
[""] + sample_problems
)
# Main content area
col1, col2 = st.columns([1, 1])
with col1:
st.header("Problem Input")
# Problem input
if selected_problem:
problem_text = st.text_area(
"Enter your problem:",
value=selected_problem,
height=100
)
else:
problem_text = st.text_area(
"Enter your problem:",
placeholder="e.g., What is 25% of 80?",
height=100
)
# Expected answer (optional)
expected_answer = st.text_input(
"Expected Answer (optional):",
help="Provide the expected answer for comparison"
)
# Compare button
if st.button("🚀 Compare Reasoning Methods", type="primary"):
if not problem_text.strip():
st.error("Please enter a problem to solve")
else:
with st.spinner("Running comparisons..."):
# Initialize comparator
comparator = MandelMemComparator(api_key)
# Run both methods
try:
# Create async event loop
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# Run comparisons
cot_result = loop.run_until_complete(
comparator.evaluate_chain_of_thought(problem_text)
)
mandelmem_result = loop.run_until_complete(
comparator.evaluate_mandelmem_integrated(problem_text)
)
# Store results in session state
st.session_state.cot_result = cot_result
st.session_state.mandelmem_result = mandelmem_result
st.session_state.expected_answer = expected_answer
loop.close()
except Exception as e:
st.error(f"Error running comparison: {str(e)}")
with col2:
st.header("Comparison Results")
if hasattr(st.session_state, 'cot_result') and hasattr(st.session_state, 'mandelmem_result'):
cot_result = st.session_state.cot_result
mandelmem_result = st.session_state.mandelmem_result
expected_answer = st.session_state.get('expected_answer', '')
# Results tabs
tab1, tab2, tab3 = st.tabs(["📊 Summary", "🔗 Chain-of-Thought", "🧠 MandelMem"])
with tab1:
st.subheader("Performance Comparison")
# Create comparison table
comparison_data = {
"Method": ["Chain-of-Thought", "MandelMem"],
"Answer": [cot_result.answer, mandelmem_result.answer],
"Confidence": [f"{cot_result.confidence:.2f}", f"{mandelmem_result.confidence:.2f}"],
"Time (s)": [f"{cot_result.processing_time:.2f}", f"{mandelmem_result.processing_time:.2f}"],
"Tokens": [cot_result.tokens_used, mandelmem_result.tokens_used]
}
st.table(comparison_data)
# Expected answer comparison
if expected_answer:
st.subheader("Answer Accuracy")
st.write(f"**Expected Answer**: {expected_answer}")
cot_match = "✅" if expected_answer.lower() in cot_result.answer.lower() else "❌"
mandelmem_match = "✅" if expected_answer.lower() in mandelmem_result.answer.lower() else "❌"
st.write(f"**Chain-of-Thought**: {cot_match} {cot_result.answer}")
st.write(f"**MandelMem**: {mandelmem_match} {mandelmem_result.answer}")
# Export results
if st.button("📥 Export Results as JSON"):
results = {
"problem": problem_text,
"expected_answer": expected_answer,
"timestamp": datetime.now().isoformat(),
"chain_of_thought": asdict(cot_result),
"mandelmem": asdict(mandelmem_result)
}
st.download_button(
label="Download JSON",
data=json.dumps(results, indent=2),
file_name=f"mandelmem_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
mime="application/json"
)
with tab2:
st.subheader("Chain-of-Thought Reasoning")
st.write(f"**Answer**: {cot_result.answer}")
st.write(f"**Confidence**: {cot_result.confidence:.2f}")
st.write(f"**Processing Time**: {cot_result.processing_time:.2f}s")
st.write(f"**Tokens Used**: {cot_result.tokens_used}")
st.subheader("Reasoning Process")
st.text_area("", value=cot_result.reasoning, height=400, disabled=True)
with tab3:
st.subheader("MandelMem Integrated Reasoning")
st.write(f"**Answer**: {mandelmem_result.answer}")
st.write(f"**Confidence**: {mandelmem_result.confidence:.2f}")
st.write(f"**Processing Time**: {mandelmem_result.processing_time:.2f}s")
st.write(f"**Tokens Used**: {mandelmem_result.tokens_used}")
if mandelmem_result.memory_trace:
st.write(f"**Memory Trace**: {mandelmem_result.memory_trace}")
st.subheader("Multi-Resolution Reasoning Process")
st.text_area("", value=mandelmem_result.reasoning, height=400, disabled=True)
else:
st.info("👆 Enter a problem and click 'Compare Reasoning Methods' to see results")
# Footer
st.markdown("---")
st.markdown("""
**About MandelMem**: A multi-resolution reasoning architecture inspired by fractal dynamics and quadtree decomposition.
Built to demonstrate the advantages of integrated reasoning systems over prompt-based approaches.
[📄 Research Paper](./mandelmem_paper.pdf) | [💻 GitHub](https://github.com/kossisoroyce/mandlemem) | [📧 Contact](mailto:kossi@electricsheep.africa)
""")
if __name__ == "__main__":
main()