# app.py """ MandelMem vs Chain-of-Thought Interactive Comparison App A Streamlit application for comparing MandelMem integrated architecture against Chain-of-Thought reasoning on mathematical problems. """ import streamlit as st import asyncio import time import json import os from datetime import datetime from typing import Dict, Any, List, Optional from dataclasses import dataclass, asdict import openai from openai import AsyncOpenAI # Import MandelMem components from mandelmem.core import MandelMem from mandelmem.quadtree import QuadTree, Tile from mandelmem.dynamics import FractalDynamics from mandelmem.memory import MemorySystem from mandelmem.encoders import TextEncoder @dataclass class ComparisonResult: method: str question: str reasoning: str answer: str confidence: float processing_time: float tokens_used: int memory_trace: Optional[str] = None class MandelMemComparator: def __init__(self, api_key: str): self.client = AsyncOpenAI(api_key=api_key) self.mandelmem = MandelMem() async def evaluate_chain_of_thought(self, question: str) -> ComparisonResult: start_time = time.time() prompt = f""" Solve this problem step by step using clear reasoning: Problem: {question} Please provide: 1. Your step-by-step reasoning 2. Your final answer 3. Your confidence level (0.0 to 1.0) """ try: response = await self.client.chat.completions.create( model="gpt-4-turbo-preview", messages=[{"role": "user", "content": prompt}], temperature=0.1, max_tokens=1000 ) processing_time = time.time() - start_time content = response.choices[0].message.content # Extract answer and confidence answer = self._extract_answer(content) confidence = self._extract_confidence(content) return ComparisonResult( method="Chain-of-Thought", question=question, reasoning=content, answer=answer, confidence=confidence, processing_time=processing_time, tokens_used=response.usage.total_tokens ) except Exception as e: return ComparisonResult( method="Chain-of-Thought", question=question, reasoning=f"Error: {str(e)}", answer="Error occurred", confidence=0.0, processing_time=time.time() - start_time, tokens_used=0 ) async def evaluate_mandelmem_integrated(self, question: str) -> ComparisonResult: start_time = time.time() try: # Write to MandelMem memory problem_metadata = { "type": "reasoning_problem", "domain": self._classify_domain(question), "timestamp": datetime.now().isoformat() } write_result = self.mandelmem.write(question, problem_metadata) tile_id = write_result.tile_id # Read from memory to get context read_result = self.mandelmem.read(question, k=3, with_trace=True) # Create architectural context architecture_context = f""" You are MandelMem, an advanced multi-resolution reasoning system. Use your architectural insights: QUADTREE DECOMPOSITION: Break this problem into hierarchical components FRACTAL DYNAMICS: Classify solution stability (stable/plastic/escape) MEMORY INTEGRATION: Leverage similar problems from your experience BOUNDEDNESS VERIFICATION: Ensure solution quality and prevent drift Current tile: {tile_id} Memory context: {read_result.explanation if hasattr(read_result, 'explanation') else 'No prior context'} Similar problems: {len(read_result.results)} found """ solve_prompt = f"""{architecture_context} Problem: {question} Apply MandelMem architecture: 1. DECOMPOSE: Break into quadtree components 2. CLASSIFY: Determine stability (stable/plastic/escape) 3. INTEGRATE: Use memory context from similar problems 4. VERIFY: Check boundedness and solution quality 5. SOLVE: Provide final answer with confidence Show your multi-resolution reasoning process. """ response = await self.client.chat.completions.create( model="gpt-4-turbo-preview", messages=[{"role": "user", "content": solve_prompt}], temperature=0.1, max_tokens=1500 ) processing_time = time.time() - start_time content = response.choices[0].message.content # Extract answer and confidence answer = self._extract_answer(content) confidence = self._extract_confidence(content) # Create memory trace memory_trace = f"Tile: {tile_id}, Similar problems: {len(read_result.results)}" if hasattr(read_result, 'results') and read_result.results: similarities = [f"{r.similarity:.3f}" for r in read_result.results[:3]] memory_trace += f", Similarities: {similarities}" return ComparisonResult( method="MandelMem Integrated", question=question, reasoning=content, answer=answer, confidence=confidence, processing_time=processing_time, tokens_used=response.usage.total_tokens, memory_trace=memory_trace ) except Exception as e: return ComparisonResult( method="MandelMem Integrated", question=question, reasoning=f"Error: {str(e)}", answer="Error occurred", confidence=0.0, processing_time=time.time() - start_time, tokens_used=0, memory_trace="Error in processing" ) def _classify_domain(self, question: str) -> str: question_lower = question.lower() if any(word in question_lower for word in ['calculate', 'math', 'equation', 'solve', '%', 'percent']): return "mathematical" elif any(word in question_lower for word in ['strategy', 'plan', 'decision', 'choose']): return "strategic" elif any(word in question_lower for word in ['logic', 'reasoning', 'if', 'then', 'because']): return "logical" else: return "general" def _extract_answer(self, content: str) -> str: # Look for common answer patterns lines = content.split('\n') for line in lines: line = line.strip() if line.startswith(('Final answer:', 'Answer:', 'The answer is', 'Result:')): return line.split(':', 1)[-1].strip() elif 'final answer' in line.lower() and ':' in line: return line.split(':', 1)[-1].strip() # If no clear answer pattern, return last non-empty line for line in reversed(lines): if line.strip(): return line.strip() return "No clear answer extracted" def _extract_confidence(self, content: str) -> float: # Look for confidence patterns import re confidence_patterns = [ r'confidence[:\s]+([0-9]*\.?[0-9]+)', r'confidence level[:\s]+([0-9]*\.?[0-9]+)', r'\(confidence[:\s]+([0-9]*\.?[0-9]+)\)', ] for pattern in confidence_patterns: match = re.search(pattern, content.lower()) if match: try: conf = float(match.group(1)) return min(1.0, max(0.0, conf)) # Clamp between 0 and 1 except ValueError: continue return 0.8 # Default confidence def main(): st.set_page_config( page_title="MandelMem vs Chain-of-Thought Comparison", page_icon="🧠", layout="wide" ) st.title("🧠 MandelMem vs Chain-of-Thought Reasoning") st.markdown("Compare MandelMem's integrated multi-resolution architecture against traditional Chain-of-Thought reasoning") # Sidebar for API key and settings with st.sidebar: st.header("Configuration") api_key = st.text_input( "OpenAI API Key", type="password", help="Enter your OpenAI API key to enable comparisons" ) if not api_key: st.warning("⚠️ Please enter your OpenAI API key to use the comparison features") st.stop() st.success("✅ API key configured") # Sample problems st.header("Sample Problems") sample_problems = [ "What is 15% of 240?", "If a train travels 120 miles in 2 hours, what is its average speed?", "A rectangle has length 8 and width 5. What is its area and perimeter?", "If you buy 3 apples for $1.50 each and 2 oranges for $2.00 each, what is the total cost?", "A company's revenue increased from $100,000 to $125,000. What is the percentage increase?", "If it takes 5 workers 8 hours to complete a job, how long would it take 8 workers?", "What is the next number in the sequence: 2, 4, 8, 16, ...?", "A pizza is cut into 8 equal slices. If you eat 3 slices, what fraction of the pizza remains?", "If a car uses 1 gallon of gas to travel 25 miles, how many gallons are needed for 150 miles?", "A store offers a 20% discount on a $50 item. What is the final price?" ] selected_problem = st.selectbox( "Choose a sample problem:", [""] + sample_problems ) # Main content area col1, col2 = st.columns([1, 1]) with col1: st.header("Problem Input") # Problem input if selected_problem: problem_text = st.text_area( "Enter your problem:", value=selected_problem, height=100 ) else: problem_text = st.text_area( "Enter your problem:", placeholder="e.g., What is 25% of 80?", height=100 ) # Expected answer (optional) expected_answer = st.text_input( "Expected Answer (optional):", help="Provide the expected answer for comparison" ) # Compare button if st.button("🚀 Compare Reasoning Methods", type="primary"): if not problem_text.strip(): st.error("Please enter a problem to solve") else: with st.spinner("Running comparisons..."): # Initialize comparator comparator = MandelMemComparator(api_key) # Run both methods try: # Create async event loop loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) # Run comparisons cot_result = loop.run_until_complete( comparator.evaluate_chain_of_thought(problem_text) ) mandelmem_result = loop.run_until_complete( comparator.evaluate_mandelmem_integrated(problem_text) ) # Store results in session state st.session_state.cot_result = cot_result st.session_state.mandelmem_result = mandelmem_result st.session_state.expected_answer = expected_answer loop.close() except Exception as e: st.error(f"Error running comparison: {str(e)}") with col2: st.header("Comparison Results") if hasattr(st.session_state, 'cot_result') and hasattr(st.session_state, 'mandelmem_result'): cot_result = st.session_state.cot_result mandelmem_result = st.session_state.mandelmem_result expected_answer = st.session_state.get('expected_answer', '') # Results tabs tab1, tab2, tab3 = st.tabs(["📊 Summary", "🔗 Chain-of-Thought", "🧠 MandelMem"]) with tab1: st.subheader("Performance Comparison") # Create comparison table comparison_data = { "Method": ["Chain-of-Thought", "MandelMem"], "Answer": [cot_result.answer, mandelmem_result.answer], "Confidence": [f"{cot_result.confidence:.2f}", f"{mandelmem_result.confidence:.2f}"], "Time (s)": [f"{cot_result.processing_time:.2f}", f"{mandelmem_result.processing_time:.2f}"], "Tokens": [cot_result.tokens_used, mandelmem_result.tokens_used] } st.table(comparison_data) # Expected answer comparison if expected_answer: st.subheader("Answer Accuracy") st.write(f"**Expected Answer**: {expected_answer}") cot_match = "✅" if expected_answer.lower() in cot_result.answer.lower() else "❌" mandelmem_match = "✅" if expected_answer.lower() in mandelmem_result.answer.lower() else "❌" st.write(f"**Chain-of-Thought**: {cot_match} {cot_result.answer}") st.write(f"**MandelMem**: {mandelmem_match} {mandelmem_result.answer}") # Export results if st.button("📥 Export Results as JSON"): results = { "problem": problem_text, "expected_answer": expected_answer, "timestamp": datetime.now().isoformat(), "chain_of_thought": asdict(cot_result), "mandelmem": asdict(mandelmem_result) } st.download_button( label="Download JSON", data=json.dumps(results, indent=2), file_name=f"mandelmem_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", mime="application/json" ) with tab2: st.subheader("Chain-of-Thought Reasoning") st.write(f"**Answer**: {cot_result.answer}") st.write(f"**Confidence**: {cot_result.confidence:.2f}") st.write(f"**Processing Time**: {cot_result.processing_time:.2f}s") st.write(f"**Tokens Used**: {cot_result.tokens_used}") st.subheader("Reasoning Process") st.text_area("", value=cot_result.reasoning, height=400, disabled=True) with tab3: st.subheader("MandelMem Integrated Reasoning") st.write(f"**Answer**: {mandelmem_result.answer}") st.write(f"**Confidence**: {mandelmem_result.confidence:.2f}") st.write(f"**Processing Time**: {mandelmem_result.processing_time:.2f}s") st.write(f"**Tokens Used**: {mandelmem_result.tokens_used}") if mandelmem_result.memory_trace: st.write(f"**Memory Trace**: {mandelmem_result.memory_trace}") st.subheader("Multi-Resolution Reasoning Process") st.text_area("", value=mandelmem_result.reasoning, height=400, disabled=True) else: st.info("👆 Enter a problem and click 'Compare Reasoning Methods' to see results") # Footer st.markdown("---") st.markdown(""" **About MandelMem**: A multi-resolution reasoning architecture inspired by fractal dynamics and quadtree decomposition. Built to demonstrate the advantages of integrated reasoning systems over prompt-based approaches. [📄 Research Paper](./mandelmem_paper.pdf) | [💻 GitHub](https://github.com/kossisoroyce/mandlemem) | [📧 Contact](mailto:kossi@electricsheep.africa) """) if __name__ == "__main__": main()