Spaces:
Sleeping
Sleeping
| # app.py | |
| """ | |
| MandelMem vs Chain-of-Thought Interactive Comparison App | |
| A Streamlit application for comparing MandelMem integrated architecture | |
| against Chain-of-Thought reasoning on mathematical problems. | |
| """ | |
| import streamlit as st | |
| import asyncio | |
| import time | |
| import json | |
| import os | |
| from datetime import datetime | |
| from typing import Dict, Any, List, Optional | |
| from dataclasses import dataclass, asdict | |
| import openai | |
| from openai import AsyncOpenAI | |
| # Import MandelMem components | |
| from mandelmem.core import MandelMem | |
| from mandelmem.quadtree import QuadTree, Tile | |
| from mandelmem.dynamics import FractalDynamics | |
| from mandelmem.memory import MemorySystem | |
| from mandelmem.encoders import TextEncoder | |
| class ComparisonResult: | |
| method: str | |
| question: str | |
| reasoning: str | |
| answer: str | |
| confidence: float | |
| processing_time: float | |
| tokens_used: int | |
| memory_trace: Optional[str] = None | |
| class MandelMemComparator: | |
| def __init__(self, api_key: str): | |
| self.client = AsyncOpenAI(api_key=api_key) | |
| self.mandelmem = MandelMem() | |
| async def evaluate_chain_of_thought(self, question: str) -> ComparisonResult: | |
| start_time = time.time() | |
| prompt = f""" | |
| Solve this problem step by step using clear reasoning: | |
| Problem: {question} | |
| Please provide: | |
| 1. Your step-by-step reasoning | |
| 2. Your final answer | |
| 3. Your confidence level (0.0 to 1.0) | |
| """ | |
| try: | |
| response = await self.client.chat.completions.create( | |
| model="gpt-4-turbo-preview", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.1, | |
| max_tokens=1000 | |
| ) | |
| processing_time = time.time() - start_time | |
| content = response.choices[0].message.content | |
| # Extract answer and confidence | |
| answer = self._extract_answer(content) | |
| confidence = self._extract_confidence(content) | |
| return ComparisonResult( | |
| method="Chain-of-Thought", | |
| question=question, | |
| reasoning=content, | |
| answer=answer, | |
| confidence=confidence, | |
| processing_time=processing_time, | |
| tokens_used=response.usage.total_tokens | |
| ) | |
| except Exception as e: | |
| return ComparisonResult( | |
| method="Chain-of-Thought", | |
| question=question, | |
| reasoning=f"Error: {str(e)}", | |
| answer="Error occurred", | |
| confidence=0.0, | |
| processing_time=time.time() - start_time, | |
| tokens_used=0 | |
| ) | |
| async def evaluate_mandelmem_integrated(self, question: str) -> ComparisonResult: | |
| start_time = time.time() | |
| try: | |
| # Write to MandelMem memory | |
| problem_metadata = { | |
| "type": "reasoning_problem", | |
| "domain": self._classify_domain(question), | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| write_result = self.mandelmem.write(question, problem_metadata) | |
| tile_id = write_result.tile_id | |
| # Read from memory to get context | |
| read_result = self.mandelmem.read(question, k=3, with_trace=True) | |
| # Create architectural context | |
| architecture_context = f""" | |
| You are MandelMem, an advanced multi-resolution reasoning system. Use your architectural insights: | |
| QUADTREE DECOMPOSITION: Break this problem into hierarchical components | |
| FRACTAL DYNAMICS: Classify solution stability (stable/plastic/escape) | |
| MEMORY INTEGRATION: Leverage similar problems from your experience | |
| BOUNDEDNESS VERIFICATION: Ensure solution quality and prevent drift | |
| Current tile: {tile_id} | |
| Memory context: {read_result.explanation if hasattr(read_result, 'explanation') else 'No prior context'} | |
| Similar problems: {len(read_result.results)} found | |
| """ | |
| solve_prompt = f"""{architecture_context} | |
| Problem: {question} | |
| Apply MandelMem architecture: | |
| 1. DECOMPOSE: Break into quadtree components | |
| 2. CLASSIFY: Determine stability (stable/plastic/escape) | |
| 3. INTEGRATE: Use memory context from similar problems | |
| 4. VERIFY: Check boundedness and solution quality | |
| 5. SOLVE: Provide final answer with confidence | |
| Show your multi-resolution reasoning process. | |
| """ | |
| response = await self.client.chat.completions.create( | |
| model="gpt-4-turbo-preview", | |
| messages=[{"role": "user", "content": solve_prompt}], | |
| temperature=0.1, | |
| max_tokens=1500 | |
| ) | |
| processing_time = time.time() - start_time | |
| content = response.choices[0].message.content | |
| # Extract answer and confidence | |
| answer = self._extract_answer(content) | |
| confidence = self._extract_confidence(content) | |
| # Create memory trace | |
| memory_trace = f"Tile: {tile_id}, Similar problems: {len(read_result.results)}" | |
| if hasattr(read_result, 'results') and read_result.results: | |
| similarities = [f"{r.similarity:.3f}" for r in read_result.results[:3]] | |
| memory_trace += f", Similarities: {similarities}" | |
| return ComparisonResult( | |
| method="MandelMem Integrated", | |
| question=question, | |
| reasoning=content, | |
| answer=answer, | |
| confidence=confidence, | |
| processing_time=processing_time, | |
| tokens_used=response.usage.total_tokens, | |
| memory_trace=memory_trace | |
| ) | |
| except Exception as e: | |
| return ComparisonResult( | |
| method="MandelMem Integrated", | |
| question=question, | |
| reasoning=f"Error: {str(e)}", | |
| answer="Error occurred", | |
| confidence=0.0, | |
| processing_time=time.time() - start_time, | |
| tokens_used=0, | |
| memory_trace="Error in processing" | |
| ) | |
| def _classify_domain(self, question: str) -> str: | |
| question_lower = question.lower() | |
| if any(word in question_lower for word in ['calculate', 'math', 'equation', 'solve', '%', 'percent']): | |
| return "mathematical" | |
| elif any(word in question_lower for word in ['strategy', 'plan', 'decision', 'choose']): | |
| return "strategic" | |
| elif any(word in question_lower for word in ['logic', 'reasoning', 'if', 'then', 'because']): | |
| return "logical" | |
| else: | |
| return "general" | |
| def _extract_answer(self, content: str) -> str: | |
| # Look for common answer patterns | |
| lines = content.split('\n') | |
| for line in lines: | |
| line = line.strip() | |
| if line.startswith(('Final answer:', 'Answer:', 'The answer is', 'Result:')): | |
| return line.split(':', 1)[-1].strip() | |
| elif 'final answer' in line.lower() and ':' in line: | |
| return line.split(':', 1)[-1].strip() | |
| # If no clear answer pattern, return last non-empty line | |
| for line in reversed(lines): | |
| if line.strip(): | |
| return line.strip() | |
| return "No clear answer extracted" | |
| def _extract_confidence(self, content: str) -> float: | |
| # Look for confidence patterns | |
| import re | |
| confidence_patterns = [ | |
| r'confidence[:\s]+([0-9]*\.?[0-9]+)', | |
| r'confidence level[:\s]+([0-9]*\.?[0-9]+)', | |
| r'\(confidence[:\s]+([0-9]*\.?[0-9]+)\)', | |
| ] | |
| for pattern in confidence_patterns: | |
| match = re.search(pattern, content.lower()) | |
| if match: | |
| try: | |
| conf = float(match.group(1)) | |
| return min(1.0, max(0.0, conf)) # Clamp between 0 and 1 | |
| except ValueError: | |
| continue | |
| return 0.8 # Default confidence | |
| def main(): | |
| st.set_page_config( | |
| page_title="MandelMem vs Chain-of-Thought Comparison", | |
| page_icon="🧠", | |
| layout="wide" | |
| ) | |
| st.title("🧠 MandelMem vs Chain-of-Thought Reasoning") | |
| st.markdown("Compare MandelMem's integrated multi-resolution architecture against traditional Chain-of-Thought reasoning") | |
| # Sidebar for API key and settings | |
| with st.sidebar: | |
| st.header("Configuration") | |
| api_key = st.text_input( | |
| "OpenAI API Key", | |
| type="password", | |
| help="Enter your OpenAI API key to enable comparisons" | |
| ) | |
| if not api_key: | |
| st.warning("⚠️ Please enter your OpenAI API key to use the comparison features") | |
| st.stop() | |
| st.success("✅ API key configured") | |
| # Sample problems | |
| st.header("Sample Problems") | |
| sample_problems = [ | |
| "What is 15% of 240?", | |
| "If a train travels 120 miles in 2 hours, what is its average speed?", | |
| "A rectangle has length 8 and width 5. What is its area and perimeter?", | |
| "If you buy 3 apples for $1.50 each and 2 oranges for $2.00 each, what is the total cost?", | |
| "A company's revenue increased from $100,000 to $125,000. What is the percentage increase?", | |
| "If it takes 5 workers 8 hours to complete a job, how long would it take 8 workers?", | |
| "What is the next number in the sequence: 2, 4, 8, 16, ...?", | |
| "A pizza is cut into 8 equal slices. If you eat 3 slices, what fraction of the pizza remains?", | |
| "If a car uses 1 gallon of gas to travel 25 miles, how many gallons are needed for 150 miles?", | |
| "A store offers a 20% discount on a $50 item. What is the final price?" | |
| ] | |
| selected_problem = st.selectbox( | |
| "Choose a sample problem:", | |
| [""] + sample_problems | |
| ) | |
| # Main content area | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.header("Problem Input") | |
| # Problem input | |
| if selected_problem: | |
| problem_text = st.text_area( | |
| "Enter your problem:", | |
| value=selected_problem, | |
| height=100 | |
| ) | |
| else: | |
| problem_text = st.text_area( | |
| "Enter your problem:", | |
| placeholder="e.g., What is 25% of 80?", | |
| height=100 | |
| ) | |
| # Expected answer (optional) | |
| expected_answer = st.text_input( | |
| "Expected Answer (optional):", | |
| help="Provide the expected answer for comparison" | |
| ) | |
| # Compare button | |
| if st.button("🚀 Compare Reasoning Methods", type="primary"): | |
| if not problem_text.strip(): | |
| st.error("Please enter a problem to solve") | |
| else: | |
| with st.spinner("Running comparisons..."): | |
| # Initialize comparator | |
| comparator = MandelMemComparator(api_key) | |
| # Run both methods | |
| try: | |
| # Create async event loop | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| # Run comparisons | |
| cot_result = loop.run_until_complete( | |
| comparator.evaluate_chain_of_thought(problem_text) | |
| ) | |
| mandelmem_result = loop.run_until_complete( | |
| comparator.evaluate_mandelmem_integrated(problem_text) | |
| ) | |
| # Store results in session state | |
| st.session_state.cot_result = cot_result | |
| st.session_state.mandelmem_result = mandelmem_result | |
| st.session_state.expected_answer = expected_answer | |
| loop.close() | |
| except Exception as e: | |
| st.error(f"Error running comparison: {str(e)}") | |
| with col2: | |
| st.header("Comparison Results") | |
| if hasattr(st.session_state, 'cot_result') and hasattr(st.session_state, 'mandelmem_result'): | |
| cot_result = st.session_state.cot_result | |
| mandelmem_result = st.session_state.mandelmem_result | |
| expected_answer = st.session_state.get('expected_answer', '') | |
| # Results tabs | |
| tab1, tab2, tab3 = st.tabs(["📊 Summary", "🔗 Chain-of-Thought", "🧠 MandelMem"]) | |
| with tab1: | |
| st.subheader("Performance Comparison") | |
| # Create comparison table | |
| comparison_data = { | |
| "Method": ["Chain-of-Thought", "MandelMem"], | |
| "Answer": [cot_result.answer, mandelmem_result.answer], | |
| "Confidence": [f"{cot_result.confidence:.2f}", f"{mandelmem_result.confidence:.2f}"], | |
| "Time (s)": [f"{cot_result.processing_time:.2f}", f"{mandelmem_result.processing_time:.2f}"], | |
| "Tokens": [cot_result.tokens_used, mandelmem_result.tokens_used] | |
| } | |
| st.table(comparison_data) | |
| # Expected answer comparison | |
| if expected_answer: | |
| st.subheader("Answer Accuracy") | |
| st.write(f"**Expected Answer**: {expected_answer}") | |
| cot_match = "✅" if expected_answer.lower() in cot_result.answer.lower() else "❌" | |
| mandelmem_match = "✅" if expected_answer.lower() in mandelmem_result.answer.lower() else "❌" | |
| st.write(f"**Chain-of-Thought**: {cot_match} {cot_result.answer}") | |
| st.write(f"**MandelMem**: {mandelmem_match} {mandelmem_result.answer}") | |
| # Export results | |
| if st.button("📥 Export Results as JSON"): | |
| results = { | |
| "problem": problem_text, | |
| "expected_answer": expected_answer, | |
| "timestamp": datetime.now().isoformat(), | |
| "chain_of_thought": asdict(cot_result), | |
| "mandelmem": asdict(mandelmem_result) | |
| } | |
| st.download_button( | |
| label="Download JSON", | |
| data=json.dumps(results, indent=2), | |
| file_name=f"mandelmem_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", | |
| mime="application/json" | |
| ) | |
| with tab2: | |
| st.subheader("Chain-of-Thought Reasoning") | |
| st.write(f"**Answer**: {cot_result.answer}") | |
| st.write(f"**Confidence**: {cot_result.confidence:.2f}") | |
| st.write(f"**Processing Time**: {cot_result.processing_time:.2f}s") | |
| st.write(f"**Tokens Used**: {cot_result.tokens_used}") | |
| st.subheader("Reasoning Process") | |
| st.text_area("", value=cot_result.reasoning, height=400, disabled=True) | |
| with tab3: | |
| st.subheader("MandelMem Integrated Reasoning") | |
| st.write(f"**Answer**: {mandelmem_result.answer}") | |
| st.write(f"**Confidence**: {mandelmem_result.confidence:.2f}") | |
| st.write(f"**Processing Time**: {mandelmem_result.processing_time:.2f}s") | |
| st.write(f"**Tokens Used**: {mandelmem_result.tokens_used}") | |
| if mandelmem_result.memory_trace: | |
| st.write(f"**Memory Trace**: {mandelmem_result.memory_trace}") | |
| st.subheader("Multi-Resolution Reasoning Process") | |
| st.text_area("", value=mandelmem_result.reasoning, height=400, disabled=True) | |
| else: | |
| st.info("👆 Enter a problem and click 'Compare Reasoning Methods' to see results") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| **About MandelMem**: A multi-resolution reasoning architecture inspired by fractal dynamics and quadtree decomposition. | |
| Built to demonstrate the advantages of integrated reasoning systems over prompt-based approaches. | |
| [📄 Research Paper](./mandelmem_paper.pdf) | [💻 GitHub](https://github.com/kossisoroyce/mandlemem) | [📧 Contact](mailto:kossi@electricsheep.africa) | |
| """) | |
| if __name__ == "__main__": | |
| main() | |