# app.py
"""
MandelMem vs Chain-of-Thought Interactive Comparison App

A Streamlit application for comparing MandelMem integrated architecture
against Chain-of-Thought reasoning on mathematical problems.
"""

import streamlit as st
import asyncio
import time
import json
import os
from datetime import datetime
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, asdict
import openai
from openai import AsyncOpenAI

# Import MandelMem components
from mandelmem.core import MandelMem
from mandelmem.quadtree import QuadTree, Tile
from mandelmem.dynamics import FractalDynamics
from mandelmem.memory import MemorySystem
from mandelmem.encoders import TextEncoder

@dataclass
class ComparisonResult:
    method: str
    question: str
    reasoning: str
    answer: str
    confidence: float
    processing_time: float
    tokens_used: int
    memory_trace: Optional[str] = None

class MandelMemComparator:
    def __init__(self, api_key: str):
        self.client = AsyncOpenAI(api_key=api_key)
        self.mandelmem = MandelMem()
        
    async def evaluate_chain_of_thought(self, question: str) -> ComparisonResult:
        start_time = time.time()
        
        prompt = f"""
        Solve this problem step by step using clear reasoning:
        
        Problem: {question}
        
        Please provide:
        1. Your step-by-step reasoning
        2. Your final answer
        3. Your confidence level (0.0 to 1.0)
        """
        
        try:
            response = await self.client.chat.completions.create(
                model="gpt-4-turbo-preview",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.1,
                max_tokens=1000
            )
            
            processing_time = time.time() - start_time
            content = response.choices[0].message.content
            
            # Extract answer and confidence
            answer = self._extract_answer(content)
            confidence = self._extract_confidence(content)
            
            return ComparisonResult(
                method="Chain-of-Thought",
                question=question,
                reasoning=content,
                answer=answer,
                confidence=confidence,
                processing_time=processing_time,
                tokens_used=response.usage.total_tokens
            )
            
        except Exception as e:
            return ComparisonResult(
                method="Chain-of-Thought",
                question=question,
                reasoning=f"Error: {str(e)}",
                answer="Error occurred",
                confidence=0.0,
                processing_time=time.time() - start_time,
                tokens_used=0
            )
    
    async def evaluate_mandelmem_integrated(self, question: str) -> ComparisonResult:
        start_time = time.time()
        
        try:
            # Write to MandelMem memory
            problem_metadata = {
                "type": "reasoning_problem",
                "domain": self._classify_domain(question),
                "timestamp": datetime.now().isoformat()
            }
            
            write_result = self.mandelmem.write(question, problem_metadata)
            tile_id = write_result.tile_id
            
            # Read from memory to get context
            read_result = self.mandelmem.read(question, k=3, with_trace=True)
            
            # Create architectural context
            architecture_context = f"""
            You are MandelMem, an advanced multi-resolution reasoning system. Use your architectural insights:
            
            QUADTREE DECOMPOSITION: Break this problem into hierarchical components
            FRACTAL DYNAMICS: Classify solution stability (stable/plastic/escape)
            MEMORY INTEGRATION: Leverage similar problems from your experience
            BOUNDEDNESS VERIFICATION: Ensure solution quality and prevent drift
            
            Current tile: {tile_id}
            Memory context: {read_result.explanation if hasattr(read_result, 'explanation') else 'No prior context'}
            Similar problems: {len(read_result.results)} found
            """
            
            solve_prompt = f"""{architecture_context}
            
            Problem: {question}
            
            Apply MandelMem architecture:
            1. DECOMPOSE: Break into quadtree components
            2. CLASSIFY: Determine stability (stable/plastic/escape)  
            3. INTEGRATE: Use memory context from similar problems
            4. VERIFY: Check boundedness and solution quality
            5. SOLVE: Provide final answer with confidence
            
            Show your multi-resolution reasoning process.
            """
            
            response = await self.client.chat.completions.create(
                model="gpt-4-turbo-preview",
                messages=[{"role": "user", "content": solve_prompt}],
                temperature=0.1,
                max_tokens=1500
            )
            
            processing_time = time.time() - start_time
            content = response.choices[0].message.content
            
            # Extract answer and confidence
            answer = self._extract_answer(content)
            confidence = self._extract_confidence(content)
            
            # Create memory trace
            memory_trace = f"Tile: {tile_id}, Similar problems: {len(read_result.results)}"
            if hasattr(read_result, 'results') and read_result.results:
                similarities = [f"{r.similarity:.3f}" for r in read_result.results[:3]]
                memory_trace += f", Similarities: {similarities}"
            
            return ComparisonResult(
                method="MandelMem Integrated",
                question=question,
                reasoning=content,
                answer=answer,
                confidence=confidence,
                processing_time=processing_time,
                tokens_used=response.usage.total_tokens,
                memory_trace=memory_trace
            )
            
        except Exception as e:
            return ComparisonResult(
                method="MandelMem Integrated",
                question=question,
                reasoning=f"Error: {str(e)}",
                answer="Error occurred",
                confidence=0.0,
                processing_time=time.time() - start_time,
                tokens_used=0,
                memory_trace="Error in processing"
            )
    
    def _classify_domain(self, question: str) -> str:
        question_lower = question.lower()
        if any(word in question_lower for word in ['calculate', 'math', 'equation', 'solve', '%', 'percent']):
            return "mathematical"
        elif any(word in question_lower for word in ['strategy', 'plan', 'decision', 'choose']):
            return "strategic"
        elif any(word in question_lower for word in ['logic', 'reasoning', 'if', 'then', 'because']):
            return "logical"
        else:
            return "general"
    
    def _extract_answer(self, content: str) -> str:
        # Look for common answer patterns
        lines = content.split('\n')
        for line in lines:
            line = line.strip()
            if line.startswith(('Final answer:', 'Answer:', 'The answer is', 'Result:')):
                return line.split(':', 1)[-1].strip()
            elif 'final answer' in line.lower() and ':' in line:
                return line.split(':', 1)[-1].strip()
        
        # If no clear answer pattern, return last non-empty line
        for line in reversed(lines):
            if line.strip():
                return line.strip()
        
        return "No clear answer extracted"
    
    def _extract_confidence(self, content: str) -> float:
        # Look for confidence patterns
        import re
        confidence_patterns = [
            r'confidence[:\s]+([0-9]*\.?[0-9]+)',
            r'confidence level[:\s]+([0-9]*\.?[0-9]+)',
            r'\(confidence[:\s]+([0-9]*\.?[0-9]+)\)',
        ]
        
        for pattern in confidence_patterns:
            match = re.search(pattern, content.lower())
            if match:
                try:
                    conf = float(match.group(1))
                    return min(1.0, max(0.0, conf))  # Clamp between 0 and 1
                except ValueError:
                    continue
        
        return 0.8  # Default confidence

def main():
    st.set_page_config(
        page_title="MandelMem vs Chain-of-Thought Comparison",
        page_icon="🧠",
        layout="wide"
    )
    
    st.title("🧠 MandelMem vs Chain-of-Thought Reasoning")
    st.markdown("Compare MandelMem's integrated multi-resolution architecture against traditional Chain-of-Thought reasoning")
    
    # Sidebar for API key and settings
    with st.sidebar:
        st.header("Configuration")
        
        api_key = st.text_input(
            "OpenAI API Key",
            type="password",
            help="Enter your OpenAI API key to enable comparisons"
        )
        
        if not api_key:
            st.warning("⚠️ Please enter your OpenAI API key to use the comparison features")
            st.stop()
        
        st.success("✅ API key configured")
        
        # Sample problems
        st.header("Sample Problems")
        sample_problems = [
            "What is 15% of 240?",
            "If a train travels 120 miles in 2 hours, what is its average speed?",
            "A rectangle has length 8 and width 5. What is its area and perimeter?",
            "If you buy 3 apples for $1.50 each and 2 oranges for $2.00 each, what is the total cost?",
            "A company's revenue increased from $100,000 to $125,000. What is the percentage increase?",
            "If it takes 5 workers 8 hours to complete a job, how long would it take 8 workers?",
            "What is the next number in the sequence: 2, 4, 8, 16, ...?",
            "A pizza is cut into 8 equal slices. If you eat 3 slices, what fraction of the pizza remains?",
            "If a car uses 1 gallon of gas to travel 25 miles, how many gallons are needed for 150 miles?",
            "A store offers a 20% discount on a $50 item. What is the final price?"
        ]
        
        selected_problem = st.selectbox(
            "Choose a sample problem:",
            [""] + sample_problems
        )
    
    # Main content area
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.header("Problem Input")
        
        # Problem input
        if selected_problem:
            problem_text = st.text_area(
                "Enter your problem:",
                value=selected_problem,
                height=100
            )
        else:
            problem_text = st.text_area(
                "Enter your problem:",
                placeholder="e.g., What is 25% of 80?",
                height=100
            )
        
        # Expected answer (optional)
        expected_answer = st.text_input(
            "Expected Answer (optional):",
            help="Provide the expected answer for comparison"
        )
        
        # Compare button
        if st.button("🚀 Compare Reasoning Methods", type="primary"):
            if not problem_text.strip():
                st.error("Please enter a problem to solve")
            else:
                with st.spinner("Running comparisons..."):
                    # Initialize comparator
                    comparator = MandelMemComparator(api_key)
                    
                    # Run both methods
                    try:
                        # Create async event loop
                        loop = asyncio.new_event_loop()
                        asyncio.set_event_loop(loop)
                        
                        # Run comparisons
                        cot_result = loop.run_until_complete(
                            comparator.evaluate_chain_of_thought(problem_text)
                        )
                        mandelmem_result = loop.run_until_complete(
                            comparator.evaluate_mandelmem_integrated(problem_text)
                        )
                        
                        # Store results in session state
                        st.session_state.cot_result = cot_result
                        st.session_state.mandelmem_result = mandelmem_result
                        st.session_state.expected_answer = expected_answer
                        
                        loop.close()
                        
                    except Exception as e:
                        st.error(f"Error running comparison: {str(e)}")
    
    with col2:
        st.header("Comparison Results")
        
        if hasattr(st.session_state, 'cot_result') and hasattr(st.session_state, 'mandelmem_result'):
            cot_result = st.session_state.cot_result
            mandelmem_result = st.session_state.mandelmem_result
            expected_answer = st.session_state.get('expected_answer', '')
            
            # Results tabs
            tab1, tab2, tab3 = st.tabs(["📊 Summary", "🔗 Chain-of-Thought", "🧠 MandelMem"])
            
            with tab1:
                st.subheader("Performance Comparison")
                
                # Create comparison table
                comparison_data = {
                    "Method": ["Chain-of-Thought", "MandelMem"],
                    "Answer": [cot_result.answer, mandelmem_result.answer],
                    "Confidence": [f"{cot_result.confidence:.2f}", f"{mandelmem_result.confidence:.2f}"],
                    "Time (s)": [f"{cot_result.processing_time:.2f}", f"{mandelmem_result.processing_time:.2f}"],
                    "Tokens": [cot_result.tokens_used, mandelmem_result.tokens_used]
                }
                
                st.table(comparison_data)
                
                # Expected answer comparison
                if expected_answer:
                    st.subheader("Answer Accuracy")
                    st.write(f"**Expected Answer**: {expected_answer}")
                    
                    cot_match = "✅" if expected_answer.lower() in cot_result.answer.lower() else "❌"
                    mandelmem_match = "✅" if expected_answer.lower() in mandelmem_result.answer.lower() else "❌"
                    
                    st.write(f"**Chain-of-Thought**: {cot_match} {cot_result.answer}")
                    st.write(f"**MandelMem**: {mandelmem_match} {mandelmem_result.answer}")
                
                # Export results
                if st.button("📥 Export Results as JSON"):
                    results = {
                        "problem": problem_text,
                        "expected_answer": expected_answer,
                        "timestamp": datetime.now().isoformat(),
                        "chain_of_thought": asdict(cot_result),
                        "mandelmem": asdict(mandelmem_result)
                    }
                    
                    st.download_button(
                        label="Download JSON",
                        data=json.dumps(results, indent=2),
                        file_name=f"mandelmem_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
                        mime="application/json"
                    )
            
            with tab2:
                st.subheader("Chain-of-Thought Reasoning")
                st.write(f"**Answer**: {cot_result.answer}")
                st.write(f"**Confidence**: {cot_result.confidence:.2f}")
                st.write(f"**Processing Time**: {cot_result.processing_time:.2f}s")
                st.write(f"**Tokens Used**: {cot_result.tokens_used}")
                
                st.subheader("Reasoning Process")
                st.text_area("", value=cot_result.reasoning, height=400, disabled=True)
            
            with tab3:
                st.subheader("MandelMem Integrated Reasoning")
                st.write(f"**Answer**: {mandelmem_result.answer}")
                st.write(f"**Confidence**: {mandelmem_result.confidence:.2f}")
                st.write(f"**Processing Time**: {mandelmem_result.processing_time:.2f}s")
                st.write(f"**Tokens Used**: {mandelmem_result.tokens_used}")
                
                if mandelmem_result.memory_trace:
                    st.write(f"**Memory Trace**: {mandelmem_result.memory_trace}")
                
                st.subheader("Multi-Resolution Reasoning Process")
                st.text_area("", value=mandelmem_result.reasoning, height=400, disabled=True)
        
        else:
            st.info("👆 Enter a problem and click 'Compare Reasoning Methods' to see results")
    
    # Footer
    st.markdown("---")
    st.markdown("""
    **About MandelMem**: A multi-resolution reasoning architecture inspired by fractal dynamics and quadtree decomposition.
    Built to demonstrate the advantages of integrated reasoning systems over prompt-based approaches.
    
    [📄 Research Paper](./mandelmem_paper.pdf) | [💻 GitHub](https://github.com/kossisoroyce/mandlemem) | [📧 Contact](mailto:kossi@electricsheep.africa)
    """)

if __name__ == "__main__":
    main()