import gradio as gr
import google.generativeai as genai
import os
from dotenv import load_dotenv
from github import Github
import json
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import base64
from typing import Dict, List, Any, Optional, Tuple, Iterator
from dataclasses import dataclass
import tempfile
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import time

# Load environment variables
load_dotenv()

# Configure API keys
GITHUB_TOKEN = os.getenv("github_api")
GEMINI_API_KEY = os.getenv("gemini_api") 

if not GITHUB_TOKEN or not GEMINI_API_KEY:
    raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment")

# Initialize APIs
gh = Github(GITHUB_TOKEN)
genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-thinking-exp-01-21",
    generation_config={
        "temperature": 1,
        "top_p": 0.95,
        "top_k": 40,
        "max_output_tokens": 8192,
        "response_mime_type": "text/plain",
    },
    safety_settings=[
        {
            "category": "HARM_CATEGORY_HARASSMENT",
            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
        },
        {
            "category": "HARM_CATEGORY_HATE_SPEECH",
            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
        },
        {
            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
        },
        {
            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
            "threshold": "BLOCK_MEDIUM_AND_ABOVE"
        },
    ]
)

RELEVANT_EXTENSIONS = {
    ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
    ".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt"
}

@dataclass
class ChatMessage:
    role: str
    content: str
    metadata: Dict[str, Any] = None

class ThinkingAnalyzer:
    """Handles streaming thoughts and responses from Gemini model"""
    
    def __init__(self, model):
        self.model = model
        
    def stream_analysis(self, analysis_data: Dict[str, Any], system_prompt: str) -> Iterator[List[ChatMessage]]:
        """Streams analysis with visible thinking process"""
        
        # Format the prompt
        prompt = f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}"
        
        # Initialize streaming response
        response = self.model.generate_content(prompt, stream=True)
        
        messages = []
        thought_buffer = ""
        response_buffer = ""
        thinking_complete = False
        
        # Add initial thinking message
        messages.append(
            ChatMessage(
                role="assistant",
                content="",
                metadata={"title": "⏳ Analyzing Repository: Thought Process"}
            )
        )
        
        for chunk in response:
            parts = chunk.candidates[0].content.parts
            current_chunk = parts[0].text
            
            if len(parts) == 2 and not thinking_complete:
                # Complete thought and start response
                thought_buffer += current_chunk
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⏳ Analysis Thought Process"}
                )
                
                # Add response message
                messages.append(
                    ChatMessage(
                        role="assistant",
                        content=parts[1].text
                    )
                )
                thinking_complete = True
                
            elif thinking_complete:
                # Continue streaming response
                response_buffer += current_chunk
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=response_buffer
                )
                
            else:
                # Continue streaming thoughts
                thought_buffer += current_chunk
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⏳ Analysis Thought Process"}
                )
            
            yield messages

    def stream_question_response(self, question: str, analysis_data: Dict[str, Any], 
                               chat_history: List[Tuple[str, str]]) -> Iterator[List[ChatMessage]]:
        """Streams response to follow-up questions with thinking process"""
        
        # Build context
        context = "You are an expert code analyst helping users understand repository analysis results.\n\n"
        context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n"
        
        if chat_history:
            context += "Previous conversation:\n"
            for user_msg, assistant_msg in chat_history:
                context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
                
        prompt = context + f"\nUser: {question}\nPlease think through your analysis:"
        
        # Use stream_analysis with the constructed prompt
        yield from self.stream_analysis(analysis_data, prompt)

class RepositoryAnalyzer:
    """Handles GitHub repository analysis"""

    def __init__(self, repo_url: str):
        # Extract owner and repo name from URL
        parts = repo_url.rstrip('/').split('/')
        if len(parts) < 2:
            raise ValueError("Invalid repository URL format")

        self.repo_name = parts[-1]
        self.owner = parts[-2]
        self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}")
        self.analysis_data: Dict[str, Any] = {}

    def analyze(self) -> Dict[str, Any]:
        """Perform complete repository analysis"""
        try:
            # Basic repository information
            self.analysis_data["basic_info"] = {
                "name": self.repo.name,
                "owner": self.repo.owner.login,
                "description": self.repo.description or "No description available",
                "stars": self.repo.stargazers_count,
                "forks": self.repo.forks_count,
                "created_at": self.repo.created_at.isoformat(),
                "last_updated": self.repo.updated_at.isoformat(),
                "primary_language": self.repo.language or "Not specified",
            }

            # Analyze repository structure
            self.analysis_data["structure"] = self._analyze_structure()

            # Analyze code patterns
            self.analysis_data["code_patterns"] = self._analyze_code_patterns()

            # Analyze commit history
            self.analysis_data["commit_history"] = self._analyze_commits()

            # Get contributor statistics
            self.analysis_data["contributors"] = self._analyze_contributors()

            return self.analysis_data

        except Exception as e:
            raise Exception(f"Error analyzing repository: {str(e)}")

    def _analyze_structure(self) -> Dict[str, Any]:
        """Analyze repository structure and organization"""
        structure = {
            "files": defaultdict(int),
            "directories": set(),
            "total_size": 0,
        }

        try:
            contents = self.repo.get_contents("")
            while contents:
                content = contents.pop(0)
                if content.type == "dir":
                    structure["directories"].add(content.path)
                    contents.extend(self.repo.get_contents(content.path))
                else:
                    ext = Path(content.path).suffix.lower()
                    if ext in RELEVANT_EXTENSIONS:
                        structure["files"][ext] += 1
                        structure["total_size"] += content.size
        except Exception as e:
            print(f"Error analyzing structure: {str(e)}")

        return {
            "file_types": dict(structure["files"]),
            "directory_count": len(structure["directories"]),
            "total_size": structure["total_size"],
            "file_count": sum(structure["files"].values())
        }

    def _analyze_code_patterns(self) -> Dict[str, Any]:
        """Analyze code patterns and style"""
        patterns = {
            "samples": [],
            "languages": defaultdict(int),
            "complexity_metrics": defaultdict(list)
        }

        try:
            files = self.repo.get_contents("")
            analyzed = 0

            while files and analyzed < 5:
                file = files.pop(0)
                if file.type == "dir":
                    files.extend(self.repo.get_contents(file.path))
                elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS:
                    try:
                        content = base64.b64decode(file.content).decode('utf-8')
                        lines = content.splitlines()

                        if not lines:
                            continue

                        loc = len([line for line in lines if line.strip()])
                        avg_line_length = sum(len(line) for line in lines) / len(lines)

                        patterns["samples"].append({
                            "path": file.path,
                            "language": Path(file.path).suffix[1:],
                            "loc": loc,
                            "avg_line_length": round(avg_line_length, 2)
                        })

                        patterns["languages"][Path(file.path).suffix[1:]] += loc
                        patterns["complexity_metrics"]["loc"].append(loc)
                        patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length)

                        analyzed += 1

                    except Exception as e:
                        print(f"Error analyzing file {file.path}: {str(e)}")
                        continue

        except Exception as e:
            print(f"Error in code pattern analysis: {str(e)}")

        return patterns

    def _analyze_commits(self) -> Dict[str, Any]:
        """Analyze commit history and patterns"""
        commit_data = []
        commit_times = []

        try:
            commits = list(self.repo.get_commits()[:100])  # Get last 100 commits

            for commit in commits:
                try:
                    commit_info = {
                        "sha": commit.sha,
                        "author": commit.author.login if commit.author else "Unknown",
                        "date": commit.commit.author.date.isoformat(),
                        "message": commit.commit.message,
                        "changes": {
                            "additions": commit.stats.additions,
                            "deletions": commit.stats.deletions,
                        }
                    }
                    commit_data.append(commit_info)
                    commit_times.append(commit.commit.author.date.hour)
                except Exception as e:
                    print(f"Error processing commit {commit.sha}: {str(e)}")
                    continue

            # Analyze commit patterns
            commit_hours = defaultdict(int)
            for hour in commit_times:
                commit_hours[hour] += 1

            total_commits = len(commit_data)
            return {
                "commits": commit_data,
                "total_commits": total_commits,
                "commit_hours": dict(commit_hours),
                "avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0,
                "avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0,
            }

        except Exception as e:
            print(f"Error in commit analysis: {str(e)}")
            return {
                "commits": [],
                "total_commits": 0,
                "commit_hours": {},
                "avg_additions": 0,
                "avg_deletions": 0
            }

    def _analyze_contributors(self) -> Dict[str, Any]:
        """Analyze contributor statistics"""
        contributor_data = []

        try:
            contributors = list(self.repo.get_contributors())
            for contributor in contributors:
                contributor_data.append({
                    "login": contributor.login,
                    "contributions": contributor.contributions,
                    "type": contributor.type,
                })
        except Exception as e:
            print(f"Error analyzing contributors: {str(e)}")

        return {
            "total_contributors": len(contributor_data),
            "contributors": contributor_data
        }

@retry(
    retry=retry_if_exception_type(Exception),
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10)
)
def process_analysis(repo_url: str, progress=gr.Progress()):
    """Process repository analysis with visible thinking"""
    try:
        # Initialize analysis
        progress(0, desc="Initializing repository analysis...")
        analyzer = RepositoryAnalyzer(repo_url)
        analysis_data = analyzer.analyze()
        
        # Initialize thinking analyzer
        thinking_analyzer = ThinkingAnalyzer(model)
        
        # System prompt for analysis
        system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template:

# Repository Analysis

## 📊 Project Overview
[Provide a comprehensive overview including:
- Project purpose and scope
- Age and maturity of the project
- Current activity level and maintenance status
- Key metrics (stars, forks, etc.)
- Primary technologies and languages used]

## 🏗️ Architecture and Code Organization
[Analyze in detail:
- Repository structure and organization
- Code distribution across different technologies
- File and directory organization patterns
- Project size and complexity metrics
- Code modularity and component structure
- Presence of key architectural patterns]

## 💻 Development Practices & Code Quality
[Evaluate:
- Coding standards and consistency
- Code complexity and maintainability metrics
- Documentation practices
- Testing approach and coverage (if visible)
- Error handling and logging practices
- Use of design patterns and best practices]

## 📈 Development Workflow & History
[Analyze:
- Commit patterns and frequency
- Release cycles and versioning
- Branch management strategy
- Code review practices
- Continuous integration/deployment indicators
- Peak development periods and cycles]

## 👥 Team Dynamics & Collaboration
[Examine:
- Team size and composition
- Contribution patterns
- Core maintainer identification
- Community engagement level
- Communication patterns
- Collaboration efficiency]

## 🔧 Technical Depth & Innovation
[Assess:
- Technical sophistication level
- Innovative approaches or solutions
- Complex problem-solving examples
- Performance optimization efforts
- Security considerations
- Scalability approach]

## 🚀 Project Health & Sustainability
[Evaluate:
- Project momentum and growth trends
- Maintenance patterns
- Community health indicators
- Documentation completeness
- Onboarding friendliness
- Long-term viability indicators]

## 💡 Key Insights & Recommendations
[Provide:
- 3-5 key strengths identified
- 3-5 potential improvement areas
- Notable patterns or practices
- Unique characteristics
- Strategic recommendations]"""
        
        # Stream thinking and analysis
        progress(0.5, desc="Generating analysis with thinking process...")
        messages = []
        for msg_update in thinking_analyzer.stream_analysis(
            analysis_data, 
            system_prompt
        ):
            messages = msg_update
            
        return messages, analysis_data
            
    except Exception as e:
        return

def process_question(question: str, analysis_data: Dict[str, Any], chat_history: List[str]):
    """Process follow-up questions with visible thinking"""
    if not analysis_data:
        return [ChatMessage(role="assistant", content="Please analyze a repository first before asking questions.")]
    
    thinking_analyzer = ThinkingAnalyzer(model)
    messages = []
    for msg_update in thinking_analyzer.stream_question_response(
        question, 
        analysis_data, 
        chat_history
    ):
        messages = msg_update
    return messages

# Create Gradio interface with thinking visualization
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("""
    # 🔍 GitHub Repository Analyzer with Thinking Process

    Analyze any public GitHub repository using AI. Watch the AI's thought process as it:
    1. 📊 Analyzes repository structure and patterns
    2. 💡 Generates insights about development practices
    3. 💭 Shows its thinking while answering your follow-up questions

    Enter a GitHub repository URL (e.g., `https://github.com/owner/repo`)
    """)

    with gr.Row():
        repo_url = gr.Textbox(
            label="GitHub Repository URL",
            placeholder="https://github.com/owner/repo",
            scale=4
        )
        analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1)

    # Status message
    status_msg = gr.Markdown("", elem_id="status_message")

    with gr.Row():
        chatbot = gr.Chatbot(
            label="Analysis & Discussion",
            height=500,
            show_label=True,
            render_markdown=True,
            type="messages"
        )

    with gr.Row():
        question = gr.Textbox(
            label="Your Question",
            placeholder="Ask about the analysis...",
            scale=4
        )
        ask_btn = gr.Button("💭 Ask", variant="primary", scale=1)
        clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1)

    # Hidden states
    analysis_data = gr.State({})
    chat_history = gr.State([])
    msg_store = gr.State("")

    def clear_outputs():
        return [], {}, [], ""

    # Set up event handlers with thinking visualization
    analyze_btn.click(
        fn=lambda: "⏳ Analysis in progress... Watch the thinking process below!",
        inputs=None,
        outputs=status_msg,
        queue=False
    ).then(
        process_analysis,
        inputs=[repo_url],
        outputs=[chatbot, analysis_data]
    ).success(
        lambda: "✅ Analysis complete! You can now ask questions about the repository.",
        inputs=None,
        outputs=status_msg
    )

    def update_chat(question, history):
        """Update chat history with user question"""
        history = history or []
        history.append(question)
        return question, history, ""

    ask_btn.click(
        update_chat,
        inputs=[question, chat_history],
        outputs=[msg_store, chat_history, question],
        queue=False
    ).then(
        process_question,
        inputs=[msg_store, analysis_data, chat_history],
        outputs=chatbot
    )

    clear_btn.click(
        clear_outputs,
        inputs=None,
        outputs=[chatbot, analysis_data, chat_history, status_msg],
        queue=False
    )

    # Handle enter key in question input
    question.submit(
        update_chat,
        inputs=[question, chat_history],
        outputs=[msg_store, chat_history, question],
        queue=False
    ).then(
        process_question,
        inputs=[msg_store, analysis_data, chat_history],
        outputs=chatbot
    )

# Launch the app
if __name__ == "__main__":
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        debug=True
    )