# gemini_integration.py import os import time import base64 import json from typing import List, Dict, Any, Optional, Union # Fix: Replace incorrect imports with the proper Google Generative AI imports import google.generativeai as genai from google.generativeai import types class GeminiClient: """Enhanced Gemini client for the GitHub AI Agent""" def __init__(self, api_key: str, model: str = "gemini-2.0-pro-exp-02-05"): """ Initialize the Gemini client Args: api_key: Gemini API key model: Gemini model to use (default: gemini-1.5-pro) """ self.api_key = api_key self.model_name = model genai.configure(api_key=api_key) # Configure API key self.model = genai.GenerativeModel(model) # Use GenerativeModel self.chat_history = [] self.config = types.GenerationConfig( temperature=0.2, top_p=0.95, top_k=64, max_output_tokens=8192, # response_mime_type="text/plain", # No longer needed. ) def analyze_repository(self, repo_data: Dict, file_contents: Dict, commit_history: List[Dict], contributors: Dict, insights: Dict) -> Dict: """ Analyze a GitHub repository with Gemini Args: repo_data: Repository metadata file_contents: Contents of key files commit_history: Repository commit history contributors: Contributors data insights: Extracted repository insights Returns: Dictionary containing the analysis and metadata """ prompt = self._build_repo_analysis_prompt(repo_data, file_contents, commit_history, insights) try: response = self.model.generate_content(prompt, generation_config = self.config) return { 'analysis': response.text, 'timestamp': time.strftime("%Y-%m-%d %H:%M:%S") } except Exception as e: return { 'analysis': f"Error analyzing repository: {str(e)}", 'timestamp': time.strftime("%Y-%m-%d %H:%M:%S") } def answer_query(self, query: str, repo_context: Dict, related_files: List[Dict], insights: Dict = None) -> str: """ Answer natural language queries about the repository Args: query: User's natural language query repo_context: Repository context information related_files: List of files relevant to the query insights: Repository insights (optional) Returns: Gemini's response to the query """ prompt = self._build_query_prompt(query, repo_context, related_files) # Save query to history self.chat_history.append({"role": "user", "parts": [query]}) try: response = self.model.generate_content(prompt, generation_config = self.config) answer = response.text # Save response to history self.chat_history.append({"role": "model", "parts": [answer]}) return answer except Exception as e: return f"Error answering query: {str(e)}" def analyze_code_snippet(self, code: str, language: str = "") -> str: """ Analyze a code snippet with Gemini Args: code: Code snippet to analyze language: Programming language (optional) Returns: Code analysis """ # Build prompt for code analysis lang_info = f"Language: {language}\n" if language else "" prompt = f""" You are an expert code analyzer. Please analyze the following code snippet: {lang_info} ```{language} {code} ``` Please provide a detailed analysis covering: 1. What does this code do? Provide a clear and concise overview. 2. Key components and functions/methods and their purpose. 3. Architecture and design patterns used (if any). 4. Potential issues, bugs, or code smells. 5. Suggestions for improvements or best practices. Focus on providing meaningful insights rather than just describing the code line by line. """ try: response = self.model.generate_content(prompt, generation_config = self.config) return response.text except Exception as e: return f"Error analyzing code: {str(e)}" def identify_potential_collaborators(self, contributors: Dict, insights: Dict, requirements: str) -> List[Dict]: """ Find potential collaborators based on requirements Args: contributors: Repository contributors data insights: Repository insights requirements: User requirements for collaborators Returns: List of potential collaborators with reasoning """ # Build prompt for collaborator search prompt = self._build_collaborator_prompt(contributors, requirements, insights) try: response = self.model.generate_content(prompt, generation_config = self.config) # Try to parse JSON response try: result = json.loads(response.text) return result.get('recommended_collaborators', []) except json.JSONDecodeError: # If JSON parsing fails, return error return [{ 'login': 'error', 'reasons': ['Failed to parse response from Gemini. Please try again.'], 'confidence': 0 }] except Exception as e: return [{ 'login': 'error', 'reasons': [f"Error finding collaborators: {str(e)}"], 'confidence': 0 }] def _build_repo_analysis_prompt(self, repo_data: Dict, file_contents: Dict, commit_history: List[Dict], insights: Dict) -> str: """Build a comprehensive prompt for repository analysis""" # Basic repository information basic_info = f""" # GitHub Repository Analysis ## Basic Information Repository: {repo_data.get('full_name', 'Unknown')} Description: {repo_data.get('description', 'No description provided')} Primary Language: {repo_data.get('language', 'Unknown')} Stars: {repo_data.get('stars', 0)} Forks: {repo_data.get('forks', 0)} Open Issues: {repo_data.get('open_issues', 0)} Created: {repo_data.get('created_at', 'Unknown')} Last Updated: {repo_data.get('updated_at', 'Unknown')} License: {repo_data.get('license', 'Unknown')} Topics: {', '.join(repo_data.get('topics', ['None']))} """ # File structure information file_structure = "\n## File Structure\n" for path in list(file_contents.keys())[:20]: # Limit to 20 files to avoid token limit file_structure += f"- {path}\n" if len(file_contents) > 20: file_structure += f"- ... and {len(file_contents) - 20} more files\n" # Key files content key_files = "\n## Key Files Content\n" priority_files = [f for f in file_contents.keys() if 'readme' in f.lower() or 'main' in f.lower()] for path in priority_files[:3]: # Limit to 3 key files key_files += f"\n### {path}\n" content = file_contents[path]['content'][:3000] # Limit content to 3000 characters key_files += f"```\n{content}\n```\n" if len(file_contents[path]['content']) > 3000: key_files += "(content truncated for brevity)\n" # Commit history summary commit_summary = "\n## Commit History Summary\n" commit_summary += f"Total Commits Analyzed: {len(commit_history)}\n" if commit_history: commit_summary += "\nRecent Commits:\n" for commit in commit_history[:5]: # Show 5 most recent commits commit_summary += f"- {commit.get('date', 'Unknown')}: {commit.get('message', 'No message')} by {commit.get('author', 'Unknown')}\n" # Insights summary insights_summary = "\n## Insights Summary\n" insights_json = json.dumps(insights, indent=2, default=str) insights_summary += f"```json\n{insights_json}\n```\n" # Analysis instructions instructions = """ ## Analysis Instructions As an expert in software development and repository analysis, please provide a comprehensive analysis of this GitHub repository. Your analysis should include: 1. **Purpose and Overview**: What is this project about? What problem does it solve? 2. **Architecture and Design**: Analyze the code organization, architectural patterns, and overall design. Identify the key components and how they interact. 3. **Code Quality Assessment**: Evaluate the code quality, readability, and maintainability. Are there any best practices or patterns employed? 4. **Development Activity**: Analyze the development patterns, collaboration, and project health based on commit history and contributor information. 5. **Strengths and Areas for Improvement**: Identify the strengths of this repository and suggest areas that could be improved. 6. **Summary**: Provide a concise summary of your analysis. Base your analysis entirely on the information provided above. Be specific and reference actual files, code patterns, or commit information when possible. """ # Combine all sections prompt = basic_info + file_structure + key_files + commit_summary + insights_summary + instructions return prompt def _build_query_prompt(self, query: str, repo_context: Dict, related_files: List[Dict]) -> str: """Build a prompt for answering natural language queries""" # Basic repository context basic_context = f""" Repository: {repo_context.get('full_name', 'Unknown')} Description: {repo_context.get('description', 'No description provided')} Primary Language: {repo_context.get('language', 'Unknown')} """ # Relevant files context files_context = "\n## Relevant Files\n" for file_data in related_files[:3]: # Limit to 3 most relevant files files_context += f"\n### {file_data['file']}\n" files_context += f"```\n{file_data['content'][:2000]}\n```\n" if len(file_data['content']) > 2000: files_context += "(content truncated for brevity)\n" # History context history_context = "\n## Conversation History\n" recent_history = self.chat_history[-6:] # Get last 3 exchanges (6 messages) for i, message in enumerate(recent_history): role = "User" if message["role"] == "user" else "Assistant" history_context += f"{role}: {message['parts'][0]}\n\n" # Instructions instructions = f""" ## Query Instructions You are an AI assistant specialized in understanding GitHub repositories. The user has asked: "{query}" Please answer this question clearly and concisely based ONLY on the information provided about the repository. If you cannot answer the question based on the available information, say so clearly. Reference specific files, code, or other relevant details in your answer whenever possible. """ # Combine all sections prompt = basic_context + files_context + history_context + instructions return prompt def _build_collaborator_prompt(self, contributors: Dict, requirements: str, insights: Dict) -> str: """Build a prompt for finding potential collaborators""" # Prepare contributor data contributor_data = [] for login, data in contributors.items(): contributor_info = { 'login': login, 'contributions': data.get('contributions', 0), 'files_modified': data.get('files_modified', [])[:10], # Top 10 files 'commit_messages': data.get('commit_messages', [])[:5], # Sample 5 messages 'activity_dates': [str(d) for d in data.get('activity_dates', [])[-10:]] # Recent 10 dates } contributor_data.append(contributor_info) # Build prompt prompt = f""" # Collaborator Identification ## Requirements {requirements} ## Contributor Data ```json {json.dumps(contributor_data, indent=2, default=str)[:5000]} ``` ## Repository Insights ```json {json.dumps(insights, indent=2, default=str)[:3000]} ``` ## Instructions You are an AI assistant specialized in GitHub repository analysis. Based on the contributor data and repository insights provided, identify the most suitable collaborators that match the requirements. For each recommended collaborator, explain why they would be a good fit based on: 1. Their contribution patterns and activity 2. The specific files and areas they've worked on 3. Their apparent expertise and skills 4. How well they match the stated requirements Provide a confidence score (0.0 to 1.0) for each recommendation based on how well they match the requirements. Format your response as a JSON object with the following structure: {{ "recommended_collaborators": [ {{ "login": "username", "reasons": ["reason1", "reason2"], "confidence": 0.85 }} ] }} Provide no more than 3 recommendations, focusing on quality matches rather than quantity. """ return prompt def generate_prompt(self, prompt: str) -> str: """ Generate a response to a custom prompt using Gemini Args: prompt: Custom prompt for Gemini Returns: Gemini's response """ try: response = self.model.generate_content(prompt, generation_config = self.config) return response.text except Exception as e: return f"Error generating response: {str(e)}"