Gitu / gemini_integration.py
nihalaninihal's picture
Update gemini_integration.py
276f458 verified
# gemini_integration.py
import os
import time
import base64
import json
from typing import List, Dict, Any, Optional, Union
# Fix: Replace incorrect imports with the proper Google Generative AI imports
import google.generativeai as genai
from google.generativeai import types
class GeminiClient:
"""Enhanced Gemini client for the GitHub AI Agent"""
def __init__(self, api_key: str, model: str = "gemini-2.0-pro-exp-02-05"):
"""
Initialize the Gemini client
Args:
api_key: Gemini API key
model: Gemini model to use (default: gemini-1.5-pro)
"""
self.api_key = api_key
self.model_name = model
genai.configure(api_key=api_key) # Configure API key
self.model = genai.GenerativeModel(model) # Use GenerativeModel
self.chat_history = []
self.config = types.GenerationConfig(
temperature=0.2,
top_p=0.95,
top_k=64,
max_output_tokens=8192,
# response_mime_type="text/plain", # No longer needed.
)
def analyze_repository(self, repo_data: Dict, file_contents: Dict,
commit_history: List[Dict], contributors: Dict, insights: Dict) -> Dict:
"""
Analyze a GitHub repository with Gemini
Args:
repo_data: Repository metadata
file_contents: Contents of key files
commit_history: Repository commit history
contributors: Contributors data
insights: Extracted repository insights
Returns:
Dictionary containing the analysis and metadata
"""
prompt = self._build_repo_analysis_prompt(repo_data, file_contents, commit_history, insights)
try:
response = self.model.generate_content(prompt, generation_config = self.config)
return {
'analysis': response.text,
'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
}
except Exception as e:
return {
'analysis': f"Error analyzing repository: {str(e)}",
'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
}
def answer_query(self, query: str, repo_context: Dict, related_files: List[Dict], insights: Dict = None) -> str:
"""
Answer natural language queries about the repository
Args:
query: User's natural language query
repo_context: Repository context information
related_files: List of files relevant to the query
insights: Repository insights (optional)
Returns:
Gemini's response to the query
"""
prompt = self._build_query_prompt(query, repo_context, related_files)
# Save query to history
self.chat_history.append({"role": "user", "parts": [query]})
try:
response = self.model.generate_content(prompt, generation_config = self.config)
answer = response.text
# Save response to history
self.chat_history.append({"role": "model", "parts": [answer]})
return answer
except Exception as e:
return f"Error answering query: {str(e)}"
def analyze_code_snippet(self, code: str, language: str = "") -> str:
"""
Analyze a code snippet with Gemini
Args:
code: Code snippet to analyze
language: Programming language (optional)
Returns:
Code analysis
"""
# Build prompt for code analysis
lang_info = f"Language: {language}\n" if language else ""
prompt = f"""
You are an expert code analyzer. Please analyze the following code snippet:
{lang_info}
```{language}
{code}
```
Please provide a detailed analysis covering:
1. What does this code do? Provide a clear and concise overview.
2. Key components and functions/methods and their purpose.
3. Architecture and design patterns used (if any).
4. Potential issues, bugs, or code smells.
5. Suggestions for improvements or best practices.
Focus on providing meaningful insights rather than just describing the code line by line.
"""
try:
response = self.model.generate_content(prompt, generation_config = self.config)
return response.text
except Exception as e:
return f"Error analyzing code: {str(e)}"
def identify_potential_collaborators(self, contributors: Dict, insights: Dict, requirements: str) -> List[Dict]:
"""
Find potential collaborators based on requirements
Args:
contributors: Repository contributors data
insights: Repository insights
requirements: User requirements for collaborators
Returns:
List of potential collaborators with reasoning
"""
# Build prompt for collaborator search
prompt = self._build_collaborator_prompt(contributors, requirements, insights)
try:
response = self.model.generate_content(prompt, generation_config = self.config)
# Try to parse JSON response
try:
result = json.loads(response.text)
return result.get('recommended_collaborators', [])
except json.JSONDecodeError:
# If JSON parsing fails, return error
return [{
'login': 'error',
'reasons': ['Failed to parse response from Gemini. Please try again.'],
'confidence': 0
}]
except Exception as e:
return [{
'login': 'error',
'reasons': [f"Error finding collaborators: {str(e)}"],
'confidence': 0
}]
def _build_repo_analysis_prompt(self, repo_data: Dict, file_contents: Dict,
commit_history: List[Dict], insights: Dict) -> str:
"""Build a comprehensive prompt for repository analysis"""
# Basic repository information
basic_info = f"""
# GitHub Repository Analysis
## Basic Information
Repository: {repo_data.get('full_name', 'Unknown')}
Description: {repo_data.get('description', 'No description provided')}
Primary Language: {repo_data.get('language', 'Unknown')}
Stars: {repo_data.get('stars', 0)}
Forks: {repo_data.get('forks', 0)}
Open Issues: {repo_data.get('open_issues', 0)}
Created: {repo_data.get('created_at', 'Unknown')}
Last Updated: {repo_data.get('updated_at', 'Unknown')}
License: {repo_data.get('license', 'Unknown')}
Topics: {', '.join(repo_data.get('topics', ['None']))}
"""
# File structure information
file_structure = "\n## File Structure\n"
for path in list(file_contents.keys())[:20]: # Limit to 20 files to avoid token limit
file_structure += f"- {path}\n"
if len(file_contents) > 20:
file_structure += f"- ... and {len(file_contents) - 20} more files\n"
# Key files content
key_files = "\n## Key Files Content\n"
priority_files = [f for f in file_contents.keys() if 'readme' in f.lower() or 'main' in f.lower()]
for path in priority_files[:3]: # Limit to 3 key files
key_files += f"\n### {path}\n"
content = file_contents[path]['content'][:3000] # Limit content to 3000 characters
key_files += f"```\n{content}\n```\n"
if len(file_contents[path]['content']) > 3000:
key_files += "(content truncated for brevity)\n"
# Commit history summary
commit_summary = "\n## Commit History Summary\n"
commit_summary += f"Total Commits Analyzed: {len(commit_history)}\n"
if commit_history:
commit_summary += "\nRecent Commits:\n"
for commit in commit_history[:5]: # Show 5 most recent commits
commit_summary += f"- {commit.get('date', 'Unknown')}: {commit.get('message', 'No message')} by {commit.get('author', 'Unknown')}\n"
# Insights summary
insights_summary = "\n## Insights Summary\n"
insights_json = json.dumps(insights, indent=2, default=str)
insights_summary += f"```json\n{insights_json}\n```\n"
# Analysis instructions
instructions = """
## Analysis Instructions
As an expert in software development and repository analysis, please provide a comprehensive analysis of this GitHub repository. Your analysis should include:
1. **Purpose and Overview**: What is this project about? What problem does it solve?
2. **Architecture and Design**: Analyze the code organization, architectural patterns, and overall design. Identify the key components and how they interact.
3. **Code Quality Assessment**: Evaluate the code quality, readability, and maintainability. Are there any best practices or patterns employed?
4. **Development Activity**: Analyze the development patterns, collaboration, and project health based on commit history and contributor information.
5. **Strengths and Areas for Improvement**: Identify the strengths of this repository and suggest areas that could be improved.
6. **Summary**: Provide a concise summary of your analysis.
Base your analysis entirely on the information provided above. Be specific and reference actual files, code patterns, or commit information when possible.
"""
# Combine all sections
prompt = basic_info + file_structure + key_files + commit_summary + insights_summary + instructions
return prompt
def _build_query_prompt(self, query: str, repo_context: Dict, related_files: List[Dict]) -> str:
"""Build a prompt for answering natural language queries"""
# Basic repository context
basic_context = f"""
Repository: {repo_context.get('full_name', 'Unknown')}
Description: {repo_context.get('description', 'No description provided')}
Primary Language: {repo_context.get('language', 'Unknown')}
"""
# Relevant files context
files_context = "\n## Relevant Files\n"
for file_data in related_files[:3]: # Limit to 3 most relevant files
files_context += f"\n### {file_data['file']}\n"
files_context += f"```\n{file_data['content'][:2000]}\n```\n"
if len(file_data['content']) > 2000:
files_context += "(content truncated for brevity)\n"
# History context
history_context = "\n## Conversation History\n"
recent_history = self.chat_history[-6:] # Get last 3 exchanges (6 messages)
for i, message in enumerate(recent_history):
role = "User" if message["role"] == "user" else "Assistant"
history_context += f"{role}: {message['parts'][0]}\n\n"
# Instructions
instructions = f"""
## Query Instructions
You are an AI assistant specialized in understanding GitHub repositories. The user has asked:
"{query}"
Please answer this question clearly and concisely based ONLY on the information provided about the repository.
If you cannot answer the question based on the available information, say so clearly.
Reference specific files, code, or other relevant details in your answer whenever possible.
"""
# Combine all sections
prompt = basic_context + files_context + history_context + instructions
return prompt
def _build_collaborator_prompt(self, contributors: Dict, requirements: str, insights: Dict) -> str:
"""Build a prompt for finding potential collaborators"""
# Prepare contributor data
contributor_data = []
for login, data in contributors.items():
contributor_info = {
'login': login,
'contributions': data.get('contributions', 0),
'files_modified': data.get('files_modified', [])[:10], # Top 10 files
'commit_messages': data.get('commit_messages', [])[:5], # Sample 5 messages
'activity_dates': [str(d) for d in data.get('activity_dates', [])[-10:]] # Recent 10 dates
}
contributor_data.append(contributor_info)
# Build prompt
prompt = f"""
# Collaborator Identification
## Requirements
{requirements}
## Contributor Data
```json
{json.dumps(contributor_data, indent=2, default=str)[:5000]}
```
## Repository Insights
```json
{json.dumps(insights, indent=2, default=str)[:3000]}
```
## Instructions
You are an AI assistant specialized in GitHub repository analysis. Based on the contributor data and repository insights provided, identify the most suitable collaborators that match the requirements.
For each recommended collaborator, explain why they would be a good fit based on:
1. Their contribution patterns and activity
2. The specific files and areas they've worked on
3. Their apparent expertise and skills
4. How well they match the stated requirements
Provide a confidence score (0.0 to 1.0) for each recommendation based on how well they match the requirements.
Format your response as a JSON object with the following structure:
{{
"recommended_collaborators": [
{{
"login": "username",
"reasons": ["reason1", "reason2"],
"confidence": 0.85
}}
]
}}
Provide no more than 3 recommendations, focusing on quality matches rather than quantity.
"""
return prompt
def generate_prompt(self, prompt: str) -> str:
"""
Generate a response to a custom prompt using Gemini
Args:
prompt: Custom prompt for Gemini
Returns:
Gemini's response
"""
try:
response = self.model.generate_content(prompt, generation_config = self.config)
return response.text
except Exception as e:
return f"Error generating response: {str(e)}"