Spaces:
Runtime error
Runtime error
# gemini_integration.py | |
import os | |
import time | |
import base64 | |
import json | |
from typing import List, Dict, Any, Optional, Union | |
# Fix: Replace incorrect imports with the proper Google Generative AI imports | |
import google.generativeai as genai | |
from google.generativeai import types | |
class GeminiClient: | |
"""Enhanced Gemini client for the GitHub AI Agent""" | |
def __init__(self, api_key: str, model: str = "gemini-2.0-pro-exp-02-05"): | |
""" | |
Initialize the Gemini client | |
Args: | |
api_key: Gemini API key | |
model: Gemini model to use (default: gemini-1.5-pro) | |
""" | |
self.api_key = api_key | |
self.model_name = model | |
genai.configure(api_key=api_key) # Configure API key | |
self.model = genai.GenerativeModel(model) # Use GenerativeModel | |
self.chat_history = [] | |
self.config = types.GenerationConfig( | |
temperature=0.2, | |
top_p=0.95, | |
top_k=64, | |
max_output_tokens=8192, | |
# response_mime_type="text/plain", # No longer needed. | |
) | |
def analyze_repository(self, repo_data: Dict, file_contents: Dict, | |
commit_history: List[Dict], contributors: Dict, insights: Dict) -> Dict: | |
""" | |
Analyze a GitHub repository with Gemini | |
Args: | |
repo_data: Repository metadata | |
file_contents: Contents of key files | |
commit_history: Repository commit history | |
contributors: Contributors data | |
insights: Extracted repository insights | |
Returns: | |
Dictionary containing the analysis and metadata | |
""" | |
prompt = self._build_repo_analysis_prompt(repo_data, file_contents, commit_history, insights) | |
try: | |
response = self.model.generate_content(prompt, generation_config = self.config) | |
return { | |
'analysis': response.text, | |
'timestamp': time.strftime("%Y-%m-%d %H:%M:%S") | |
} | |
except Exception as e: | |
return { | |
'analysis': f"Error analyzing repository: {str(e)}", | |
'timestamp': time.strftime("%Y-%m-%d %H:%M:%S") | |
} | |
def answer_query(self, query: str, repo_context: Dict, related_files: List[Dict], insights: Dict = None) -> str: | |
""" | |
Answer natural language queries about the repository | |
Args: | |
query: User's natural language query | |
repo_context: Repository context information | |
related_files: List of files relevant to the query | |
insights: Repository insights (optional) | |
Returns: | |
Gemini's response to the query | |
""" | |
prompt = self._build_query_prompt(query, repo_context, related_files) | |
# Save query to history | |
self.chat_history.append({"role": "user", "parts": [query]}) | |
try: | |
response = self.model.generate_content(prompt, generation_config = self.config) | |
answer = response.text | |
# Save response to history | |
self.chat_history.append({"role": "model", "parts": [answer]}) | |
return answer | |
except Exception as e: | |
return f"Error answering query: {str(e)}" | |
def analyze_code_snippet(self, code: str, language: str = "") -> str: | |
""" | |
Analyze a code snippet with Gemini | |
Args: | |
code: Code snippet to analyze | |
language: Programming language (optional) | |
Returns: | |
Code analysis | |
""" | |
# Build prompt for code analysis | |
lang_info = f"Language: {language}\n" if language else "" | |
prompt = f""" | |
You are an expert code analyzer. Please analyze the following code snippet: | |
{lang_info} | |
```{language} | |
{code} | |
``` | |
Please provide a detailed analysis covering: | |
1. What does this code do? Provide a clear and concise overview. | |
2. Key components and functions/methods and their purpose. | |
3. Architecture and design patterns used (if any). | |
4. Potential issues, bugs, or code smells. | |
5. Suggestions for improvements or best practices. | |
Focus on providing meaningful insights rather than just describing the code line by line. | |
""" | |
try: | |
response = self.model.generate_content(prompt, generation_config = self.config) | |
return response.text | |
except Exception as e: | |
return f"Error analyzing code: {str(e)}" | |
def identify_potential_collaborators(self, contributors: Dict, insights: Dict, requirements: str) -> List[Dict]: | |
""" | |
Find potential collaborators based on requirements | |
Args: | |
contributors: Repository contributors data | |
insights: Repository insights | |
requirements: User requirements for collaborators | |
Returns: | |
List of potential collaborators with reasoning | |
""" | |
# Build prompt for collaborator search | |
prompt = self._build_collaborator_prompt(contributors, requirements, insights) | |
try: | |
response = self.model.generate_content(prompt, generation_config = self.config) | |
# Try to parse JSON response | |
try: | |
result = json.loads(response.text) | |
return result.get('recommended_collaborators', []) | |
except json.JSONDecodeError: | |
# If JSON parsing fails, return error | |
return [{ | |
'login': 'error', | |
'reasons': ['Failed to parse response from Gemini. Please try again.'], | |
'confidence': 0 | |
}] | |
except Exception as e: | |
return [{ | |
'login': 'error', | |
'reasons': [f"Error finding collaborators: {str(e)}"], | |
'confidence': 0 | |
}] | |
def _build_repo_analysis_prompt(self, repo_data: Dict, file_contents: Dict, | |
commit_history: List[Dict], insights: Dict) -> str: | |
"""Build a comprehensive prompt for repository analysis""" | |
# Basic repository information | |
basic_info = f""" | |
# GitHub Repository Analysis | |
## Basic Information | |
Repository: {repo_data.get('full_name', 'Unknown')} | |
Description: {repo_data.get('description', 'No description provided')} | |
Primary Language: {repo_data.get('language', 'Unknown')} | |
Stars: {repo_data.get('stars', 0)} | |
Forks: {repo_data.get('forks', 0)} | |
Open Issues: {repo_data.get('open_issues', 0)} | |
Created: {repo_data.get('created_at', 'Unknown')} | |
Last Updated: {repo_data.get('updated_at', 'Unknown')} | |
License: {repo_data.get('license', 'Unknown')} | |
Topics: {', '.join(repo_data.get('topics', ['None']))} | |
""" | |
# File structure information | |
file_structure = "\n## File Structure\n" | |
for path in list(file_contents.keys())[:20]: # Limit to 20 files to avoid token limit | |
file_structure += f"- {path}\n" | |
if len(file_contents) > 20: | |
file_structure += f"- ... and {len(file_contents) - 20} more files\n" | |
# Key files content | |
key_files = "\n## Key Files Content\n" | |
priority_files = [f for f in file_contents.keys() if 'readme' in f.lower() or 'main' in f.lower()] | |
for path in priority_files[:3]: # Limit to 3 key files | |
key_files += f"\n### {path}\n" | |
content = file_contents[path]['content'][:3000] # Limit content to 3000 characters | |
key_files += f"```\n{content}\n```\n" | |
if len(file_contents[path]['content']) > 3000: | |
key_files += "(content truncated for brevity)\n" | |
# Commit history summary | |
commit_summary = "\n## Commit History Summary\n" | |
commit_summary += f"Total Commits Analyzed: {len(commit_history)}\n" | |
if commit_history: | |
commit_summary += "\nRecent Commits:\n" | |
for commit in commit_history[:5]: # Show 5 most recent commits | |
commit_summary += f"- {commit.get('date', 'Unknown')}: {commit.get('message', 'No message')} by {commit.get('author', 'Unknown')}\n" | |
# Insights summary | |
insights_summary = "\n## Insights Summary\n" | |
insights_json = json.dumps(insights, indent=2, default=str) | |
insights_summary += f"```json\n{insights_json}\n```\n" | |
# Analysis instructions | |
instructions = """ | |
## Analysis Instructions | |
As an expert in software development and repository analysis, please provide a comprehensive analysis of this GitHub repository. Your analysis should include: | |
1. **Purpose and Overview**: What is this project about? What problem does it solve? | |
2. **Architecture and Design**: Analyze the code organization, architectural patterns, and overall design. Identify the key components and how they interact. | |
3. **Code Quality Assessment**: Evaluate the code quality, readability, and maintainability. Are there any best practices or patterns employed? | |
4. **Development Activity**: Analyze the development patterns, collaboration, and project health based on commit history and contributor information. | |
5. **Strengths and Areas for Improvement**: Identify the strengths of this repository and suggest areas that could be improved. | |
6. **Summary**: Provide a concise summary of your analysis. | |
Base your analysis entirely on the information provided above. Be specific and reference actual files, code patterns, or commit information when possible. | |
""" | |
# Combine all sections | |
prompt = basic_info + file_structure + key_files + commit_summary + insights_summary + instructions | |
return prompt | |
def _build_query_prompt(self, query: str, repo_context: Dict, related_files: List[Dict]) -> str: | |
"""Build a prompt for answering natural language queries""" | |
# Basic repository context | |
basic_context = f""" | |
Repository: {repo_context.get('full_name', 'Unknown')} | |
Description: {repo_context.get('description', 'No description provided')} | |
Primary Language: {repo_context.get('language', 'Unknown')} | |
""" | |
# Relevant files context | |
files_context = "\n## Relevant Files\n" | |
for file_data in related_files[:3]: # Limit to 3 most relevant files | |
files_context += f"\n### {file_data['file']}\n" | |
files_context += f"```\n{file_data['content'][:2000]}\n```\n" | |
if len(file_data['content']) > 2000: | |
files_context += "(content truncated for brevity)\n" | |
# History context | |
history_context = "\n## Conversation History\n" | |
recent_history = self.chat_history[-6:] # Get last 3 exchanges (6 messages) | |
for i, message in enumerate(recent_history): | |
role = "User" if message["role"] == "user" else "Assistant" | |
history_context += f"{role}: {message['parts'][0]}\n\n" | |
# Instructions | |
instructions = f""" | |
## Query Instructions | |
You are an AI assistant specialized in understanding GitHub repositories. The user has asked: | |
"{query}" | |
Please answer this question clearly and concisely based ONLY on the information provided about the repository. | |
If you cannot answer the question based on the available information, say so clearly. | |
Reference specific files, code, or other relevant details in your answer whenever possible. | |
""" | |
# Combine all sections | |
prompt = basic_context + files_context + history_context + instructions | |
return prompt | |
def _build_collaborator_prompt(self, contributors: Dict, requirements: str, insights: Dict) -> str: | |
"""Build a prompt for finding potential collaborators""" | |
# Prepare contributor data | |
contributor_data = [] | |
for login, data in contributors.items(): | |
contributor_info = { | |
'login': login, | |
'contributions': data.get('contributions', 0), | |
'files_modified': data.get('files_modified', [])[:10], # Top 10 files | |
'commit_messages': data.get('commit_messages', [])[:5], # Sample 5 messages | |
'activity_dates': [str(d) for d in data.get('activity_dates', [])[-10:]] # Recent 10 dates | |
} | |
contributor_data.append(contributor_info) | |
# Build prompt | |
prompt = f""" | |
# Collaborator Identification | |
## Requirements | |
{requirements} | |
## Contributor Data | |
```json | |
{json.dumps(contributor_data, indent=2, default=str)[:5000]} | |
``` | |
## Repository Insights | |
```json | |
{json.dumps(insights, indent=2, default=str)[:3000]} | |
``` | |
## Instructions | |
You are an AI assistant specialized in GitHub repository analysis. Based on the contributor data and repository insights provided, identify the most suitable collaborators that match the requirements. | |
For each recommended collaborator, explain why they would be a good fit based on: | |
1. Their contribution patterns and activity | |
2. The specific files and areas they've worked on | |
3. Their apparent expertise and skills | |
4. How well they match the stated requirements | |
Provide a confidence score (0.0 to 1.0) for each recommendation based on how well they match the requirements. | |
Format your response as a JSON object with the following structure: | |
{{ | |
"recommended_collaborators": [ | |
{{ | |
"login": "username", | |
"reasons": ["reason1", "reason2"], | |
"confidence": 0.85 | |
}} | |
] | |
}} | |
Provide no more than 3 recommendations, focusing on quality matches rather than quantity. | |
""" | |
return prompt | |
def generate_prompt(self, prompt: str) -> str: | |
""" | |
Generate a response to a custom prompt using Gemini | |
Args: | |
prompt: Custom prompt for Gemini | |
Returns: | |
Gemini's response | |
""" | |
try: | |
response = self.model.generate_content(prompt, generation_config = self.config) | |
return response.text | |
except Exception as e: | |
return f"Error generating response: {str(e)}" |