|
import gradio as gr |
|
import google.generativeai as genai |
|
import os |
|
from dotenv import load_dotenv |
|
from github import Github |
|
import json |
|
from pathlib import Path |
|
from datetime import datetime |
|
from collections import defaultdict |
|
import base64 |
|
from typing import Dict, List, Any, Optional, Tuple, Iterator |
|
from dataclasses import dataclass |
|
import tempfile |
|
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type |
|
import time |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
GITHUB_TOKEN = os.getenv("github_api") |
|
GEMINI_API_KEY = os.getenv("gemini_api") |
|
|
|
if not GITHUB_TOKEN or not GEMINI_API_KEY: |
|
raise ValueError("Both GITHUB_TOKEN and GEMINI_API_KEY must be set in environment") |
|
|
|
|
|
gh = Github(GITHUB_TOKEN) |
|
genai.configure(api_key=GEMINI_API_KEY) |
|
model = genai.GenerativeModel( |
|
model_name="gemini-2.0-flash-thinking-exp-01-21", |
|
generation_config={ |
|
"temperature": 1, |
|
"top_p": 0.95, |
|
"top_k": 40, |
|
"max_output_tokens": 8192, |
|
"response_mime_type": "text/plain", |
|
}, |
|
safety_settings=[ |
|
{ |
|
"category": "HARM_CATEGORY_HARASSMENT", |
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE" |
|
}, |
|
{ |
|
"category": "HARM_CATEGORY_HATE_SPEECH", |
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE" |
|
}, |
|
{ |
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", |
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE" |
|
}, |
|
{ |
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT", |
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE" |
|
}, |
|
] |
|
) |
|
|
|
RELEVANT_EXTENSIONS = { |
|
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h", |
|
".hpp", ".rb", ".php", ".go", ".rs", ".swift", ".kt" |
|
} |
|
|
|
@dataclass |
|
class ChatMessage: |
|
role: str |
|
content: str |
|
metadata: Dict[str, Any] = None |
|
|
|
class ThinkingAnalyzer: |
|
"""Handles streaming thoughts and responses from Gemini model""" |
|
|
|
def __init__(self, model): |
|
self.model = model |
|
|
|
def stream_analysis(self, analysis_data: Dict[str, Any], system_prompt: str) -> Iterator[List[ChatMessage]]: |
|
"""Streams analysis with visible thinking process""" |
|
|
|
|
|
prompt = f"{system_prompt}\n\nRepository Analysis Data:\n{json.dumps(analysis_data, indent=2)}" |
|
|
|
|
|
response = self.model.generate_content(prompt, stream=True) |
|
|
|
messages = [] |
|
thought_buffer = "" |
|
response_buffer = "" |
|
thinking_complete = False |
|
|
|
|
|
messages.append( |
|
ChatMessage( |
|
role="assistant", |
|
content="", |
|
metadata={"title": "β³ Analyzing Repository: Thought Process"} |
|
) |
|
) |
|
|
|
for chunk in response: |
|
parts = chunk.candidates[0].content.parts |
|
current_chunk = parts[0].text |
|
|
|
if len(parts) == 2 and not thinking_complete: |
|
|
|
thought_buffer += current_chunk |
|
messages[-1] = ChatMessage( |
|
role="assistant", |
|
content=thought_buffer, |
|
metadata={"title": "β³ Analysis Thought Process"} |
|
) |
|
|
|
|
|
messages.append( |
|
ChatMessage( |
|
role="assistant", |
|
content=parts[1].text |
|
) |
|
) |
|
thinking_complete = True |
|
|
|
elif thinking_complete: |
|
|
|
response_buffer += current_chunk |
|
messages[-1] = ChatMessage( |
|
role="assistant", |
|
content=response_buffer |
|
) |
|
|
|
else: |
|
|
|
thought_buffer += current_chunk |
|
messages[-1] = ChatMessage( |
|
role="assistant", |
|
content=thought_buffer, |
|
metadata={"title": "β³ Analysis Thought Process"} |
|
) |
|
|
|
yield messages |
|
|
|
def stream_question_response(self, question: str, analysis_data: Dict[str, Any], |
|
chat_history: List[Tuple[str, str]]) -> Iterator[List[ChatMessage]]: |
|
"""Streams response to follow-up questions with thinking process""" |
|
|
|
|
|
context = "You are an expert code analyst helping users understand repository analysis results.\n\n" |
|
context += f"Repository Analysis Data:\n{json.dumps(analysis_data, indent=2)}\n\n" |
|
|
|
if chat_history: |
|
context += "Previous conversation:\n" |
|
for user_msg, assistant_msg in chat_history: |
|
context += f"User: {user_msg}\nAssistant: {assistant_msg}\n" |
|
|
|
prompt = context + f"\nUser: {question}\nPlease think through your analysis:" |
|
|
|
|
|
yield from self.stream_analysis(analysis_data, prompt) |
|
|
|
class RepositoryAnalyzer: |
|
"""Handles GitHub repository analysis""" |
|
|
|
def __init__(self, repo_url: str): |
|
|
|
parts = repo_url.rstrip('/').split('/') |
|
if len(parts) < 2: |
|
raise ValueError("Invalid repository URL format") |
|
|
|
self.repo_name = parts[-1] |
|
self.owner = parts[-2] |
|
self.repo = gh.get_repo(f"{self.owner}/{self.repo_name}") |
|
self.analysis_data: Dict[str, Any] = {} |
|
|
|
def analyze(self) -> Dict[str, Any]: |
|
"""Perform complete repository analysis""" |
|
try: |
|
|
|
self.analysis_data["basic_info"] = { |
|
"name": self.repo.name, |
|
"owner": self.repo.owner.login, |
|
"description": self.repo.description or "No description available", |
|
"stars": self.repo.stargazers_count, |
|
"forks": self.repo.forks_count, |
|
"created_at": self.repo.created_at.isoformat(), |
|
"last_updated": self.repo.updated_at.isoformat(), |
|
"primary_language": self.repo.language or "Not specified", |
|
} |
|
|
|
|
|
self.analysis_data["structure"] = self._analyze_structure() |
|
|
|
|
|
self.analysis_data["code_patterns"] = self._analyze_code_patterns() |
|
|
|
|
|
self.analysis_data["commit_history"] = self._analyze_commits() |
|
|
|
|
|
self.analysis_data["contributors"] = self._analyze_contributors() |
|
|
|
return self.analysis_data |
|
|
|
except Exception as e: |
|
raise Exception(f"Error analyzing repository: {str(e)}") |
|
|
|
def _analyze_structure(self) -> Dict[str, Any]: |
|
"""Analyze repository structure and organization""" |
|
structure = { |
|
"files": defaultdict(int), |
|
"directories": set(), |
|
"total_size": 0, |
|
} |
|
|
|
try: |
|
contents = self.repo.get_contents("") |
|
while contents: |
|
content = contents.pop(0) |
|
if content.type == "dir": |
|
structure["directories"].add(content.path) |
|
contents.extend(self.repo.get_contents(content.path)) |
|
else: |
|
ext = Path(content.path).suffix.lower() |
|
if ext in RELEVANT_EXTENSIONS: |
|
structure["files"][ext] += 1 |
|
structure["total_size"] += content.size |
|
except Exception as e: |
|
print(f"Error analyzing structure: {str(e)}") |
|
|
|
return { |
|
"file_types": dict(structure["files"]), |
|
"directory_count": len(structure["directories"]), |
|
"total_size": structure["total_size"], |
|
"file_count": sum(structure["files"].values()) |
|
} |
|
|
|
def _analyze_code_patterns(self) -> Dict[str, Any]: |
|
"""Analyze code patterns and style""" |
|
patterns = { |
|
"samples": [], |
|
"languages": defaultdict(int), |
|
"complexity_metrics": defaultdict(list) |
|
} |
|
|
|
try: |
|
files = self.repo.get_contents("") |
|
analyzed = 0 |
|
|
|
while files and analyzed < 5: |
|
file = files.pop(0) |
|
if file.type == "dir": |
|
files.extend(self.repo.get_contents(file.path)) |
|
elif Path(file.path).suffix.lower() in RELEVANT_EXTENSIONS: |
|
try: |
|
content = base64.b64decode(file.content).decode('utf-8') |
|
lines = content.splitlines() |
|
|
|
if not lines: |
|
continue |
|
|
|
loc = len([line for line in lines if line.strip()]) |
|
avg_line_length = sum(len(line) for line in lines) / len(lines) |
|
|
|
patterns["samples"].append({ |
|
"path": file.path, |
|
"language": Path(file.path).suffix[1:], |
|
"loc": loc, |
|
"avg_line_length": round(avg_line_length, 2) |
|
}) |
|
|
|
patterns["languages"][Path(file.path).suffix[1:]] += loc |
|
patterns["complexity_metrics"]["loc"].append(loc) |
|
patterns["complexity_metrics"]["avg_line_length"].append(avg_line_length) |
|
|
|
analyzed += 1 |
|
|
|
except Exception as e: |
|
print(f"Error analyzing file {file.path}: {str(e)}") |
|
continue |
|
|
|
except Exception as e: |
|
print(f"Error in code pattern analysis: {str(e)}") |
|
|
|
return patterns |
|
|
|
def _analyze_commits(self) -> Dict[str, Any]: |
|
"""Analyze commit history and patterns""" |
|
commit_data = [] |
|
commit_times = [] |
|
|
|
try: |
|
commits = list(self.repo.get_commits()[:100]) |
|
|
|
for commit in commits: |
|
try: |
|
commit_info = { |
|
"sha": commit.sha, |
|
"author": commit.author.login if commit.author else "Unknown", |
|
"date": commit.commit.author.date.isoformat(), |
|
"message": commit.commit.message, |
|
"changes": { |
|
"additions": commit.stats.additions, |
|
"deletions": commit.stats.deletions, |
|
} |
|
} |
|
commit_data.append(commit_info) |
|
commit_times.append(commit.commit.author.date.hour) |
|
except Exception as e: |
|
print(f"Error processing commit {commit.sha}: {str(e)}") |
|
continue |
|
|
|
|
|
commit_hours = defaultdict(int) |
|
for hour in commit_times: |
|
commit_hours[hour] += 1 |
|
|
|
total_commits = len(commit_data) |
|
return { |
|
"commits": commit_data, |
|
"total_commits": total_commits, |
|
"commit_hours": dict(commit_hours), |
|
"avg_additions": sum(c["changes"]["additions"] for c in commit_data) / total_commits if total_commits else 0, |
|
"avg_deletions": sum(c["changes"]["deletions"] for c in commit_data) / total_commits if total_commits else 0, |
|
} |
|
|
|
except Exception as e: |
|
print(f"Error in commit analysis: {str(e)}") |
|
return { |
|
"commits": [], |
|
"total_commits": 0, |
|
"commit_hours": {}, |
|
"avg_additions": 0, |
|
"avg_deletions": 0 |
|
} |
|
|
|
def _analyze_contributors(self) -> Dict[str, Any]: |
|
"""Analyze contributor statistics""" |
|
contributor_data = [] |
|
|
|
try: |
|
contributors = list(self.repo.get_contributors()) |
|
for contributor in contributors: |
|
contributor_data.append({ |
|
"login": contributor.login, |
|
"contributions": contributor.contributions, |
|
"type": contributor.type, |
|
}) |
|
except Exception as e: |
|
print(f"Error analyzing contributors: {str(e)}") |
|
|
|
return { |
|
"total_contributors": len(contributor_data), |
|
"contributors": contributor_data |
|
} |
|
|
|
@retry( |
|
retry=retry_if_exception_type(Exception), |
|
stop=stop_after_attempt(3), |
|
wait=wait_exponential(multiplier=1, min=4, max=10) |
|
) |
|
def process_analysis(repo_url: str, progress=gr.Progress()): |
|
"""Process repository analysis with visible thinking""" |
|
try: |
|
|
|
progress(0, desc="Initializing repository analysis...") |
|
analyzer = RepositoryAnalyzer(repo_url) |
|
analysis_data = analyzer.analyze() |
|
|
|
|
|
thinking_analyzer = ThinkingAnalyzer(model) |
|
|
|
|
|
system_prompt = """You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics. Analyze the provided repository data and create a detailed, insightful analysis using the following markdown template: |
|
|
|
# Repository Analysis |
|
|
|
## π Project Overview |
|
[Provide a comprehensive overview including: |
|
- Project purpose and scope |
|
- Age and maturity of the project |
|
- Current activity level and maintenance status |
|
- Key metrics (stars, forks, etc.) |
|
- Primary technologies and languages used] |
|
|
|
## ποΈ Architecture and Code Organization |
|
[Analyze in detail: |
|
- Repository structure and organization |
|
- Code distribution across different technologies |
|
- File and directory organization patterns |
|
- Project size and complexity metrics |
|
- Code modularity and component structure |
|
- Presence of key architectural patterns] |
|
|
|
## π» Development Practices & Code Quality |
|
[Evaluate: |
|
- Coding standards and consistency |
|
- Code complexity and maintainability metrics |
|
- Documentation practices |
|
- Testing approach and coverage (if visible) |
|
- Error handling and logging practices |
|
- Use of design patterns and best practices] |
|
|
|
## π Development Workflow & History |
|
[Analyze: |
|
- Commit patterns and frequency |
|
- Release cycles and versioning |
|
- Branch management strategy |
|
- Code review practices |
|
- Continuous integration/deployment indicators |
|
- Peak development periods and cycles] |
|
|
|
## π₯ Team Dynamics & Collaboration |
|
[Examine: |
|
- Team size and composition |
|
- Contribution patterns |
|
- Core maintainer identification |
|
- Community engagement level |
|
- Communication patterns |
|
- Collaboration efficiency] |
|
|
|
## π§ Technical Depth & Innovation |
|
[Assess: |
|
- Technical sophistication level |
|
- Innovative approaches or solutions |
|
- Complex problem-solving examples |
|
- Performance optimization efforts |
|
- Security considerations |
|
- Scalability approach] |
|
|
|
## π Project Health & Sustainability |
|
[Evaluate: |
|
- Project momentum and growth trends |
|
- Maintenance patterns |
|
- Community health indicators |
|
- Documentation completeness |
|
- Onboarding friendliness |
|
- Long-term viability indicators] |
|
|
|
## π‘ Key Insights & Recommendations |
|
[Provide: |
|
- 3-5 key strengths identified |
|
- 3-5 potential improvement areas |
|
- Notable patterns or practices |
|
- Unique characteristics |
|
- Strategic recommendations]""" |
|
|
|
|
|
progress(0.5, desc="Generating analysis with thinking process...") |
|
messages = [] |
|
for msg_update in thinking_analyzer.stream_analysis( |
|
analysis_data, |
|
system_prompt |
|
): |
|
messages = msg_update |
|
|
|
return messages, analysis_data |
|
|
|
except Exception as e: |
|
return |
|
|
|
def process_question(question: str, analysis_data: Dict[str, Any], chat_history: List[str]): |
|
"""Process follow-up questions with visible thinking""" |
|
if not analysis_data: |
|
return [ChatMessage(role="assistant", content="Please analyze a repository first before asking questions.")] |
|
|
|
thinking_analyzer = ThinkingAnalyzer(model) |
|
messages = [] |
|
for msg_update in thinking_analyzer.stream_question_response( |
|
question, |
|
analysis_data, |
|
chat_history |
|
): |
|
messages = msg_update |
|
return messages |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as app: |
|
gr.Markdown(""" |
|
# π GitHub Repository Analyzer with Thinking Process |
|
|
|
Analyze any public GitHub repository using AI. Watch the AI's thought process as it: |
|
1. π Analyzes repository structure and patterns |
|
2. π‘ Generates insights about development practices |
|
3. π Shows its thinking while answering your follow-up questions |
|
|
|
Enter a GitHub repository URL (e.g., `https://github.com/owner/repo`) |
|
""") |
|
|
|
with gr.Row(): |
|
repo_url = gr.Textbox( |
|
label="GitHub Repository URL", |
|
placeholder="https://github.com/owner/repo", |
|
scale=4 |
|
) |
|
analyze_btn = gr.Button("π Analyze", variant="primary", scale=1) |
|
|
|
|
|
status_msg = gr.Markdown("", elem_id="status_message") |
|
|
|
with gr.Row(): |
|
chatbot = gr.Chatbot( |
|
label="Analysis & Discussion", |
|
height=500, |
|
show_label=True, |
|
render_markdown=True, |
|
type="messages" |
|
) |
|
|
|
with gr.Row(): |
|
question = gr.Textbox( |
|
label="Your Question", |
|
placeholder="Ask about the analysis...", |
|
scale=4 |
|
) |
|
ask_btn = gr.Button("π Ask", variant="primary", scale=1) |
|
clear_btn = gr.Button("ποΈ Clear Chat", variant="secondary", scale=1) |
|
|
|
|
|
analysis_data = gr.State({}) |
|
chat_history = gr.State([]) |
|
msg_store = gr.State("") |
|
|
|
def clear_outputs(): |
|
return [], {}, [], "" |
|
|
|
|
|
analyze_btn.click( |
|
fn=lambda: "β³ Analysis in progress... Watch the thinking process below!", |
|
inputs=None, |
|
outputs=status_msg, |
|
queue=False |
|
).then( |
|
process_analysis, |
|
inputs=[repo_url], |
|
outputs=[chatbot, analysis_data] |
|
).success( |
|
lambda: "β
Analysis complete! You can now ask questions about the repository.", |
|
inputs=None, |
|
outputs=status_msg |
|
) |
|
|
|
def update_chat(question, history): |
|
"""Update chat history with user question""" |
|
history = history or [] |
|
history.append(question) |
|
return question, history, "" |
|
|
|
ask_btn.click( |
|
update_chat, |
|
inputs=[question, chat_history], |
|
outputs=[msg_store, chat_history, question], |
|
queue=False |
|
).then( |
|
process_question, |
|
inputs=[msg_store, analysis_data, chat_history], |
|
outputs=chatbot |
|
) |
|
|
|
clear_btn.click( |
|
clear_outputs, |
|
inputs=None, |
|
outputs=[chatbot, analysis_data, chat_history, status_msg], |
|
queue=False |
|
) |
|
|
|
|
|
question.submit( |
|
update_chat, |
|
inputs=[question, chat_history], |
|
outputs=[msg_store, chat_history, question], |
|
queue=False |
|
).then( |
|
process_question, |
|
inputs=[msg_store, analysis_data, chat_history], |
|
outputs=chatbot |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True, |
|
debug=True |
|
) |