import os
import json
import networkx as nx
from collections import Counter, defaultdict
from typing import Dict, List, Tuple, Any, Optional
from datetime import datetime
import numpy as np
from pyvis.network import Network
import re
import google.generativeai as genai
class RepositoryVisualizer:
"""Handles visualization of GitHub repository data using Enhanced PyVis"""
def __init__(self, config: Any = None, max_nodes: int = 150):
"""
Initialize the repository visualizer
Args:
config: Configuration object (optional)
max_nodes: Maximum number of nodes to include in visualizations (if config not provided)
"""
# Handle both config object and direct parameters
if config is not None:
self.max_nodes = getattr(config, 'visualization_node_limit', 150)
else:
self.max_nodes = max_nodes
self.node_colors = {
'file': {
'py': '#3572A5', # Python (blue)
'js': '#F7DF1E', # JavaScript (yellow)
'ts': '#3178C6', # TypeScript (blue)
'jsx': '#61DAFB', # React JSX (cyan)
'tsx': '#61DAFB', # React TSX (cyan)
'html': '#E34F26', # HTML (orange)
'css': '#563D7C', # CSS (purple)
'java': '#B07219', # Java (brown)
'cpp': '#F34B7D', # C++ (pink)
'c': '#A8B9CC', # C (light blue)
'go': '#00ADD8', # Go (blue)
'md': '#083fa1', # Markdown (blue)
'json': '#292929', # JSON (dark gray)
'default': '#7F7F7F' # Default (gray)
},
'contributor': '#e74c3c', # Contributor (red)
'issue': '#3498db', # Issue (blue)
'directory': '#2ecc71' # Directory (green)
}
# Add group definitions for visualization
self.groups = {
'files': {"color": {"background": "#3498db"}, "shape": "dot"},
'contributors': {"color": {"background": "#e74c3c"}, "shape": "diamond"},
'directories': {"color": {"background": "#2ecc71"}, "shape": "triangle"},
'issues': {"color": {"background": "#9b59b6"}, "shape": "star"}
}
def _get_important_subgraph(self, graph: nx.Graph, max_nodes: int) -> nx.Graph:
"""
Get a subgraph containing the most important nodes
Args:
graph: Input graph
max_nodes: Maximum number of nodes to include
Returns:
Subgraph with most important nodes
"""
# Return original graph if it's already small enough
if len(graph.nodes) <= max_nodes:
return graph
# Try different centrality measures
try:
# First try degree centrality
centrality = nx.degree_centrality(graph)
except:
# Fall back to simpler degree if that fails
centrality = {node: graph.degree(node) for node in graph.nodes()}
# Sort nodes by importance
sorted_nodes = sorted(centrality.items(), key=lambda x: x[1], reverse=True)
# Take top nodes
top_nodes = [node for node, _ in sorted_nodes[:max_nodes]]
# Create subgraph
return graph.subgraph(top_nodes)
def _extract_dependencies(self, file_contents: Dict) -> Dict[str, List[str]]:
"""
Extract file dependencies based on imports and includes
Args:
file_contents: Dictionary of file contents
Returns:
Dictionary mapping files to their dependencies
"""
dependencies = defaultdict(list)
# Map of common import patterns by language
import_patterns = {
'py': [
r'^\s*import\s+(\w+)', # import module
r'^\s*from\s+(\w+)', # from module import
r'^\s*import\s+([\w.]+)' # import module.submodule
],
'js': [
r'^\s*import.*from\s+[\'"](.+)[\'"]', # ES6 import
r'^\s*require\([\'"](.+)[\'"]\)', # CommonJS require
r'^\s*import\s+[\'"](.+)[\'"]' # Side-effect import
],
'java': [
r'^\s*import\s+([\w.]+)' # Java import
],
'cpp': [
r'^\s*#include\s+[<"](.+)[>"]' # C/C++ include
],
'go': [
r'^\s*import\s+[\'"](.+)[\'"]', # Go single import
r'^\s*import\s+\(\s*[\'"](.+)[\'"]' # Go multiple imports
]
}
# Process each file
for filename, file_data in file_contents.items():
# Get file extension
_, ext = os.path.splitext(filename)
ext = ext.lstrip('.').lower() if ext else ''
# Skip if we don't have patterns for this language
if ext not in import_patterns:
continue
# Get content
content = file_data.get('content', '')
if not content:
continue
# Search for imports
lines = content.split('\n')
patterns = import_patterns[ext]
for line in lines:
for pattern in patterns:
# Find imports
import_match = re.search(pattern, line)
if import_match:
imported = import_match.group(1)
# Look for matching files
for target_file in file_contents.keys():
target_name = os.path.basename(target_file)
target_module = os.path.splitext(target_name)[0]
# Check if this might be the imported file
if imported == target_module or imported.endswith('.' + target_module):
dependencies[filename].append(target_file)
break
return dependencies
def _format_size(self, size_bytes: int) -> str:
"""
Format file size in human-readable format
Args:
size_bytes: Size in bytes
Returns:
Formatted size string
"""
if size_bytes < 1024:
return f"{size_bytes} bytes"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f} KB"
else:
return f"{size_bytes / (1024 * 1024):.1f} MB"
def _add_directory_nodes(self, graph: nx.Graph) -> None:
"""
Add directory nodes to graph for hierarchical structure
Args:
graph: NetworkX graph to modify
"""
file_nodes = [node for node, data in graph.nodes(data=True)
if data.get('type') == 'file']
# Extract unique directories
directories = set()
for filepath in file_nodes:
path_parts = os.path.dirname(filepath).split('/')
current_path = ""
for part in path_parts:
if not part: # Skip empty parts
continue
if current_path:
current_path = f"{current_path}/{part}"
else:
current_path = part
directories.add(current_path)
# Add directory nodes
for directory in directories:
if directory not in graph:
graph.add_node(directory, type='directory')
# Connect files to their parent directories
for filepath in file_nodes:
parent_dir = os.path.dirname(filepath)
if parent_dir and parent_dir in graph:
graph.add_edge(filepath, parent_dir, type='parent')
# Connect directories to their parents
for directory in directories:
parent_dir = os.path.dirname(directory)
if parent_dir and parent_dir in graph:
graph.add_edge(directory, parent_dir, type='parent')
def create_repository_graph(self, knowledge_graph: nx.Graph, output_path: str = "repo_graph.html") -> str:
"""
Create an interactive visualization of the repository structure
Enhanced with better physics, filtering, and groups
Args:
knowledge_graph: NetworkX graph of repository data
output_path: Path to save the HTML visualization
Returns:
Path to the saved HTML file
"""
# Create a copy of the graph to avoid modifying the original
graph = knowledge_graph.copy()
# Limit the number of nodes if necessary
if len(graph.nodes()) > self.max_nodes:
print(f"Graph has {len(graph.nodes())} nodes, limiting to {self.max_nodes} most important nodes")
graph = self._get_important_subgraph(graph, self.max_nodes)
# Extract directories from file paths for hierarchical structure
self._add_directory_nodes(graph)
# Create PyVis network with improved settings
net = Network(height="750px", width="100%", notebook=False, directed=False,
bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True)
# Add custom groups for better filtering
for group_name, group_props in self.groups.items():
net.add_node(f"group_{group_name}", hidden=True, **group_props)
# Customize physics for better visualization
net.barnes_hut(gravity=-80000, central_gravity=0.3, spring_length=250, spring_strength=0.001,
damping=0.09, overlap=0)
# Add nodes with appropriate styling and interactive features
for node_id in graph.nodes():
node_data = graph.nodes[node_id]
node_type = node_data.get('type', 'unknown')
# Default node properties
title = node_id
color = self.node_colors.get(node_type, {}).get('default', "#7F7F7F")
shape = "dot"
size = 15
group = None
if node_type == 'file':
# Get file extension
_, ext = os.path.splitext(node_id)
ext = ext.lstrip('.').lower() if ext else 'default'
# Set color based on file extension
color = self.node_colors['file'].get(ext, self.node_colors['file']['default'])
# Use filename as label
label = os.path.basename(node_id)
# Set title with additional info
file_type = node_data.get('file_type', 'unknown')
file_size = node_data.get('size', 0)
title = f"
"
# Set group for filtering
group = 'files'
elif node_type == 'contributor':
# Contributor styling
color = self.node_colors['contributor']
shape = "diamond"
# Scale size based on contributions
contributions = node_data.get('contributions', 0)
size = min(30, 15 + contributions / 20)
label = node_id
title = f"
Contributor: {node_id}
Contributions: {contributions}
"
# Set group for filtering
group = 'contributors'
elif node_type == 'directory':
# Directory styling
color = self.node_colors['directory']
shape = "triangle"
label = os.path.basename(node_id) if node_id else "/"
title = f"
Directory: {label}
Path: {node_id}
"
# Set group for filtering
group = 'directories'
else:
# Default styling
label = node_id
# Add node to network with searchable property and group
net.add_node(node_id, label=label, title=title, color=color, shape=shape, size=size,
group=group, searchable=True)
# Add edges with appropriate styling and information
for source, target, data in graph.edges(data=True):
# Default edge properties
width = 1
color = "#ffffff80" # Semi-transparent white
title = f"{source} → {target}"
smooth = True # Enable smooth edges
# Adjust based on edge data
edge_type = data.get('type', 'default')
weight = data.get('weight', 1)
# Scale width based on weight
width = min(10, 1 + weight / 5)
if edge_type == 'co-occurrence':
title = f"
Co-occurred in {weight} commits Files modified together frequently
"
color = "#9b59b680" # Semi-transparent purple
elif edge_type == 'contribution':
title = f"
Modified {weight} times By this contributor
"
color = "#e74c3c80" # Semi-transparent red
elif edge_type == 'imports':
title = f"
Imports This file imports the target
"
color = "#3498db80" # Semi-transparent blue
elif edge_type == 'parent':
title = f"
"""
html_after = """
"""
# Convert file_stats to JSON for the template
file_stats_json = json.dumps(file_stats)
# Replace placeholder with actual data
html = html.replace('FILE_STATS', file_stats_json)
# Save to file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html)
return output_path
# Save network visualization to HTML file with custom HTML
net.save_graph(output_path)
# Read the generated file
with open(output_path, 'r', encoding='utf-8') as f:
net_html = f.read()
# Insert our custom HTML
net_html = net_html.replace('', html_before).replace('', html_after)
# Write the modified file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(net_html)
return output_path
def create_contributor_network(self, contributors: Dict, commits: List[Dict],
output_path: str = "contributor_network.html") -> str:
"""
Create an enhanced network visualization of contributor relationships
Args:
contributors: Dictionary of contributor data
commits: List of commit data
output_path: Path to save the HTML visualization
Returns:
Path to the saved HTML file
"""
# Create graph for contributor relationships
graph = nx.Graph()
# Add contributor nodes
for login, data in contributors.items():
graph.add_node(login, type='contributor', contributions=data['contributions'])
# Find file co-authorship to establish contributor relationships
file_authors = defaultdict(set)
# Group files by authors
for login, data in contributors.items():
for file_data in data.get('files_modified', []):
filename = file_data.get('filename', '')
if filename:
file_authors[filename].add(login)
# Create edges between contributors who worked on the same files
for filename, authors in file_authors.items():
if len(authors) > 1:
for author1 in authors:
for author2 in authors:
if author1 != author2:
if graph.has_edge(author1, author2):
graph[author1][author2]['weight'] += 1
graph[author1][author2]['files'].add(filename)
else:
graph.add_edge(author1, author2, weight=1, files={filename}, type='collaboration')
# Create Pyvis network with enhanced settings
net = Network(height="750px", width="100%", notebook=False, directed=False,
bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True)
# Configure physics
net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=150, spring_strength=0.05)
# Add nodes with improved styling
for login in graph.nodes():
# Get node data
node_data = graph.nodes[login]
contributions = node_data.get('contributions', 0)
# Scale size based on contributions
size = 15 + min(20, contributions / 10)
# Create detailed HTML tooltip
tooltip = f"""
Contributor: {login}
Contributions: {contributions} Activity Level: {"High" if contributions > 50 else "Medium" if contributions > 20 else "Low"}
"""
# Add node with improved metadata
net.add_node(login, label=login, title=tooltip,
color=self.node_colors['contributor'], shape="dot", size=size,
group='contributors', searchable=True)
# Add edges with enhanced information
for source, target, data in graph.edges(data=True):
weight = data.get('weight', 1)
files = data.get('files', set())
# Scale width based on collaboration strength
width = min(10, 1 + weight / 2)
# Create a better-formatted tooltip with file information
file_list = " ".join(list(files)[:5])
if len(files) > 5:
file_list += f" ...and {len(files) - 5} more"
tooltip = f"""
Collaboration
Contributors: {source} & {target} Shared Files: {weight} Collaboration Strength: {"Strong" if weight > 5 else "Medium" if weight > 2 else "Light"} Example Files:
{file_list}
"""
html_after = """
"""
# Save to HTML file with custom HTML
net.save_graph(output_path)
# Read the generated file
with open(output_path, 'r', encoding='utf-8') as f:
net_html = f.read()
# Insert our custom HTML
net_html = net_html.replace('', html_before).replace('', html_after)
# Write the modified file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(net_html)
return output_path
def create_file_dependency_graph(self, file_contents: Dict, output_path: str = "dependency_graph.html") -> str:
"""
Create an enhanced graph of file dependencies based on imports and references
Using direct PyVis implementation without relying on NetworkX
Args:
file_contents: Dictionary of file contents
output_path: Path to save the HTML visualization
Returns:
Path to the saved HTML file
"""
# Create PyVis network directly
net = Network(height="750px", width="100%", notebook=False, directed=True,
bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True)
# Customize physics
net.barnes_hut(gravity=-10000, central_gravity=0.3, spring_length=200)
# Process files to find dependencies
dependencies = self._extract_dependencies(file_contents)
# Keep track of added nodes to avoid duplicates
added_nodes = set()
# Add file nodes with improved styling
for filename, targets in dependencies.items():
if filename not in added_nodes:
# Get file extension for color
_, ext = os.path.splitext(filename)
ext = ext.lstrip('.').lower() if ext else 'default'
color = self.node_colors['file'].get(ext, self.node_colors['file']['default'])
# Use filename as label
label = os.path.basename(filename)
# Enhanced tooltip with file information
file_data = file_contents.get(filename, {})
file_type = file_data.get('type', 'unknown')
file_size = file_data.get('size', 0)
tooltip = f"""
"""
# Add node with improved styling and metadata
net.add_node(filename, label=label, title=tooltip, color=color,
shape="dot", size=15, group=ext, searchable=True)
added_nodes.add(filename)
# Add target nodes if not already added
for target in targets:
if target not in added_nodes:
# Get file extension for color
_, ext = os.path.splitext(target)
ext = ext.lstrip('.').lower() if ext else 'default'
color = self.node_colors['file'].get(ext, self.node_colors['file']['default'])
# Use filename as label
label = os.path.basename(target)
# Enhanced tooltip with file information
file_data = file_contents.get(target, {})
file_type = file_data.get('type', 'unknown')
file_size = file_data.get('size', 0)
tooltip = f"""
"""
# Add node with improved styling and metadata
net.add_node(target, label=label, title=tooltip, color=color,
shape="dot", size=15, group=ext, searchable=True)
added_nodes.add(target)
# Add edges with improved styling
for source, targets in dependencies.items():
for target in targets:
# Enhanced tooltip with relationship information
tooltip = f"""
"""
html_after = """
"""
# Save to HTML file with custom HTML
net.save_graph(output_path)
# Read the generated file
with open(output_path, 'r', encoding='utf-8') as f:
net_html = f.read()
# Insert our custom HTML
net_html = net_html.replace('', html_before).replace('', html_after)
# Write the modified file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(net_html)
return output_path
def create_commit_activity_chart(self, commits: List[Dict], output_path: str = "commit_activity.html") -> str:
"""
Create an enhanced interactive chart showing commit activity over time
Args:
commits: List of commit data
output_path: Path to save the HTML visualization
Returns:
Path to the saved HTML file
"""
# Prepare commit data by month
monthly_data = defaultdict(int)
author_data = defaultdict(lambda: defaultdict(int))
file_type_data = defaultdict(lambda: defaultdict(int))
for commit in commits:
date = commit.get('date')
author = commit.get('author', 'Unknown')
if date:
# Format as year-month
month_key = date.strftime('%Y-%m')
monthly_data[month_key] += 1
author_data[author][month_key] += 1
# Count file types in this commit
for file in commit.get('files', []):
filename = file.get('filename', '')
ext = os.path.splitext(filename)[1].lower()
if ext:
file_type_data[ext][month_key] += 1
# Sort by date
sorted_data = sorted(monthly_data.items())
# Prepare author data for chart
authors = list(author_data.keys())
author_datasets = []
# Generate colors for authors
author_colors = [
'#3498db', '#e74c3c', '#2ecc71', '#f39c12', '#9b59b6',
'#1abc9c', '#d35400', '#34495e', '#16a085', '#c0392b'
]
for i, author in enumerate(authors[:10]): # Limit to top 10 authors
color = author_colors[i % len(author_colors)]
author_data_points = []
for month_key, _ in sorted_data:
author_data_points.append(author_data[author].get(month_key, 0))
author_datasets.append({
'label': author,
'data': author_data_points,
'backgroundColor': color + '80',
'borderColor': color,
'borderWidth': 1
})
# Create HTML with Chart.js and custom UI
html = """
Repository Activity Analysis
Repository Commit Activity
0
Total Commits
0
Active Months
0
Avg. Commits per Month
0
Contributors
Activity Overview
By Contributor
By File Type
Contributor Commit Summary
Contributor
Commits
Percentage
First Commit
Last Commit
File Type Statistics
File Type
Changes
Percentage
"""
# Replace placeholders with actual data
labels_json = json.dumps([d[0] for d in sorted_data])
data_json = json.dumps([d[1] for d in sorted_data])
# Author data for chart
author_data_json = json.dumps(author_data)
author_datasets_json = json.dumps(author_datasets)
# File type data for chart
file_type_data_json = json.dumps(file_type_data)
html = html.replace('CHART_LABELS', labels_json)
html = html.replace('CHART_DATA', data_json)
html = html.replace('AUTHOR_DATA', author_data_json)
html = html.replace('AUTHOR_DATASETS', author_datasets_json)
html = html.replace('FILE_TYPE_DATA', file_type_data_json)
# Save to file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html)
return output_path
def create_code_change_heatmap(self, commits: List[Dict], output_path: str = "code_changes.html") -> str:
"""
Create an enhanced heatmap showing which files are changed most frequently
Args:
commits: List of commit data
output_path: Path to save the HTML visualization
Returns:
Path to the saved HTML file
"""
# Count file modifications
file_changes = Counter()
file_authors = defaultdict(Counter)
file_dates = defaultdict(list)
for commit in commits:
author = commit.get('author', 'Unknown')
date = commit.get('date')
for file_data in commit.get('files', []):
filename = file_data.get('filename', '')
if filename:
file_changes[filename] += 1
file_authors[filename][author] += 1
if date:
file_dates[filename].append(date)