import os import json import networkx as nx from collections import Counter, defaultdict from typing import Dict, List, Tuple, Any, Optional from datetime import datetime import numpy as np from pyvis.network import Network import re import google.generativeai as genai class RepositoryVisualizer: """Handles visualization of GitHub repository data using Enhanced PyVis""" def __init__(self, config: Any = None, max_nodes: int = 150): """ Initialize the repository visualizer Args: config: Configuration object (optional) max_nodes: Maximum number of nodes to include in visualizations (if config not provided) """ # Handle both config object and direct parameters if config is not None: self.max_nodes = getattr(config, 'visualization_node_limit', 150) else: self.max_nodes = max_nodes self.node_colors = { 'file': { 'py': '#3572A5', # Python (blue) 'js': '#F7DF1E', # JavaScript (yellow) 'ts': '#3178C6', # TypeScript (blue) 'jsx': '#61DAFB', # React JSX (cyan) 'tsx': '#61DAFB', # React TSX (cyan) 'html': '#E34F26', # HTML (orange) 'css': '#563D7C', # CSS (purple) 'java': '#B07219', # Java (brown) 'cpp': '#F34B7D', # C++ (pink) 'c': '#A8B9CC', # C (light blue) 'go': '#00ADD8', # Go (blue) 'md': '#083fa1', # Markdown (blue) 'json': '#292929', # JSON (dark gray) 'default': '#7F7F7F' # Default (gray) }, 'contributor': '#e74c3c', # Contributor (red) 'issue': '#3498db', # Issue (blue) 'directory': '#2ecc71' # Directory (green) } # Add group definitions for visualization self.groups = { 'files': {"color": {"background": "#3498db"}, "shape": "dot"}, 'contributors': {"color": {"background": "#e74c3c"}, "shape": "diamond"}, 'directories': {"color": {"background": "#2ecc71"}, "shape": "triangle"}, 'issues': {"color": {"background": "#9b59b6"}, "shape": "star"} } def _get_important_subgraph(self, graph: nx.Graph, max_nodes: int) -> nx.Graph: """ Get a subgraph containing the most important nodes Args: graph: Input graph max_nodes: Maximum number of nodes to include Returns: Subgraph with most important nodes """ # Return original graph if it's already small enough if len(graph.nodes) <= max_nodes: return graph # Try different centrality measures try: # First try degree centrality centrality = nx.degree_centrality(graph) except: # Fall back to simpler degree if that fails centrality = {node: graph.degree(node) for node in graph.nodes()} # Sort nodes by importance sorted_nodes = sorted(centrality.items(), key=lambda x: x[1], reverse=True) # Take top nodes top_nodes = [node for node, _ in sorted_nodes[:max_nodes]] # Create subgraph return graph.subgraph(top_nodes) def _extract_dependencies(self, file_contents: Dict) -> Dict[str, List[str]]: """ Extract file dependencies based on imports and includes Args: file_contents: Dictionary of file contents Returns: Dictionary mapping files to their dependencies """ dependencies = defaultdict(list) # Map of common import patterns by language import_patterns = { 'py': [ r'^\s*import\s+(\w+)', # import module r'^\s*from\s+(\w+)', # from module import r'^\s*import\s+([\w.]+)' # import module.submodule ], 'js': [ r'^\s*import.*from\s+[\'"](.+)[\'"]', # ES6 import r'^\s*require\([\'"](.+)[\'"]\)', # CommonJS require r'^\s*import\s+[\'"](.+)[\'"]' # Side-effect import ], 'java': [ r'^\s*import\s+([\w.]+)' # Java import ], 'cpp': [ r'^\s*#include\s+[<"](.+)[>"]' # C/C++ include ], 'go': [ r'^\s*import\s+[\'"](.+)[\'"]', # Go single import r'^\s*import\s+\(\s*[\'"](.+)[\'"]' # Go multiple imports ] } # Process each file for filename, file_data in file_contents.items(): # Get file extension _, ext = os.path.splitext(filename) ext = ext.lstrip('.').lower() if ext else '' # Skip if we don't have patterns for this language if ext not in import_patterns: continue # Get content content = file_data.get('content', '') if not content: continue # Search for imports lines = content.split('\n') patterns = import_patterns[ext] for line in lines: for pattern in patterns: # Find imports import_match = re.search(pattern, line) if import_match: imported = import_match.group(1) # Look for matching files for target_file in file_contents.keys(): target_name = os.path.basename(target_file) target_module = os.path.splitext(target_name)[0] # Check if this might be the imported file if imported == target_module or imported.endswith('.' + target_module): dependencies[filename].append(target_file) break return dependencies def _format_size(self, size_bytes: int) -> str: """ Format file size in human-readable format Args: size_bytes: Size in bytes Returns: Formatted size string """ if size_bytes < 1024: return f"{size_bytes} bytes" elif size_bytes < 1024 * 1024: return f"{size_bytes / 1024:.1f} KB" else: return f"{size_bytes / (1024 * 1024):.1f} MB" def _add_directory_nodes(self, graph: nx.Graph) -> None: """ Add directory nodes to graph for hierarchical structure Args: graph: NetworkX graph to modify """ file_nodes = [node for node, data in graph.nodes(data=True) if data.get('type') == 'file'] # Extract unique directories directories = set() for filepath in file_nodes: path_parts = os.path.dirname(filepath).split('/') current_path = "" for part in path_parts: if not part: # Skip empty parts continue if current_path: current_path = f"{current_path}/{part}" else: current_path = part directories.add(current_path) # Add directory nodes for directory in directories: if directory not in graph: graph.add_node(directory, type='directory') # Connect files to their parent directories for filepath in file_nodes: parent_dir = os.path.dirname(filepath) if parent_dir and parent_dir in graph: graph.add_edge(filepath, parent_dir, type='parent') # Connect directories to their parents for directory in directories: parent_dir = os.path.dirname(directory) if parent_dir and parent_dir in graph: graph.add_edge(directory, parent_dir, type='parent') def create_repository_graph(self, knowledge_graph: nx.Graph, output_path: str = "repo_graph.html") -> str: """ Create an interactive visualization of the repository structure Enhanced with better physics, filtering, and groups Args: knowledge_graph: NetworkX graph of repository data output_path: Path to save the HTML visualization Returns: Path to the saved HTML file """ # Create a copy of the graph to avoid modifying the original graph = knowledge_graph.copy() # Limit the number of nodes if necessary if len(graph.nodes()) > self.max_nodes: print(f"Graph has {len(graph.nodes())} nodes, limiting to {self.max_nodes} most important nodes") graph = self._get_important_subgraph(graph, self.max_nodes) # Extract directories from file paths for hierarchical structure self._add_directory_nodes(graph) # Create PyVis network with improved settings net = Network(height="750px", width="100%", notebook=False, directed=False, bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True) # Add custom groups for better filtering for group_name, group_props in self.groups.items(): net.add_node(f"group_{group_name}", hidden=True, **group_props) # Customize physics for better visualization net.barnes_hut(gravity=-80000, central_gravity=0.3, spring_length=250, spring_strength=0.001, damping=0.09, overlap=0) # Add nodes with appropriate styling and interactive features for node_id in graph.nodes(): node_data = graph.nodes[node_id] node_type = node_data.get('type', 'unknown') # Default node properties title = node_id color = self.node_colors.get(node_type, {}).get('default', "#7F7F7F") shape = "dot" size = 15 group = None if node_type == 'file': # Get file extension _, ext = os.path.splitext(node_id) ext = ext.lstrip('.').lower() if ext else 'default' # Set color based on file extension color = self.node_colors['file'].get(ext, self.node_colors['file']['default']) # Use filename as label label = os.path.basename(node_id) # Set title with additional info file_type = node_data.get('file_type', 'unknown') file_size = node_data.get('size', 0) title = f"

{label}


Path: {node_id}
Type: {file_type}
Size: {self._format_size(file_size)}
" # Set group for filtering group = 'files' elif node_type == 'contributor': # Contributor styling color = self.node_colors['contributor'] shape = "diamond" # Scale size based on contributions contributions = node_data.get('contributions', 0) size = min(30, 15 + contributions / 20) label = node_id title = f"

Contributor: {node_id}


Contributions: {contributions}
" # Set group for filtering group = 'contributors' elif node_type == 'directory': # Directory styling color = self.node_colors['directory'] shape = "triangle" label = os.path.basename(node_id) if node_id else "/" title = f"

Directory: {label}


Path: {node_id}
" # Set group for filtering group = 'directories' else: # Default styling label = node_id # Add node to network with searchable property and group net.add_node(node_id, label=label, title=title, color=color, shape=shape, size=size, group=group, searchable=True) # Add edges with appropriate styling and information for source, target, data in graph.edges(data=True): # Default edge properties width = 1 color = "#ffffff80" # Semi-transparent white title = f"{source} → {target}" smooth = True # Enable smooth edges # Adjust based on edge data edge_type = data.get('type', 'default') weight = data.get('weight', 1) # Scale width based on weight width = min(10, 1 + weight / 5) if edge_type == 'co-occurrence': title = f"
Co-occurred in {weight} commits
Files modified together frequently
" color = "#9b59b680" # Semi-transparent purple elif edge_type == 'contribution': title = f"
Modified {weight} times
By this contributor
" color = "#e74c3c80" # Semi-transparent red elif edge_type == 'imports': title = f"
Imports
This file imports the target
" color = "#3498db80" # Semi-transparent blue elif edge_type == 'parent': title = f"
Parent directory
" color = "#2ecc7180" # Semi-transparent green width = 1 # Fixed width for parent relationships # Add edge to network with additional properties net.add_edge(source, target, title=title, width=width, color=color, smooth=smooth, selectionWidth=width*1.5) # Configure network options with improved UI and interactivity options = """ var options = { "nodes": { "borderWidth": 2, "borderWidthSelected": 4, "opacity": 0.9, "font": { "size": 12, "face": "Tahoma" }, "shadow": true }, "edges": { "color": { "inherit": false }, "smooth": { "type": "continuous", "forceDirection": "none" }, "shadow": true, "selectionWidth": 3 }, "physics": { "barnesHut": { "gravitationalConstant": -80000, "centralGravity": 0.3, "springLength": 250, "springConstant": 0.001, "damping": 0.09, "avoidOverlap": 0.1 }, "maxVelocity": 50, "minVelocity": 0.1, "stabilization": { "enabled": true, "iterations": 1000, "updateInterval": 100, "onlyDynamicEdges": false, "fit": true } }, "interaction": { "tooltipDelay": 200, "hideEdgesOnDrag": true, "multiselect": true, "hover": true, "navigationButtons": true, "keyboard": { "enabled": true, "speed": { "x": 10, "y": 10, "zoom": 0.1 }, "bindToWindow": true } }, "configure": { "enabled": true, "filter": ["physics", "nodes", "edges"], "showButton": true }, "groups": { "files": {"color": {"background": "#3498db"}, "shape": "dot"}, "contributors": {"color": {"background": "#e74c3c"}, "shape": "diamond"}, "directories": {"color": {"background": "#2ecc71"}, "shape": "triangle"}, "issues": {"color": {"background": "#9b59b6"}, "shape": "star"} } } """ net.set_options(options) # Add search functionality and control buttons to the HTML html_before = """ Repository Visualization

Legend

Files
Contributors
Directories
Issues
Co-occurrence
Contribution
Imports
Parent
""" html_after = """ """ # Convert file_stats to JSON for the template file_stats_json = json.dumps(file_stats) # Replace placeholder with actual data html = html.replace('FILE_STATS', file_stats_json) # Save to file with open(output_path, 'w', encoding='utf-8') as f: f.write(html) return output_path # Save network visualization to HTML file with custom HTML net.save_graph(output_path) # Read the generated file with open(output_path, 'r', encoding='utf-8') as f: net_html = f.read() # Insert our custom HTML net_html = net_html.replace('', html_before).replace('', html_after) # Write the modified file with open(output_path, 'w', encoding='utf-8') as f: f.write(net_html) return output_path def create_contributor_network(self, contributors: Dict, commits: List[Dict], output_path: str = "contributor_network.html") -> str: """ Create an enhanced network visualization of contributor relationships Args: contributors: Dictionary of contributor data commits: List of commit data output_path: Path to save the HTML visualization Returns: Path to the saved HTML file """ # Create graph for contributor relationships graph = nx.Graph() # Add contributor nodes for login, data in contributors.items(): graph.add_node(login, type='contributor', contributions=data['contributions']) # Find file co-authorship to establish contributor relationships file_authors = defaultdict(set) # Group files by authors for login, data in contributors.items(): for file_data in data.get('files_modified', []): filename = file_data.get('filename', '') if filename: file_authors[filename].add(login) # Create edges between contributors who worked on the same files for filename, authors in file_authors.items(): if len(authors) > 1: for author1 in authors: for author2 in authors: if author1 != author2: if graph.has_edge(author1, author2): graph[author1][author2]['weight'] += 1 graph[author1][author2]['files'].add(filename) else: graph.add_edge(author1, author2, weight=1, files={filename}, type='collaboration') # Create Pyvis network with enhanced settings net = Network(height="750px", width="100%", notebook=False, directed=False, bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True) # Configure physics net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=150, spring_strength=0.05) # Add nodes with improved styling for login in graph.nodes(): # Get node data node_data = graph.nodes[login] contributions = node_data.get('contributions', 0) # Scale size based on contributions size = 15 + min(20, contributions / 10) # Create detailed HTML tooltip tooltip = f"""

Contributor: {login}


Contributions: {contributions}
Activity Level: {"High" if contributions > 50 else "Medium" if contributions > 20 else "Low"}
""" # Add node with improved metadata net.add_node(login, label=login, title=tooltip, color=self.node_colors['contributor'], shape="dot", size=size, group='contributors', searchable=True) # Add edges with enhanced information for source, target, data in graph.edges(data=True): weight = data.get('weight', 1) files = data.get('files', set()) # Scale width based on collaboration strength width = min(10, 1 + weight / 2) # Create a better-formatted tooltip with file information file_list = "
".join(list(files)[:5]) if len(files) > 5: file_list += f"
...and {len(files) - 5} more" tooltip = f"""

Collaboration


Contributors: {source} & {target}
Shared Files: {weight}
Collaboration Strength: {"Strong" if weight > 5 else "Medium" if weight > 2 else "Light"}

Example Files:
{file_list}
""" # Add edge with enhanced styling color = "#3498db" + hex(min(255, 80 + (weight * 10)))[2:].zfill(2) # Vary opacity by weight net.add_edge(source, target, title=tooltip, width=width, color=color, smooth=True) # Configure options with enhanced UI options = """ var options = { "nodes": { "borderWidth": 2, "borderWidthSelected": 4, "opacity": 0.9, "font": { "size": 14, "face": "Tahoma" }, "shadow": true }, "edges": { "color": { "inherit": false }, "smooth": { "type": "continuous", "forceDirection": "horizontal" }, "shadow": true, "selectionWidth": 3 }, "physics": { "barnesHut": { "gravitationalConstant": -5000, "centralGravity": 0.3, "springLength": 150, "springConstant": 0.05, "damping": 0.09, "avoidOverlap": 0.2 }, "stabilization": { "enabled": true, "iterations": 1000 } }, "interaction": { "hover": true, "tooltipDelay": 200, "hideEdgesOnDrag": true, "multiselect": true, "navigationButtons": true }, "configure": { "enabled": true, "filter": ["physics", "nodes", "edges"], "showButton": true } } """ net.set_options(options) # Add search and controls similar to repository graph html_before = """ Contributor Network

Network Statistics

Contributors: 0

Collaborations: 0

Avg. Collaborations: 0

Click on a contributor to see their relationships

""" html_after = """ """ # Save to HTML file with custom HTML net.save_graph(output_path) # Read the generated file with open(output_path, 'r', encoding='utf-8') as f: net_html = f.read() # Insert our custom HTML net_html = net_html.replace('', html_before).replace('', html_after) # Write the modified file with open(output_path, 'w', encoding='utf-8') as f: f.write(net_html) return output_path def create_file_dependency_graph(self, file_contents: Dict, output_path: str = "dependency_graph.html") -> str: """ Create an enhanced graph of file dependencies based on imports and references Using direct PyVis implementation without relying on NetworkX Args: file_contents: Dictionary of file contents output_path: Path to save the HTML visualization Returns: Path to the saved HTML file """ # Create PyVis network directly net = Network(height="750px", width="100%", notebook=False, directed=True, bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True) # Customize physics net.barnes_hut(gravity=-10000, central_gravity=0.3, spring_length=200) # Process files to find dependencies dependencies = self._extract_dependencies(file_contents) # Keep track of added nodes to avoid duplicates added_nodes = set() # Add file nodes with improved styling for filename, targets in dependencies.items(): if filename not in added_nodes: # Get file extension for color _, ext = os.path.splitext(filename) ext = ext.lstrip('.').lower() if ext else 'default' color = self.node_colors['file'].get(ext, self.node_colors['file']['default']) # Use filename as label label = os.path.basename(filename) # Enhanced tooltip with file information file_data = file_contents.get(filename, {}) file_type = file_data.get('type', 'unknown') file_size = file_data.get('size', 0) tooltip = f"""

{label}


Path: {filename}
Type: {file_type}
Size: {self._format_size(file_size)}
Dependencies: {len(targets)}
""" # Add node with improved styling and metadata net.add_node(filename, label=label, title=tooltip, color=color, shape="dot", size=15, group=ext, searchable=True) added_nodes.add(filename) # Add target nodes if not already added for target in targets: if target not in added_nodes: # Get file extension for color _, ext = os.path.splitext(target) ext = ext.lstrip('.').lower() if ext else 'default' color = self.node_colors['file'].get(ext, self.node_colors['file']['default']) # Use filename as label label = os.path.basename(target) # Enhanced tooltip with file information file_data = file_contents.get(target, {}) file_type = file_data.get('type', 'unknown') file_size = file_data.get('size', 0) tooltip = f"""

{label}


Path: {target}
Type: {file_type}
Size: {self._format_size(file_size)}
""" # Add node with improved styling and metadata net.add_node(target, label=label, title=tooltip, color=color, shape="dot", size=15, group=ext, searchable=True) added_nodes.add(target) # Add edges with improved styling for source, targets in dependencies.items(): for target in targets: # Enhanced tooltip with relationship information tooltip = f"""

Dependency


{os.path.basename(source)} imports {os.path.basename(target)}
""" # Add edge with improved styling net.add_edge(source, target, title=tooltip, arrows="to", color="#2ecc7180", smooth=True, width=1.5) # Configure options with improved UI for dependencies options = """ var options = { "nodes": { "borderWidth": 2, "opacity": 0.9, "font": { "size": 12, "face": "Tahoma" }, "shadow": true }, "edges": { "color": { "inherit": false }, "smooth": { "type": "continuous", "roundness": 0.6 }, "arrows": { "to": { "enabled": true, "scaleFactor": 0.5 } }, "shadow": true }, "layout": { "hierarchical": { "enabled": true, "direction": "UD", "sortMethod": "directed", "nodeSpacing": 150, "levelSeparation": 150 } }, "physics": { "enabled": false }, "interaction": { "hover": true, "tooltipDelay": 200, "hideEdgesOnDrag": true, "navigationButtons": true }, "configure": { "enabled": true, "filter": ["layout", "nodes", "edges"], "showButton": true } } """ net.set_options(options) # Add search and controls similar to previous graphs html_before = """ File Dependency Graph

Dependency Statistics

Files: 0

Dependencies: 0

Click a file to see its dependencies

""" html_after = """ """ # Save to HTML file with custom HTML net.save_graph(output_path) # Read the generated file with open(output_path, 'r', encoding='utf-8') as f: net_html = f.read() # Insert our custom HTML net_html = net_html.replace('', html_before).replace('', html_after) # Write the modified file with open(output_path, 'w', encoding='utf-8') as f: f.write(net_html) return output_path def create_commit_activity_chart(self, commits: List[Dict], output_path: str = "commit_activity.html") -> str: """ Create an enhanced interactive chart showing commit activity over time Args: commits: List of commit data output_path: Path to save the HTML visualization Returns: Path to the saved HTML file """ # Prepare commit data by month monthly_data = defaultdict(int) author_data = defaultdict(lambda: defaultdict(int)) file_type_data = defaultdict(lambda: defaultdict(int)) for commit in commits: date = commit.get('date') author = commit.get('author', 'Unknown') if date: # Format as year-month month_key = date.strftime('%Y-%m') monthly_data[month_key] += 1 author_data[author][month_key] += 1 # Count file types in this commit for file in commit.get('files', []): filename = file.get('filename', '') ext = os.path.splitext(filename)[1].lower() if ext: file_type_data[ext][month_key] += 1 # Sort by date sorted_data = sorted(monthly_data.items()) # Prepare author data for chart authors = list(author_data.keys()) author_datasets = [] # Generate colors for authors author_colors = [ '#3498db', '#e74c3c', '#2ecc71', '#f39c12', '#9b59b6', '#1abc9c', '#d35400', '#34495e', '#16a085', '#c0392b' ] for i, author in enumerate(authors[:10]): # Limit to top 10 authors color = author_colors[i % len(author_colors)] author_data_points = [] for month_key, _ in sorted_data: author_data_points.append(author_data[author].get(month_key, 0)) author_datasets.append({ 'label': author, 'data': author_data_points, 'backgroundColor': color + '80', 'borderColor': color, 'borderWidth': 1 }) # Create HTML with Chart.js and custom UI html = """ Repository Activity Analysis

Repository Commit Activity

0
Total Commits
0
Active Months
0
Avg. Commits per Month
0
Contributors
Activity Overview
By Contributor
By File Type

Contributor Commit Summary

Contributor Commits Percentage First Commit Last Commit

File Type Statistics

File Type Changes Percentage
""" # Replace placeholders with actual data labels_json = json.dumps([d[0] for d in sorted_data]) data_json = json.dumps([d[1] for d in sorted_data]) # Author data for chart author_data_json = json.dumps(author_data) author_datasets_json = json.dumps(author_datasets) # File type data for chart file_type_data_json = json.dumps(file_type_data) html = html.replace('CHART_LABELS', labels_json) html = html.replace('CHART_DATA', data_json) html = html.replace('AUTHOR_DATA', author_data_json) html = html.replace('AUTHOR_DATASETS', author_datasets_json) html = html.replace('FILE_TYPE_DATA', file_type_data_json) # Save to file with open(output_path, 'w', encoding='utf-8') as f: f.write(html) return output_path def create_code_change_heatmap(self, commits: List[Dict], output_path: str = "code_changes.html") -> str: """ Create an enhanced heatmap showing which files are changed most frequently Args: commits: List of commit data output_path: Path to save the HTML visualization Returns: Path to the saved HTML file """ # Count file modifications file_changes = Counter() file_authors = defaultdict(Counter) file_dates = defaultdict(list) for commit in commits: author = commit.get('author', 'Unknown') date = commit.get('date') for file_data in commit.get('files', []): filename = file_data.get('filename', '') if filename: file_changes[filename] += 1 file_authors[filename][author] += 1 if date: file_dates[filename].append(date)