Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import re | |
import json | |
import os | |
from pathlib import Path | |
import tempfile | |
import base64 | |
def parse_repo_url(url): | |
"""Parse GitHub repository URL to extract owner, repo, reference, and path.""" | |
url = url.rstrip('/') | |
url_pattern = r'^https://github\.com/([^/]+)/([^/]+)(/tree/([^/]+)(/(.+))?)?$' | |
match = re.match(url_pattern, url) | |
if not match: | |
raise ValueError('Invalid GitHub repository URL. Please ensure the URL is in the correct format: ' | |
'https://github.com/owner/repo or https://github.com/owner/repo/tree/branch/path') | |
return { | |
'owner': match.group(1), | |
'repo': match.group(2), | |
'ref_from_url': match.group(4), | |
'path_from_url': match.group(6) | |
} | |
def fetch_repo_sha(owner, repo, ref, path, token=None): | |
"""Fetch repository SHA for specified path and reference.""" | |
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path if path else ''}" | |
if ref: | |
url += f"?ref={ref}" | |
headers = {'Accept': 'application/vnd.github.object+json'} | |
if token: | |
headers['Authorization'] = f"token {token}" | |
response = requests.get(url, headers=headers) | |
if not response.ok: | |
if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0': | |
raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.') | |
if response.status_code == 404: | |
raise ValueError('Repository, branch, or path not found. Please check that the URL, branch/tag, and path are correct.') | |
raise ValueError(f'Failed to fetch repository SHA. Status: {response.status_code}. Please check your input.') | |
data = response.json() | |
return data.get('sha') | |
def fetch_repo_tree(owner, repo, sha, token=None): | |
"""Fetch repository tree structure.""" | |
url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{sha}?recursive=1" | |
headers = {'Accept': 'application/vnd.github+json'} | |
if token: | |
headers['Authorization'] = f"token {token}" | |
response = requests.get(url, headers=headers) | |
if not response.ok: | |
if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0': | |
raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.') | |
raise ValueError(f'Failed to fetch repository tree. Status: {response.status_code}. Please check your input.') | |
data = response.json() | |
return data.get('tree', []) | |
def sort_contents(contents): | |
"""Sort contents by path.""" | |
def get_path(item): | |
return item.get('path', '') | |
sorted_contents = sorted(contents, key=lambda x: [p or '.' for p in get_path(x).split('/')]) | |
return sorted_contents | |
def create_directory_structure(tree): | |
"""Create directory structure dictionary from tree.""" | |
tree = [item for item in tree if item.get('type') == 'blob'] | |
tree = sort_contents(tree) | |
directory_structure = {} | |
for item in tree: | |
path = item.get('path', '') | |
if not path.startswith('/'): | |
path = '/' + path | |
path_parts = path.split('/') | |
current_level = directory_structure | |
for i, part in enumerate(path_parts): | |
if not part: | |
part = './' | |
if part not in current_level: | |
current_level[part] = item if i == len(path_parts) - 1 else {} | |
if i < len(path_parts) - 1: | |
current_level = current_level[part] | |
return directory_structure | |
def build_directory_html(structure, prefix=''): | |
"""Build HTML representation of directory structure with checkboxes.""" | |
html = '<ul class="directory">' | |
for name, item in sorted(structure.items(), key=lambda x: x[0]): | |
if isinstance(item, dict): | |
# This is a directory | |
html += f'<li><input type="checkbox" class="directory-checkbox"> <span class="folder">{name}</span>' | |
html += build_directory_html(item, prefix + '/' + name if prefix else name) | |
html += '</li>' | |
else: | |
# This is a file | |
file_path = item.get('path', '') | |
file_url = item.get('url', '') | |
common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx'] | |
is_common = any(file_path.lower().endswith(ext) for ext in common_extensions) | |
checked = 'checked' if is_common else '' | |
html += f'<li><input type="checkbox" {checked} value="{json.dumps({"url": file_url, "path": file_path})}" class="file-checkbox"> ' | |
html += f'<span class="file">{name}</span></li>' | |
html += '</ul>' | |
return html | |
def fetch_repo_contents(repo_url, ref, path, token): | |
"""Fetch repository contents and return HTML representation of directory structure.""" | |
try: | |
repo_info = parse_repo_url(repo_url) | |
final_ref = ref or repo_info.get('ref_from_url') | |
final_path = path or repo_info.get('path_from_url') or '' | |
owner = repo_info.get('owner') | |
repo = repo_info.get('repo') | |
sha = fetch_repo_sha(owner, repo, final_ref, final_path, token) | |
tree = fetch_repo_tree(owner, repo, sha, token) | |
structure = create_directory_structure(tree) | |
# Create HTML for directory structure display | |
html_structure = build_directory_html(structure) | |
# Add JavaScript for checkbox behavior | |
js = """ | |
<script> | |
// Check/uncheck all child checkboxes when directory checkbox is changed | |
document.querySelectorAll('.directory-checkbox').forEach(checkbox => { | |
checkbox.addEventListener('change', function() { | |
const parent = this.parentElement; | |
const childCheckboxes = parent.querySelectorAll('input[type="checkbox"]'); | |
childCheckboxes.forEach(childBox => { | |
childBox.checked = this.checked; | |
}); | |
}); | |
}); | |
</script> | |
""" | |
return html_structure + js, "", tree | |
except Exception as e: | |
error_message = str(e) | |
return "", f"Error fetching repository contents: {error_message}\n\nPlease ensure:\n1. The repository URL is correct and accessible.\n2. You have the necessary permissions.\n3. If it's a private repository, you've provided a valid access token.\n4. The specified branch/tag and path exist.", None | |
def fetch_selected_files(selected_files_json, token): | |
"""Fetch contents of selected files.""" | |
try: | |
selected_files = json.loads(selected_files_json) | |
if not selected_files: | |
return "Error: No files selected. Please select at least one file from the directory structure." | |
file_contents = [] | |
headers = {'Accept': 'application/vnd.github.v3.raw'} | |
if token: | |
headers['Authorization'] = f"token {token}" | |
for file_info in selected_files: | |
url = file_info.get('url') | |
path = file_info.get('path') | |
response = requests.get(url, headers=headers) | |
if not response.ok: | |
if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0': | |
raise ValueError(f"GitHub API rate limit exceeded while fetching {path}. Please try again later or provide a valid access token.") | |
raise ValueError(f"Failed to fetch content for {path}. Status: {response.status_code}. Please check your permissions.") | |
text = response.text | |
file_contents.append({'url': url, 'path': path, 'text': text}) | |
return format_repo_contents(file_contents) | |
except Exception as e: | |
return f"Error generating text file: {str(e)}\n\nPlease ensure:\n1. You have selected at least one file.\n2. Your access token (if provided) is valid.\n3. You have a stable internet connection.\n4. The GitHub API is accessible." | |
def format_repo_contents(contents): | |
"""Format repository contents for display.""" | |
text = '' | |
index = '' | |
contents = sort_contents(contents) | |
# Create a directory tree structure | |
tree = {} | |
for item in contents: | |
parts = item.get('path', '').split('/') | |
current_level = tree | |
for i, part in enumerate(parts): | |
if part not in current_level: | |
current_level[part] = {} if i < len(parts) - 1 else None | |
if i < len(parts) - 1: | |
current_level = current_level[part] | |
# Function to recursively build the index | |
def build_index(node, prefix=''): | |
result = '' | |
entries = sorted(node.items()) | |
for i, (name, subnode) in enumerate(entries): | |
is_last = i == len(entries) - 1 | |
line_prefix = '└── ' if is_last else '├── ' | |
child_prefix = ' ' if is_last else '│ ' | |
if name == '': | |
name = './' | |
result += f"{prefix}{line_prefix}{name}\n" | |
if subnode and isinstance(subnode, dict): | |
result += build_index(subnode, f"{prefix}{child_prefix}") | |
return result | |
index = build_index(tree) | |
for item in contents: | |
text += f"\n\n---\nFile: {item.get('path', '')}\n---\n\n{item.get('text', '')}\n" | |
return f"Directory Structure:\n\n{index}\n{text}" | |
def get_selected_files(html_structure, repo_tree): | |
"""Parse selected files from HTML structure.""" | |
# This would normally be done with JavaScript on the client side, | |
# but since Gradio doesn't support direct DOM manipulation, | |
# we'll provide a list of files for selection instead. | |
blob_items = [item for item in repo_tree if item.get('type') == 'blob'] | |
file_list = [] | |
for item in blob_items: | |
file_path = item.get('path', '') | |
common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx'] | |
is_common = any(file_path.lower().endswith(ext) for ext in common_extensions) | |
file_list.append({ | |
'name': file_path, | |
'url': item.get('url', ''), | |
'path': file_path, | |
'selected': is_common | |
}) | |
return file_list | |
def generate_file_checkboxes(tree): | |
"""Generate file checkboxes for selection.""" | |
if not tree: | |
return {} # Return empty dictionary instead of empty list | |
blob_items = [item for item in tree if item.get('type') == 'blob'] | |
file_options = {} | |
for item in blob_items: | |
path = item.get('path', '') | |
file_options[path] = { | |
'url': item.get('url', ''), | |
'path': path | |
} | |
return file_options | |
def process_selections(file_options, selections, token): | |
"""Process selected files and fetch their contents.""" | |
if not selections or not file_options: | |
return "Error: No files selected or no files available." | |
selected_files = [] | |
for selection in selections: | |
if selection in file_options: | |
selected_files.append(file_options[selection]) | |
if not selected_files: | |
return "Error: No valid files selected." | |
# Convert to JSON for the fetch function | |
selected_files_json = json.dumps(selected_files) | |
return fetch_selected_files(selected_files_json, token) | |
def save_output(output_text): | |
"""Save output text to a file and return download link.""" | |
if not output_text or not output_text.strip(): | |
return "Error: No content to download. Please generate the text file first." | |
# Create a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file: | |
temp_file.write(output_text.encode('utf-8')) | |
temp_path = temp_file.name | |
return temp_path | |
# Create Gradio interface | |
with gr.Blocks(css=""" | |
.directory { list-style-type: none; padding-left: 20px; } | |
.folder { color: #e67e22; font-weight: bold; } | |
.file { color: #3498db; } | |
.gr-box { border-radius: 8px; } | |
""") as demo: | |
gr.Markdown("# GitHub Repository Explorer") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
repo_url = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repo") | |
with gr.Row(): | |
ref = gr.Textbox(label="Branch/Tag (optional)", placeholder="main") | |
path = gr.Textbox(label="Path (optional)", placeholder="src") | |
token = gr.Textbox(label="Access Token (optional, for private repos)", placeholder="ghp_xxxxxxxxxxxx", type="password") | |
fetch_button = gr.Button("Fetch Repository") | |
with gr.Column(scale=3): | |
with gr.Tabs(): | |
with gr.TabItem("Info"): | |
gr.Markdown(""" | |
## How to use | |
1. Enter a GitHub repository URL (e.g., https://github.com/username/repo) | |
2. Optionally specify branch/tag and path | |
3. For private repositories, provide an access token | |
4. Click "Fetch Repository" to load the directory structure | |
5. Select files from the directory structure | |
6. Click "Generate Text" to fetch and format file contents | |
7. Copy or download the generated text | |
## Access Token Information | |
To access private repositories or increase API rate limits, you'll need a GitHub personal access token. | |
To create one: | |
1. Go to GitHub Settings > Developer settings > Personal access tokens | |
2. Generate a new token with the 'repo' scope | |
3. Copy the token and paste it in the Access Token field | |
""") | |
# Directory structure display and file selection | |
dir_structure_html = gr.HTML(label="Directory Structure") | |
# Store repo tree data for use in file selection | |
repo_tree_state = gr.State(None) | |
file_options_state = gr.State(None) | |
# File selection | |
file_selector = gr.CheckboxGroup(label="Select Files", interactive=True) | |
# Output and action buttons | |
output_text = gr.Textbox(label="Output", lines=20) | |
with gr.Row(): | |
generate_button = gr.Button("Generate Text") | |
copy_button = gr.Button("Copy to Clipboard") | |
download_button = gr.Button("Download") | |
error_output = gr.Textbox(label="Status/Error Messages") | |
download_path = gr.State(None) | |
# Define events | |
fetch_button.click( | |
fn=fetch_repo_contents, | |
inputs=[repo_url, ref, path, token], | |
outputs=[dir_structure_html, error_output, repo_tree_state] | |
).then( | |
fn=generate_file_checkboxes, | |
inputs=[repo_tree_state], | |
outputs=[file_options_state] | |
).then( | |
fn=lambda tree: [path for path in generate_file_checkboxes(tree).keys()], | |
inputs=[repo_tree_state], | |
outputs=[file_selector] | |
) | |
generate_button.click( | |
fn=process_selections, | |
inputs=[file_options_state, file_selector, token], | |
outputs=[output_text] | |
) | |
copy_button.click( | |
fn=lambda x: x, # Just pass through the text | |
inputs=[output_text], | |
outputs=[output_text] | |
) | |
download_button.click( | |
fn=save_output, | |
inputs=[output_text], | |
outputs=[download_path] | |
).then( | |
fn=lambda path: gr.update(value=f"File saved at: {path}. You can download it from there."), | |
inputs=[download_path], | |
outputs=[error_output] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch(share=True) # Added share=True to create a public link |