Update app.py
Browse files
app.py
CHANGED
@@ -5,21 +5,23 @@ import requests
|
|
5 |
import torch
|
6 |
import uvicorn
|
7 |
import nest_asyncio
|
8 |
-
import os
|
9 |
from fastapi import FastAPI, HTTPException
|
10 |
from pydantic import BaseModel
|
11 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
12 |
from sentence_transformers import SentenceTransformer, models
|
13 |
import gradio as gr
|
|
|
14 |
|
15 |
############################################
|
16 |
# Configuration
|
17 |
############################################
|
18 |
|
|
|
19 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
20 |
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
|
21 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
22 |
|
|
|
23 |
############################################
|
24 |
# GitHub API Functions
|
25 |
############################################
|
@@ -67,8 +69,7 @@ def preprocess_text(text: str) -> str:
|
|
67 |
|
68 |
def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
|
69 |
transformer_model = models.Transformer(model_name)
|
70 |
-
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
|
71 |
-
pooling_mode_mean_tokens=True)
|
72 |
model = SentenceTransformer(modules=[transformer_model, pooling_model])
|
73 |
return model
|
74 |
|
@@ -92,6 +93,7 @@ def generate_prompt(query: str, context_snippets: list) -> str:
|
|
92 |
instruction = "Provide an extremely detailed and thorough explanation of at least 500 words."
|
93 |
else:
|
94 |
instruction = "Answer concisely."
|
|
|
95 |
prompt = (
|
96 |
f"Below is some context from a GitHub repository:\n\n"
|
97 |
f"{context}\n\n"
|
@@ -100,23 +102,129 @@ def generate_prompt(query: str, context_snippets: list) -> str:
|
|
100 |
)
|
101 |
return prompt
|
102 |
|
103 |
-
|
104 |
def get_gemini_flash_response(prompt: str) -> str:
|
105 |
-
|
106 |
-
from google.
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
109 |
model="gemini-2.0-flash",
|
110 |
contents=[prompt],
|
111 |
-
|
112 |
-
max_output_tokens=500
|
|
|
|
|
113 |
)
|
|
|
114 |
return response.text
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
############################################
|
117 |
-
# Gradio Interface
|
118 |
############################################
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
def get_file_content_for_choice(github_url: str, file_path: str):
|
121 |
try:
|
122 |
owner, repo = extract_repo_info(github_url)
|
@@ -131,7 +239,7 @@ def chat_with_file(github_url: str, file_path: str, user_query: str):
|
|
131 |
return result # Return error message if occurred.
|
132 |
file_content, selected_file = result
|
133 |
preprocessed = preprocess_text(file_content)
|
134 |
-
context_snippet = preprocessed[:1000] # Use the first 1000 characters as context
|
135 |
prompt = generate_prompt(user_query, [context_snippet])
|
136 |
llm_response = get_gemini_flash_response(prompt)
|
137 |
return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
|
@@ -162,7 +270,9 @@ with gr.Blocks() as demo:
|
|
162 |
github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
|
163 |
load_repo_btn = gr.Button("Load Repository Contents")
|
164 |
file_dropdown = gr.Dropdown(label="Select a File", interactive=True, choices=[])
|
165 |
-
repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=20)
|
|
|
|
|
166 |
with gr.Column(scale=2):
|
167 |
gr.Markdown("### Chat Interface")
|
168 |
chat_output = gr.Chatbot(label="Chat Conversation")
|
@@ -174,17 +284,23 @@ with gr.Blocks() as demo:
|
|
174 |
|
175 |
def update_file_dropdown(github_url):
|
176 |
files = load_repo_contents_backend(github_url)
|
177 |
-
if isinstance(files, str):
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
182 |
|
183 |
def update_repo_content(github_url, file_choice):
|
184 |
if not file_choice:
|
185 |
-
return "No file selected."
|
186 |
content, _ = get_file_content_for_choice(github_url, file_choice)
|
187 |
-
|
|
|
|
|
188 |
|
189 |
def process_chat(github_url, file_choice, chat_query, history):
|
190 |
if not file_choice:
|
@@ -194,7 +310,7 @@ with gr.Blocks() as demo:
|
|
194 |
history.append((chat_query, response))
|
195 |
return history, history
|
196 |
|
197 |
-
load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
|
198 |
file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
|
199 |
chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input, conversation_history], outputs=[chat_output, conversation_history])
|
200 |
|
|
|
5 |
import torch
|
6 |
import uvicorn
|
7 |
import nest_asyncio
|
|
|
8 |
from fastapi import FastAPI, HTTPException
|
9 |
from pydantic import BaseModel
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
11 |
from sentence_transformers import SentenceTransformer, models
|
12 |
import gradio as gr
|
13 |
+
import os
|
14 |
|
15 |
############################################
|
16 |
# Configuration
|
17 |
############################################
|
18 |
|
19 |
+
|
20 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
21 |
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
|
22 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
23 |
|
24 |
+
|
25 |
############################################
|
26 |
# GitHub API Functions
|
27 |
############################################
|
|
|
69 |
|
70 |
def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
|
71 |
transformer_model = models.Transformer(model_name)
|
72 |
+
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True)
|
|
|
73 |
model = SentenceTransformer(modules=[transformer_model, pooling_model])
|
74 |
return model
|
75 |
|
|
|
93 |
instruction = "Provide an extremely detailed and thorough explanation of at least 500 words."
|
94 |
else:
|
95 |
instruction = "Answer concisely."
|
96 |
+
|
97 |
prompt = (
|
98 |
f"Below is some context from a GitHub repository:\n\n"
|
99 |
f"{context}\n\n"
|
|
|
102 |
)
|
103 |
return prompt
|
104 |
|
105 |
+
|
106 |
def get_gemini_flash_response(prompt: str) -> str:
|
107 |
+
from google import genai
|
108 |
+
from google.genai import types
|
109 |
+
client = genai.Client(api_key=GEMINI_API_KEY)
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
response = client.models.generate_content(
|
114 |
model="gemini-2.0-flash",
|
115 |
contents=[prompt],
|
116 |
+
config=types.GenerateContentConfig(
|
117 |
+
max_output_tokens=500,
|
118 |
+
temperature=0.1
|
119 |
+
)
|
120 |
)
|
121 |
+
|
122 |
return response.text
|
123 |
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
# ############################################
|
129 |
+
# # Gradio Interface Functions
|
130 |
+
# ############################################
|
131 |
+
|
132 |
+
# # For file content retrieval, we now use the file path directly.
|
133 |
+
# def get_file_content_for_choice(github_url: str, file_path: str):
|
134 |
+
# try:
|
135 |
+
# owner, repo = extract_repo_info(github_url)
|
136 |
+
# except Exception as e:
|
137 |
+
# return str(e)
|
138 |
+
# content = get_file_content(owner, repo, file_path)
|
139 |
+
# return content, file_path
|
140 |
+
|
141 |
+
# def chat_with_file(github_url: str, file_path: str, user_query: str):
|
142 |
+
# # Retrieve file content using the file path directly.
|
143 |
+
# result = get_file_content_for_choice(github_url, file_path)
|
144 |
+
# if isinstance(result, str):
|
145 |
+
# return result # Return error message if occurred.
|
146 |
+
# file_content, selected_file = result
|
147 |
+
|
148 |
+
# # Preprocess file content and extract context.
|
149 |
+
# preprocessed = preprocess_text(file_content)
|
150 |
+
# context_snippet = preprocessed[:5000] # Use first 1000 characters as context.
|
151 |
+
|
152 |
+
# # Generate the prompt based on context and user query.
|
153 |
+
# prompt = generate_prompt(user_query, [context_snippet])
|
154 |
+
|
155 |
+
# # Use Gemini Flash to generate a response.
|
156 |
+
# llm_response = get_gemini_flash_response(prompt)
|
157 |
+
|
158 |
+
# return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
|
159 |
+
|
160 |
+
|
161 |
+
# def load_repo_contents_backend(github_url: str):
|
162 |
+
# try:
|
163 |
+
# owner, repo = extract_repo_info(github_url)
|
164 |
+
# except Exception as e:
|
165 |
+
# return f"Error: {str(e)}"
|
166 |
+
# repo_data = get_repo_metadata(owner, repo)
|
167 |
+
# default_branch = repo_data.get("default_branch", "main")
|
168 |
+
# tree_data = get_repo_tree(owner, repo, default_branch)
|
169 |
+
# if "tree" not in tree_data:
|
170 |
+
# return "Error: Could not fetch repository tree."
|
171 |
+
# file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
|
172 |
+
# return file_list
|
173 |
+
|
174 |
############################################
|
175 |
+
# Gradio Interface Setup
|
176 |
############################################
|
177 |
|
178 |
+
# with gr.Blocks() as demo:
|
179 |
+
# gr.Markdown("# RepoChat - Chat with Repository Files")
|
180 |
+
|
181 |
+
# with gr.Row():
|
182 |
+
# with gr.Column(scale=1):
|
183 |
+
# gr.Markdown("### Repository Information")
|
184 |
+
# github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
|
185 |
+
# load_repo_btn = gr.Button("Load Repository Contents")
|
186 |
+
# # Dropdown with choices as file paths; default value is empty.
|
187 |
+
# file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
|
188 |
+
# # repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=30)
|
189 |
+
# repo_content_output = gr.Chatbot(label="Chat Conversation")
|
190 |
+
# with gr.Column(scale=2):
|
191 |
+
# gr.Markdown("### Chat Interface")
|
192 |
+
# chat_query_input = gr.Textbox(label="Your Query", placeholder="Type your query here")
|
193 |
+
# chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
|
194 |
+
# chat_btn = gr.Button("Send Query")
|
195 |
+
|
196 |
+
# # Callback: Update file dropdown choices.
|
197 |
+
# def update_file_dropdown(github_url):
|
198 |
+
# files = load_repo_contents_backend(github_url)
|
199 |
+
# if isinstance(files, str): # Error message
|
200 |
+
# print("Error loading files:", files)
|
201 |
+
# return gr.update(choices=[], value="")
|
202 |
+
# print("Files loaded:", files)
|
203 |
+
# # Do not pre-select any file (empty value)
|
204 |
+
# return gr.update(choices=files, value="")
|
205 |
+
|
206 |
+
# load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
|
207 |
+
|
208 |
+
# # Callback: Update repository content when a file is selected.
|
209 |
+
# def update_repo_content(github_url, file_choice):
|
210 |
+
# if not file_choice:
|
211 |
+
# return "No file selected."
|
212 |
+
# content, _ = get_file_content_for_choice(github_url, file_choice)
|
213 |
+
# return content
|
214 |
+
|
215 |
+
# file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
|
216 |
+
|
217 |
+
# # Callback: Process chat query.
|
218 |
+
# def process_chat(github_url, file_choice, chat_query):
|
219 |
+
# if not file_choice:
|
220 |
+
# return "Please select a file first."
|
221 |
+
# return chat_with_file(github_url, file_choice, chat_query)
|
222 |
+
|
223 |
+
# chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])
|
224 |
+
|
225 |
+
# demo.launch(share=True)
|
226 |
+
|
227 |
+
|
228 |
def get_file_content_for_choice(github_url: str, file_path: str):
|
229 |
try:
|
230 |
owner, repo = extract_repo_info(github_url)
|
|
|
239 |
return result # Return error message if occurred.
|
240 |
file_content, selected_file = result
|
241 |
preprocessed = preprocess_text(file_content)
|
242 |
+
context_snippet = preprocessed[:1000] # Use the first 1000 characters as context
|
243 |
prompt = generate_prompt(user_query, [context_snippet])
|
244 |
llm_response = get_gemini_flash_response(prompt)
|
245 |
return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
|
|
|
270 |
github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
|
271 |
load_repo_btn = gr.Button("Load Repository Contents")
|
272 |
file_dropdown = gr.Dropdown(label="Select a File", interactive=True, choices=[])
|
273 |
+
# repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=20)
|
274 |
+
repo_content_output = gr.Chatbot(label="File Content")
|
275 |
+
|
276 |
with gr.Column(scale=2):
|
277 |
gr.Markdown("### Chat Interface")
|
278 |
chat_output = gr.Chatbot(label="Chat Conversation")
|
|
|
284 |
|
285 |
def update_file_dropdown(github_url):
|
286 |
files = load_repo_contents_backend(github_url)
|
287 |
+
if isinstance(files, str): # Error message
|
288 |
+
return gr.update(choices=[]), files
|
289 |
+
return gr.update(choices=files), ""
|
290 |
+
|
291 |
+
# def update_repo_content(github_url, file_choice):
|
292 |
+
# if not file_choice:
|
293 |
+
# return "No file selected."
|
294 |
+
# content, _ = get_file_content_for_choice(github_url, file_choice)
|
295 |
+
# return content
|
296 |
|
297 |
def update_repo_content(github_url, file_choice):
|
298 |
if not file_choice:
|
299 |
+
return [("System", "No file selected.")]
|
300 |
content, _ = get_file_content_for_choice(github_url, file_choice)
|
301 |
+
# Wrap the content in a tuple so it displays like a chat message.
|
302 |
+
return [("File Content", content)]
|
303 |
+
|
304 |
|
305 |
def process_chat(github_url, file_choice, chat_query, history):
|
306 |
if not file_choice:
|
|
|
310 |
history.append((chat_query, response))
|
311 |
return history, history
|
312 |
|
313 |
+
load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown, repo_content_output])
|
314 |
file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
|
315 |
chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input, conversation_history], outputs=[chat_output, conversation_history])
|
316 |
|