jyo01 commited on
Commit
8cfda1b
·
verified ·
1 Parent(s): 8ac6c6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -21
app.py CHANGED
@@ -5,21 +5,23 @@ import requests
5
  import torch
6
  import uvicorn
7
  import nest_asyncio
8
- import os
9
  from fastapi import FastAPI, HTTPException
10
  from pydantic import BaseModel
11
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
12
  from sentence_transformers import SentenceTransformer, models
13
  import gradio as gr
 
14
 
15
  ############################################
16
  # Configuration
17
  ############################################
18
 
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN")
20
  GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
21
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
22
 
 
23
  ############################################
24
  # GitHub API Functions
25
  ############################################
@@ -67,8 +69,7 @@ def preprocess_text(text: str) -> str:
67
 
68
  def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
69
  transformer_model = models.Transformer(model_name)
70
- pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
71
- pooling_mode_mean_tokens=True)
72
  model = SentenceTransformer(modules=[transformer_model, pooling_model])
73
  return model
74
 
@@ -92,6 +93,7 @@ def generate_prompt(query: str, context_snippets: list) -> str:
92
  instruction = "Provide an extremely detailed and thorough explanation of at least 500 words."
93
  else:
94
  instruction = "Answer concisely."
 
95
  prompt = (
96
  f"Below is some context from a GitHub repository:\n\n"
97
  f"{context}\n\n"
@@ -100,23 +102,129 @@ def generate_prompt(query: str, context_snippets: list) -> str:
100
  )
101
  return prompt
102
 
103
- # Gemini Flash integration using the new API.
104
  def get_gemini_flash_response(prompt: str) -> str:
105
- import google.generativeai as palm
106
- from google.generativeai import types
107
- palm.configure(api_key=GEMINI_API_KEY)
108
- response = palm.generate_text(
 
 
 
109
  model="gemini-2.0-flash",
110
  contents=[prompt],
111
- temperature=0.1,
112
- max_output_tokens=500
 
 
113
  )
 
114
  return response.text
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  ############################################
117
- # Gradio Interface Functions
118
  ############################################
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def get_file_content_for_choice(github_url: str, file_path: str):
121
  try:
122
  owner, repo = extract_repo_info(github_url)
@@ -131,7 +239,7 @@ def chat_with_file(github_url: str, file_path: str, user_query: str):
131
  return result # Return error message if occurred.
132
  file_content, selected_file = result
133
  preprocessed = preprocess_text(file_content)
134
- context_snippet = preprocessed[:1000] # Use the first 1000 characters as context.
135
  prompt = generate_prompt(user_query, [context_snippet])
136
  llm_response = get_gemini_flash_response(prompt)
137
  return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
@@ -162,7 +270,9 @@ with gr.Blocks() as demo:
162
  github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
163
  load_repo_btn = gr.Button("Load Repository Contents")
164
  file_dropdown = gr.Dropdown(label="Select a File", interactive=True, choices=[])
165
- repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=20)
 
 
166
  with gr.Column(scale=2):
167
  gr.Markdown("### Chat Interface")
168
  chat_output = gr.Chatbot(label="Chat Conversation")
@@ -174,17 +284,23 @@ with gr.Blocks() as demo:
174
 
175
  def update_file_dropdown(github_url):
176
  files = load_repo_contents_backend(github_url)
177
- if isinstance(files, str):
178
- print("Error loading files:", files)
179
- return gr.update(choices=[], value=None)
180
- print("Files loaded:", files)
181
- return gr.update(choices=files, value=None)
 
 
 
 
182
 
183
  def update_repo_content(github_url, file_choice):
184
  if not file_choice:
185
- return "No file selected."
186
  content, _ = get_file_content_for_choice(github_url, file_choice)
187
- return content
 
 
188
 
189
  def process_chat(github_url, file_choice, chat_query, history):
190
  if not file_choice:
@@ -194,7 +310,7 @@ with gr.Blocks() as demo:
194
  history.append((chat_query, response))
195
  return history, history
196
 
197
- load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
198
  file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
199
  chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input, conversation_history], outputs=[chat_output, conversation_history])
200
 
 
5
  import torch
6
  import uvicorn
7
  import nest_asyncio
 
8
  from fastapi import FastAPI, HTTPException
9
  from pydantic import BaseModel
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
11
  from sentence_transformers import SentenceTransformer, models
12
  import gradio as gr
13
+ import os
14
 
15
  ############################################
16
  # Configuration
17
  ############################################
18
 
19
+
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
22
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
23
 
24
+
25
  ############################################
26
  # GitHub API Functions
27
  ############################################
 
69
 
70
  def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
71
  transformer_model = models.Transformer(model_name)
72
+ pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True)
 
73
  model = SentenceTransformer(modules=[transformer_model, pooling_model])
74
  return model
75
 
 
93
  instruction = "Provide an extremely detailed and thorough explanation of at least 500 words."
94
  else:
95
  instruction = "Answer concisely."
96
+
97
  prompt = (
98
  f"Below is some context from a GitHub repository:\n\n"
99
  f"{context}\n\n"
 
102
  )
103
  return prompt
104
 
105
+
106
  def get_gemini_flash_response(prompt: str) -> str:
107
+ from google import genai
108
+ from google.genai import types
109
+ client = genai.Client(api_key=GEMINI_API_KEY)
110
+
111
+
112
+
113
+ response = client.models.generate_content(
114
  model="gemini-2.0-flash",
115
  contents=[prompt],
116
+ config=types.GenerateContentConfig(
117
+ max_output_tokens=500,
118
+ temperature=0.1
119
+ )
120
  )
121
+
122
  return response.text
123
 
124
+
125
+
126
+
127
+
128
+ # ############################################
129
+ # # Gradio Interface Functions
130
+ # ############################################
131
+
132
+ # # For file content retrieval, we now use the file path directly.
133
+ # def get_file_content_for_choice(github_url: str, file_path: str):
134
+ # try:
135
+ # owner, repo = extract_repo_info(github_url)
136
+ # except Exception as e:
137
+ # return str(e)
138
+ # content = get_file_content(owner, repo, file_path)
139
+ # return content, file_path
140
+
141
+ # def chat_with_file(github_url: str, file_path: str, user_query: str):
142
+ # # Retrieve file content using the file path directly.
143
+ # result = get_file_content_for_choice(github_url, file_path)
144
+ # if isinstance(result, str):
145
+ # return result # Return error message if occurred.
146
+ # file_content, selected_file = result
147
+
148
+ # # Preprocess file content and extract context.
149
+ # preprocessed = preprocess_text(file_content)
150
+ # context_snippet = preprocessed[:5000] # Use first 1000 characters as context.
151
+
152
+ # # Generate the prompt based on context and user query.
153
+ # prompt = generate_prompt(user_query, [context_snippet])
154
+
155
+ # # Use Gemini Flash to generate a response.
156
+ # llm_response = get_gemini_flash_response(prompt)
157
+
158
+ # return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
159
+
160
+
161
+ # def load_repo_contents_backend(github_url: str):
162
+ # try:
163
+ # owner, repo = extract_repo_info(github_url)
164
+ # except Exception as e:
165
+ # return f"Error: {str(e)}"
166
+ # repo_data = get_repo_metadata(owner, repo)
167
+ # default_branch = repo_data.get("default_branch", "main")
168
+ # tree_data = get_repo_tree(owner, repo, default_branch)
169
+ # if "tree" not in tree_data:
170
+ # return "Error: Could not fetch repository tree."
171
+ # file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
172
+ # return file_list
173
+
174
  ############################################
175
+ # Gradio Interface Setup
176
  ############################################
177
 
178
+ # with gr.Blocks() as demo:
179
+ # gr.Markdown("# RepoChat - Chat with Repository Files")
180
+
181
+ # with gr.Row():
182
+ # with gr.Column(scale=1):
183
+ # gr.Markdown("### Repository Information")
184
+ # github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
185
+ # load_repo_btn = gr.Button("Load Repository Contents")
186
+ # # Dropdown with choices as file paths; default value is empty.
187
+ # file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
188
+ # # repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=30)
189
+ # repo_content_output = gr.Chatbot(label="Chat Conversation")
190
+ # with gr.Column(scale=2):
191
+ # gr.Markdown("### Chat Interface")
192
+ # chat_query_input = gr.Textbox(label="Your Query", placeholder="Type your query here")
193
+ # chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
194
+ # chat_btn = gr.Button("Send Query")
195
+
196
+ # # Callback: Update file dropdown choices.
197
+ # def update_file_dropdown(github_url):
198
+ # files = load_repo_contents_backend(github_url)
199
+ # if isinstance(files, str): # Error message
200
+ # print("Error loading files:", files)
201
+ # return gr.update(choices=[], value="")
202
+ # print("Files loaded:", files)
203
+ # # Do not pre-select any file (empty value)
204
+ # return gr.update(choices=files, value="")
205
+
206
+ # load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
207
+
208
+ # # Callback: Update repository content when a file is selected.
209
+ # def update_repo_content(github_url, file_choice):
210
+ # if not file_choice:
211
+ # return "No file selected."
212
+ # content, _ = get_file_content_for_choice(github_url, file_choice)
213
+ # return content
214
+
215
+ # file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
216
+
217
+ # # Callback: Process chat query.
218
+ # def process_chat(github_url, file_choice, chat_query):
219
+ # if not file_choice:
220
+ # return "Please select a file first."
221
+ # return chat_with_file(github_url, file_choice, chat_query)
222
+
223
+ # chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])
224
+
225
+ # demo.launch(share=True)
226
+
227
+
228
  def get_file_content_for_choice(github_url: str, file_path: str):
229
  try:
230
  owner, repo = extract_repo_info(github_url)
 
239
  return result # Return error message if occurred.
240
  file_content, selected_file = result
241
  preprocessed = preprocess_text(file_content)
242
+ context_snippet = preprocessed[:1000] # Use the first 1000 characters as context
243
  prompt = generate_prompt(user_query, [context_snippet])
244
  llm_response = get_gemini_flash_response(prompt)
245
  return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
 
270
  github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
271
  load_repo_btn = gr.Button("Load Repository Contents")
272
  file_dropdown = gr.Dropdown(label="Select a File", interactive=True, choices=[])
273
+ # repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=20)
274
+ repo_content_output = gr.Chatbot(label="File Content")
275
+
276
  with gr.Column(scale=2):
277
  gr.Markdown("### Chat Interface")
278
  chat_output = gr.Chatbot(label="Chat Conversation")
 
284
 
285
  def update_file_dropdown(github_url):
286
  files = load_repo_contents_backend(github_url)
287
+ if isinstance(files, str): # Error message
288
+ return gr.update(choices=[]), files
289
+ return gr.update(choices=files), ""
290
+
291
+ # def update_repo_content(github_url, file_choice):
292
+ # if not file_choice:
293
+ # return "No file selected."
294
+ # content, _ = get_file_content_for_choice(github_url, file_choice)
295
+ # return content
296
 
297
  def update_repo_content(github_url, file_choice):
298
  if not file_choice:
299
+ return [("System", "No file selected.")]
300
  content, _ = get_file_content_for_choice(github_url, file_choice)
301
+ # Wrap the content in a tuple so it displays like a chat message.
302
+ return [("File Content", content)]
303
+
304
 
305
  def process_chat(github_url, file_choice, chat_query, history):
306
  if not file_choice:
 
310
  history.append((chat_query, response))
311
  return history, history
312
 
313
+ load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown, repo_content_output])
314
  file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
315
  chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input, conversation_history], outputs=[chat_output, conversation_history])
316