muhammadnoman76 commited on
Commit
65b86a4
·
1 Parent(s): 0148862
Files changed (1) hide show
  1. app/services/chat_processor.py +126 -70
app/services/chat_processor.py CHANGED
@@ -1,6 +1,7 @@
1
  from datetime import datetime, timezone
2
  from typing import Optional, Dict, Any
3
- from concurrent.futures import ThreadPoolExecutor
 
4
  from yake import KeywordExtractor
5
  from app.services.chathistory import ChatSession
6
  from app.services.websearch import WebSearch
@@ -9,7 +10,10 @@ from app.services.environmental_condition import EnvironmentalData
9
  from app.services.prompts import *
10
  from app.services.vector_database_search import VectorDatabaseSearch
11
  import re
 
 
12
  vectordb = VectorDatabaseSearch()
 
13
 
14
  class ChatProcessor:
15
  def __init__(self, token: str, session_id: Optional[str] = None, num_results: int = 3, num_images: int = 3):
@@ -23,18 +27,24 @@ class ChatProcessor:
23
  self.environment_data = EnvironmentalData(city)
24
  self.web_searcher = WebSearch(num_results=num_results, max_images=num_images)
25
  self.web_search_required = True
 
26
 
27
- def extract_keywords_yake(self, text: str, language: str, max_ngram_size: int = 2, num_keywords: int = 4) -> list:
 
 
28
  lang_code = "en"
29
  if language.lower() == "urdu":
30
  lang_code = "ur"
31
-
32
- kw_extractor = KeywordExtractor(
33
  lan=lang_code,
34
  n=max_ngram_size,
35
  top=num_keywords,
36
  features=None
37
  )
 
 
 
38
  keywords = kw_extractor.extract_keywords(text)
39
  return [kw[0] for kw in keywords]
40
 
@@ -54,36 +64,48 @@ class ChatProcessor:
54
 
55
  def process_chat(self, query: str) -> Dict[str, Any]:
56
  try:
57
- profile = self.chat_session.get_name_and_age()
 
 
 
 
 
 
 
58
  name = profile['name']
59
  age = profile['age']
60
- self.chat_session.load_chat_history()
61
- self.chat_session.update_title(self.session_id,query)
 
62
  history = self.chat_session.format_history()
63
 
64
- history_based_prompt = HISTORY_BASED_PROMPT.format(history=history,query= query)
65
-
66
- enhanced_query = Model().send_message_openrouter(history_based_prompt)
67
 
 
68
  self.session_id = self.ensure_valid_session(title=enhanced_query)
69
  permission = self.chat_session.get_user_preferences()
70
- websearch_enabled = permission.get('websearch', False)
 
 
71
  env_recommendations = permission.get('environmental_recommendations', False)
72
  personalized_recommendations = permission.get('personalized_recommendations', False)
73
  keywords_permission = permission.get('keywords', False)
74
  reference_permission = permission.get('references', False)
75
  language = self.chat_session.get_language().lower()
76
 
 
77
 
78
- language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language = language)
79
-
80
- if websearch_enabled :
81
  with ThreadPoolExecutor(max_workers=2) as executor:
82
  future_web = executor.submit(self.web_searcher.search, enhanced_query)
83
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
84
  web_results = future_web.result()
85
  image_results = future_images.result()
86
 
 
87
  context_parts = []
88
  references = []
89
 
@@ -94,13 +116,27 @@ class ChatProcessor:
94
 
95
  context = "\n".join(context_parts)
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  if env_recommendations and personalized_recommendations:
98
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
99
  user_name=name,
100
  user_age=age,
101
  history=history,
102
- user_details=self.chat_session.get_personalized_recommendation(),
103
- environmental_condition=self.environment_data.get_environmental_data(),
104
  previous_history=history,
105
  context=context,
106
  current_query=enhanced_query
@@ -109,16 +145,16 @@ class ChatProcessor:
109
  prompt = PERSONALIZED_PROMPT.format(
110
  user_name=name,
111
  user_age=age,
112
- user_details=self.chat_session.get_personalized_recommendation(),
113
  previous_history=history,
114
  context=context,
115
  current_query=enhanced_query
116
  )
117
- elif env_recommendations :
118
  prompt = ENVIRONMENTAL_PROMPT.format(
119
  user_name=name,
120
  user_age=age,
121
- environmental_condition=self.environment_data.get_environmental_data(),
122
  previous_history=history,
123
  context=context,
124
  current_query=enhanced_query
@@ -130,16 +166,12 @@ class ChatProcessor:
130
  current_query=enhanced_query
131
  )
132
 
133
- prompt = prompt + language_prompt
 
134
 
135
- response = Model().llm(prompt,enhanced_query)
136
-
137
- keywords = ""
138
-
139
- if (keywords_permission):
140
- keywords = self.extract_keywords_yake(response, language=language)
141
- if (not reference_permission):
142
- references = ""
143
 
144
  chat_data = {
145
  "query": enhanced_query,
@@ -158,42 +190,56 @@ class ChatProcessor:
158
  return chat_data
159
 
160
  else:
161
- attach_image = False
162
-
 
 
163
  with ThreadPoolExecutor(max_workers=2) as executor:
164
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
 
 
 
165
  image_results = future_images.result()
166
 
167
- start_time = datetime.now(timezone.utc)
168
-
169
- results = vectordb.search( query=enhanced_query, top_k=3)
170
-
171
  context_parts = []
172
  references = []
173
- seen_pages = set()
 
174
 
175
  for result in results:
176
  confidence = result['confidence']
177
  if confidence > 60:
178
  context_parts.append(f"Content: {result['content']}")
179
  page = result['page']
180
- if page not in seen_pages: # Only append if page is not seen
181
  references.append(f"Source: {result['source']}, Page: {page}")
182
  seen_pages.add(page)
183
- attach_image = True
184
-
185
- context = "\n".join(context_parts)
186
-
187
- if not context or len(context) < 10:
188
- context = "There is no context found unfortunately"
189
-
190
- if env_recommendations and personalized_recommendations:
 
 
 
 
 
 
 
 
 
 
 
191
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
192
  user_name=name,
193
- user_age = age,
194
  history=history,
195
- user_details=self.chat_session.get_personalized_recommendation(),
196
- environmental_condition=self.environment_data.get_environmental_data(),
197
  previous_history=history,
198
  context=context,
199
  current_query=enhanced_query
@@ -202,16 +248,16 @@ class ChatProcessor:
202
  prompt = PERSONALIZED_PROMPT.format(
203
  user_name=name,
204
  user_age=age,
205
- user_details=self.chat_session.get_personalized_recommendation(),
206
  previous_history=history,
207
  context=context,
208
  current_query=enhanced_query
209
  )
210
- elif env_recommendations :
211
  prompt = ENVIRONMENTAL_PROMPT.format(
212
  user_name=name,
213
  user_age=age,
214
- environmental_condition=self.environment_data.get_environmental_data(),
215
  previous_history=history,
216
  context=context,
217
  current_query=enhanced_query
@@ -223,24 +269,23 @@ class ChatProcessor:
223
  current_query=enhanced_query
224
  )
225
 
226
- prompt = prompt + language_prompt
227
-
228
- response = Model().response = Model().llm(prompt,query)
229
-
230
  end_time = datetime.now(timezone.utc)
231
 
 
232
  keywords = ""
233
-
234
- if (keywords_permission):
235
  keywords = self.extract_keywords_yake(response, language=language)
236
 
237
- if (not reference_permission):
238
- references = ""
239
 
240
- if not attach_image:
241
- image_results = ""
242
- keywords = ""
243
 
 
244
  chat_data = {
245
  "query": enhanced_query,
246
  "response": response,
@@ -252,15 +297,26 @@ class ChatProcessor:
252
  "timestamp": datetime.now(timezone.utc).isoformat(),
253
  "session_id": self.chat_session.session_id
254
  }
255
- match = re.search(r'(## Personal Recommendations|## Environmental Considerations)', response)
256
- if match:
257
- truncated_response = response[:match.start()].strip()
258
- else:
259
- truncated_response = response
260
- if not self.chat_session.save_details(session_id=self.session_id , context= context , query= enhanced_query , response=truncated_response , rag_start_time=start_time , rag_end_time=end_time ):
261
- raise ValueError("Failed to save the RAG details")
262
- if not self.chat_session.save_chat(chat_data):
263
- raise ValueError("Failed to save chat message")
 
 
 
 
 
 
 
 
 
 
 
264
  return chat_data
265
 
266
  except Exception as e:
 
1
  from datetime import datetime, timezone
2
  from typing import Optional, Dict, Any
3
+ from concurrent.futures import ThreadPoolExecutor, as_completed
4
+ from functools import lru_cache
5
  from yake import KeywordExtractor
6
  from app.services.chathistory import ChatSession
7
  from app.services.websearch import WebSearch
 
10
  from app.services.prompts import *
11
  from app.services.vector_database_search import VectorDatabaseSearch
12
  import re
13
+
14
+ # Create a singleton instance outside the class
15
  vectordb = VectorDatabaseSearch()
16
+ model = Model() # Reuse model instance
17
 
18
  class ChatProcessor:
19
  def __init__(self, token: str, session_id: Optional[str] = None, num_results: int = 3, num_images: int = 3):
 
27
  self.environment_data = EnvironmentalData(city)
28
  self.web_searcher = WebSearch(num_results=num_results, max_images=num_images)
29
  self.web_search_required = True
30
+ self._keyword_extractors = {} # Cache keyword extractors
31
 
32
+ @lru_cache(maxsize=128)
33
+ def get_keyword_extractor(self, language: str, max_ngram_size: int, num_keywords: int):
34
+ """Cache keyword extractors for reuse"""
35
  lang_code = "en"
36
  if language.lower() == "urdu":
37
  lang_code = "ur"
38
+
39
+ return KeywordExtractor(
40
  lan=lang_code,
41
  n=max_ngram_size,
42
  top=num_keywords,
43
  features=None
44
  )
45
+
46
+ def extract_keywords_yake(self, text: str, language: str, max_ngram_size: int = 2, num_keywords: int = 4) -> list:
47
+ kw_extractor = self.get_keyword_extractor(language, max_ngram_size, num_keywords)
48
  keywords = kw_extractor.extract_keywords(text)
49
  return [kw[0] for kw in keywords]
50
 
 
64
 
65
  def process_chat(self, query: str) -> Dict[str, Any]:
66
  try:
67
+ # Fetch profile and history in parallel
68
+ with ThreadPoolExecutor(max_workers=2) as executor:
69
+ future_profile = executor.submit(self.chat_session.get_name_and_age)
70
+ future_history = executor.submit(self.chat_session.load_chat_history)
71
+
72
+ profile = future_profile.result()
73
+ future_history.result() # Wait for history to load
74
+
75
  name = profile['name']
76
  age = profile['age']
77
+
78
+ # Update title and get history format
79
+ self.chat_session.update_title(self.session_id, query)
80
  history = self.chat_session.format_history()
81
 
82
+ # Get enhanced query
83
+ history_based_prompt = HISTORY_BASED_PROMPT.format(history=history, query=query)
84
+ enhanced_query = model.send_message_openrouter(history_based_prompt)
85
 
86
+ # Ensure session and get user preferences
87
  self.session_id = self.ensure_valid_session(title=enhanced_query)
88
  permission = self.chat_session.get_user_preferences()
89
+
90
+ # Extract permissions
91
+ websearch_enabled = permission.get('websearch', False)
92
  env_recommendations = permission.get('environmental_recommendations', False)
93
  personalized_recommendations = permission.get('personalized_recommendations', False)
94
  keywords_permission = permission.get('keywords', False)
95
  reference_permission = permission.get('references', False)
96
  language = self.chat_session.get_language().lower()
97
 
98
+ language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language=language)
99
 
100
+ if websearch_enabled:
101
+ # Run web and image search in parallel
 
102
  with ThreadPoolExecutor(max_workers=2) as executor:
103
  future_web = executor.submit(self.web_searcher.search, enhanced_query)
104
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
105
  web_results = future_web.result()
106
  image_results = future_images.result()
107
 
108
+ # Efficiently build context and references
109
  context_parts = []
110
  references = []
111
 
 
116
 
117
  context = "\n".join(context_parts)
118
 
119
+ # Pre-fetch user details and environmental data in parallel if needed
120
+ if env_recommendations or personalized_recommendations:
121
+ with ThreadPoolExecutor(max_workers=2) as executor:
122
+ futures = {}
123
+ if personalized_recommendations:
124
+ futures['user_details'] = executor.submit(self.chat_session.get_personalized_recommendation)
125
+ if env_recommendations:
126
+ futures['env_data'] = executor.submit(self.environment_data.get_environmental_data)
127
+
128
+ # Get results as they complete
129
+ user_details = futures.get('user_details').result() if 'user_details' in futures else None
130
+ env_data = futures.get('env_data').result() if 'env_data' in futures else None
131
+
132
+ # Select appropriate prompt
133
  if env_recommendations and personalized_recommendations:
134
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
135
  user_name=name,
136
  user_age=age,
137
  history=history,
138
+ user_details=user_details,
139
+ environmental_condition=env_data,
140
  previous_history=history,
141
  context=context,
142
  current_query=enhanced_query
 
145
  prompt = PERSONALIZED_PROMPT.format(
146
  user_name=name,
147
  user_age=age,
148
+ user_details=user_details,
149
  previous_history=history,
150
  context=context,
151
  current_query=enhanced_query
152
  )
153
+ elif env_recommendations:
154
  prompt = ENVIRONMENTAL_PROMPT.format(
155
  user_name=name,
156
  user_age=age,
157
+ environmental_condition=env_data,
158
  previous_history=history,
159
  context=context,
160
  current_query=enhanced_query
 
166
  current_query=enhanced_query
167
  )
168
 
169
+ prompt += language_prompt
170
+ response = model.llm(prompt, enhanced_query)
171
 
172
+ # Process keywords only if needed
173
+ keywords = self.extract_keywords_yake(response, language=language) if keywords_permission else ""
174
+ references = "" if not reference_permission else references
 
 
 
 
 
175
 
176
  chat_data = {
177
  "query": enhanced_query,
 
190
  return chat_data
191
 
192
  else:
193
+ # Start RAG timing
194
+ start_time = datetime.now(timezone.utc)
195
+
196
+ # Launch image search in the background while doing vector search
197
  with ThreadPoolExecutor(max_workers=2) as executor:
198
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
199
+ future_vector = executor.submit(vectordb.search, enhanced_query, 3)
200
+
201
+ results = future_vector.result()
202
  image_results = future_images.result()
203
 
204
+ # Process vector search results
 
 
 
205
  context_parts = []
206
  references = []
207
+ seen_pages = set()
208
+ attach_image = False
209
 
210
  for result in results:
211
  confidence = result['confidence']
212
  if confidence > 60:
213
  context_parts.append(f"Content: {result['content']}")
214
  page = result['page']
215
+ if page not in seen_pages:
216
  references.append(f"Source: {result['source']}, Page: {page}")
217
  seen_pages.add(page)
218
+ attach_image = True
219
+
220
+ context = "\n".join(context_parts) if context_parts else "There is no context found unfortunately"
221
+
222
+ # Pre-fetch user details and environmental data in parallel if needed
223
+ if env_recommendations or personalized_recommendations:
224
+ with ThreadPoolExecutor(max_workers=2) as executor:
225
+ futures = {}
226
+ if personalized_recommendations:
227
+ futures['user_details'] = executor.submit(self.chat_session.get_personalized_recommendation)
228
+ if env_recommendations:
229
+ futures['env_data'] = executor.submit(self.environment_data.get_environmental_data)
230
+
231
+ # Get results as they complete
232
+ user_details = futures.get('user_details').result() if 'user_details' in futures else None
233
+ env_data = futures.get('env_data').result() if 'env_data' in futures else None
234
+
235
+ # Select appropriate prompt
236
+ if env_recommendations and personalized_recommendations:
237
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
238
  user_name=name,
239
+ user_age=age,
240
  history=history,
241
+ user_details=user_details,
242
+ environmental_condition=env_data,
243
  previous_history=history,
244
  context=context,
245
  current_query=enhanced_query
 
248
  prompt = PERSONALIZED_PROMPT.format(
249
  user_name=name,
250
  user_age=age,
251
+ user_details=user_details,
252
  previous_history=history,
253
  context=context,
254
  current_query=enhanced_query
255
  )
256
+ elif env_recommendations:
257
  prompt = ENVIRONMENTAL_PROMPT.format(
258
  user_name=name,
259
  user_age=age,
260
+ environmental_condition=env_data,
261
  previous_history=history,
262
  context=context,
263
  current_query=enhanced_query
 
269
  current_query=enhanced_query
270
  )
271
 
272
+ prompt += language_prompt
273
+ response = model.llm(prompt, query)
 
 
274
  end_time = datetime.now(timezone.utc)
275
 
276
+ # Process keywords only if needed
277
  keywords = ""
278
+ if keywords_permission and attach_image:
 
279
  keywords = self.extract_keywords_yake(response, language=language)
280
 
281
+ references = "" if not reference_permission else references
282
+ image_results = "" if not attach_image else image_results
283
 
284
+ # Find where to truncate the response if needed
285
+ match = re.search(r'(## Personal Recommendations|## Environmental Considerations)', response)
286
+ truncated_response = response[:match.start()].strip() if match else response
287
 
288
+ # Prepare chat data
289
  chat_data = {
290
  "query": enhanced_query,
291
  "response": response,
 
297
  "timestamp": datetime.now(timezone.utc).isoformat(),
298
  "session_id": self.chat_session.session_id
299
  }
300
+
301
+ # Save in parallel
302
+ with ThreadPoolExecutor(max_workers=2) as executor:
303
+ future_save_details = executor.submit(
304
+ self.chat_session.save_details,
305
+ session_id=self.session_id,
306
+ context=context,
307
+ query=enhanced_query,
308
+ response=truncated_response,
309
+ rag_start_time=start_time,
310
+ rag_end_time=end_time
311
+ )
312
+ future_save_chat = executor.submit(self.chat_session.save_chat, chat_data)
313
+
314
+ # Check results
315
+ if not future_save_details.result():
316
+ raise ValueError("Failed to save the RAG details")
317
+ if not future_save_chat.result():
318
+ raise ValueError("Failed to save chat message")
319
+
320
  return chat_data
321
 
322
  except Exception as e: