muhammadnoman76 commited on
Commit
29ce710
·
1 Parent(s): 65b86a4
Files changed (2) hide show
  1. app/services/chat_processor.py +70 -126
  2. pyproject.toml +2 -1
app/services/chat_processor.py CHANGED
@@ -1,7 +1,6 @@
1
  from datetime import datetime, timezone
2
  from typing import Optional, Dict, Any
3
- from concurrent.futures import ThreadPoolExecutor, as_completed
4
- from functools import lru_cache
5
  from yake import KeywordExtractor
6
  from app.services.chathistory import ChatSession
7
  from app.services.websearch import WebSearch
@@ -10,10 +9,7 @@ from app.services.environmental_condition import EnvironmentalData
10
  from app.services.prompts import *
11
  from app.services.vector_database_search import VectorDatabaseSearch
12
  import re
13
-
14
- # Create a singleton instance outside the class
15
  vectordb = VectorDatabaseSearch()
16
- model = Model() # Reuse model instance
17
 
18
  class ChatProcessor:
19
  def __init__(self, token: str, session_id: Optional[str] = None, num_results: int = 3, num_images: int = 3):
@@ -27,24 +23,18 @@ class ChatProcessor:
27
  self.environment_data = EnvironmentalData(city)
28
  self.web_searcher = WebSearch(num_results=num_results, max_images=num_images)
29
  self.web_search_required = True
30
- self._keyword_extractors = {} # Cache keyword extractors
31
 
32
- @lru_cache(maxsize=128)
33
- def get_keyword_extractor(self, language: str, max_ngram_size: int, num_keywords: int):
34
- """Cache keyword extractors for reuse"""
35
  lang_code = "en"
36
  if language.lower() == "urdu":
37
  lang_code = "ur"
38
-
39
- return KeywordExtractor(
40
  lan=lang_code,
41
  n=max_ngram_size,
42
  top=num_keywords,
43
  features=None
44
  )
45
-
46
- def extract_keywords_yake(self, text: str, language: str, max_ngram_size: int = 2, num_keywords: int = 4) -> list:
47
- kw_extractor = self.get_keyword_extractor(language, max_ngram_size, num_keywords)
48
  keywords = kw_extractor.extract_keywords(text)
49
  return [kw[0] for kw in keywords]
50
 
@@ -64,48 +54,36 @@ class ChatProcessor:
64
 
65
  def process_chat(self, query: str) -> Dict[str, Any]:
66
  try:
67
- # Fetch profile and history in parallel
68
- with ThreadPoolExecutor(max_workers=2) as executor:
69
- future_profile = executor.submit(self.chat_session.get_name_and_age)
70
- future_history = executor.submit(self.chat_session.load_chat_history)
71
-
72
- profile = future_profile.result()
73
- future_history.result() # Wait for history to load
74
-
75
  name = profile['name']
76
  age = profile['age']
77
-
78
- # Update title and get history format
79
- self.chat_session.update_title(self.session_id, query)
80
  history = self.chat_session.format_history()
81
 
82
- # Get enhanced query
83
- history_based_prompt = HISTORY_BASED_PROMPT.format(history=history, query=query)
84
- enhanced_query = model.send_message_openrouter(history_based_prompt)
85
 
86
- # Ensure session and get user preferences
87
  self.session_id = self.ensure_valid_session(title=enhanced_query)
88
  permission = self.chat_session.get_user_preferences()
89
-
90
- # Extract permissions
91
- websearch_enabled = permission.get('websearch', False)
92
  env_recommendations = permission.get('environmental_recommendations', False)
93
  personalized_recommendations = permission.get('personalized_recommendations', False)
94
  keywords_permission = permission.get('keywords', False)
95
  reference_permission = permission.get('references', False)
96
  language = self.chat_session.get_language().lower()
97
 
98
- language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language=language)
99
 
100
- if websearch_enabled:
101
- # Run web and image search in parallel
 
102
  with ThreadPoolExecutor(max_workers=2) as executor:
103
  future_web = executor.submit(self.web_searcher.search, enhanced_query)
104
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
105
  web_results = future_web.result()
106
  image_results = future_images.result()
107
 
108
- # Efficiently build context and references
109
  context_parts = []
110
  references = []
111
 
@@ -116,27 +94,13 @@ class ChatProcessor:
116
 
117
  context = "\n".join(context_parts)
118
 
119
- # Pre-fetch user details and environmental data in parallel if needed
120
- if env_recommendations or personalized_recommendations:
121
- with ThreadPoolExecutor(max_workers=2) as executor:
122
- futures = {}
123
- if personalized_recommendations:
124
- futures['user_details'] = executor.submit(self.chat_session.get_personalized_recommendation)
125
- if env_recommendations:
126
- futures['env_data'] = executor.submit(self.environment_data.get_environmental_data)
127
-
128
- # Get results as they complete
129
- user_details = futures.get('user_details').result() if 'user_details' in futures else None
130
- env_data = futures.get('env_data').result() if 'env_data' in futures else None
131
-
132
- # Select appropriate prompt
133
  if env_recommendations and personalized_recommendations:
134
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
135
  user_name=name,
136
  user_age=age,
137
  history=history,
138
- user_details=user_details,
139
- environmental_condition=env_data,
140
  previous_history=history,
141
  context=context,
142
  current_query=enhanced_query
@@ -145,16 +109,16 @@ class ChatProcessor:
145
  prompt = PERSONALIZED_PROMPT.format(
146
  user_name=name,
147
  user_age=age,
148
- user_details=user_details,
149
  previous_history=history,
150
  context=context,
151
  current_query=enhanced_query
152
  )
153
- elif env_recommendations:
154
  prompt = ENVIRONMENTAL_PROMPT.format(
155
  user_name=name,
156
  user_age=age,
157
- environmental_condition=env_data,
158
  previous_history=history,
159
  context=context,
160
  current_query=enhanced_query
@@ -166,12 +130,16 @@ class ChatProcessor:
166
  current_query=enhanced_query
167
  )
168
 
169
- prompt += language_prompt
170
- response = model.llm(prompt, enhanced_query)
171
 
172
- # Process keywords only if needed
173
- keywords = self.extract_keywords_yake(response, language=language) if keywords_permission else ""
174
- references = "" if not reference_permission else references
 
 
 
 
 
175
 
176
  chat_data = {
177
  "query": enhanced_query,
@@ -190,56 +158,42 @@ class ChatProcessor:
190
  return chat_data
191
 
192
  else:
193
- # Start RAG timing
194
- start_time = datetime.now(timezone.utc)
195
-
196
- # Launch image search in the background while doing vector search
197
  with ThreadPoolExecutor(max_workers=2) as executor:
198
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
199
- future_vector = executor.submit(vectordb.search, enhanced_query, 3)
200
-
201
- results = future_vector.result()
202
  image_results = future_images.result()
203
 
204
- # Process vector search results
 
 
 
205
  context_parts = []
206
  references = []
207
- seen_pages = set()
208
- attach_image = False
209
 
210
  for result in results:
211
  confidence = result['confidence']
212
  if confidence > 60:
213
  context_parts.append(f"Content: {result['content']}")
214
  page = result['page']
215
- if page not in seen_pages:
216
  references.append(f"Source: {result['source']}, Page: {page}")
217
  seen_pages.add(page)
218
- attach_image = True
219
-
220
- context = "\n".join(context_parts) if context_parts else "There is no context found unfortunately"
221
-
222
- # Pre-fetch user details and environmental data in parallel if needed
223
- if env_recommendations or personalized_recommendations:
224
- with ThreadPoolExecutor(max_workers=2) as executor:
225
- futures = {}
226
- if personalized_recommendations:
227
- futures['user_details'] = executor.submit(self.chat_session.get_personalized_recommendation)
228
- if env_recommendations:
229
- futures['env_data'] = executor.submit(self.environment_data.get_environmental_data)
230
-
231
- # Get results as they complete
232
- user_details = futures.get('user_details').result() if 'user_details' in futures else None
233
- env_data = futures.get('env_data').result() if 'env_data' in futures else None
234
-
235
- # Select appropriate prompt
236
- if env_recommendations and personalized_recommendations:
237
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
238
  user_name=name,
239
- user_age=age,
240
  history=history,
241
- user_details=user_details,
242
- environmental_condition=env_data,
243
  previous_history=history,
244
  context=context,
245
  current_query=enhanced_query
@@ -248,16 +202,16 @@ class ChatProcessor:
248
  prompt = PERSONALIZED_PROMPT.format(
249
  user_name=name,
250
  user_age=age,
251
- user_details=user_details,
252
  previous_history=history,
253
  context=context,
254
  current_query=enhanced_query
255
  )
256
- elif env_recommendations:
257
  prompt = ENVIRONMENTAL_PROMPT.format(
258
  user_name=name,
259
  user_age=age,
260
- environmental_condition=env_data,
261
  previous_history=history,
262
  context=context,
263
  current_query=enhanced_query
@@ -269,23 +223,24 @@ class ChatProcessor:
269
  current_query=enhanced_query
270
  )
271
 
272
- prompt += language_prompt
273
- response = model.llm(prompt, query)
 
 
274
  end_time = datetime.now(timezone.utc)
275
 
276
- # Process keywords only if needed
277
  keywords = ""
278
- if keywords_permission and attach_image:
 
279
  keywords = self.extract_keywords_yake(response, language=language)
280
 
281
- references = "" if not reference_permission else references
282
- image_results = "" if not attach_image else image_results
283
 
284
- # Find where to truncate the response if needed
285
- match = re.search(r'(## Personal Recommendations|## Environmental Considerations)', response)
286
- truncated_response = response[:match.start()].strip() if match else response
287
 
288
- # Prepare chat data
289
  chat_data = {
290
  "query": enhanced_query,
291
  "response": response,
@@ -297,26 +252,15 @@ class ChatProcessor:
297
  "timestamp": datetime.now(timezone.utc).isoformat(),
298
  "session_id": self.chat_session.session_id
299
  }
300
-
301
- # Save in parallel
302
- with ThreadPoolExecutor(max_workers=2) as executor:
303
- future_save_details = executor.submit(
304
- self.chat_session.save_details,
305
- session_id=self.session_id,
306
- context=context,
307
- query=enhanced_query,
308
- response=truncated_response,
309
- rag_start_time=start_time,
310
- rag_end_time=end_time
311
- )
312
- future_save_chat = executor.submit(self.chat_session.save_chat, chat_data)
313
-
314
- # Check results
315
- if not future_save_details.result():
316
- raise ValueError("Failed to save the RAG details")
317
- if not future_save_chat.result():
318
- raise ValueError("Failed to save chat message")
319
-
320
  return chat_data
321
 
322
  except Exception as e:
 
1
  from datetime import datetime, timezone
2
  from typing import Optional, Dict, Any
3
+ from concurrent.futures import ThreadPoolExecutor
 
4
  from yake import KeywordExtractor
5
  from app.services.chathistory import ChatSession
6
  from app.services.websearch import WebSearch
 
9
  from app.services.prompts import *
10
  from app.services.vector_database_search import VectorDatabaseSearch
11
  import re
 
 
12
  vectordb = VectorDatabaseSearch()
 
13
 
14
  class ChatProcessor:
15
  def __init__(self, token: str, session_id: Optional[str] = None, num_results: int = 3, num_images: int = 3):
 
23
  self.environment_data = EnvironmentalData(city)
24
  self.web_searcher = WebSearch(num_results=num_results, max_images=num_images)
25
  self.web_search_required = True
 
26
 
27
+ def extract_keywords_yake(self, text: str, language: str, max_ngram_size: int = 2, num_keywords: int = 4) -> list:
 
 
28
  lang_code = "en"
29
  if language.lower() == "urdu":
30
  lang_code = "ur"
31
+
32
+ kw_extractor = KeywordExtractor(
33
  lan=lang_code,
34
  n=max_ngram_size,
35
  top=num_keywords,
36
  features=None
37
  )
 
 
 
38
  keywords = kw_extractor.extract_keywords(text)
39
  return [kw[0] for kw in keywords]
40
 
 
54
 
55
  def process_chat(self, query: str) -> Dict[str, Any]:
56
  try:
57
+ profile = self.chat_session.get_name_and_age()
 
 
 
 
 
 
 
58
  name = profile['name']
59
  age = profile['age']
60
+ self.chat_session.load_chat_history()
61
+ self.chat_session.update_title(self.session_id,query)
 
62
  history = self.chat_session.format_history()
63
 
64
+ history_based_prompt = HISTORY_BASED_PROMPT.format(history=history,query= query)
65
+
66
+ enhanced_query = Model().send_message_openrouter(history_based_prompt)
67
 
 
68
  self.session_id = self.ensure_valid_session(title=enhanced_query)
69
  permission = self.chat_session.get_user_preferences()
70
+ websearch_enabled = permission.get('websearch', False)
 
 
71
  env_recommendations = permission.get('environmental_recommendations', False)
72
  personalized_recommendations = permission.get('personalized_recommendations', False)
73
  keywords_permission = permission.get('keywords', False)
74
  reference_permission = permission.get('references', False)
75
  language = self.chat_session.get_language().lower()
76
 
 
77
 
78
+ language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language = language)
79
+
80
+ if websearch_enabled :
81
  with ThreadPoolExecutor(max_workers=2) as executor:
82
  future_web = executor.submit(self.web_searcher.search, enhanced_query)
83
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
84
  web_results = future_web.result()
85
  image_results = future_images.result()
86
 
 
87
  context_parts = []
88
  references = []
89
 
 
94
 
95
  context = "\n".join(context_parts)
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  if env_recommendations and personalized_recommendations:
98
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
99
  user_name=name,
100
  user_age=age,
101
  history=history,
102
+ user_details=self.chat_session.get_personalized_recommendation(),
103
+ environmental_condition=self.environment_data.get_environmental_data(),
104
  previous_history=history,
105
  context=context,
106
  current_query=enhanced_query
 
109
  prompt = PERSONALIZED_PROMPT.format(
110
  user_name=name,
111
  user_age=age,
112
+ user_details=self.chat_session.get_personalized_recommendation(),
113
  previous_history=history,
114
  context=context,
115
  current_query=enhanced_query
116
  )
117
+ elif env_recommendations :
118
  prompt = ENVIRONMENTAL_PROMPT.format(
119
  user_name=name,
120
  user_age=age,
121
+ environmental_condition=self.environment_data.get_environmental_data(),
122
  previous_history=history,
123
  context=context,
124
  current_query=enhanced_query
 
130
  current_query=enhanced_query
131
  )
132
 
133
+ prompt = prompt + language_prompt
 
134
 
135
+ response = Model().llm(prompt,enhanced_query)
136
+
137
+ keywords = ""
138
+
139
+ if (keywords_permission):
140
+ keywords = self.extract_keywords_yake(response, language=language)
141
+ if (not reference_permission):
142
+ references = ""
143
 
144
  chat_data = {
145
  "query": enhanced_query,
 
158
  return chat_data
159
 
160
  else:
161
+ attach_image = False
162
+
 
 
163
  with ThreadPoolExecutor(max_workers=2) as executor:
164
  future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
 
 
 
165
  image_results = future_images.result()
166
 
167
+ start_time = datetime.now(timezone.utc)
168
+
169
+ results = vectordb.search( query=enhanced_query, top_k=3)
170
+
171
  context_parts = []
172
  references = []
173
+ seen_pages = set()
 
174
 
175
  for result in results:
176
  confidence = result['confidence']
177
  if confidence > 60:
178
  context_parts.append(f"Content: {result['content']}")
179
  page = result['page']
180
+ if page not in seen_pages: # Only append if page is not seen
181
  references.append(f"Source: {result['source']}, Page: {page}")
182
  seen_pages.add(page)
183
+ attach_image = True
184
+
185
+ context = "\n".join(context_parts)
186
+
187
+ if not context or len(context) < 10:
188
+ context = "There is no context found unfortunately"
189
+
190
+ if env_recommendations and personalized_recommendations:
 
 
 
 
 
 
 
 
 
 
 
191
  prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
192
  user_name=name,
193
+ user_age = age,
194
  history=history,
195
+ user_details=self.chat_session.get_personalized_recommendation(),
196
+ environmental_condition=self.environment_data.get_environmental_data(),
197
  previous_history=history,
198
  context=context,
199
  current_query=enhanced_query
 
202
  prompt = PERSONALIZED_PROMPT.format(
203
  user_name=name,
204
  user_age=age,
205
+ user_details=self.chat_session.get_personalized_recommendation(),
206
  previous_history=history,
207
  context=context,
208
  current_query=enhanced_query
209
  )
210
+ elif env_recommendations :
211
  prompt = ENVIRONMENTAL_PROMPT.format(
212
  user_name=name,
213
  user_age=age,
214
+ environmental_condition=self.environment_data.get_environmental_data(),
215
  previous_history=history,
216
  context=context,
217
  current_query=enhanced_query
 
223
  current_query=enhanced_query
224
  )
225
 
226
+ prompt = prompt + language_prompt
227
+
228
+ response = Model().response = Model().llm(prompt,query)
229
+
230
  end_time = datetime.now(timezone.utc)
231
 
 
232
  keywords = ""
233
+
234
+ if (keywords_permission):
235
  keywords = self.extract_keywords_yake(response, language=language)
236
 
237
+ if (not reference_permission):
238
+ references = ""
239
 
240
+ if not attach_image:
241
+ image_results = ""
242
+ keywords = ""
243
 
 
244
  chat_data = {
245
  "query": enhanced_query,
246
  "response": response,
 
252
  "timestamp": datetime.now(timezone.utc).isoformat(),
253
  "session_id": self.chat_session.session_id
254
  }
255
+ match = re.search(r'(## Personal Recommendations|## Environmental Considerations)', response)
256
+ if match:
257
+ truncated_response = response[:match.start()].strip()
258
+ else:
259
+ truncated_response = response
260
+ if not self.chat_session.save_details(session_id=self.session_id , context= context , query= enhanced_query , response=truncated_response , rag_start_time=start_time , rag_end_time=end_time ):
261
+ raise ValueError("Failed to save the RAG details")
262
+ if not self.chat_session.save_chat(chat_data):
263
+ raise ValueError("Failed to save chat message")
 
 
 
 
 
 
 
 
 
 
 
264
  return chat_data
265
 
266
  except Exception as e:
pyproject.toml CHANGED
@@ -41,7 +41,8 @@ dependencies = [
41
  "python-pptx==1.0.2",
42
  "puremagic==1.28",
43
  "charset-normalizer==3.4.1",
44
- "pytesseract==0.3.13"
 
45
  ]
46
 
47
  [build-system]
 
41
  "python-pptx==1.0.2",
42
  "puremagic==1.28",
43
  "charset-normalizer==3.4.1",
44
+ "pytesseract==0.3.13",
45
+ "functools"
46
  ]
47
 
48
  [build-system]