Files changed (1) hide show
  1. data_processing.py +31 -10
data_processing.py CHANGED
@@ -22,6 +22,7 @@ import google.generativeai as genai
22
  import urllib
23
  import pysolr
24
  import config # Import the config module to access remote host details
 
25
 
26
  from llm_prompts import (
27
  get_analysis_plan_prompt,
@@ -51,11 +52,11 @@ def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, c
51
 
52
  if intent != 'search_list':
53
  print(f"API returned intent '{intent}' which is not 'search_list'. Aborting analysis.")
54
- return None, None, None, intent
55
 
56
  except Exception as e:
57
  print(f"Warning: Could not retrieve dynamic search fields. Proceeding without them. Error: {e}")
58
- return None, [], None, 'api_error'
59
 
60
  core_name = search_name if search_name else 'news'
61
 
@@ -75,17 +76,21 @@ def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, c
75
 
76
  try:
77
  response = llm_model.generate_content(prompt)
 
 
 
 
78
  cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
79
  plan = json.loads(cleaned_text)
80
- return plan, mapped_search_fields, core_name, intent
81
  except json.JSONDecodeError as e:
82
  raw_response_text = response.text if 'response' in locals() else 'N/A'
83
  print(f"Error decoding JSON from LLM response: {e}\nRaw Response:\n{raw_response_text}")
84
- return None, mapped_search_fields, core_name, intent
85
  except Exception as e:
86
  raw_response_text = response.text if 'response' in locals() else 'N/A'
87
  print(f"Error in llm_generate_analysis_plan_with_history: {e}\nRaw Response:\n{raw_response_text}")
88
- return None, mapped_search_fields, core_name, intent
89
 
90
  def execute_quantitative_query(solr_client, plan):
91
  """Executes the facet query to get aggregate data."""
@@ -150,12 +155,24 @@ def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, q
150
  """
151
  prompt = get_synthesis_report_prompt(query, quantitative_data, qualitative_data, plan)
152
  try:
153
- response_stream = llm_model.generate_content(prompt, stream=True)
 
154
  for chunk in response_stream:
155
- yield chunk.text
 
 
 
 
 
 
 
 
 
 
 
156
  except Exception as e:
157
  print(f"Error in llm_synthesize_enriched_report_stream: {e}")
158
- yield "Sorry, an error occurred while generating the report. Please check the logs for details."
159
 
160
  def llm_generate_visualization_code(llm_model, query_context, facet_data):
161
  """Generates Python code for visualization based on query and data."""
@@ -163,12 +180,16 @@ def llm_generate_visualization_code(llm_model, query_context, facet_data):
163
  try:
164
  generation_config = genai.types.GenerationConfig(temperature=0)
165
  response = llm_model.generate_content(prompt, generation_config=generation_config)
 
 
 
 
166
  code = re.sub(r'^```python\s*|```$', '', response.text, flags=re.MULTILINE)
167
- return code
168
  except Exception as e:
169
  raw_response_text = response.text if 'response' in locals() else 'N/A'
170
  print(f"Error in llm_generate_visualization_code: {e}\nRaw response: {raw_response_text}")
171
- return None
172
 
173
  def execute_viz_code_and_get_path(viz_code, facet_data):
174
  """Executes visualization code and returns the path to the saved plot image."""
 
22
  import urllib
23
  import pysolr
24
  import config # Import the config module to access remote host details
25
+ import tiktoken
26
 
27
  from llm_prompts import (
28
  get_analysis_plan_prompt,
 
52
 
53
  if intent != 'search_list':
54
  print(f"API returned intent '{intent}' which is not 'search_list'. Aborting analysis.")
55
+ return None, None, None, intent, None, None, None
56
 
57
  except Exception as e:
58
  print(f"Warning: Could not retrieve dynamic search fields. Proceeding without them. Error: {e}")
59
+ return None, [], None, 'api_error', None, None, None
60
 
61
  core_name = search_name if search_name else 'news'
62
 
 
76
 
77
  try:
78
  response = llm_model.generate_content(prompt)
79
+ encoding = tiktoken.encoding_for_model("gpt-4")
80
+ input_token_count = len(encoding.encode(prompt))
81
+ output_token_count = len(encoding.encode(response.text))
82
+ total_token_count = (input_token_count if input_token_count is not None else 0) + (output_token_count if output_token_count is not None else 0)
83
  cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
84
  plan = json.loads(cleaned_text)
85
+ return plan, mapped_search_fields, core_name, intent, input_token_count, output_token_count, total_token_count
86
  except json.JSONDecodeError as e:
87
  raw_response_text = response.text if 'response' in locals() else 'N/A'
88
  print(f"Error decoding JSON from LLM response: {e}\nRaw Response:\n{raw_response_text}")
89
+ return None, mapped_search_fields, core_name, intent, None, None, None
90
  except Exception as e:
91
  raw_response_text = response.text if 'response' in locals() else 'N/A'
92
  print(f"Error in llm_generate_analysis_plan_with_history: {e}\nRaw Response:\n{raw_response_text}")
93
+ return None, mapped_search_fields, core_name, intent, None, None, None
94
 
95
  def execute_quantitative_query(solr_client, plan):
96
  """Executes the facet query to get aggregate data."""
 
155
  """
156
  prompt = get_synthesis_report_prompt(query, quantitative_data, qualitative_data, plan)
157
  try:
158
+ response_stream = llm_model.generate_content(prompt, stream=True)
159
+ response_text = ""
160
  for chunk in response_stream:
161
+ yield {"text": chunk.text, "tokens": None}
162
+ response_text += chunk.text
163
+ encoding = tiktoken.encoding_for_model("gpt-4")
164
+ input_token_count = len(encoding.encode(prompt))
165
+ output_token_count = len(encoding.encode(response_text))
166
+ total_token_count = (input_token_count if input_token_count is not None else 0) + (output_token_count if output_token_count is not None else 0)
167
+ tokens = {
168
+ "input": input_token_count,
169
+ "output": output_token_count,
170
+ "total": total_token_count,
171
+ }
172
+ yield {"text": None, "tokens": tokens}
173
  except Exception as e:
174
  print(f"Error in llm_synthesize_enriched_report_stream: {e}")
175
+ yield {"text": "Sorry, an error occurred while generating the report. Please check the logs for details.", "tokens": None}
176
 
177
  def llm_generate_visualization_code(llm_model, query_context, facet_data):
178
  """Generates Python code for visualization based on query and data."""
 
180
  try:
181
  generation_config = genai.types.GenerationConfig(temperature=0)
182
  response = llm_model.generate_content(prompt, generation_config=generation_config)
183
+ encoding = tiktoken.encoding_for_model("gpt-4")
184
+ input_token_count = len(encoding.encode(prompt))
185
+ output_token_count = len(encoding.encode(response.text))
186
+ total_token_count = (input_token_count if input_token_count is not None else 0) + (output_token_count if output_token_count is not None else 0)
187
  code = re.sub(r'^```python\s*|```$', '', response.text, flags=re.MULTILINE)
188
+ return code, input_token_count, output_token_count, total_token_count
189
  except Exception as e:
190
  raw_response_text = response.text if 'response' in locals() else 'N/A'
191
  print(f"Error in llm_generate_visualization_code: {e}\nRaw response: {raw_response_text}")
192
+ return
193
 
194
  def execute_viz_code_and_get_path(viz_code, facet_data):
195
  """Executes visualization code and returns the path to the saved plot image."""