Spaces:

dolphinium
/

pc-ai-data-analyst-dup

Sleeping

App Files Files Community

token_count

by uralk - opened 9 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+42

-110

This PR is in draft mode

Files changed (4) hide show

data_processing.py +10 -31
llm_prompts.py +9 -19
requirements.txt +1 -2
ui.py +22 -58

data_processing.py CHANGED Viewed

@@ -22,7 +22,6 @@ import google.generativeai as genai
 import urllib
 import pysolr
 import config # Import the config module to access remote host details
-import tiktoken
 from llm_prompts import (
     get_analysis_plan_prompt,
@@ -52,11 +51,11 @@ def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, c
         if intent != 'search_list':
             print(f"API returned intent '{intent}' which is not 'search_list'. Aborting analysis.")
-            return None, None, None, intent, None, None, None
     except Exception as e:
         print(f"Warning: Could not retrieve dynamic search fields. Proceeding without them. Error: {e}")
-        return None, [], None, 'api_error', None, None, None
     core_name = search_name if search_name else 'news'
@@ -76,21 +75,17 @@ def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, c
     try:
         response = llm_model.generate_content(prompt)
-        encoding = tiktoken.encoding_for_model("gpt-4")
-        input_token_count = len(encoding.encode(prompt))
-        output_token_count = len(encoding.encode(response.text))
-        total_token_count = (input_token_count if input_token_count is not None else 0) + (output_token_count if output_token_count is not None else 0)
         cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
         plan = json.loads(cleaned_text)
-        return plan, mapped_search_fields, core_name, intent, input_token_count, output_token_count, total_token_count
     except json.JSONDecodeError as e:
         raw_response_text = response.text if 'response' in locals() else 'N/A'
         print(f"Error decoding JSON from LLM response: {e}\nRaw Response:\n{raw_response_text}")
-        return None, mapped_search_fields, core_name, intent, None, None, None
     except Exception as e:
         raw_response_text = response.text if 'response' in locals() else 'N/A'
         print(f"Error in llm_generate_analysis_plan_with_history: {e}\nRaw Response:\n{raw_response_text}")
-        return None, mapped_search_fields, core_name, intent, None, None, None
 def execute_quantitative_query(solr_client, plan):
     """Executes the facet query to get aggregate data."""
@@ -155,24 +150,12 @@ def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, q
     """
     prompt = get_synthesis_report_prompt(query, quantitative_data, qualitative_data, plan)
     try:
-        response_stream = llm_model.generate_content(prompt, stream=True)
-        response_text = ""
         for chunk in response_stream:
-            yield {"text": chunk.text, "tokens": None}
-            response_text += chunk.text
-        encoding = tiktoken.encoding_for_model("gpt-4")
-        input_token_count = len(encoding.encode(prompt))
-        output_token_count = len(encoding.encode(response_text))
-        total_token_count = (input_token_count if input_token_count is not None else 0) + (output_token_count if output_token_count is not None else 0)
-        tokens = {
-            "input": input_token_count,
-            "output": output_token_count,
-            "total": total_token_count,
-        }
-        yield {"text": None, "tokens": tokens}
     except Exception as e:
         print(f"Error in llm_synthesize_enriched_report_stream: {e}")
-        yield {"text": "Sorry, an error occurred while generating the report. Please check the logs for details.", "tokens": None}
 def llm_generate_visualization_code(llm_model, query_context, facet_data):
     """Generates Python code for visualization based on query and data."""
@@ -180,16 +163,12 @@ def llm_generate_visualization_code(llm_model, query_context, facet_data):
     try:
         generation_config = genai.types.GenerationConfig(temperature=0)
         response = llm_model.generate_content(prompt, generation_config=generation_config)
-        encoding = tiktoken.encoding_for_model("gpt-4")
-        input_token_count = len(encoding.encode(prompt))
-        output_token_count = len(encoding.encode(response.text))
-        total_token_count = (input_token_count if input_token_count is not None else 0) + (output_token_count if output_token_count is not None else 0)
         code = re.sub(r'^```python\s*|```$', '', response.text, flags=re.MULTILINE)
-        return code, input_token_count, output_token_count, total_token_count
     except Exception as e:
         raw_response_text = response.text if 'response' in locals() else 'N/A'
         print(f"Error in llm_generate_visualization_code: {e}\nRaw response: {raw_response_text}")
-        return
 def execute_viz_code_and_get_path(viz_code, facet_data):
     """Executes visualization code and returns the path to the saved plot image."""

 import urllib
 import pysolr
 import config # Import the config module to access remote host details
 from llm_prompts import (
     get_analysis_plan_prompt,
         if intent != 'search_list':
             print(f"API returned intent '{intent}' which is not 'search_list'. Aborting analysis.")
+            return None, None, None, intent
     except Exception as e:
         print(f"Warning: Could not retrieve dynamic search fields. Proceeding without them. Error: {e}")
+        return None, [], None, 'api_error'
     core_name = search_name if search_name else 'news'
     try:
         response = llm_model.generate_content(prompt)
         cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
         plan = json.loads(cleaned_text)
+        return plan, mapped_search_fields, core_name, intent
     except json.JSONDecodeError as e:
         raw_response_text = response.text if 'response' in locals() else 'N/A'
         print(f"Error decoding JSON from LLM response: {e}\nRaw Response:\n{raw_response_text}")
+        return None, mapped_search_fields, core_name, intent
     except Exception as e:
         raw_response_text = response.text if 'response' in locals() else 'N/A'
         print(f"Error in llm_generate_analysis_plan_with_history: {e}\nRaw Response:\n{raw_response_text}")
+        return None, mapped_search_fields, core_name, intent
 def execute_quantitative_query(solr_client, plan):
     """Executes the facet query to get aggregate data."""
     """
     prompt = get_synthesis_report_prompt(query, quantitative_data, qualitative_data, plan)
     try:
+        response_stream = llm_model.generate_content(prompt, stream=True)
         for chunk in response_stream:
+            yield chunk.text
     except Exception as e:
         print(f"Error in llm_synthesize_enriched_report_stream: {e}")
+        yield "Sorry, an error occurred while generating the report. Please check the logs for details."
 def llm_generate_visualization_code(llm_model, query_context, facet_data):
     """Generates Python code for visualization based on query and data."""
     try:
         generation_config = genai.types.GenerationConfig(temperature=0)
         response = llm_model.generate_content(prompt, generation_config=generation_config)
         code = re.sub(r'^```python\s*|```$', '', response.text, flags=re.MULTILINE)
+        return code
     except Exception as e:
         raw_response_text = response.text if 'response' in locals() else 'N/A'
         print(f"Error in llm_generate_visualization_code: {e}\nRaw response: {raw_response_text}")
+        return None
 def execute_viz_code_and_get_path(viz_code, facet_data):
     """Executes visualization code and returns the path to the saved plot image."""

llm_prompts.py CHANGED Viewed

@@ -44,11 +44,9 @@ An external API has identified the following field-value pairs from the user que
 """
     return f"""
-You are the AI Data Analyst for PharmaCircle, a leading knowledge management company dedicated to curating vast amounts of pharmaceutical, biotechnology, and drug delivery industry data into due diligence-level intelligence. Your purpose is to make PharmaCircle's complex and powerful database easily accessible through natural language, providing insightful analysis that would typically require navigating complex search interfaces.
-Your primary task is to convert a user's natural language question into a structured JSON "Analysis Plan". This plan will drive two separate, efficient queries: one for aggregate data (facets) and one for finding illustrative examples (grouping).
-Your most important job is to correctly infer the user's intent and choose an `analysis_dimension` and `analysis_measure` that provides a meaningful, non-obvious breakdown of the data that aligns with PharmaCircle's mission of tracking drug development and innovation.
 ---
 ### CONTEXT & RULES
@@ -73,9 +71,8 @@ never add an additional filter by yourself like `total_deal_value_in_million:[0
 This is the most critical part of your task. A bad choice leads to a useless, boring analysis. You must first determine the user's persona and then select the analysis parameters accordingly.
 **USER PERSONAS:**
-Your users are PharmaCircle clients, primarily from the US (70%), Europe, and Asia. They fall into two main categories:
-*   **The Financial Analyst:** This user cares about the money. They look for investments, acquisitions, deal values, and company financials to identify partnering and investment opportunities. Their queries contain terms like "deal," "value," "acquisition," "financing," "investment," or "revenue."
-*   **The Scientific Analyst:** This user cares about the science. They track drug development, from discovery to market. They look for product pipelines, clinical trial phases, therapeutic breakthroughs, formulation details, and compound data. Their queries contain terms like "drug approvals," "phase 2," "therapeutic category," "compounds," "molecule," or "mechanism."
 **1. Choosing the `analysis_measure` (The metric):**
@@ -88,20 +85,13 @@ Your users are PharmaCircle clients, primarily from the US (70%), Europe, and As
 *   **USER INTENT FIRST:** If the user explicitly asks to group by a field (e.g., "by company," "by country"), use that field.
-*   **INFERENCE HEURISTICS (If the user doesn't specify a dimension):** Think "What is the next logical question for this user persona, keeping PharmaCircle's mission in mind?"
-    *   **PharmaCircle Mission Priority:** Given PharmaCircle's focus on product pipelines and development timelines, **you should strongly prioritize `product_name`, `compound_name`, and date related fields as `analysis_dimension`s.** A time-based analysis (e.g., 'by year') or a product-focused analysis is often the most valuable insight for our users who are tracking progress, approvals, or activities over time.
     *   For a **Financial Analyst** asking about "top deals" or "recent financings," a good dimension is `company_name` (who is making deals?) or `news_type` (what kind of deals?). If the query is about "recent deals about infection," the dimension should be `company_name_invested`. Using `company_name` would pollute the data with both investor and invested companies.
-    *   For a **Scientific Analyst** asking about "drug approvals," a good dimension is `therapeutic_category` (what diseases are the approvals for?) or `company_name` (who is getting the approvals?). See the Mission Priority rule above—if the query implies a timeline, `date_year` might be even better.
-    *   For a **Scientific Analyst** asking about phase movements (e.g., "phase 2 to phase 3" or "phase 2 or phase 3"), a highly valuable dimension is `compound_name` or `product_name`. This reveals which specific products are progressing through the pipeline.
     *   If the query compares concepts like "cancer vs. infection," the dimension is `therapeutic_category`.
     *   If the query compares "oral vs. injection," the dimension is `route_branch`.
-    *   Your goal is to find a dimension that reveals a meaningful pattern in the filtered data that is relevant to the user's likely persona and PharmaCircle's core value proposition.
 ---
 ### FIELD DEFINITIONS (Your Source of Truth for Core: {core_name})
@@ -225,7 +215,7 @@ Your users are PharmaCircle clients, primarily from the US (70%), Europe, and As
         "limit": 2,
         "sort": "total_deal_value desc",
         "facet": {{
-          "total_value": "sum(total_deal_value_in_million)"
         }}
       }}
     }}

 """
     return f"""
+You are an expert financial and scientific analyst specializing in the pharmaceutical industry. Your task is to convert a natural language question into a structured JSON "Analysis Plan". This plan will be used to run two separate, efficient queries: one for aggregate data (facets) and one for finding illustrative examples (grouping).
+Your most important job is to correctly infer the user's intent (are they a scientist or a financial analyst?) and choose an `analysis_dimension` and `analysis_measure` that provides a meaningful, non-obvious breakdown of the data for them.
 ---
 ### CONTEXT & RULES
 This is the most critical part of your task. A bad choice leads to a useless, boring analysis. You must first determine the user's persona and then select the analysis parameters accordingly.
 **USER PERSONAS:**
+*   **The Financial Analyst:** This user cares about the money. They look for investments, acquisitions, deal values, and company financials. Their queries contain terms like "deal," "value," "acquisition," "financing," "investment," or "revenue."
+*   **The Scientific Analyst:** This user cares about the science. They look for product pipelines, clinical trial phases, therapeutic breakthroughs, and compound details. Their queries contain terms like "drug approvals," "phase 2," "therapeutic category," "compounds," "molecule," or "mechanism."
 **1. Choosing the `analysis_measure` (The metric):**
 *   **USER INTENT FIRST:** If the user explicitly asks to group by a field (e.g., "by company," "by country"), use that field.
+*   **INFERENCE HEURISTICS (If the user doesn't specify a dimension):** Think "What is the next logical question for this user persona?"
     *   For a **Financial Analyst** asking about "top deals" or "recent financings," a good dimension is `company_name` (who is making deals?) or `news_type` (what kind of deals?). If the query is about "recent deals about infection," the dimension should be `company_name_invested`. Using `company_name` would pollute the data with both investor and invested companies.
+    *   For a **Scientific Analyst** asking about "drug approvals," a good dimension is `therapeutic_category` (what diseases are the approvals for?) or `company_name` (who is getting the approvals?).
+    *   For a **Scientific Analyst** asking about phase movements (e.g., "phase 2 to phase 3" or "phase 2 or phase 3"), a highly valuable dimension is `compound_name`. This reveals which specific compounds are progressing through the pipeline.
     *   If the query compares concepts like "cancer vs. infection," the dimension is `therapeutic_category`.
     *   If the query compares "oral vs. injection," the dimension is `route_branch`.
+    *   Your goal is to find a dimension that reveals a meaningful pattern in the filtered data that is relevant to the user's likely persona.
 ---
 ### FIELD DEFINITIONS (Your Source of Truth for Core: {core_name})
         "limit": 2,
         "sort": "total_deal_value desc",
         "facet": {{
+          "total_deal_value": "sum(total_deal_value_in_million)"
         }}
       }}
     }}

requirements.txt CHANGED Viewed

@@ -5,5 +5,4 @@ google-generativeai
 pandas
 seaborn
 matplotlib
-IPython
-tiktoken

 pandas
 seaborn
 matplotlib
+IPython

ui.py CHANGED Viewed

@@ -70,8 +70,6 @@ def create_ui(llm_model, solr_client):
                         "Qualitative URL will appear here...", visible=False)
                     qualitative_data_display = gr.Markdown(
                         "Example data will appear here...", visible=False)
-                with gr.Accordion("Token Usage", open=False):
-                    token_summary_box = gr.Markdown(visible=False)
                 plot_display = gr.Image(
                     label="Visualization", type="filepath", visible=False)
                 report_display = gr.Markdown(
@@ -81,28 +79,25 @@ def create_ui(llm_model, solr_client):
             """
             Manages the conversation and yields UI updates.
             """
-            analysis_plan_input_token_count = analysis_plan_output_token_count = analysis_plan_total_token_count = None
-            enriched_report_input_token_count = enriched_report_output_token_count =  enriched_report_total_token_count = None
-            visualization_input_token_count = visualization_output_token_count = visualization_total_token_count = None
             if state is None:
                 state = {'query_count': 0, 'last_suggestions': []}
             if history is None:
                 history = []
             # Reset all displays at the beginning of a new flow
-            yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Suggestions from the external API will appear here...", visible=False))
             query_context = user_input.strip()
             if not query_context:
                 history.append((user_input, "Please enter a question to analyze."))
-                yield (history, state, None, None, None, None, None, None, None, None, None)
                 return
             history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
-            yield (history, state, None, None, None, None, None, None, None, None, None)
             # Generate plan, get search field suggestions, and intent.
-            analysis_plan, mapped_search_fields, core_name, intent, analysis_plan_input_token_count, analysis_plan_output_token_count, analysis_plan_total_token_count = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
             # Update and display search field suggestions in its own accordion
             if mapped_search_fields:
@@ -117,7 +112,7 @@ def create_ui(llm_model, solr_client):
                 else:
                     message = "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing your question."
                 history.append((None, message))
-                yield (history, state, None, None, None, None, None, None, None, None, suggestions_display_update)
                 return
             history.append((None, f"✅ Analysis plan generated for core: **`{core_name}`**"))
@@ -128,10 +123,10 @@ def create_ui(llm_model, solr_client):
         """
             history.append((None, plan_summary))
             formatted_plan = f"**Full Analysis Plan (Core: `{core_name}`):**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, None, suggestions_display_update)
             history.append((None, "*Executing queries for aggregates and examples...*"))
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, None, suggestions_display_update)
             # --- DYNAMIC CORE SWITCH (Thread-safe) ---
             with solr_lock:
@@ -158,7 +153,7 @@ def create_ui(llm_model, solr_client):
             if not aggregate_data or aggregate_data.get('count', 0) == 0:
                 history.append((None, f"No data was found for your query in the '{core_name}' core. Please try a different question."))
-                yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, None, suggestions_display_update)
                 return
             # Display retrieved data
@@ -167,66 +162,36 @@ def create_ui(llm_model, solr_client):
             formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
             formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
             qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, None, suggestions_display_update)
             history.append((None, "✅ Data retrieved. Generating visualization and final report..."))
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, None, suggestions_display_update)
             # Generate viz and report
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 viz_future = executor.submit(llm_generate_visualization_code, llm_model, query_context, aggregate_data)
-                viz_code, visualization_input_token_count, visualization_output_token_count, visualization_total_token_count = viz_future.result()
                 report_text = ""
                 stream_history = history[:]
-                report_stream = llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan)
-                for item in report_stream:
-                    if item["text"] is not None:
-                        report_text += item["text"]
-                        yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, None, suggestions_display_update)
-                    elif item["tokens"] is not None:
-                        enriched_report_input_token_count = item["tokens"]["input"]
-                        enriched_report_output_token_count = item["tokens"]["output"]
-                        enriched_report_total_token_count = item["tokens"]["total"]
                 history.append((None, report_text))
                 plot_path = execute_viz_code_and_get_path(viz_code, aggregate_data)
                 output_plot = gr.update(value=plot_path, visible=True) if plot_path else gr.update(visible=False)
                 if not plot_path:
                     history.append((None, "*I was unable to generate a plot for this data.*\n"))
-                cumulative_tokens = sum(filter(None, [
-                    analysis_plan_total_token_count,
-                    enriched_report_total_token_count,
-                    visualization_total_token_count
-                ]))
-                total_input = sum(filter(None, [
-                    analysis_plan_input_token_count,
-                    enriched_report_input_token_count,
-                    visualization_input_token_count
-                ]))
-                total_output = sum(filter(None, [
-                    analysis_plan_output_token_count,
-                    enriched_report_output_token_count,
-                    visualization_output_token_count
-                ]))
-                expected_cost = round((total_input*0.3+total_output*2.5)/1000000, 3)
-                token_summary_box_update = gr.update(
-                    value=f"""**Analysis Plan Tokens** → Prompt: `{analysis_plan_input_token_count or '-'}`,  Output: `{analysis_plan_output_token_count or '-'}`,  Total: `{analysis_plan_total_token_count or '-'}`
-                    **Report Tokens** → Prompt: `{enriched_report_input_token_count or '-'}`,  Output: `{enriched_report_output_token_count or '-'}`,  Total: `{enriched_report_total_token_count or '-'}`
-                    **Visualization Tokens** → Prompt: `{visualization_input_token_count or '-'}`,  Output: `{visualization_output_token_count or '-'}`,  Total: `{visualization_total_token_count or '-'}`
-                    **Cumulative Tokens** → `{cumulative_tokens}`
-                    **Expected Cost** → `{expected_cost}$`""",
-                    visible=True
-                    )
-                yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, token_summary_box_update, suggestions_display_update)
             state['query_count'] += 1
             state['last_suggestions'] = parse_suggestions_from_report(report_text)
             next_prompt = "Analysis complete. What would you like to explore next?"
             history.append((None, next_prompt))
-            yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, token_summary_box_update, suggestions_display_update)
         def reset_all():
             """Resets the entire UI for a new analysis session."""
@@ -241,7 +206,6 @@ def create_ui(llm_model, solr_client):
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
-                gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False)
             )
@@ -249,7 +213,7 @@ def create_ui(llm_model, solr_client):
             fn=process_analysis_flow,
             inputs=[msg_textbox, chatbot, state],
             outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_url_display,
-                     quantitative_data_display, qualitative_url_display, qualitative_data_display, token_summary_box, suggestions_display],
         ).then(
             lambda: gr.update(value=""),
             None,
@@ -261,7 +225,7 @@ def create_ui(llm_model, solr_client):
             fn=reset_all,
             inputs=None,
             outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_url_display,
-                     quantitative_data_display, qualitative_url_display, qualitative_data_display, token_summary_box, suggestions_display],
             queue=False
         )

                         "Qualitative URL will appear here...", visible=False)
                     qualitative_data_display = gr.Markdown(
                         "Example data will appear here...", visible=False)
                 plot_display = gr.Image(
                     label="Visualization", type="filepath", visible=False)
                 report_display = gr.Markdown(
             """
             Manages the conversation and yields UI updates.
             """
             if state is None:
                 state = {'query_count': 0, 'last_suggestions': []}
             if history is None:
                 history = []
             # Reset all displays at the beginning of a new flow
+            yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Suggestions from the external API will appear here...", visible=False))
             query_context = user_input.strip()
             if not query_context:
                 history.append((user_input, "Please enter a question to analyze."))
+                yield (history, state, None, None, None, None, None, None, None, None)
                 return
             history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
+            yield (history, state, None, None, None, None, None, None, None, None)
             # Generate plan, get search field suggestions, and intent.
+            analysis_plan, mapped_search_fields, core_name, intent = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
             # Update and display search field suggestions in its own accordion
             if mapped_search_fields:
                 else:
                     message = "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing your question."
                 history.append((None, message))
+                yield (history, state, None, None, None, None, None, None, None, suggestions_display_update)
                 return
             history.append((None, f"✅ Analysis plan generated for core: **`{core_name}`**"))
         """
             history.append((None, plan_summary))
             formatted_plan = f"**Full Analysis Plan (Core: `{core_name}`):**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
             history.append((None, "*Executing queries for aggregates and examples...*"))
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
             # --- DYNAMIC CORE SWITCH (Thread-safe) ---
             with solr_lock:
             if not aggregate_data or aggregate_data.get('count', 0) == 0:
                 history.append((None, f"No data was found for your query in the '{core_name}' core. Please try a different question."))
+                yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
                 return
             # Display retrieved data
             formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
             formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
             qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
             history.append((None, "✅ Data retrieved. Generating visualization and final report..."))
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
             # Generate viz and report
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 viz_future = executor.submit(llm_generate_visualization_code, llm_model, query_context, aggregate_data)
                 report_text = ""
                 stream_history = history[:]
+                for chunk in llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan):
+                    report_text += chunk
+                    yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
                 history.append((None, report_text))
+                viz_code = viz_future.result()
                 plot_path = execute_viz_code_and_get_path(viz_code, aggregate_data)
                 output_plot = gr.update(value=plot_path, visible=True) if plot_path else gr.update(visible=False)
                 if not plot_path:
                     history.append((None, "*I was unable to generate a plot for this data.*\n"))
+                yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
             state['query_count'] += 1
             state['last_suggestions'] = parse_suggestions_from_report(report_text)
             next_prompt = "Analysis complete. What would you like to explore next?"
             history.append((None, next_prompt))
+            yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
         def reset_all():
             """Resets the entire UI for a new analysis session."""
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False)
             )
             fn=process_analysis_flow,
             inputs=[msg_textbox, chatbot, state],
             outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_url_display,
+                     quantitative_data_display, qualitative_url_display, qualitative_data_display, suggestions_display],
         ).then(
             lambda: gr.update(value=""),
             None,
             fn=reset_all,
             inputs=None,
             outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_url_display,
+                     quantitative_data_display, qualitative_url_display, qualitative_data_display, suggestions_display],
             queue=False
         )