Spaces:

fdaudens
/

perspicacity

Sleeping

App Files Files Community

fdaudens HF Staff commited on 27 days ago

Commit

fd36d8c

verified ·

1 Parent(s): b4b4ca8

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -25

app.py CHANGED Viewed

@@ -311,8 +311,29 @@ async def run_query(query: str):
             session_id="web-agent-session",
             user_id=ANON_USER_ID,
         ):
-            response_gen = await web_agent.astream(query, ctx=ctx)
-            return response_gen
     finally:
         instrumentor.flush()
@@ -325,31 +346,13 @@ async def gradio_query(user_input, chat_history=None):
     history.append({"role": "assistant", "content": "Thinking..."})
     yield history, history
-    # Run the query (non-streaming at the agent level)
-    result = await run_query(user_input)
-    text = result.response if isinstance(result.response, str) else str(result.response)
-    # Simulate streaming by yielding partial responses
-    # This is a simple approach - we'll show the response word by word
-    words = text.split()
-    partial_response = ""
-    for i, word in enumerate(words):
-        # Add the word to the partial response
-        partial_response += word + " "
-        # Update the history with the partial response
-        history[-1]["content"] = partial_response
-        # Yield the updated history
         yield history, history
-        # Add a small delay to simulate typing (can be adjusted)
-        await asyncio.sleep(0.05)
-    # Final update with complete response
-    history[-1]["content"] = text
-    yield history, history
 # Build and launch Gradio app
 grb = gr.Blocks()

             session_id="web-agent-session",
             user_id=ANON_USER_ID,
         ):
+            # We'll use this to accumulate the response for the final trace
+            full_response = ""
+            # Return a generator that we can iterate through in gradio_query
+            async def response_generator():
+                nonlocal full_response
+                # Use the agent normally, but we'll handle streaming separately
+                result = await web_agent.run(query, ctx=ctx)
+                # Get the final response
+                response_text = result.response if isinstance(result.response, str) else str(result.response)
+                # Simulate streaming by yielding words
+                words = response_text.split()
+                partial = ""
+                for word in words:
+                    partial += word + " "
+                    yield partial
+                    await asyncio.sleep(0.05)  # Simulate typing speed
+                # Store the full response for tracing
+                full_response = response_text
+            return response_generator()
     finally:
         instrumentor.flush()
     history.append({"role": "assistant", "content": "Thinking..."})
     yield history, history
+    # Get response generator
+    response_gen = await run_query(user_input)
+    # Update the response as chunks come in
+    async for chunk in response_gen:
+        history[-1]["content"] = chunk
         yield history, history
 # Build and launch Gradio app
 grb = gr.Blocks()