fdaudens HF Staff commited on
Commit
ac301bc
·
verified ·
1 Parent(s): c266c49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +366 -1
app.py CHANGED
@@ -1,3 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  async def run_query(query: str):
2
  trace_id = f"agent-run-{uuid.uuid4().hex}"
3
  try:
@@ -81,4 +388,62 @@ async def run_query(query: str):
81
  except Exception as e:
82
  yield f"❌ Error: {str(e)}"
83
  finally:
84
- instrumentor.flush()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import logging
4
+ import asyncio
5
+ import nest_asyncio
6
+ from datetime import datetime
7
+ import uuid
8
+ import aiohttp
9
+ import gradio as gr
10
+ import requests
11
+ import xml.etree.ElementTree as ET
12
+ import json
13
+
14
+ from langfuse.llama_index import LlamaIndexInstrumentor
15
+ from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
16
+ from llama_index.tools.weather import OpenWeatherMapToolSpec
17
+ from llama_index.tools.playwright import PlaywrightToolSpec
18
+ from llama_index.core.tools import FunctionTool
19
+ from llama_index.core.agent.workflow import AgentWorkflow
20
+ from llama_index.core.workflow import Context
21
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
22
+ from llama_index.core.memory import ChatMemoryBuffer
23
+ from llama_index.readers.web import RssReader, SimpleWebPageReader
24
+ from llama_index.core import SummaryIndex
25
+
26
+ import subprocess
27
+ subprocess.run(["playwright", "install"])
28
+
29
+ # allow nested loops in Spaces
30
+ nest_asyncio.apply()
31
+
32
+ # --- Llangfuse ---
33
+ instrumentor = LlamaIndexInstrumentor(
34
+ public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
35
+ secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
36
+ host=os.environ.get("LANGFUSE_HOST"),
37
+ )
38
+ instrumentor.start()
39
+
40
+ # --- Secrets via env vars ---
41
+ HF_TOKEN = os.getenv("HF_TOKEN")
42
+ # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
43
+ OPENWEATHERMAP_KEY = os.getenv("OPENWEATHERMAP_API_KEY")
44
+ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
45
+
46
+ # --- LLMs ---
47
+ llm = HuggingFaceInferenceAPI(
48
+ model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
49
+ token=HF_TOKEN,
50
+ task="conversational",
51
+ streaming=True
52
+ )
53
+
54
+ memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
55
+ today_str = datetime.now().strftime("%B %d, %Y")
56
+ ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
57
+
58
+ # # OpenAI for pure function-calling
59
+ # openai_llm = OpenAI(
60
+ # model="gpt-4o",
61
+ # api_key=OPENAI_API_KEY,
62
+ # temperature=0.0,
63
+ # streaming=False,
64
+ # )
65
+
66
+ # --- Tools Setup ---
67
+ # DuckDuckGo
68
+ # duck_spec = DuckDuckGoSearchToolSpec()
69
+ # search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
70
+
71
+ # Weather
72
+ openweather_api_key=OPENWEATHERMAP_KEY
73
+ weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
74
+ weather_tool = FunctionTool.from_defaults(
75
+ weather_tool_spec.weather_at_location,
76
+ name="current_weather",
77
+ description="Get the current weather at a specific location (city, country)."
78
+ )
79
+ forecast_tool = FunctionTool.from_defaults(
80
+ weather_tool_spec.forecast_tommorrow_at_location,
81
+ name="weather_forecast",
82
+ description="Get tomorrow's weather forecast for a specific location (city, country)."
83
+ )
84
+
85
+ # Playwright (synchronous start)
86
+ # async def _start_browser():
87
+ # return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
88
+ # browser = asyncio.get_event_loop().run_until_complete(_start_browser())
89
+ # playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
90
+
91
+ # navigate_tool = FunctionTool.from_defaults(
92
+ # playwright_tool_spec.navigate_to,
93
+ # name="web_navigate",
94
+ # description="Navigate to a specific URL."
95
+ # )
96
+ # extract_text_tool = FunctionTool.from_defaults(
97
+ # playwright_tool_spec.extract_text,
98
+ # name="web_extract_text",
99
+ # description="Extract all text from the current page."
100
+ # )
101
+ # extract_links_tool = FunctionTool.from_defaults(
102
+ # playwright_tool_spec.extract_hyperlinks,
103
+ # name="web_extract_links",
104
+ # description="Extract all hyperlinks from the current page."
105
+ # )
106
+
107
+ # Google News RSS
108
+ # def fetch_google_news_rss():
109
+ # docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
110
+ # return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
111
+
112
+ # -----------------------------
113
+ # Google News RSS
114
+ # -----------------------------
115
+
116
+ def fetch_news_headlines() -> str:
117
+ """Fetches the latest news from Google News RSS feed.
118
+
119
+ Returns:
120
+ A string containing the latest news articles from Google News, or an error message if the request fails.
121
+ """
122
+ url = "https://news.google.com/rss"
123
+
124
+ try:
125
+ response = requests.get(url)
126
+ response.raise_for_status()
127
+
128
+ # Parse the XML content
129
+ root = ET.fromstring(response.content)
130
+
131
+ # Format the news articles into a readable string
132
+ formatted_news = []
133
+ for i, item in enumerate(root.findall('.//item')):
134
+ if i >= 5:
135
+ break
136
+ title = item.find('title').text if item.find('title') is not None else 'N/A'
137
+ link = item.find('link').text if item.find('link') is not None else 'N/A'
138
+ pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
139
+ description = item.find('description').text if item.find('description') is not None else 'N/A'
140
+
141
+ formatted_news.append(f"Title: {title}")
142
+ formatted_news.append(f"Published: {pub_date}")
143
+ formatted_news.append(f"Link: {link}")
144
+ formatted_news.append(f"Description: {description}")
145
+ formatted_news.append("---")
146
+
147
+ return "\n".join(formatted_news) if formatted_news else "No news articles found."
148
+
149
+ except requests.exceptions.RequestException as e:
150
+ return f"Error fetching news: {str(e)}"
151
+ except Exception as e:
152
+ return f"An unexpected error occurred: {str(e)}"
153
+
154
+ google_rss_tool = FunctionTool.from_defaults(
155
+ fn=fetch_news_headlines,
156
+ name="fetch_google_news_rss",
157
+ description="Fetch latest headlines."
158
+ )
159
+ # -----------------------------
160
+ # SERPER API
161
+ # -----------------------------
162
+ def fetch_news_topics(query: str) -> str:
163
+ """Fetches news articles about a specific topic using the Serper API.
164
+
165
+ Args:
166
+ query: The topic to search for news about.
167
+
168
+ Returns:
169
+ A string containing the news articles found, or an error message if the request fails.
170
+ """
171
+ url = "https://google.serper.dev/news"
172
+
173
+ payload = json.dumps({
174
+ "q": query
175
+ })
176
+
177
+ headers = {
178
+ 'X-API-KEY': os.getenv('SERPER_API_KEY'),
179
+ 'Content-Type': 'application/json'
180
+ }
181
+
182
+ try:
183
+ response = requests.post(url, headers=headers, data=payload)
184
+ response.raise_for_status()
185
+
186
+ news_data = response.json()
187
+
188
+ # Format the news articles into a readable string
189
+ formatted_news = []
190
+ for i, article in enumerate(news_data.get('news', [])):
191
+ if i >= 5:
192
+ break
193
+ formatted_news.append(f"Title: {article.get('title', 'N/A')}")
194
+ formatted_news.append(f"Source: {article.get('source', 'N/A')}")
195
+ formatted_news.append(f"Link: {article.get('link', 'N/A')}")
196
+ formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
197
+ formatted_news.append("---")
198
+
199
+ return "\n".join(formatted_news) if formatted_news else "No news articles found."
200
+
201
+ except requests.exceptions.RequestException as e:
202
+ return f"Error fetching news: {str(e)}"
203
+ except Exception as e:
204
+ return f"An unexpected error occurred: {str(e)}"
205
+
206
+ serper_news_tool = FunctionTool.from_defaults(
207
+ fetch_news_topics,
208
+ name="fetch_news_from_serper",
209
+ description="Fetch news articles on a specific topic."
210
+ )
211
+
212
+ # -----------------------------
213
+ # WEB PAGE READER
214
+ # -----------------------------
215
+ def summarize_webpage(url: str) -> str:
216
+ """Fetches and summarizes the content of a web page."""
217
+ try:
218
+ # NOTE: the html_to_text=True option requires html2text to be installed
219
+ documents = SimpleWebPageReader(html_to_text=True).load_data([url])
220
+ if not documents:
221
+ return "No content could be loaded from the provided URL."
222
+ index = SummaryIndex.from_documents(documents)
223
+ query_engine = index.as_query_engine()
224
+ response = query_engine.query("Summarize the main points of this page.")
225
+ return str(response)
226
+ except Exception as e:
227
+ return f"An error occurred while summarizing the web page: {str(e)}"
228
+
229
+ webpage_reader_tool = FunctionTool.from_defaults(
230
+ summarize_webpage,
231
+ name="summarize_webpage",
232
+ description="Read and summarize the main points of a web page given its URL."
233
+ )
234
+
235
+ # Create the agent workflow
236
+ tools = [
237
+ #search_tool,
238
+ #navigate_tool,
239
+ #extract_text_tool,
240
+ #extract_links_tool,
241
+ weather_tool,
242
+ forecast_tool,
243
+ google_rss_tool,
244
+ serper_news_tool,
245
+ webpage_reader_tool,
246
+ ]
247
+ web_agent = AgentWorkflow.from_tools_or_functions(
248
+ tools,
249
+ llm=llm,
250
+ system_prompt="""You are a helpful assistant with access to specialized tools for retrieving information about weather, and news.
251
+ AVAILABLE TOOLS:
252
+ 1. current_weather - Get current weather conditions for a location
253
+ 2. weather_forecast - Get tomorrow's weather forecast for a location
254
+ 3. fetch_google_news_rss - Fetch the latest general news headlines
255
+ 4. fetch_news_from_serper - Fetch news articles on a specific topic
256
+ 5. summarize_webpage - Read and summarize the content of a web page
257
+
258
+ WHEN AND HOW TO USE EACH TOOL:
259
+
260
+ For weather information:
261
+ - Use current_weather when asked about present conditions
262
+ EXAMPLE: User asks "What's the weather in Tokyo?"
263
+ TOOL: current_weather
264
+ PARAMETERS: {"location": "Tokyo, JP"}
265
+
266
+ - Use weather_forecast when asked about future weather
267
+ EXAMPLE: User asks "What will the weather be like in Paris tomorrow?"
268
+ TOOL: weather_forecast
269
+ PARAMETERS: {"location": "Paris, FR"}
270
+
271
+ For news retrieval:
272
+ - Use fetch_google_news_rss for general headlines (requires NO parameters)
273
+ EXAMPLE: User asks "What's happening in the news today?"
274
+ TOOL: fetch_google_news_rss
275
+ PARAMETERS: {}
276
+
277
+ - Use fetch_news_from_serper for specific news topics
278
+ EXAMPLE: User asks "Any news about AI advancements?"
279
+ TOOL: fetch_news_from_serper
280
+ PARAMETERS: {"query": "artificial intelligence advancements"}
281
+
282
+ For web content:
283
+ - Use summarize_webpage to extract information from websites
284
+ EXAMPLE: User asks "Can you summarize the content on hf.co/learn?"
285
+ TOOL: summarize_webpage
286
+ PARAMETERS: {"url": "https://hf.co/learn"}
287
+
288
+ IMPORTANT GUIDELINES:
289
+ - Always verify the format of parameters before submitting
290
+ - For locations, use the format "City, Country Code" (e.g., "Montreal, CA")
291
+ - For URLs, include the full address with http:// or https://
292
+ - When multiple tools are needed to answer a complex question, use them in sequence
293
+
294
+ When you use a tool, explain to the user that you're retrieving information. After receiving the tool's output, provide a helpful summary of the information.
295
+ """
296
+ )
297
+ ctx = Context(web_agent)
298
+
299
+ # Async helper to run agent queries
300
+ def run_query_sync(query: str):
301
+ """Helper to run async agent.run in sync context."""
302
+ return asyncio.get_event_loop().run_until_complete(
303
+ web_agent.run(query, ctx=ctx)
304
+ )
305
+
306
+ stream_queue = asyncio.Queue()
307
+
308
  async def run_query(query: str):
309
  trace_id = f"agent-run-{uuid.uuid4().hex}"
310
  try:
 
388
  except Exception as e:
389
  yield f"❌ Error: {str(e)}"
390
  finally:
391
+ instrumentor.flush()
392
+
393
+ # Gradio interface function
394
+ async def gradio_query(user_input, chat_history=None):
395
+ history = chat_history or []
396
+ history.append({"role": "user", "content": user_input})
397
+
398
+ # Add initial assistant message
399
+ history.append({"role": "assistant", "content": "Thinking..."})
400
+ yield history, history
401
+
402
+ # Get streaming response
403
+ full_response = ""
404
+ async for chunk in run_query(user_input):
405
+ if chunk:
406
+ full_response += chunk
407
+ history[-1]["content"] = full_response
408
+ yield history, history
409
+
410
+ # Build and launch Gradio app
411
+ grb = gr.Blocks()
412
+ with grb:
413
+ gr.Markdown("## Perspicacity")
414
+ gr.Markdown(
415
+ """
416
+ This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n
417
+ 🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.\n
418
+ 🙌 Got ideas or improvements? PRs welcome!\n\n
419
+ 👉 Try asking 'What's the weather in Montreal?' or 'What's in the news today?'
420
+ """
421
+ )
422
+ chatbot = gr.Chatbot(type="messages")
423
+ txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
424
+
425
+ # Set up event handlers for streaming
426
+ txt.submit(
427
+ gradio_query,
428
+ inputs=[txt, chatbot],
429
+ outputs=[chatbot, chatbot]
430
+ ).then(
431
+ lambda: gr.update(value=""), # Clear the textbox after submission
432
+ None,
433
+ [txt]
434
+ )
435
+
436
+ # Also update the button click handler
437
+ send_btn = gr.Button("Send")
438
+ send_btn.click(
439
+ gradio_query,
440
+ [txt, chatbot],
441
+ [chatbot, chatbot]
442
+ ).then(
443
+ lambda: gr.update(value=""), # Clear the textbox after submission
444
+ None,
445
+ [txt]
446
+ )
447
+
448
+ if __name__ == "__main__":
449
+ grb.launch()