Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
async def run_query(query: str):
|
2 |
trace_id = f"agent-run-{uuid.uuid4().hex}"
|
3 |
try:
|
@@ -81,4 +388,62 @@ async def run_query(query: str):
|
|
81 |
except Exception as e:
|
82 |
yield f"❌ Error: {str(e)}"
|
83 |
finally:
|
84 |
-
instrumentor.flush()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
import asyncio
|
5 |
+
import nest_asyncio
|
6 |
+
from datetime import datetime
|
7 |
+
import uuid
|
8 |
+
import aiohttp
|
9 |
+
import gradio as gr
|
10 |
+
import requests
|
11 |
+
import xml.etree.ElementTree as ET
|
12 |
+
import json
|
13 |
+
|
14 |
+
from langfuse.llama_index import LlamaIndexInstrumentor
|
15 |
+
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
16 |
+
from llama_index.tools.weather import OpenWeatherMapToolSpec
|
17 |
+
from llama_index.tools.playwright import PlaywrightToolSpec
|
18 |
+
from llama_index.core.tools import FunctionTool
|
19 |
+
from llama_index.core.agent.workflow import AgentWorkflow
|
20 |
+
from llama_index.core.workflow import Context
|
21 |
+
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
|
22 |
+
from llama_index.core.memory import ChatMemoryBuffer
|
23 |
+
from llama_index.readers.web import RssReader, SimpleWebPageReader
|
24 |
+
from llama_index.core import SummaryIndex
|
25 |
+
|
26 |
+
import subprocess
|
27 |
+
subprocess.run(["playwright", "install"])
|
28 |
+
|
29 |
+
# allow nested loops in Spaces
|
30 |
+
nest_asyncio.apply()
|
31 |
+
|
32 |
+
# --- Llangfuse ---
|
33 |
+
instrumentor = LlamaIndexInstrumentor(
|
34 |
+
public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
|
35 |
+
secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
|
36 |
+
host=os.environ.get("LANGFUSE_HOST"),
|
37 |
+
)
|
38 |
+
instrumentor.start()
|
39 |
+
|
40 |
+
# --- Secrets via env vars ---
|
41 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
42 |
+
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
43 |
+
OPENWEATHERMAP_KEY = os.getenv("OPENWEATHERMAP_API_KEY")
|
44 |
+
SERPER_API_KEY = os.getenv("SERPER_API_KEY")
|
45 |
+
|
46 |
+
# --- LLMs ---
|
47 |
+
llm = HuggingFaceInferenceAPI(
|
48 |
+
model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
|
49 |
+
token=HF_TOKEN,
|
50 |
+
task="conversational",
|
51 |
+
streaming=True
|
52 |
+
)
|
53 |
+
|
54 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
|
55 |
+
today_str = datetime.now().strftime("%B %d, %Y")
|
56 |
+
ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
|
57 |
+
|
58 |
+
# # OpenAI for pure function-calling
|
59 |
+
# openai_llm = OpenAI(
|
60 |
+
# model="gpt-4o",
|
61 |
+
# api_key=OPENAI_API_KEY,
|
62 |
+
# temperature=0.0,
|
63 |
+
# streaming=False,
|
64 |
+
# )
|
65 |
+
|
66 |
+
# --- Tools Setup ---
|
67 |
+
# DuckDuckGo
|
68 |
+
# duck_spec = DuckDuckGoSearchToolSpec()
|
69 |
+
# search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
|
70 |
+
|
71 |
+
# Weather
|
72 |
+
openweather_api_key=OPENWEATHERMAP_KEY
|
73 |
+
weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
|
74 |
+
weather_tool = FunctionTool.from_defaults(
|
75 |
+
weather_tool_spec.weather_at_location,
|
76 |
+
name="current_weather",
|
77 |
+
description="Get the current weather at a specific location (city, country)."
|
78 |
+
)
|
79 |
+
forecast_tool = FunctionTool.from_defaults(
|
80 |
+
weather_tool_spec.forecast_tommorrow_at_location,
|
81 |
+
name="weather_forecast",
|
82 |
+
description="Get tomorrow's weather forecast for a specific location (city, country)."
|
83 |
+
)
|
84 |
+
|
85 |
+
# Playwright (synchronous start)
|
86 |
+
# async def _start_browser():
|
87 |
+
# return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
|
88 |
+
# browser = asyncio.get_event_loop().run_until_complete(_start_browser())
|
89 |
+
# playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
|
90 |
+
|
91 |
+
# navigate_tool = FunctionTool.from_defaults(
|
92 |
+
# playwright_tool_spec.navigate_to,
|
93 |
+
# name="web_navigate",
|
94 |
+
# description="Navigate to a specific URL."
|
95 |
+
# )
|
96 |
+
# extract_text_tool = FunctionTool.from_defaults(
|
97 |
+
# playwright_tool_spec.extract_text,
|
98 |
+
# name="web_extract_text",
|
99 |
+
# description="Extract all text from the current page."
|
100 |
+
# )
|
101 |
+
# extract_links_tool = FunctionTool.from_defaults(
|
102 |
+
# playwright_tool_spec.extract_hyperlinks,
|
103 |
+
# name="web_extract_links",
|
104 |
+
# description="Extract all hyperlinks from the current page."
|
105 |
+
# )
|
106 |
+
|
107 |
+
# Google News RSS
|
108 |
+
# def fetch_google_news_rss():
|
109 |
+
# docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
|
110 |
+
# return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
|
111 |
+
|
112 |
+
# -----------------------------
|
113 |
+
# Google News RSS
|
114 |
+
# -----------------------------
|
115 |
+
|
116 |
+
def fetch_news_headlines() -> str:
|
117 |
+
"""Fetches the latest news from Google News RSS feed.
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
A string containing the latest news articles from Google News, or an error message if the request fails.
|
121 |
+
"""
|
122 |
+
url = "https://news.google.com/rss"
|
123 |
+
|
124 |
+
try:
|
125 |
+
response = requests.get(url)
|
126 |
+
response.raise_for_status()
|
127 |
+
|
128 |
+
# Parse the XML content
|
129 |
+
root = ET.fromstring(response.content)
|
130 |
+
|
131 |
+
# Format the news articles into a readable string
|
132 |
+
formatted_news = []
|
133 |
+
for i, item in enumerate(root.findall('.//item')):
|
134 |
+
if i >= 5:
|
135 |
+
break
|
136 |
+
title = item.find('title').text if item.find('title') is not None else 'N/A'
|
137 |
+
link = item.find('link').text if item.find('link') is not None else 'N/A'
|
138 |
+
pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
|
139 |
+
description = item.find('description').text if item.find('description') is not None else 'N/A'
|
140 |
+
|
141 |
+
formatted_news.append(f"Title: {title}")
|
142 |
+
formatted_news.append(f"Published: {pub_date}")
|
143 |
+
formatted_news.append(f"Link: {link}")
|
144 |
+
formatted_news.append(f"Description: {description}")
|
145 |
+
formatted_news.append("---")
|
146 |
+
|
147 |
+
return "\n".join(formatted_news) if formatted_news else "No news articles found."
|
148 |
+
|
149 |
+
except requests.exceptions.RequestException as e:
|
150 |
+
return f"Error fetching news: {str(e)}"
|
151 |
+
except Exception as e:
|
152 |
+
return f"An unexpected error occurred: {str(e)}"
|
153 |
+
|
154 |
+
google_rss_tool = FunctionTool.from_defaults(
|
155 |
+
fn=fetch_news_headlines,
|
156 |
+
name="fetch_google_news_rss",
|
157 |
+
description="Fetch latest headlines."
|
158 |
+
)
|
159 |
+
# -----------------------------
|
160 |
+
# SERPER API
|
161 |
+
# -----------------------------
|
162 |
+
def fetch_news_topics(query: str) -> str:
|
163 |
+
"""Fetches news articles about a specific topic using the Serper API.
|
164 |
+
|
165 |
+
Args:
|
166 |
+
query: The topic to search for news about.
|
167 |
+
|
168 |
+
Returns:
|
169 |
+
A string containing the news articles found, or an error message if the request fails.
|
170 |
+
"""
|
171 |
+
url = "https://google.serper.dev/news"
|
172 |
+
|
173 |
+
payload = json.dumps({
|
174 |
+
"q": query
|
175 |
+
})
|
176 |
+
|
177 |
+
headers = {
|
178 |
+
'X-API-KEY': os.getenv('SERPER_API_KEY'),
|
179 |
+
'Content-Type': 'application/json'
|
180 |
+
}
|
181 |
+
|
182 |
+
try:
|
183 |
+
response = requests.post(url, headers=headers, data=payload)
|
184 |
+
response.raise_for_status()
|
185 |
+
|
186 |
+
news_data = response.json()
|
187 |
+
|
188 |
+
# Format the news articles into a readable string
|
189 |
+
formatted_news = []
|
190 |
+
for i, article in enumerate(news_data.get('news', [])):
|
191 |
+
if i >= 5:
|
192 |
+
break
|
193 |
+
formatted_news.append(f"Title: {article.get('title', 'N/A')}")
|
194 |
+
formatted_news.append(f"Source: {article.get('source', 'N/A')}")
|
195 |
+
formatted_news.append(f"Link: {article.get('link', 'N/A')}")
|
196 |
+
formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
|
197 |
+
formatted_news.append("---")
|
198 |
+
|
199 |
+
return "\n".join(formatted_news) if formatted_news else "No news articles found."
|
200 |
+
|
201 |
+
except requests.exceptions.RequestException as e:
|
202 |
+
return f"Error fetching news: {str(e)}"
|
203 |
+
except Exception as e:
|
204 |
+
return f"An unexpected error occurred: {str(e)}"
|
205 |
+
|
206 |
+
serper_news_tool = FunctionTool.from_defaults(
|
207 |
+
fetch_news_topics,
|
208 |
+
name="fetch_news_from_serper",
|
209 |
+
description="Fetch news articles on a specific topic."
|
210 |
+
)
|
211 |
+
|
212 |
+
# -----------------------------
|
213 |
+
# WEB PAGE READER
|
214 |
+
# -----------------------------
|
215 |
+
def summarize_webpage(url: str) -> str:
|
216 |
+
"""Fetches and summarizes the content of a web page."""
|
217 |
+
try:
|
218 |
+
# NOTE: the html_to_text=True option requires html2text to be installed
|
219 |
+
documents = SimpleWebPageReader(html_to_text=True).load_data([url])
|
220 |
+
if not documents:
|
221 |
+
return "No content could be loaded from the provided URL."
|
222 |
+
index = SummaryIndex.from_documents(documents)
|
223 |
+
query_engine = index.as_query_engine()
|
224 |
+
response = query_engine.query("Summarize the main points of this page.")
|
225 |
+
return str(response)
|
226 |
+
except Exception as e:
|
227 |
+
return f"An error occurred while summarizing the web page: {str(e)}"
|
228 |
+
|
229 |
+
webpage_reader_tool = FunctionTool.from_defaults(
|
230 |
+
summarize_webpage,
|
231 |
+
name="summarize_webpage",
|
232 |
+
description="Read and summarize the main points of a web page given its URL."
|
233 |
+
)
|
234 |
+
|
235 |
+
# Create the agent workflow
|
236 |
+
tools = [
|
237 |
+
#search_tool,
|
238 |
+
#navigate_tool,
|
239 |
+
#extract_text_tool,
|
240 |
+
#extract_links_tool,
|
241 |
+
weather_tool,
|
242 |
+
forecast_tool,
|
243 |
+
google_rss_tool,
|
244 |
+
serper_news_tool,
|
245 |
+
webpage_reader_tool,
|
246 |
+
]
|
247 |
+
web_agent = AgentWorkflow.from_tools_or_functions(
|
248 |
+
tools,
|
249 |
+
llm=llm,
|
250 |
+
system_prompt="""You are a helpful assistant with access to specialized tools for retrieving information about weather, and news.
|
251 |
+
AVAILABLE TOOLS:
|
252 |
+
1. current_weather - Get current weather conditions for a location
|
253 |
+
2. weather_forecast - Get tomorrow's weather forecast for a location
|
254 |
+
3. fetch_google_news_rss - Fetch the latest general news headlines
|
255 |
+
4. fetch_news_from_serper - Fetch news articles on a specific topic
|
256 |
+
5. summarize_webpage - Read and summarize the content of a web page
|
257 |
+
|
258 |
+
WHEN AND HOW TO USE EACH TOOL:
|
259 |
+
|
260 |
+
For weather information:
|
261 |
+
- Use current_weather when asked about present conditions
|
262 |
+
EXAMPLE: User asks "What's the weather in Tokyo?"
|
263 |
+
TOOL: current_weather
|
264 |
+
PARAMETERS: {"location": "Tokyo, JP"}
|
265 |
+
|
266 |
+
- Use weather_forecast when asked about future weather
|
267 |
+
EXAMPLE: User asks "What will the weather be like in Paris tomorrow?"
|
268 |
+
TOOL: weather_forecast
|
269 |
+
PARAMETERS: {"location": "Paris, FR"}
|
270 |
+
|
271 |
+
For news retrieval:
|
272 |
+
- Use fetch_google_news_rss for general headlines (requires NO parameters)
|
273 |
+
EXAMPLE: User asks "What's happening in the news today?"
|
274 |
+
TOOL: fetch_google_news_rss
|
275 |
+
PARAMETERS: {}
|
276 |
+
|
277 |
+
- Use fetch_news_from_serper for specific news topics
|
278 |
+
EXAMPLE: User asks "Any news about AI advancements?"
|
279 |
+
TOOL: fetch_news_from_serper
|
280 |
+
PARAMETERS: {"query": "artificial intelligence advancements"}
|
281 |
+
|
282 |
+
For web content:
|
283 |
+
- Use summarize_webpage to extract information from websites
|
284 |
+
EXAMPLE: User asks "Can you summarize the content on hf.co/learn?"
|
285 |
+
TOOL: summarize_webpage
|
286 |
+
PARAMETERS: {"url": "https://hf.co/learn"}
|
287 |
+
|
288 |
+
IMPORTANT GUIDELINES:
|
289 |
+
- Always verify the format of parameters before submitting
|
290 |
+
- For locations, use the format "City, Country Code" (e.g., "Montreal, CA")
|
291 |
+
- For URLs, include the full address with http:// or https://
|
292 |
+
- When multiple tools are needed to answer a complex question, use them in sequence
|
293 |
+
|
294 |
+
When you use a tool, explain to the user that you're retrieving information. After receiving the tool's output, provide a helpful summary of the information.
|
295 |
+
"""
|
296 |
+
)
|
297 |
+
ctx = Context(web_agent)
|
298 |
+
|
299 |
+
# Async helper to run agent queries
|
300 |
+
def run_query_sync(query: str):
|
301 |
+
"""Helper to run async agent.run in sync context."""
|
302 |
+
return asyncio.get_event_loop().run_until_complete(
|
303 |
+
web_agent.run(query, ctx=ctx)
|
304 |
+
)
|
305 |
+
|
306 |
+
stream_queue = asyncio.Queue()
|
307 |
+
|
308 |
async def run_query(query: str):
|
309 |
trace_id = f"agent-run-{uuid.uuid4().hex}"
|
310 |
try:
|
|
|
388 |
except Exception as e:
|
389 |
yield f"❌ Error: {str(e)}"
|
390 |
finally:
|
391 |
+
instrumentor.flush()
|
392 |
+
|
393 |
+
# Gradio interface function
|
394 |
+
async def gradio_query(user_input, chat_history=None):
|
395 |
+
history = chat_history or []
|
396 |
+
history.append({"role": "user", "content": user_input})
|
397 |
+
|
398 |
+
# Add initial assistant message
|
399 |
+
history.append({"role": "assistant", "content": "Thinking..."})
|
400 |
+
yield history, history
|
401 |
+
|
402 |
+
# Get streaming response
|
403 |
+
full_response = ""
|
404 |
+
async for chunk in run_query(user_input):
|
405 |
+
if chunk:
|
406 |
+
full_response += chunk
|
407 |
+
history[-1]["content"] = full_response
|
408 |
+
yield history, history
|
409 |
+
|
410 |
+
# Build and launch Gradio app
|
411 |
+
grb = gr.Blocks()
|
412 |
+
with grb:
|
413 |
+
gr.Markdown("## Perspicacity")
|
414 |
+
gr.Markdown(
|
415 |
+
"""
|
416 |
+
This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n
|
417 |
+
🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.\n
|
418 |
+
🙌 Got ideas or improvements? PRs welcome!\n\n
|
419 |
+
👉 Try asking 'What's the weather in Montreal?' or 'What's in the news today?'
|
420 |
+
"""
|
421 |
+
)
|
422 |
+
chatbot = gr.Chatbot(type="messages")
|
423 |
+
txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
|
424 |
+
|
425 |
+
# Set up event handlers for streaming
|
426 |
+
txt.submit(
|
427 |
+
gradio_query,
|
428 |
+
inputs=[txt, chatbot],
|
429 |
+
outputs=[chatbot, chatbot]
|
430 |
+
).then(
|
431 |
+
lambda: gr.update(value=""), # Clear the textbox after submission
|
432 |
+
None,
|
433 |
+
[txt]
|
434 |
+
)
|
435 |
+
|
436 |
+
# Also update the button click handler
|
437 |
+
send_btn = gr.Button("Send")
|
438 |
+
send_btn.click(
|
439 |
+
gradio_query,
|
440 |
+
[txt, chatbot],
|
441 |
+
[chatbot, chatbot]
|
442 |
+
).then(
|
443 |
+
lambda: gr.update(value=""), # Clear the textbox after submission
|
444 |
+
None,
|
445 |
+
[txt]
|
446 |
+
)
|
447 |
+
|
448 |
+
if __name__ == "__main__":
|
449 |
+
grb.launch()
|