Spaces:

fdaudens
/

perspicacity

Sleeping

App Files Files Community

fdaudens HF Staff commited on 26 days ago

Commit

ac301bc

verified ·

1 Parent(s): c266c49

Update app.py

Browse files

Files changed (1) hide show

app.py +366 -1

app.py CHANGED Viewed

@@ -1,3 +1,310 @@
 async def run_query(query: str):
     trace_id = f"agent-run-{uuid.uuid4().hex}"
     try:
@@ -81,4 +388,62 @@ async def run_query(query: str):
     except Exception as e:
         yield f"❌ Error: {str(e)}"
     finally:
-        instrumentor.flush()

+# app.py
+import os
+import logging
+import asyncio
+import nest_asyncio
+from datetime import datetime
+import uuid
+import aiohttp
+import gradio as gr
+import requests
+import xml.etree.ElementTree as ET
+import json
+from langfuse.llama_index import LlamaIndexInstrumentor
+from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
+from llama_index.tools.weather import OpenWeatherMapToolSpec
+from llama_index.tools.playwright import PlaywrightToolSpec
+from llama_index.core.tools import FunctionTool
+from llama_index.core.agent.workflow import AgentWorkflow
+from llama_index.core.workflow import Context
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from llama_index.core.memory import ChatMemoryBuffer
+from llama_index.readers.web import RssReader, SimpleWebPageReader
+from llama_index.core import SummaryIndex
+import subprocess
+subprocess.run(["playwright", "install"])
+# allow nested loops in Spaces
+nest_asyncio.apply()
+# --- Llangfuse ---
+instrumentor = LlamaIndexInstrumentor(
+    public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
+    secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
+    host=os.environ.get("LANGFUSE_HOST"),
+)
+instrumentor.start()
+# --- Secrets via env vars ---
+HF_TOKEN            = os.getenv("HF_TOKEN")
+# OPENAI_API_KEY      = os.getenv("OPENAI_API_KEY")
+OPENWEATHERMAP_KEY  = os.getenv("OPENWEATHERMAP_API_KEY")
+SERPER_API_KEY      = os.getenv("SERPER_API_KEY")
+# --- LLMs ---
+llm = HuggingFaceInferenceAPI(
+    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
+    token=HF_TOKEN,
+    task="conversational",
+    streaming=True
+)
+memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
+today_str = datetime.now().strftime("%B %d, %Y")
+ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
+# # OpenAI for pure function-calling
+# openai_llm = OpenAI(
+#     model="gpt-4o",
+#     api_key=OPENAI_API_KEY,
+#     temperature=0.0,
+#     streaming=False,
+# )
+# --- Tools Setup ---
+# DuckDuckGo
+# duck_spec = DuckDuckGoSearchToolSpec()
+# search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
+# Weather
+openweather_api_key=OPENWEATHERMAP_KEY
+weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
+weather_tool = FunctionTool.from_defaults(
+    weather_tool_spec.weather_at_location,
+    name="current_weather",
+    description="Get the current weather at a specific location (city, country)."
+)
+forecast_tool = FunctionTool.from_defaults(
+    weather_tool_spec.forecast_tommorrow_at_location,
+    name="weather_forecast",
+    description="Get tomorrow's weather forecast for a specific location (city, country)."
+)
+# Playwright (synchronous start)
+# async def _start_browser():
+#     return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
+# browser = asyncio.get_event_loop().run_until_complete(_start_browser())
+# playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
+# navigate_tool = FunctionTool.from_defaults(
+#     playwright_tool_spec.navigate_to,
+#     name="web_navigate",
+#     description="Navigate to a specific URL."
+# )
+# extract_text_tool = FunctionTool.from_defaults(
+#     playwright_tool_spec.extract_text,
+#     name="web_extract_text",
+#     description="Extract all text from the current page."
+# )
+# extract_links_tool = FunctionTool.from_defaults(
+#     playwright_tool_spec.extract_hyperlinks,
+#     name="web_extract_links",
+#     description="Extract all hyperlinks from the current page."
+# )
+# Google News RSS
+# def fetch_google_news_rss():
+#     docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
+#     return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
+# -----------------------------
+# Google News RSS
+# -----------------------------
+def fetch_news_headlines() -> str:
+    """Fetches the latest news from Google News RSS feed.
+    Returns:
+        A string containing the latest news articles from Google News, or an error message if the request fails.
+    """
+    url = "https://news.google.com/rss"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        # Parse the XML content
+        root = ET.fromstring(response.content)
+        # Format the news articles into a readable string
+        formatted_news = []
+        for i, item in enumerate(root.findall('.//item')):
+            if i >= 5:
+                break
+            title = item.find('title').text if item.find('title') is not None else 'N/A'
+            link = item.find('link').text if item.find('link') is not None else 'N/A'
+            pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
+            description = item.find('description').text if item.find('description') is not None else 'N/A'
+            formatted_news.append(f"Title: {title}")
+            formatted_news.append(f"Published: {pub_date}")
+            formatted_news.append(f"Link: {link}")
+            formatted_news.append(f"Description: {description}")
+            formatted_news.append("---")
+        return "\n".join(formatted_news) if formatted_news else "No news articles found."
+    except requests.exceptions.RequestException as e:
+        return f"Error fetching news: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
+google_rss_tool = FunctionTool.from_defaults(
+    fn=fetch_news_headlines,
+    name="fetch_google_news_rss",
+    description="Fetch latest headlines."
+)
+# -----------------------------
+# SERPER API
+# -----------------------------
+def fetch_news_topics(query: str) -> str:
+    """Fetches news articles about a specific topic using the Serper API.
+    Args:
+        query: The topic to search for news about.
+    Returns:
+        A string containing the news articles found, or an error message if the request fails.
+    """
+    url = "https://google.serper.dev/news"
+    payload = json.dumps({
+        "q": query
+    })
+    headers = {
+        'X-API-KEY': os.getenv('SERPER_API_KEY'),
+        'Content-Type': 'application/json'
+    }
+    try:
+        response = requests.post(url, headers=headers, data=payload)
+        response.raise_for_status()
+        news_data = response.json()
+        # Format the news articles into a readable string
+        formatted_news = []
+        for i, article in enumerate(news_data.get('news', [])):
+            if i >= 5:
+                break
+            formatted_news.append(f"Title: {article.get('title', 'N/A')}")
+            formatted_news.append(f"Source: {article.get('source', 'N/A')}")
+            formatted_news.append(f"Link: {article.get('link', 'N/A')}")
+            formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
+            formatted_news.append("---")
+        return "\n".join(formatted_news) if formatted_news else "No news articles found."
+    except requests.exceptions.RequestException as e:
+        return f"Error fetching news: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
+serper_news_tool = FunctionTool.from_defaults(
+    fetch_news_topics,
+    name="fetch_news_from_serper",
+    description="Fetch news articles on a specific topic."
+)
+# -----------------------------
+# WEB PAGE READER
+# -----------------------------
+def summarize_webpage(url: str) -> str:
+    """Fetches and summarizes the content of a web page."""
+    try:
+        # NOTE: the html_to_text=True option requires html2text to be installed
+        documents = SimpleWebPageReader(html_to_text=True).load_data([url])
+        if not documents:
+            return "No content could be loaded from the provided URL."
+        index = SummaryIndex.from_documents(documents)
+        query_engine = index.as_query_engine()
+        response = query_engine.query("Summarize the main points of this page.")
+        return str(response)
+    except Exception as e:
+        return f"An error occurred while summarizing the web page: {str(e)}"
+webpage_reader_tool = FunctionTool.from_defaults(
+    summarize_webpage,
+    name="summarize_webpage",
+    description="Read and summarize the main points of a web page given its URL."
+)
+# Create the agent workflow
+tools = [
+    #search_tool,
+    #navigate_tool,
+    #extract_text_tool,
+    #extract_links_tool,
+    weather_tool,
+    forecast_tool,
+    google_rss_tool,
+    serper_news_tool,
+    webpage_reader_tool,
+]
+web_agent = AgentWorkflow.from_tools_or_functions(
+    tools,
+    llm=llm,
+    system_prompt="""You are a helpful assistant with access to specialized tools for retrieving information about weather, and news.
+    AVAILABLE TOOLS:
+    1. current_weather - Get current weather conditions for a location
+    2. weather_forecast - Get tomorrow's weather forecast for a location
+    3. fetch_google_news_rss - Fetch the latest general news headlines
+    4. fetch_news_from_serper - Fetch news articles on a specific topic
+    5. summarize_webpage - Read and summarize the content of a web page
+    WHEN AND HOW TO USE EACH TOOL:
+    For weather information:
+    - Use current_weather when asked about present conditions
+    EXAMPLE: User asks "What's the weather in Tokyo?"
+    TOOL: current_weather
+    PARAMETERS: {"location": "Tokyo, JP"}
+    - Use weather_forecast when asked about future weather
+    EXAMPLE: User asks "What will the weather be like in Paris tomorrow?"
+    TOOL: weather_forecast
+    PARAMETERS: {"location": "Paris, FR"}
+    For news retrieval:
+    - Use fetch_google_news_rss for general headlines (requires NO parameters)
+    EXAMPLE: User asks "What's happening in the news today?"
+    TOOL: fetch_google_news_rss
+    PARAMETERS: {}
+    - Use fetch_news_from_serper for specific news topics
+    EXAMPLE: User asks "Any news about AI advancements?"
+    TOOL: fetch_news_from_serper
+    PARAMETERS: {"query": "artificial intelligence advancements"}
+    For web content:
+    - Use summarize_webpage to extract information from websites
+    EXAMPLE: User asks "Can you summarize the content on hf.co/learn?"
+    TOOL: summarize_webpage
+    PARAMETERS: {"url": "https://hf.co/learn"}
+    IMPORTANT GUIDELINES:
+    - Always verify the format of parameters before submitting
+    - For locations, use the format "City, Country Code" (e.g., "Montreal, CA")
+    - For URLs, include the full address with http:// or https://
+    - When multiple tools are needed to answer a complex question, use them in sequence
+    When you use a tool, explain to the user that you're retrieving information. After receiving the tool's output, provide a helpful summary of the information.
+    """
+)
+ctx = Context(web_agent)
+# Async helper to run agent queries
+def run_query_sync(query: str):
+    """Helper to run async agent.run in sync context."""
+    return asyncio.get_event_loop().run_until_complete(
+        web_agent.run(query, ctx=ctx)
+    )
+stream_queue = asyncio.Queue()
 async def run_query(query: str):
     trace_id = f"agent-run-{uuid.uuid4().hex}"
     try:
     except Exception as e:
         yield f"❌ Error: {str(e)}"
     finally:
+        instrumentor.flush()
+# Gradio interface function
+async def gradio_query(user_input, chat_history=None):
+    history = chat_history or []
+    history.append({"role": "user", "content": user_input})
+    # Add initial assistant message
+    history.append({"role": "assistant", "content": "Thinking..."})
+    yield history, history
+    # Get streaming response
+    full_response = ""
+    async for chunk in run_query(user_input):
+        if chunk:
+            full_response += chunk
+            history[-1]["content"] = full_response
+            yield history, history
+# Build and launch Gradio app
+grb = gr.Blocks()
+with grb:
+    gr.Markdown("## Perspicacity")
+    gr.Markdown(
+        """
+        This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n
+        🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.\n
+        🙌 Got ideas or improvements? PRs welcome!\n\n
+        👉 Try asking 'What's the weather in Montreal?' or 'What's in the news today?'
+        """
+    )
+    chatbot = gr.Chatbot(type="messages")
+    txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
+    # Set up event handlers for streaming
+    txt.submit(
+        gradio_query,
+        inputs=[txt, chatbot],
+        outputs=[chatbot, chatbot]
+    ).then(
+        lambda: gr.update(value=""),  # Clear the textbox after submission
+        None,
+        [txt]
+    )
+    # Also update the button click handler
+    send_btn = gr.Button("Send")
+    send_btn.click(
+        gradio_query,
+        [txt, chatbot],
+        [chatbot, chatbot]
+    ).then(
+        lambda: gr.update(value=""),  # Clear the textbox after submission
+        None,
+        [txt]
+    )
+if __name__ == "__main__":
+    grb.launch()