File size: 1,910 Bytes
1c64423 cb8319c 1c64423 0693c05 cb8319c 1c64423 41b20a8 1c64423 41b20a8 cb8319c 41b20a8 1c64423 41b20a8 1c64423 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import os
import requests
from bs4 import BeautifulSoup
import logging
import time
logger = logging.getLogger(__name__)
def web_search(query: str) -> str:
try:
google_api_key = os.getenv("GOOGLE_API_KEY")
google_cse_id = os.getenv("GOOGLE_CSE_ID")
if not google_api_key or not google_cse_id:
return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
results = response.json().get("items", [])
if not results:
return "No web results found."
search_results = []
for i, item in enumerate(results[:5]):
title = item.get("title", "")
snippet = item.get("snippet", "")
link = item.get("link", "")
try:
time.sleep(2)
page_response = requests.get(link, headers=headers, timeout=10)
page_response.raise_for_status()
soup = BeautifulSoup(page_response.text, "html.parser")
paragraphs = soup.find_all("p")
page_content = " ".join([p.get_text() for p in paragraphs][:1000])
except Exception as e:
logger.warning(f"Failed to fetch page content for {link}: {e}")
page_content = snippet
search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
return "\n".join(search_results)
except Exception as e:
logger.exception("Web search failed")
return f"Web search error: {e}"
|