HFG-gr / tool.py
Elret's picture
Update tool.py
510c36e verified
import requests
from bs4 import BeautifulSoup
class Tool:
"""Base class for tools."""
def __init__(self):
self.name = None
self.description = None
self.inputs = {}
self.output_type = None
def forward(self, *args, **kwargs):
raise NotImplementedError("Subclasses must implement this method.")
class SearchInformationTool(Tool):
name = "web_search"
description = "Perform a web search query and return the search results."
inputs = {"query": {"type": "string", "description": "The web search query to perform."}}
inputs["filter_year"] = {
"type": "string",
"description": "[Optional parameter]: filter the search results to only include pages from a specific year.",
"nullable": True,
}
output_type = "string"
def __init__(self, browser):
super().__init__()
self.browser = browser
def forward(self, query: str, filter_year: int | None = None) -> str:
return self.browser.search_web(query, filter_year, filter_year)
class VisitTool(Tool):
name = "visit_page"
description = "Visit a webpage at a given URL and return its text."
inputs = {"url": {"type": "string", "description": "The relative or absolute URL of the webpage to visit."}}
output_type = "string"
def __init__(self, browser=None):
super().__init__()
self.browser = browser
def forward(self, url: str) -> str:
self.browser.visit_page(url)
header, content = self.browser._state()
return header.strip() + "\n=======================\n" + content
class Browser:
def __init__(self):
self.current_page = None
def search_web(self, query, start_year, end_year):
url = f"https://www.google.com/search?q={query}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
results = soup.find_all('div', class_='tF2Cxc')
search_results = []
for idx, result in enumerate(results, start=1):
result_text = result.get_text()
search_results.append(f"Result {idx}: {result_text}\n")
link = result.find('a', href=True)
if link and 'wikipedia.org' in link['href']:
search_results.append(f"Found Wikipedia link: {link['href']}")
return "\n".join(search_results)
except requests.exceptions.RequestException as e:
return f"An error occurred: {e}"
def visit_page(self, url: str):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text(separator='\n', strip=True)
self.current_page = {
"url": url,
"header": f"Header for {url}",
"content": text
}
except requests.RequestException as e:
print(f"An error occurred: {e}")
self.current_page = {
"url": url,
"header": "Error",
"content": f"Failed to retrieve the page: {e}"
}
def _state(self):
if self.current_page:
return self.current_page["header"], self.current_page["content"]
return "", ""