import requests from bs4 import BeautifulSoup class Tool: """Base class for tools.""" def __init__(self): self.name = None self.description = None self.inputs = {} self.output_type = None def forward(self, *args, **kwargs): raise NotImplementedError("Subclasses must implement this method.") class SearchInformationTool(Tool): name = "web_search" description = "Perform a web search query and return the search results." inputs = {"query": {"type": "string", "description": "The web search query to perform."}} inputs["filter_year"] = { "type": "string", "description": "[Optional parameter]: filter the search results to only include pages from a specific year.", "nullable": True, } output_type = "string" def __init__(self, browser): super().__init__() self.browser = browser def forward(self, query: str, filter_year: int | None = None) -> str: return self.browser.search_web(query, filter_year, filter_year) class VisitTool(Tool): name = "visit_page" description = "Visit a webpage at a given URL and return its text." inputs = {"url": {"type": "string", "description": "The relative or absolute URL of the webpage to visit."}} output_type = "string" def __init__(self, browser=None): super().__init__() self.browser = browser def forward(self, url: str) -> str: self.browser.visit_page(url) header, content = self.browser._state() return header.strip() + "\n=======================\n" + content class Browser: def __init__(self): self.current_page = None def search_web(self, query, start_year, end_year): url = f"https://www.google.com/search?q={query}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } try: response = requests.get(url, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') results = soup.find_all('div', class_='tF2Cxc') search_results = [] for idx, result in enumerate(results, start=1): result_text = result.get_text() search_results.append(f"Result {idx}: {result_text}\n") link = result.find('a', href=True) if link and 'wikipedia.org' in link['href']: search_results.append(f"Found Wikipedia link: {link['href']}") return "\n".join(search_results) except requests.exceptions.RequestException as e: return f"An error occurred: {e}" def visit_page(self, url: str): try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') for script in soup(["script", "style"]): script.decompose() text = soup.get_text(separator='\n', strip=True) self.current_page = { "url": url, "header": f"Header for {url}", "content": text } except requests.RequestException as e: print(f"An error occurred: {e}") self.current_page = { "url": url, "header": "Error", "content": f"Failed to retrieve the page: {e}" } def _state(self): if self.current_page: return self.current_page["header"], self.current_page["content"] return "", ""