|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
class Tool: |
|
"""Base class for tools.""" |
|
def __init__(self): |
|
self.name = None |
|
self.description = None |
|
self.inputs = {} |
|
self.output_type = None |
|
|
|
def forward(self, *args, **kwargs): |
|
raise NotImplementedError("Subclasses must implement this method.") |
|
|
|
class SearchInformationTool(Tool): |
|
name = "web_search" |
|
description = "Perform a web search query and return the search results." |
|
inputs = {"query": {"type": "string", "description": "The web search query to perform."}} |
|
inputs["filter_year"] = { |
|
"type": "string", |
|
"description": "[Optional parameter]: filter the search results to only include pages from a specific year.", |
|
"nullable": True, |
|
} |
|
output_type = "string" |
|
|
|
def __init__(self, browser): |
|
super().__init__() |
|
self.browser = browser |
|
|
|
def forward(self, query: str, filter_year: int | None = None) -> str: |
|
return self.browser.search_web(query, filter_year, filter_year) |
|
|
|
class VisitTool(Tool): |
|
name = "visit_page" |
|
description = "Visit a webpage at a given URL and return its text." |
|
inputs = {"url": {"type": "string", "description": "The relative or absolute URL of the webpage to visit."}} |
|
output_type = "string" |
|
|
|
def __init__(self, browser=None): |
|
super().__init__() |
|
self.browser = browser |
|
|
|
def forward(self, url: str) -> str: |
|
self.browser.visit_page(url) |
|
header, content = self.browser._state() |
|
return header.strip() + "\n=======================\n" + content |
|
|
|
class Browser: |
|
def __init__(self): |
|
self.current_page = None |
|
|
|
def search_web(self, query, start_year, end_year): |
|
url = f"https://www.google.com/search?q={query}" |
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' |
|
} |
|
try: |
|
response = requests.get(url, headers=headers) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
results = soup.find_all('div', class_='tF2Cxc') |
|
search_results = [] |
|
for idx, result in enumerate(results, start=1): |
|
result_text = result.get_text() |
|
search_results.append(f"Result {idx}: {result_text}\n") |
|
link = result.find('a', href=True) |
|
if link and 'wikipedia.org' in link['href']: |
|
search_results.append(f"Found Wikipedia link: {link['href']}") |
|
return "\n".join(search_results) |
|
except requests.exceptions.RequestException as e: |
|
return f"An error occurred: {e}" |
|
|
|
def visit_page(self, url: str): |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
for script in soup(["script", "style"]): |
|
script.decompose() |
|
text = soup.get_text(separator='\n', strip=True) |
|
self.current_page = { |
|
"url": url, |
|
"header": f"Header for {url}", |
|
"content": text |
|
} |
|
except requests.RequestException as e: |
|
print(f"An error occurred: {e}") |
|
self.current_page = { |
|
"url": url, |
|
"header": "Error", |
|
"content": f"Failed to retrieve the page: {e}" |
|
} |
|
|
|
def _state(self): |
|
if self.current_page: |
|
return self.current_page["header"], self.current_page["content"] |
|
return "", "" |
|
|
|
|
|
|
|
|