File size: 7,258 Bytes
c467d81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import unittest
from unittest.mock import patch, MagicMock
import requests # Import requests for its exception types
import os
import sys
# Add the parent directory to sys.path to find the src module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Adjust the import path based on your project structure
# If web_browser.py is in a 'src' directory:
from src.web_browsing_tool import WebBrowser
# If web_browser.py is in the same directory as app.py (and tools are in a 'tools' subdir):
# from tools.web_browser import WebBrowser
class TestWebBrowser(unittest.TestCase):
def setUp(self):
self.browser = WebBrowser(user_agent="TestAgent/1.0")
@patch('src.web_browsing_tool.requests.get')
def test_browse_successful_fetch_and_parse(self, mock_get):
# Mock the response from requests.get
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = b"<html><head><title>Test Page</title></head><body><p>Hello World!</p><script>alert('test');</script></body></html>"
mock_response.raise_for_status = MagicMock() # Ensure this doesn't raise an error
mock_get.return_value = mock_response
url = "http://example.com/testpage"
result = self.browser.browse(url)
mock_get.assert_called_once_with(url, headers={"User-Agent": "TestAgent/1.0"}, timeout=15)
self.assertEqual(result, "Test Page\nHello World!")
@patch('src.web_browsing_tool.requests.get')
def test_browse_http_error(self, mock_get):
# Mock requests.get to raise an HTTPError
mock_get.side_effect = requests.exceptions.HTTPError("404 Client Error: Not Found for url")
url = "http://example.com/notfound"
result = self.browser.browse(url)
mock_get.assert_called_once_with(url, headers={"User-Agent": "TestAgent/1.0"}, timeout=15)
self.assertTrue(result.startswith("Error: HTTP error occurred"))
self.assertIn("404 Client Error", result)
@patch('src.web_browsing_tool.requests.get')
def test_browse_connection_error(self, mock_get):
mock_get.side_effect = requests.exceptions.ConnectionError("Connection refused")
url = "http://example.com/unreachable"
result = self.browser.browse(url)
self.assertTrue(result.startswith("Error: Connection error occurred"))
self.assertIn("Connection refused", result)
@patch('src.web_browsing_tool.requests.get')
def test_browse_timeout_error(self, mock_get):
mock_get.side_effect = requests.exceptions.Timeout("Request timed out")
url = "http://example.com/slowresponse"
result = self.browser.browse(url)
self.assertTrue(result.startswith("Error: Timeout occurred"))
self.assertIn("Request timed out", result)
@patch('src.web_browsing_tool.requests.get')
def test_browse_generic_request_exception(self, mock_get):
mock_get.side_effect = requests.exceptions.RequestException("Some other request error")
url = "http://example.com/othererror"
result = self.browser.browse(url)
self.assertTrue(result.startswith("Error: An unexpected error occurred while fetching"))
self.assertIn("Some other request error", result)
def test_browse_invalid_url_format(self):
url = "www.example.com" # Missing http:// or https://
result = self.browser.browse(url)
self.assertEqual(result, "Error: Invalid URL format. URL must start with http:// or https://. Received: www.example.com")
@patch('src.web_browsing_tool.requests.get')
def test_browse_no_text_content(self, mock_get):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = b"<html><head><script>var x=1;</script></head><body><style>.body {color:red;}</style></body></html>"
mock_response.raise_for_status = MagicMock()
mock_get.return_value = mock_response
url = "http://example.com/notext"
result = self.browser.browse(url)
self.assertEqual(result, f"Error: No text content found at {url}.")
@patch('src.web_browsing_tool.requests.get')
def test_browse_strips_extra_whitespace_and_newlines(self, mock_get):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = b"<html><body><p>Line 1</p> <p>Line 2</p>\n\n<p>Line\n3</p><div><span>Text</span></div></body></html>"
mock_response.raise_for_status = MagicMock()
mock_get.return_value = mock_response
url = "http://example.com/whitespace"
result = self.browser.browse(url)
expected_text = "Line 1\nLine 2\nLine\n3\nText"
self.assertEqual(result, expected_text)
@patch('src.web_browsing_tool.requests.get')
def test_browse_for_question_answering_scenario_mercedes_sosa(self, mock_get):
"""
Tests if the browser can extract relevant text for a question
similar to the Mercedes Sosa studio albums count.
"""
# Use a regular string for HTML content
mock_html_content_str = """
<html>
<head><title>Mercedes Sosa Discography</title></head>
<body>
<h1>Mercedes Sosa</h1>
<h2>Studio Albums</h2>
<ul>
<li>1999 - Misa Criolla</li>
<li>2002 - Ac煤stico</li>
<li>2005 - Coraz贸n libre</li>
<li>2009 - Cantora 1</li>
<li>2011 - Canto para caminar</li>
</ul>
<h2>Live Albums</h2>
<ul>
<li>2000 - Live in Concert</li>
</ul>
</body>
</html>
"""
mock_response = MagicMock()
mock_response.status_code = 200
# Encode the string to bytes for the content
mock_response.content = mock_html_content_str.encode('utf-8')
mock_response.raise_for_status = MagicMock()
mock_get.return_value = mock_response
url = "http://example.com/mercedes_sosa_discography"
result = self.browser.browse(url)
# Assert that key information is present in the extracted text
self.assertIn("Mercedes Sosa Discography", result) # From title
self.assertIn("Studio Albums", result)
self.assertIn("1999 - Misa Criolla", result)
self.assertIn("2002 - Ac煤stico", result)
self.assertIn("2005 - Coraz贸n libre", result)
self.assertIn("2009 - Cantora 1", result)
self.assertIn("2011 - Canto para caminar", result)
# Ensure it doesn't just grab everything indiscriminately or miss sections
self.assertIn("Live Albums", result)
self.assertIn("2000 - Live in Concert", result)
# A further step (outside this tool's direct responsibility but for agent context)
# would be to pass this 'result' to an LLM with the question:
# "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?"
# The LLM should be able to parse the structured list and count "Ac煤stico", "Coraz贸n libre", "Cantora 1" -> 3.
if __name__ == '__main__':
unittest.main() |