from typing import Optional import requests from bs4 import BeautifulSoup, ResultSet class GenericScraper: def __init__(self) -> None: pass def scrape(self, url: str) -> str: response: requests.Response = requests.get(url) if response.status_code != 200: raise Exception( f'Failed to fetch url: {url} with status code {response.status_code}' ) soup: BeautifulSoup = BeautifulSoup(response.content, 'html.parser') sections: ResultSet[BeautifulSoup] = soup.find_all( ['div', 'section', 'article'] ) max_p_len = 0 best_section: Optional[BeautifulSoup] = None for section in sections: ps = section.find_all('p', recursive=False) p_len = len('\n'.join([p.get_text() for p in ps])) if p_len > max_p_len: max_p_len = p_len best_section = section if best_section is None: raise Exception('No sections found') return best_section.get_text()