import chromedriver_autoinstaller from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.common.exceptions import StaleElementReferenceException import json chromedriver_autoinstaller.install() # Automatically installs compatible driver options = Options() options.binary_location = "/usr/bin/chromium-browser" options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") def scraper(link:str,options = options): driver = webdriver.Chrome(options=options) driver.get(link) page_text = driver.find_element(By.TAG_NAME, "body").text scripts = driver.find_elements(By.TAG_NAME, "script") script_sources = [s.get_attribute("src") for s in scripts if s.get_attribute("src")] links = driver.find_elements(By.TAG_NAME, "link") link_sources = [l.get_attribute("href") for l in links if l.get_attribute("href")] driver.quit() data = { "page_text": page_text, "script_sources": script_sources, "link_sources": link_sources, } return data