Redfin-app / app.py
Krish-Upgrix's picture
Update app.py
a154445 verified
import streamlit as st
import pandas as pd
import time
import os
import subprocess
import chromedriver_autoinstaller
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def install_chrome():
if not os.path.exists("/usr/bin/chromium-browser"):
subprocess.run(["apt-get", "update"], check=True)
subprocess.run(["apt-get", "install", "-y", "chromium-browser"], check=True)
os.environ["PATH"] += os.pathsep + "/usr/bin/"
def scrape_redfin(zipcode):
install_chrome() # Ensure Chrome/Chromium is installed
chromedriver_autoinstaller.install() # Ensure the correct chromedriver version is installed
options = Options()
options.add_argument("--headless") # Run in headless mode
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--incognito")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("start-maximized")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
options.binary_location = "/usr/bin/chromium-browser" # Use Chromium
service = Service(chromedriver_autoinstaller.install())
driver = webdriver.Chrome(service=service, options=options)
url = f"https://www.redfin.com/zipcode/{zipcode}"
driver.get(url)
try:
listings_container = WebDriverWait(driver, 60).until(
EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
)
except Exception as e:
st.error("Error: Listings did not load properly")
driver.quit()
return pd.DataFrame()
scroll_pause_time = 5
screen_height = driver.execute_script("return window.innerHeight;")
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
time.sleep(scroll_pause_time)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
houses = []
listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
for listing in listings:
try:
price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
except:
price = "N/A"
try:
address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
except:
address = "N/A"
try:
size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
except:
size = "N/A"
try:
link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
except:
link = "N/A"
houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
driver.quit()
return pd.DataFrame(houses)
st.title("Redfin House Listings Scraper")
zipcode = st.text_input("Enter ZIP code:")
if st.button("Scrape Data"):
if zipcode:
with st.spinner("Scraping data, please wait..."):
df = scrape_redfin(zipcode)
if not df.empty:
st.success("Scraping complete! Here are the available houses:")
st.dataframe(df)
else:
st.warning("No houses found for the given ZIP code.")
else:
st.error("Please enter a valid ZIP code.")
## working best code ever
# import streamlit as st
# import pandas as pd
# import time
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from webdriver_manager.chrome import ChromeDriverManager
# def scrape_redfin(zipcode):
# options = Options()
# options.add_argument("--headless")
# options.add_argument("--incognito")
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("start-maximized")
# options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
# url = f"https://www.redfin.com/zipcode/{zipcode}"
# driver.get(url)
# try:
# listings_container = WebDriverWait(driver, 60).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
# )
# except Exception as e:
# st.error("Error: Listings did not load properly")
# driver.quit()
# return pd.DataFrame()
# scroll_pause_time = 5
# screen_height = driver.execute_script("return window.innerHeight;")
# last_height = driver.execute_script("return document.body.scrollHeight")
# while True:
# driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
# time.sleep(scroll_pause_time)
# new_height = driver.execute_script("return document.body.scrollHeight")
# if new_height == last_height:
# break
# last_height = new_height
# houses = []
# listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
# for listing in listings:
# try:
# price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
# except:
# price = "N/A"
# try:
# address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
# except:
# address = "N/A"
# try:
# size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
# except:
# size = "N/A"
# try:
# link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
# except:
# link = "N/A"
# houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
# driver.quit()
# return pd.DataFrame(houses)
# st.title("Redfin House Listings Scraper")
# zipcode = st.text_input("Enter ZIP code:")
# if st.button("Scrape Data"):
# if zipcode:
# with st.spinner("Scraping data, please wait..."):
# df = scrape_redfin(zipcode)
# if not df.empty:
# st.success("Scraping complete! Here are the available houses:")
# st.dataframe(df)
# else:
# st.warning("No houses found for the given ZIP code.")
# else:
# st.error("Please enter a valid ZIP code.")