Spaces:

Krish-Upgrix
/

Redfin-app

Sleeping

App Files Files Community

Redfin-app / app.py

Krish-Upgrix

Update app.py

a154445 verified 6 months ago

raw

history blame contribute delete

7.42 kB

	import streamlit as st
	import pandas as pd
	import time
	import os
	import subprocess
	import chromedriver_autoinstaller
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC

	def install_chrome():
	if not os.path.exists("/usr/bin/chromium-browser"):
	subprocess.run(["apt-get", "update"], check=True)
	subprocess.run(["apt-get", "install", "-y", "chromium-browser"], check=True)
	os.environ["PATH"] += os.pathsep + "/usr/bin/"

	def scrape_redfin(zipcode):
	install_chrome() # Ensure Chrome/Chromium is installed
	chromedriver_autoinstaller.install() # Ensure the correct chromedriver version is installed

	options = Options()
	options.add_argument("--headless") # Run in headless mode
	options.add_argument("--no-sandbox")
	options.add_argument("--disable-dev-shm-usage")
	options.add_argument("--incognito")
	options.add_argument("--disable-blink-features=AutomationControlled")
	options.add_argument("start-maximized")
	options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

	options.binary_location = "/usr/bin/chromium-browser" # Use Chromium
	service = Service(chromedriver_autoinstaller.install())
	driver = webdriver.Chrome(service=service, options=options)
	url = f"https://www.redfin.com/zipcode/{zipcode}"
	driver.get(url)

	try:
	listings_container = WebDriverWait(driver, 60).until(
	EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
	)
	except Exception as e:
	st.error("Error: Listings did not load properly")
	driver.quit()
	return pd.DataFrame()

	scroll_pause_time = 5
	screen_height = driver.execute_script("return window.innerHeight;")
	last_height = driver.execute_script("return document.body.scrollHeight")

	while True:
	driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
	time.sleep(scroll_pause_time)
	new_height = driver.execute_script("return document.body.scrollHeight")
	if new_height == last_height:
	break
	last_height = new_height

	houses = []
	listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")

	for listing in listings:
	try:
	price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
	except:
	price = "N/A"

	try:
	address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
	except:
	address = "N/A"

	try:
	size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
	except:
	size = "N/A"

	try:
	link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
	except:
	link = "N/A"

	houses.append({"Price": price, "Address": address, "Size": size, "Link": link})

	driver.quit()
	return pd.DataFrame(houses)

	st.title("Redfin House Listings Scraper")
	zipcode = st.text_input("Enter ZIP code:")

	if st.button("Scrape Data"):
	if zipcode:
	with st.spinner("Scraping data, please wait..."):
	df = scrape_redfin(zipcode)
	if not df.empty:
	st.success("Scraping complete! Here are the available houses:")
	st.dataframe(df)
	else:
	st.warning("No houses found for the given ZIP code.")
	else:
	st.error("Please enter a valid ZIP code.")














	## working best code ever

	# import streamlit as st
	# import pandas as pd
	# import time
	# from selenium import webdriver
	# from selenium.webdriver.common.by import By
	# from selenium.webdriver.chrome.service import Service
	# from selenium.webdriver.chrome.options import Options
	# from selenium.webdriver.support.ui import WebDriverWait
	# from selenium.webdriver.support import expected_conditions as EC
	# from webdriver_manager.chrome import ChromeDriverManager

	# def scrape_redfin(zipcode):
	# options = Options()
	# options.add_argument("--headless")
	# options.add_argument("--incognito")
	# options.add_argument("--disable-blink-features=AutomationControlled")
	# options.add_argument("start-maximized")
	# options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

	# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
	# url = f"https://www.redfin.com/zipcode/{zipcode}"
	# driver.get(url)

	# try:
	# listings_container = WebDriverWait(driver, 60).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
	# )
	# except Exception as e:
	# st.error("Error: Listings did not load properly")
	# driver.quit()
	# return pd.DataFrame()

	# scroll_pause_time = 5
	# screen_height = driver.execute_script("return window.innerHeight;")
	# last_height = driver.execute_script("return document.body.scrollHeight")

	# while True:
	# driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
	# time.sleep(scroll_pause_time)
	# new_height = driver.execute_script("return document.body.scrollHeight")
	# if new_height == last_height:
	# break
	# last_height = new_height

	# houses = []
	# listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")

	# for listing in listings:
	# try:
	# price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
	# except:
	# price = "N/A"

	# try:
	# address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
	# except:
	# address = "N/A"

	# try:
	# size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
	# except:
	# size = "N/A"

	# try:
	# link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
	# except:
	# link = "N/A"

	# houses.append({"Price": price, "Address": address, "Size": size, "Link": link})

	# driver.quit()
	# return pd.DataFrame(houses)

	# st.title("Redfin House Listings Scraper")
	# zipcode = st.text_input("Enter ZIP code:")

	# if st.button("Scrape Data"):
	# if zipcode:
	# with st.spinner("Scraping data, please wait..."):
	# df = scrape_redfin(zipcode)
	# if not df.empty:
	# st.success("Scraping complete! Here are the available houses:")
	# st.dataframe(df)
	# else:
	# st.warning("No houses found for the given ZIP code.")
	# else:
	# st.error("Please enter a valid ZIP code.")