Krish-Upgrix's picture
Rename FSBO_app.py to app.py
698125b verified
# Version: 3
import streamlit as st
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import re
def search_fsbo_address(location):
"""Search FSBO for the given address and return the result page."""
options = Options()
options.add_argument("--incognito")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("start-maximized")
options.add_argument("--disable-gpu")
options.add_argument("--log-level=3")
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get("https://fsbo.com/")
try:
search_box = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
)
search_box.clear()
search_box.send_keys(location)
time.sleep(2)
search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
search_button.click()
time.sleep(5)
return driver
except Exception as e:
st.error(f"Error finding FSBO URL: {e}")
driver.quit()
return None
def clean_text(text):
"""Cleans extracted text by removing Listing ID, unnecessary content, and 'View Listing Details'."""
lines = [line.strip() for line in text.split("\n") if line.strip()]
# Remove Listing ID (e.g., "Listing ID#541799 - ")
lines = [re.sub(r"Listing ID#\d+\s*-", "", line) for line in lines]
# Remove "View Listing Details"
lines = [line for line in lines if "View Listing Details" not in line]
return " | ".join(lines)
def scrape_first_house(driver):
"""Scrape only the first house and return its details properly formatted."""
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
)
first_listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
details_text = first_listing.get_attribute("innerText").strip()
formatted_details = clean_text(details_text)
return formatted_details
except:
return "N/A"
def scrape_all_houses(driver):
"""Scrape all houses and return a list of details properly formatted."""
houses = []
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
)
listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")
for index, listing in enumerate(listings, start=1):
try:
details_text = listing.get_attribute("innerText").strip()
formatted_details = clean_text(details_text)
houses.append(f"**{index}.** {formatted_details}")
except:
continue
except:
return []
return houses
def main():
st.title("FSBO House Price Finder")
location = st.text_input("Enter Address:")
if st.button("Search"):
with st.spinner("Fetching house details..."):
driver = search_fsbo_address(location)
if not driver:
return
first_house = scrape_first_house(driver)
if first_house != "N/A":
st.success("**First House Found:**")
st.write(first_house)
st.info("Wait... getting all houses in the area")
all_houses = scrape_all_houses(driver)
if all_houses:
st.success("**All Houses in the Area:**")
for house in all_houses:
st.write(house)
else:
st.error("No additional houses found.")
driver.quit()
if __name__ == "__main__":
main()
# version: 2
# import streamlit as st
# import time
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from webdriver_manager.chrome import ChromeDriverManager
# def search_fsbo_address(location):
# """Search FSBO for the given address and return the result page."""
# options = Options()
# options.add_argument("--incognito")
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("start-maximized")
# options.add_argument("--disable-gpu")
# options.add_argument("--log-level=3")
# options.add_argument(
# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# )
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
# driver.get("https://fsbo.com/")
# try:
# search_box = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
# )
# search_box.clear()
# search_box.send_keys(location)
# time.sleep(2)
# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
# search_button.click()
# time.sleep(5)
# return driver
# except Exception as e:
# st.error(f"Error finding FSBO URL: {e}")
# driver.quit()
# return None
# def format_details(details_text):
# """Formats details by adding '|' after each line."""
# lines = [line.strip() for line in details_text.split("\n") if line.strip()]
# return " | ".join(lines)
# def scrape_first_house(driver):
# """Scrape only the first house and return its details."""
# try:
# WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
# )
# listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
# details_text = listing.get_attribute("innerText").strip()
# return format_details(details_text)
# except:
# return "N/A"
# def scrape_all_houses(driver):
# """Scrape all houses and return a list of details."""
# houses = []
# try:
# WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
# )
# listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")
# for listing in listings:
# try:
# details_text = listing.get_attribute("innerText").strip()
# formatted_details = format_details(details_text)
# houses.append(formatted_details)
# except:
# continue
# except:
# return []
# return houses
# def main():
# st.title("FSBO House Price Finder")
# location = st.text_input("Enter Address:")
# if st.button("Search"):
# with st.spinner("Fetching house details..."):
# driver = search_fsbo_address(location)
# if not driver:
# return
# first_house = scrape_first_house(driver)
# if first_house != "N/A":
# st.success("First House Found:")
# st.write(first_house)
# st.info("Wait... getting all houses in the area")
# all_houses = scrape_all_houses(driver)
# if all_houses:
# st.success("All Houses in the Area:")
# for house in all_houses:
# st.write(house)
# else:
# st.error("No additional houses found.")
# driver.quit()
# if __name__ == "__main__":
# main()
# Best Version1. it is scrapping and displaying first house details correctly.
# import streamlit as st
# import threading
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from webdriver_manager.chrome import ChromeDriverManager
# import time
# def search_fsbo_address(location):
# """Search FSBO for the given address and return the first result's URL."""
# options = Options()
# options.add_argument("--incognito")
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("start-maximized")
# options.add_argument("--disable-gpu")
# options.add_argument("--log-level=3")
# options.add_argument(
# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# )
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
# driver.get("https://fsbo.com/")
# try:
# search_box = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
# )
# search_box.clear()
# search_box.send_keys(location)
# time.sleep(2)
# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
# search_button.click()
# time.sleep(5)
# fsbo_url = driver.current_url
# return driver, fsbo_url
# except Exception as e:
# print("Error finding correct FSBO URL:", e)
# driver.quit()
# return None, None
# def format_details(details_text):
# """Formats the details by adding '|' after each line."""
# lines = [line.strip() for line in details_text.split("\n") if line.strip()]
# return " | ".join(lines) # Join lines with '|'
# def scrape_fsbo_details(location):
# """Find the correct FSBO URL and scrape house details."""
# driver, fsbo_url = search_fsbo_address(location)
# if not fsbo_url:
# return {"Details": "N/A", "Link": "N/A"}
# try:
# WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
# )
# listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
# # Extract raw text
# details_text = listing.get_attribute("innerText").strip()
# # Format details by adding '|'
# formatted_details = format_details(details_text)
# except:
# driver.quit()
# return {"Details": "N/A", "Link": fsbo_url}
# driver.quit()
# return {"Details": formatted_details, "Link": fsbo_url}
# def main():
# st.title("FSBO House Price Finder")
# location = st.text_input("Enter Address:")
# if st.button("Search"):
# with st.spinner("Fetching house details..."):
# house_data = scrape_fsbo_details(location)
# if house_data:
# st.success("House Details:")
# st.write(house_data["Details"]) # Display formatted details
# st.write(f"[View Listing]({house_data['Link']})")
# threading.Thread(target=scrape_fsbo_details, args=(location,), daemon=True).start()
# else:
# st.error("No results found or address not recognized.")
# if __name__ == "__main__":
# main()
## working till searching but not able to scrap the details
# import streamlit as st
# import pandas as pd
# import threading
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from webdriver_manager.chrome import ChromeDriverManager
# import time
# def search_redfin_address(location):
# """Search Redfin for the given address and return the first result's URL."""
# options = Options()
# # options.add_argument("--headless")
# options.add_argument("--incognito")
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("start-maximized")
# options.add_argument(
# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# )
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
# driver.get("https://fsbo.com/")
# try:
# # Find and enter location into the search box
# search_box = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
# )
# search_box.clear()
# search_box.send_keys(location)
# time.sleep(2)
# # Click the search button
# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
# search_button.click()
# time.sleep(5) # Allow results to load
# # Return updated URL
# redfin_url = driver.current_url
# return driver, redfin_url
# except Exception as e:
# print("Error finding correct Redfin URL:", e)
# driver.quit()
# return None, None
# def scrape_redfin_details(location):
# """Find the correct Redfin URL and scrape house details."""
# driver, redfin_url = search_redfin_address(location)
# if not redfin_url:
# return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": "N/A"}
# try:
# WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
# )
# listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]")
# except:
# driver.quit()
# return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": redfin_url}
# houses = []
# for listing in listings:
# try:
# price = listing.find_element(By.XPATH, ".//div/div/div[3]/div[1]/div[1]/span").text
# except:
# price = "N/A"
# try:
# address = listing.find_element(By.XPATH, ".//div/div/div[3]/div[3]").text
# except:
# address = "N/A"
# try:
# beds = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[1]").text
# except:
# beds = "N/A"
# try:
# baths = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[2]").text
# except:
# baths = "N/A"
# try:
# sqft = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[3]").text
# except:
# sqft = "N/A"
# try:
# link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
# except:
# link = "N/A"
# houses.append({"Price": price, "Address": address, "Beds": beds, "Baths": baths, "Sq Ft": sqft, "Link": link})
# break # Return only the first house fast
# driver.quit()
# return houses[0]
# def background_scraping(location):
# """Scrapes additional property details in the background."""
# pass
# def main():
# st.title("Redfin House Price Finder")
# location = st.text_input("Enter Address:")
# if st.button("Search"):
# with st.spinner("Fetching house details..."):
# house_data = scrape_redfin_details(location)
# if house_data:
# st.success(f"House Price: {house_data['Price']}")
# st.write(f"**Address:** {house_data['Address']}")
# st.write(f"**Beds:** {house_data['Beds']} | **Baths:** {house_data['Baths']} | **Sq Ft:** {house_data['Sq Ft']}")
# # st.write(f"[View Listing on Redfin]({house_data['Link']})")
# threading.Thread(target=background_scraping, args=(location,), daemon=True).start()
# else:
# st.error("No results found or address not recognized.")
# if __name__ == "__main__":
# main()