Spaces:
Sleeping
Sleeping
# Version: 3 | |
import streamlit as st | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from webdriver_manager.chrome import ChromeDriverManager | |
import re | |
def search_fsbo_address(location): | |
"""Search FSBO for the given address and return the result page.""" | |
options = Options() | |
options.add_argument("--incognito") | |
options.add_argument("--disable-blink-features=AutomationControlled") | |
options.add_argument("start-maximized") | |
options.add_argument("--disable-gpu") | |
options.add_argument("--log-level=3") | |
options.add_argument( | |
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
) | |
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
driver.get("https://fsbo.com/") | |
try: | |
search_box = WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
) | |
search_box.clear() | |
search_box.send_keys(location) | |
time.sleep(2) | |
search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
search_button.click() | |
time.sleep(5) | |
return driver | |
except Exception as e: | |
st.error(f"Error finding FSBO URL: {e}") | |
driver.quit() | |
return None | |
def clean_text(text): | |
"""Cleans extracted text by removing Listing ID, unnecessary content, and 'View Listing Details'.""" | |
lines = [line.strip() for line in text.split("\n") if line.strip()] | |
# Remove Listing ID (e.g., "Listing ID#541799 - ") | |
lines = [re.sub(r"Listing ID#\d+\s*-", "", line) for line in lines] | |
# Remove "View Listing Details" | |
lines = [line for line in lines if "View Listing Details" not in line] | |
return " | ".join(lines) | |
def scrape_first_house(driver): | |
"""Scrape only the first house and return its details properly formatted.""" | |
try: | |
WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
) | |
first_listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") | |
details_text = first_listing.get_attribute("innerText").strip() | |
formatted_details = clean_text(details_text) | |
return formatted_details | |
except: | |
return "N/A" | |
def scrape_all_houses(driver): | |
"""Scrape all houses and return a list of details properly formatted.""" | |
houses = [] | |
try: | |
WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
) | |
listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div") | |
for index, listing in enumerate(listings, start=1): | |
try: | |
details_text = listing.get_attribute("innerText").strip() | |
formatted_details = clean_text(details_text) | |
houses.append(f"**{index}.** {formatted_details}") | |
except: | |
continue | |
except: | |
return [] | |
return houses | |
def main(): | |
st.title("FSBO House Price Finder") | |
location = st.text_input("Enter Address:") | |
if st.button("Search"): | |
with st.spinner("Fetching house details..."): | |
driver = search_fsbo_address(location) | |
if not driver: | |
return | |
first_house = scrape_first_house(driver) | |
if first_house != "N/A": | |
st.success("**First House Found:**") | |
st.write(first_house) | |
st.info("Wait... getting all houses in the area") | |
all_houses = scrape_all_houses(driver) | |
if all_houses: | |
st.success("**All Houses in the Area:**") | |
for house in all_houses: | |
st.write(house) | |
else: | |
st.error("No additional houses found.") | |
driver.quit() | |
if __name__ == "__main__": | |
main() | |
# version: 2 | |
# import streamlit as st | |
# import time | |
# from selenium import webdriver | |
# from selenium.webdriver.common.by import By | |
# from selenium.webdriver.chrome.service import Service | |
# from selenium.webdriver.chrome.options import Options | |
# from selenium.webdriver.support.ui import WebDriverWait | |
# from selenium.webdriver.support import expected_conditions as EC | |
# from webdriver_manager.chrome import ChromeDriverManager | |
# def search_fsbo_address(location): | |
# """Search FSBO for the given address and return the result page.""" | |
# options = Options() | |
# options.add_argument("--incognito") | |
# options.add_argument("--disable-blink-features=AutomationControlled") | |
# options.add_argument("start-maximized") | |
# options.add_argument("--disable-gpu") | |
# options.add_argument("--log-level=3") | |
# options.add_argument( | |
# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
# ) | |
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
# driver.get("https://fsbo.com/") | |
# try: | |
# search_box = WebDriverWait(driver, 10).until( | |
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
# ) | |
# search_box.clear() | |
# search_box.send_keys(location) | |
# time.sleep(2) | |
# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
# search_button.click() | |
# time.sleep(5) | |
# return driver | |
# except Exception as e: | |
# st.error(f"Error finding FSBO URL: {e}") | |
# driver.quit() | |
# return None | |
# def format_details(details_text): | |
# """Formats details by adding '|' after each line.""" | |
# lines = [line.strip() for line in details_text.split("\n") if line.strip()] | |
# return " | ".join(lines) | |
# def scrape_first_house(driver): | |
# """Scrape only the first house and return its details.""" | |
# try: | |
# WebDriverWait(driver, 10).until( | |
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
# ) | |
# listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") | |
# details_text = listing.get_attribute("innerText").strip() | |
# return format_details(details_text) | |
# except: | |
# return "N/A" | |
# def scrape_all_houses(driver): | |
# """Scrape all houses and return a list of details.""" | |
# houses = [] | |
# try: | |
# WebDriverWait(driver, 10).until( | |
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
# ) | |
# listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div") | |
# for listing in listings: | |
# try: | |
# details_text = listing.get_attribute("innerText").strip() | |
# formatted_details = format_details(details_text) | |
# houses.append(formatted_details) | |
# except: | |
# continue | |
# except: | |
# return [] | |
# return houses | |
# def main(): | |
# st.title("FSBO House Price Finder") | |
# location = st.text_input("Enter Address:") | |
# if st.button("Search"): | |
# with st.spinner("Fetching house details..."): | |
# driver = search_fsbo_address(location) | |
# if not driver: | |
# return | |
# first_house = scrape_first_house(driver) | |
# if first_house != "N/A": | |
# st.success("First House Found:") | |
# st.write(first_house) | |
# st.info("Wait... getting all houses in the area") | |
# all_houses = scrape_all_houses(driver) | |
# if all_houses: | |
# st.success("All Houses in the Area:") | |
# for house in all_houses: | |
# st.write(house) | |
# else: | |
# st.error("No additional houses found.") | |
# driver.quit() | |
# if __name__ == "__main__": | |
# main() | |
# Best Version1. it is scrapping and displaying first house details correctly. | |
# import streamlit as st | |
# import threading | |
# from selenium import webdriver | |
# from selenium.webdriver.common.by import By | |
# from selenium.webdriver.chrome.service import Service | |
# from selenium.webdriver.chrome.options import Options | |
# from selenium.webdriver.support.ui import WebDriverWait | |
# from selenium.webdriver.support import expected_conditions as EC | |
# from webdriver_manager.chrome import ChromeDriverManager | |
# import time | |
# def search_fsbo_address(location): | |
# """Search FSBO for the given address and return the first result's URL.""" | |
# options = Options() | |
# options.add_argument("--incognito") | |
# options.add_argument("--disable-blink-features=AutomationControlled") | |
# options.add_argument("start-maximized") | |
# options.add_argument("--disable-gpu") | |
# options.add_argument("--log-level=3") | |
# options.add_argument( | |
# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
# ) | |
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
# driver.get("https://fsbo.com/") | |
# try: | |
# search_box = WebDriverWait(driver, 10).until( | |
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
# ) | |
# search_box.clear() | |
# search_box.send_keys(location) | |
# time.sleep(2) | |
# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
# search_button.click() | |
# time.sleep(5) | |
# fsbo_url = driver.current_url | |
# return driver, fsbo_url | |
# except Exception as e: | |
# print("Error finding correct FSBO URL:", e) | |
# driver.quit() | |
# return None, None | |
# def format_details(details_text): | |
# """Formats the details by adding '|' after each line.""" | |
# lines = [line.strip() for line in details_text.split("\n") if line.strip()] | |
# return " | ".join(lines) # Join lines with '|' | |
# def scrape_fsbo_details(location): | |
# """Find the correct FSBO URL and scrape house details.""" | |
# driver, fsbo_url = search_fsbo_address(location) | |
# if not fsbo_url: | |
# return {"Details": "N/A", "Link": "N/A"} | |
# try: | |
# WebDriverWait(driver, 10).until( | |
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
# ) | |
# listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") | |
# # Extract raw text | |
# details_text = listing.get_attribute("innerText").strip() | |
# # Format details by adding '|' | |
# formatted_details = format_details(details_text) | |
# except: | |
# driver.quit() | |
# return {"Details": "N/A", "Link": fsbo_url} | |
# driver.quit() | |
# return {"Details": formatted_details, "Link": fsbo_url} | |
# def main(): | |
# st.title("FSBO House Price Finder") | |
# location = st.text_input("Enter Address:") | |
# if st.button("Search"): | |
# with st.spinner("Fetching house details..."): | |
# house_data = scrape_fsbo_details(location) | |
# if house_data: | |
# st.success("House Details:") | |
# st.write(house_data["Details"]) # Display formatted details | |
# st.write(f"[View Listing]({house_data['Link']})") | |
# threading.Thread(target=scrape_fsbo_details, args=(location,), daemon=True).start() | |
# else: | |
# st.error("No results found or address not recognized.") | |
# if __name__ == "__main__": | |
# main() | |
## working till searching but not able to scrap the details | |
# import streamlit as st | |
# import pandas as pd | |
# import threading | |
# from selenium import webdriver | |
# from selenium.webdriver.common.by import By | |
# from selenium.webdriver.chrome.service import Service | |
# from selenium.webdriver.chrome.options import Options | |
# from selenium.webdriver.support.ui import WebDriverWait | |
# from selenium.webdriver.support import expected_conditions as EC | |
# from webdriver_manager.chrome import ChromeDriverManager | |
# import time | |
# def search_redfin_address(location): | |
# """Search Redfin for the given address and return the first result's URL.""" | |
# options = Options() | |
# # options.add_argument("--headless") | |
# options.add_argument("--incognito") | |
# options.add_argument("--disable-blink-features=AutomationControlled") | |
# options.add_argument("start-maximized") | |
# options.add_argument( | |
# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
# ) | |
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
# driver.get("https://fsbo.com/") | |
# try: | |
# # Find and enter location into the search box | |
# search_box = WebDriverWait(driver, 10).until( | |
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
# ) | |
# search_box.clear() | |
# search_box.send_keys(location) | |
# time.sleep(2) | |
# # Click the search button | |
# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
# search_button.click() | |
# time.sleep(5) # Allow results to load | |
# # Return updated URL | |
# redfin_url = driver.current_url | |
# return driver, redfin_url | |
# except Exception as e: | |
# print("Error finding correct Redfin URL:", e) | |
# driver.quit() | |
# return None, None | |
# def scrape_redfin_details(location): | |
# """Find the correct Redfin URL and scrape house details.""" | |
# driver, redfin_url = search_redfin_address(location) | |
# if not redfin_url: | |
# return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": "N/A"} | |
# try: | |
# WebDriverWait(driver, 10).until( | |
# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
# ) | |
# listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]") | |
# except: | |
# driver.quit() | |
# return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": redfin_url} | |
# houses = [] | |
# for listing in listings: | |
# try: | |
# price = listing.find_element(By.XPATH, ".//div/div/div[3]/div[1]/div[1]/span").text | |
# except: | |
# price = "N/A" | |
# try: | |
# address = listing.find_element(By.XPATH, ".//div/div/div[3]/div[3]").text | |
# except: | |
# address = "N/A" | |
# try: | |
# beds = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[1]").text | |
# except: | |
# beds = "N/A" | |
# try: | |
# baths = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[2]").text | |
# except: | |
# baths = "N/A" | |
# try: | |
# sqft = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[3]").text | |
# except: | |
# sqft = "N/A" | |
# try: | |
# link = listing.find_element(By.TAG_NAME, "a").get_attribute("href") | |
# except: | |
# link = "N/A" | |
# houses.append({"Price": price, "Address": address, "Beds": beds, "Baths": baths, "Sq Ft": sqft, "Link": link}) | |
# break # Return only the first house fast | |
# driver.quit() | |
# return houses[0] | |
# def background_scraping(location): | |
# """Scrapes additional property details in the background.""" | |
# pass | |
# def main(): | |
# st.title("Redfin House Price Finder") | |
# location = st.text_input("Enter Address:") | |
# if st.button("Search"): | |
# with st.spinner("Fetching house details..."): | |
# house_data = scrape_redfin_details(location) | |
# if house_data: | |
# st.success(f"House Price: {house_data['Price']}") | |
# st.write(f"**Address:** {house_data['Address']}") | |
# st.write(f"**Beds:** {house_data['Beds']} | **Baths:** {house_data['Baths']} | **Sq Ft:** {house_data['Sq Ft']}") | |
# # st.write(f"[View Listing on Redfin]({house_data['Link']})") | |
# threading.Thread(target=background_scraping, args=(location,), daemon=True).start() | |
# else: | |
# st.error("No results found or address not recognized.") | |
# if __name__ == "__main__": | |
# main() |