# Version: 3 import streamlit as st import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.chrome import ChromeDriverManager import re def search_fsbo_address(location): """Search FSBO for the given address and return the result page.""" options = Options() options.add_argument("--incognito") options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("start-maximized") options.add_argument("--disable-gpu") options.add_argument("--log-level=3") options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" ) driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) driver.get("https://fsbo.com/") try: search_box = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) ) search_box.clear() search_box.send_keys(location) time.sleep(2) search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") search_button.click() time.sleep(5) return driver except Exception as e: st.error(f"Error finding FSBO URL: {e}") driver.quit() return None def clean_text(text): """Cleans extracted text by removing Listing ID, unnecessary content, and 'View Listing Details'.""" lines = [line.strip() for line in text.split("\n") if line.strip()] # Remove Listing ID (e.g., "Listing ID#541799 - ") lines = [re.sub(r"Listing ID#\d+\s*-", "", line) for line in lines] # Remove "View Listing Details" lines = [line for line in lines if "View Listing Details" not in line] return " | ".join(lines) def scrape_first_house(driver): """Scrape only the first house and return its details properly formatted.""" try: WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) ) first_listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") details_text = first_listing.get_attribute("innerText").strip() formatted_details = clean_text(details_text) return formatted_details except: return "N/A" def scrape_all_houses(driver): """Scrape all houses and return a list of details properly formatted.""" houses = [] try: WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) ) listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div") for index, listing in enumerate(listings, start=1): try: details_text = listing.get_attribute("innerText").strip() formatted_details = clean_text(details_text) houses.append(f"**{index}.** {formatted_details}") except: continue except: return [] return houses def main(): st.title("FSBO House Price Finder") location = st.text_input("Enter Address:") if st.button("Search"): with st.spinner("Fetching house details..."): driver = search_fsbo_address(location) if not driver: return first_house = scrape_first_house(driver) if first_house != "N/A": st.success("**First House Found:**") st.write(first_house) st.info("Wait... getting all houses in the area") all_houses = scrape_all_houses(driver) if all_houses: st.success("**All Houses in the Area:**") for house in all_houses: st.write(house) else: st.error("No additional houses found.") driver.quit() if __name__ == "__main__": main() # version: 2 # import streamlit as st # import time # from selenium import webdriver # from selenium.webdriver.common.by import By # from selenium.webdriver.chrome.service import Service # from selenium.webdriver.chrome.options import Options # from selenium.webdriver.support.ui import WebDriverWait # from selenium.webdriver.support import expected_conditions as EC # from webdriver_manager.chrome import ChromeDriverManager # def search_fsbo_address(location): # """Search FSBO for the given address and return the result page.""" # options = Options() # options.add_argument("--incognito") # options.add_argument("--disable-blink-features=AutomationControlled") # options.add_argument("start-maximized") # options.add_argument("--disable-gpu") # options.add_argument("--log-level=3") # options.add_argument( # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # ) # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) # driver.get("https://fsbo.com/") # try: # search_box = WebDriverWait(driver, 10).until( # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) # ) # search_box.clear() # search_box.send_keys(location) # time.sleep(2) # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") # search_button.click() # time.sleep(5) # return driver # except Exception as e: # st.error(f"Error finding FSBO URL: {e}") # driver.quit() # return None # def format_details(details_text): # """Formats details by adding '|' after each line.""" # lines = [line.strip() for line in details_text.split("\n") if line.strip()] # return " | ".join(lines) # def scrape_first_house(driver): # """Scrape only the first house and return its details.""" # try: # WebDriverWait(driver, 10).until( # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) # ) # listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") # details_text = listing.get_attribute("innerText").strip() # return format_details(details_text) # except: # return "N/A" # def scrape_all_houses(driver): # """Scrape all houses and return a list of details.""" # houses = [] # try: # WebDriverWait(driver, 10).until( # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) # ) # listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div") # for listing in listings: # try: # details_text = listing.get_attribute("innerText").strip() # formatted_details = format_details(details_text) # houses.append(formatted_details) # except: # continue # except: # return [] # return houses # def main(): # st.title("FSBO House Price Finder") # location = st.text_input("Enter Address:") # if st.button("Search"): # with st.spinner("Fetching house details..."): # driver = search_fsbo_address(location) # if not driver: # return # first_house = scrape_first_house(driver) # if first_house != "N/A": # st.success("First House Found:") # st.write(first_house) # st.info("Wait... getting all houses in the area") # all_houses = scrape_all_houses(driver) # if all_houses: # st.success("All Houses in the Area:") # for house in all_houses: # st.write(house) # else: # st.error("No additional houses found.") # driver.quit() # if __name__ == "__main__": # main() # Best Version1. it is scrapping and displaying first house details correctly. # import streamlit as st # import threading # from selenium import webdriver # from selenium.webdriver.common.by import By # from selenium.webdriver.chrome.service import Service # from selenium.webdriver.chrome.options import Options # from selenium.webdriver.support.ui import WebDriverWait # from selenium.webdriver.support import expected_conditions as EC # from webdriver_manager.chrome import ChromeDriverManager # import time # def search_fsbo_address(location): # """Search FSBO for the given address and return the first result's URL.""" # options = Options() # options.add_argument("--incognito") # options.add_argument("--disable-blink-features=AutomationControlled") # options.add_argument("start-maximized") # options.add_argument("--disable-gpu") # options.add_argument("--log-level=3") # options.add_argument( # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # ) # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) # driver.get("https://fsbo.com/") # try: # search_box = WebDriverWait(driver, 10).until( # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) # ) # search_box.clear() # search_box.send_keys(location) # time.sleep(2) # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") # search_button.click() # time.sleep(5) # fsbo_url = driver.current_url # return driver, fsbo_url # except Exception as e: # print("Error finding correct FSBO URL:", e) # driver.quit() # return None, None # def format_details(details_text): # """Formats the details by adding '|' after each line.""" # lines = [line.strip() for line in details_text.split("\n") if line.strip()] # return " | ".join(lines) # Join lines with '|' # def scrape_fsbo_details(location): # """Find the correct FSBO URL and scrape house details.""" # driver, fsbo_url = search_fsbo_address(location) # if not fsbo_url: # return {"Details": "N/A", "Link": "N/A"} # try: # WebDriverWait(driver, 10).until( # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) # ) # listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") # # Extract raw text # details_text = listing.get_attribute("innerText").strip() # # Format details by adding '|' # formatted_details = format_details(details_text) # except: # driver.quit() # return {"Details": "N/A", "Link": fsbo_url} # driver.quit() # return {"Details": formatted_details, "Link": fsbo_url} # def main(): # st.title("FSBO House Price Finder") # location = st.text_input("Enter Address:") # if st.button("Search"): # with st.spinner("Fetching house details..."): # house_data = scrape_fsbo_details(location) # if house_data: # st.success("House Details:") # st.write(house_data["Details"]) # Display formatted details # st.write(f"[View Listing]({house_data['Link']})") # threading.Thread(target=scrape_fsbo_details, args=(location,), daemon=True).start() # else: # st.error("No results found or address not recognized.") # if __name__ == "__main__": # main() ## working till searching but not able to scrap the details # import streamlit as st # import pandas as pd # import threading # from selenium import webdriver # from selenium.webdriver.common.by import By # from selenium.webdriver.chrome.service import Service # from selenium.webdriver.chrome.options import Options # from selenium.webdriver.support.ui import WebDriverWait # from selenium.webdriver.support import expected_conditions as EC # from webdriver_manager.chrome import ChromeDriverManager # import time # def search_redfin_address(location): # """Search Redfin for the given address and return the first result's URL.""" # options = Options() # # options.add_argument("--headless") # options.add_argument("--incognito") # options.add_argument("--disable-blink-features=AutomationControlled") # options.add_argument("start-maximized") # options.add_argument( # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # ) # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) # driver.get("https://fsbo.com/") # try: # # Find and enter location into the search box # search_box = WebDriverWait(driver, 10).until( # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) # ) # search_box.clear() # search_box.send_keys(location) # time.sleep(2) # # Click the search button # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") # search_button.click() # time.sleep(5) # Allow results to load # # Return updated URL # redfin_url = driver.current_url # return driver, redfin_url # except Exception as e: # print("Error finding correct Redfin URL:", e) # driver.quit() # return None, None # def scrape_redfin_details(location): # """Find the correct Redfin URL and scrape house details.""" # driver, redfin_url = search_redfin_address(location) # if not redfin_url: # return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": "N/A"} # try: # WebDriverWait(driver, 10).until( # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) # ) # listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]") # except: # driver.quit() # return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": redfin_url} # houses = [] # for listing in listings: # try: # price = listing.find_element(By.XPATH, ".//div/div/div[3]/div[1]/div[1]/span").text # except: # price = "N/A" # try: # address = listing.find_element(By.XPATH, ".//div/div/div[3]/div[3]").text # except: # address = "N/A" # try: # beds = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[1]").text # except: # beds = "N/A" # try: # baths = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[2]").text # except: # baths = "N/A" # try: # sqft = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[3]").text # except: # sqft = "N/A" # try: # link = listing.find_element(By.TAG_NAME, "a").get_attribute("href") # except: # link = "N/A" # houses.append({"Price": price, "Address": address, "Beds": beds, "Baths": baths, "Sq Ft": sqft, "Link": link}) # break # Return only the first house fast # driver.quit() # return houses[0] # def background_scraping(location): # """Scrapes additional property details in the background.""" # pass # def main(): # st.title("Redfin House Price Finder") # location = st.text_input("Enter Address:") # if st.button("Search"): # with st.spinner("Fetching house details..."): # house_data = scrape_redfin_details(location) # if house_data: # st.success(f"House Price: {house_data['Price']}") # st.write(f"**Address:** {house_data['Address']}") # st.write(f"**Beds:** {house_data['Beds']} | **Baths:** {house_data['Baths']} | **Sq Ft:** {house_data['Sq Ft']}") # # st.write(f"[View Listing on Redfin]({house_data['Link']})") # threading.Thread(target=background_scraping, args=(location,), daemon=True).start() # else: # st.error("No results found or address not recognized.") # if __name__ == "__main__": # main()