Spaces:

Krish-Upgrix
/

AITINCE-FSBO-Crawler

Sleeping

App Files Files Community

AITINCE-FSBO-Crawler / app.py

Krish-Upgrix

Rename FSBO_app.py to app.py

698125b verified 5 months ago

raw

history blame contribute delete

17.9 kB

	# Version: 3
	import streamlit as st
	import time
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from webdriver_manager.chrome import ChromeDriverManager
	import re

	def search_fsbo_address(location):
	"""Search FSBO for the given address and return the result page."""
	options = Options()
	options.add_argument("--incognito")
	options.add_argument("--disable-blink-features=AutomationControlled")
	options.add_argument("start-maximized")
	options.add_argument("--disable-gpu")
	options.add_argument("--log-level=3")
	options.add_argument(
	"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	)

	driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
	driver.get("https://fsbo.com/")

	try:
	search_box = WebDriverWait(driver, 10).until(
	EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
	)
	search_box.clear()
	search_box.send_keys(location)
	time.sleep(2)

	search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
	search_button.click()
	time.sleep(5)

	return driver
	except Exception as e:
	st.error(f"Error finding FSBO URL: {e}")
	driver.quit()
	return None

	def clean_text(text):
	"""Cleans extracted text by removing Listing ID, unnecessary content, and 'View Listing Details'."""
	lines = [line.strip() for line in text.split("\n") if line.strip()]

	# Remove Listing ID (e.g., "Listing ID#541799 - ")
	lines = [re.sub(r"Listing ID#\d+\s*-", "", line) for line in lines]

	# Remove "View Listing Details"
	lines = [line for line in lines if "View Listing Details" not in line]

	return " \| ".join(lines)

	def scrape_first_house(driver):
	"""Scrape only the first house and return its details properly formatted."""
	try:
	WebDriverWait(driver, 10).until(
	EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
	)
	first_listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")

	details_text = first_listing.get_attribute("innerText").strip()
	formatted_details = clean_text(details_text)

	return formatted_details
	except:
	return "N/A"

	def scrape_all_houses(driver):
	"""Scrape all houses and return a list of details properly formatted."""
	houses = []
	try:
	WebDriverWait(driver, 10).until(
	EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
	)
	listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")

	for index, listing in enumerate(listings, start=1):
	try:
	details_text = listing.get_attribute("innerText").strip()
	formatted_details = clean_text(details_text)
	houses.append(f"{index}. {formatted_details}")
	except:
	continue
	except:
	return []

	return houses

	def main():
	st.title("FSBO House Price Finder")
	location = st.text_input("Enter Address:")

	if st.button("Search"):
	with st.spinner("Fetching house details..."):
	driver = search_fsbo_address(location)
	if not driver:
	return

	first_house = scrape_first_house(driver)
	if first_house != "N/A":
	st.success("First House Found:")
	st.write(first_house)

	st.info("Wait... getting all houses in the area")
	all_houses = scrape_all_houses(driver)

	if all_houses:
	st.success("All Houses in the Area:")
	for house in all_houses:
	st.write(house)
	else:
	st.error("No additional houses found.")

	driver.quit()

	if __name__ == "__main__":
	main()













	# version: 2

	# import streamlit as st
	# import time
	# from selenium import webdriver
	# from selenium.webdriver.common.by import By
	# from selenium.webdriver.chrome.service import Service
	# from selenium.webdriver.chrome.options import Options
	# from selenium.webdriver.support.ui import WebDriverWait
	# from selenium.webdriver.support import expected_conditions as EC
	# from webdriver_manager.chrome import ChromeDriverManager

	# def search_fsbo_address(location):
	# """Search FSBO for the given address and return the result page."""
	# options = Options()
	# options.add_argument("--incognito")
	# options.add_argument("--disable-blink-features=AutomationControlled")
	# options.add_argument("start-maximized")
	# options.add_argument("--disable-gpu")
	# options.add_argument("--log-level=3")
	# options.add_argument(
	# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	# )

	# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
	# driver.get("https://fsbo.com/")

	# try:
	# search_box = WebDriverWait(driver, 10).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
	# )
	# search_box.clear()
	# search_box.send_keys(location)
	# time.sleep(2)

	# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
	# search_button.click()
	# time.sleep(5)

	# return driver
	# except Exception as e:
	# st.error(f"Error finding FSBO URL: {e}")
	# driver.quit()
	# return None

	# def format_details(details_text):
	# """Formats details by adding '\|' after each line."""
	# lines = [line.strip() for line in details_text.split("\n") if line.strip()]
	# return " \| ".join(lines)

	# def scrape_first_house(driver):
	# """Scrape only the first house and return its details."""
	# try:
	# WebDriverWait(driver, 10).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
	# )
	# listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")

	# details_text = listing.get_attribute("innerText").strip()
	# return format_details(details_text)
	# except:
	# return "N/A"

	# def scrape_all_houses(driver):
	# """Scrape all houses and return a list of details."""
	# houses = []
	# try:
	# WebDriverWait(driver, 10).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
	# )
	# listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")

	# for listing in listings:
	# try:
	# details_text = listing.get_attribute("innerText").strip()
	# formatted_details = format_details(details_text)
	# houses.append(formatted_details)
	# except:
	# continue
	# except:
	# return []

	# return houses

	# def main():
	# st.title("FSBO House Price Finder")
	# location = st.text_input("Enter Address:")

	# if st.button("Search"):
	# with st.spinner("Fetching house details..."):
	# driver = search_fsbo_address(location)
	# if not driver:
	# return

	# first_house = scrape_first_house(driver)
	# if first_house != "N/A":
	# st.success("First House Found:")
	# st.write(first_house)

	# st.info("Wait... getting all houses in the area")
	# all_houses = scrape_all_houses(driver)

	# if all_houses:
	# st.success("All Houses in the Area:")
	# for house in all_houses:
	# st.write(house)
	# else:
	# st.error("No additional houses found.")

	# driver.quit()

	# if __name__ == "__main__":
	# main()











	# Best Version1. it is scrapping and displaying first house details correctly.

	# import streamlit as st
	# import threading
	# from selenium import webdriver
	# from selenium.webdriver.common.by import By
	# from selenium.webdriver.chrome.service import Service
	# from selenium.webdriver.chrome.options import Options
	# from selenium.webdriver.support.ui import WebDriverWait
	# from selenium.webdriver.support import expected_conditions as EC
	# from webdriver_manager.chrome import ChromeDriverManager
	# import time

	# def search_fsbo_address(location):
	# """Search FSBO for the given address and return the first result's URL."""
	# options = Options()
	# options.add_argument("--incognito")
	# options.add_argument("--disable-blink-features=AutomationControlled")
	# options.add_argument("start-maximized")
	# options.add_argument("--disable-gpu")
	# options.add_argument("--log-level=3")
	# options.add_argument(
	# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	# )

	# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
	# driver.get("https://fsbo.com/")

	# try:
	# search_box = WebDriverWait(driver, 10).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
	# )
	# search_box.clear()
	# search_box.send_keys(location)
	# time.sleep(2)

	# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
	# search_button.click()
	# time.sleep(5)

	# fsbo_url = driver.current_url
	# return driver, fsbo_url
	# except Exception as e:
	# print("Error finding correct FSBO URL:", e)
	# driver.quit()
	# return None, None

	# def format_details(details_text):
	# """Formats the details by adding '\|' after each line."""
	# lines = [line.strip() for line in details_text.split("\n") if line.strip()]
	# return " \| ".join(lines) # Join lines with '\|'

	# def scrape_fsbo_details(location):
	# """Find the correct FSBO URL and scrape house details."""
	# driver, fsbo_url = search_fsbo_address(location)
	# if not fsbo_url:
	# return {"Details": "N/A", "Link": "N/A"}

	# try:
	# WebDriverWait(driver, 10).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
	# )
	# listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")

	# # Extract raw text
	# details_text = listing.get_attribute("innerText").strip()

	# # Format details by adding '\|'
	# formatted_details = format_details(details_text)
	# except:
	# driver.quit()
	# return {"Details": "N/A", "Link": fsbo_url}

	# driver.quit()
	# return {"Details": formatted_details, "Link": fsbo_url}

	# def main():
	# st.title("FSBO House Price Finder")
	# location = st.text_input("Enter Address:")

	# if st.button("Search"):
	# with st.spinner("Fetching house details..."):
	# house_data = scrape_fsbo_details(location)

	# if house_data:
	# st.success("House Details:")
	# st.write(house_data["Details"]) # Display formatted details
	# st.write(f"[View Listing]({house_data['Link']})")
	# threading.Thread(target=scrape_fsbo_details, args=(location,), daemon=True).start()
	# else:
	# st.error("No results found or address not recognized.")

	# if __name__ == "__main__":
	# main()













	## working till searching but not able to scrap the details


	# import streamlit as st
	# import pandas as pd
	# import threading
	# from selenium import webdriver
	# from selenium.webdriver.common.by import By
	# from selenium.webdriver.chrome.service import Service
	# from selenium.webdriver.chrome.options import Options
	# from selenium.webdriver.support.ui import WebDriverWait
	# from selenium.webdriver.support import expected_conditions as EC
	# from webdriver_manager.chrome import ChromeDriverManager
	# import time

	# def search_redfin_address(location):
	# """Search Redfin for the given address and return the first result's URL."""
	# options = Options()
	# # options.add_argument("--headless")
	# options.add_argument("--incognito")
	# options.add_argument("--disable-blink-features=AutomationControlled")
	# options.add_argument("start-maximized")
	# options.add_argument(
	# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	# )

	# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
	# driver.get("https://fsbo.com/")

	# try:
	# # Find and enter location into the search box
	# search_box = WebDriverWait(driver, 10).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
	# )
	# search_box.clear()
	# search_box.send_keys(location)
	# time.sleep(2)

	# # Click the search button
	# search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
	# search_button.click()
	# time.sleep(5) # Allow results to load

	# # Return updated URL
	# redfin_url = driver.current_url
	# return driver, redfin_url
	# except Exception as e:
	# print("Error finding correct Redfin URL:", e)
	# driver.quit()
	# return None, None

	# def scrape_redfin_details(location):
	# """Find the correct Redfin URL and scrape house details."""
	# driver, redfin_url = search_redfin_address(location)
	# if not redfin_url:
	# return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": "N/A"}

	# try:
	# WebDriverWait(driver, 10).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
	# )
	# listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]")
	# except:
	# driver.quit()
	# return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": redfin_url}

	# houses = []
	# for listing in listings:
	# try:
	# price = listing.find_element(By.XPATH, ".//div/div/div[3]/div[1]/div[1]/span").text
	# except:
	# price = "N/A"
	# try:
	# address = listing.find_element(By.XPATH, ".//div/div/div[3]/div[3]").text
	# except:
	# address = "N/A"
	# try:
	# beds = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[1]").text
	# except:
	# beds = "N/A"
	# try:
	# baths = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[2]").text
	# except:
	# baths = "N/A"
	# try:
	# sqft = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[3]").text
	# except:
	# sqft = "N/A"
	# try:
	# link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
	# except:
	# link = "N/A"

	# houses.append({"Price": price, "Address": address, "Beds": beds, "Baths": baths, "Sq Ft": sqft, "Link": link})
	# break # Return only the first house fast

	# driver.quit()
	# return houses[0]

	# def background_scraping(location):
	# """Scrapes additional property details in the background."""
	# pass

	# def main():
	# st.title("Redfin House Price Finder")
	# location = st.text_input("Enter Address:")

	# if st.button("Search"):
	# with st.spinner("Fetching house details..."):
	# house_data = scrape_redfin_details(location)

	# if house_data:
	# st.success(f"House Price: {house_data['Price']}")
	# st.write(f"Address: {house_data['Address']}")
	# st.write(f"Beds: {house_data['Beds']} \| Baths: {house_data['Baths']} \| Sq Ft: {house_data['Sq Ft']}")
	# # st.write(f"[View Listing on Redfin]({house_data['Link']})")
	# threading.Thread(target=background_scraping, args=(location,), daemon=True).start()
	# else:
	# st.error("No results found or address not recognized.")

	# if __name__ == "__main__":
	# main()