Spaces:

Krish-Upgrix
/

AITINCE-FSBO-Crawler

Sleeping

File size: 17,889 Bytes

71601e8

# Version: 3
import streamlit as st
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import re

def search_fsbo_address(location):
    """Search FSBO for the given address and return the result page."""
    options = Options()
    options.add_argument("--incognito")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("start-maximized")
    options.add_argument("--disable-gpu")
    options.add_argument("--log-level=3")
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    )

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get("https://fsbo.com/")

    try:
        search_box = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
        )
        search_box.clear()
        search_box.send_keys(location)
        time.sleep(2)
        
        search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
        search_button.click()
        time.sleep(5)

        return driver
    except Exception as e:
        st.error(f"Error finding FSBO URL: {e}")
        driver.quit()
        return None

def clean_text(text):
    """Cleans extracted text by removing Listing ID, unnecessary content, and 'View Listing Details'."""
    lines = [line.strip() for line in text.split("\n") if line.strip()]
    
    # Remove Listing ID (e.g., "Listing ID#541799 - ")
    lines = [re.sub(r"Listing ID#\d+\s*-", "", line) for line in lines]

    # Remove "View Listing Details"
    lines = [line for line in lines if "View Listing Details" not in line]

    return " | ".join(lines)

def scrape_first_house(driver):
    """Scrape only the first house and return its details properly formatted."""
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
        )
        first_listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
        
        details_text = first_listing.get_attribute("innerText").strip()
        formatted_details = clean_text(details_text)

        return formatted_details
    except:
        return "N/A"

def scrape_all_houses(driver):
    """Scrape all houses and return a list of details properly formatted."""
    houses = []
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
        )
        listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")

        for index, listing in enumerate(listings, start=1):
            try:
                details_text = listing.get_attribute("innerText").strip()
                formatted_details = clean_text(details_text)
                houses.append(f"**{index}.** {formatted_details}")
            except:
                continue
    except:
        return []

    return houses

def main():
    st.title("FSBO House Price Finder")
    location = st.text_input("Enter Address:")

    if st.button("Search"):
        with st.spinner("Fetching house details..."):
            driver = search_fsbo_address(location)
            if not driver:
                return

            first_house = scrape_first_house(driver)
            if first_house != "N/A":
                st.success("**First House Found:**")
                st.write(first_house)

                st.info("Wait... getting all houses in the area")
                all_houses = scrape_all_houses(driver)

                if all_houses:
                    st.success("**All Houses in the Area:**")
                    for house in all_houses:
                        st.write(house)
                else:
                    st.error("No additional houses found.")

            driver.quit()

if __name__ == "__main__":
    main()













# version: 2

# import streamlit as st
# import time
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from webdriver_manager.chrome import ChromeDriverManager

# def search_fsbo_address(location):
#     """Search FSBO for the given address and return the result page."""
#     options = Options()
#     options.add_argument("--incognito")
#     options.add_argument("--disable-blink-features=AutomationControlled")
#     options.add_argument("start-maximized")
#     options.add_argument("--disable-gpu")
#     options.add_argument("--log-level=3")
#     options.add_argument(
#         "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
#     )

#     driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
#     driver.get("https://fsbo.com/")

#     try:
#         search_box = WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
#         )
#         search_box.clear()
#         search_box.send_keys(location)
#         time.sleep(2)
        
#         search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
#         search_button.click()
#         time.sleep(5)

#         return driver
#     except Exception as e:
#         st.error(f"Error finding FSBO URL: {e}")
#         driver.quit()
#         return None

# def format_details(details_text):
#     """Formats details by adding '|' after each line."""
#     lines = [line.strip() for line in details_text.split("\n") if line.strip()]
#     return " | ".join(lines)

# def scrape_first_house(driver):
#     """Scrape only the first house and return its details."""
#     try:
#         WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
#         )
#         listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
        
#         details_text = listing.get_attribute("innerText").strip()
#         return format_details(details_text)
#     except:
#         return "N/A"

# def scrape_all_houses(driver):
#     """Scrape all houses and return a list of details."""
#     houses = []
#     try:
#         WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
#         )
#         listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")

#         for listing in listings:
#             try:
#                 details_text = listing.get_attribute("innerText").strip()
#                 formatted_details = format_details(details_text)
#                 houses.append(formatted_details)
#             except:
#                 continue
#     except:
#         return []

#     return houses

# def main():
#     st.title("FSBO House Price Finder")
#     location = st.text_input("Enter Address:")

#     if st.button("Search"):
#         with st.spinner("Fetching house details..."):
#             driver = search_fsbo_address(location)
#             if not driver:
#                 return

#             first_house = scrape_first_house(driver)
#             if first_house != "N/A":
#                 st.success("First House Found:")
#                 st.write(first_house)

#                 st.info("Wait... getting all houses in the area")
#                 all_houses = scrape_all_houses(driver)

#                 if all_houses:
#                     st.success("All Houses in the Area:")
#                     for house in all_houses:
#                         st.write(house)
#                 else:
#                     st.error("No additional houses found.")

#             driver.quit()

# if __name__ == "__main__":
#     main()











# Best Version1. it is scrapping and displaying first house details correctly.

# import streamlit as st
# import threading
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from webdriver_manager.chrome import ChromeDriverManager
# import time

# def search_fsbo_address(location):
#     """Search FSBO for the given address and return the first result's URL."""
#     options = Options()
#     options.add_argument("--incognito")
#     options.add_argument("--disable-blink-features=AutomationControlled")
#     options.add_argument("start-maximized")
#     options.add_argument("--disable-gpu")
#     options.add_argument("--log-level=3")
#     options.add_argument(
#         "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
#     )

#     driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
#     driver.get("https://fsbo.com/")

#     try:
#         search_box = WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
#         )
#         search_box.clear()
#         search_box.send_keys(location)
#         time.sleep(2)
        
#         search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
#         search_button.click()
#         time.sleep(5)

#         fsbo_url = driver.current_url
#         return driver, fsbo_url
#     except Exception as e:
#         print("Error finding correct FSBO URL:", e)
#         driver.quit()
#         return None, None

# def format_details(details_text):
#     """Formats the details by adding '|' after each line."""
#     lines = [line.strip() for line in details_text.split("\n") if line.strip()]
#     return " | ".join(lines)  # Join lines with '|'

# def scrape_fsbo_details(location):
#     """Find the correct FSBO URL and scrape house details."""
#     driver, fsbo_url = search_fsbo_address(location)
#     if not fsbo_url:
#         return {"Details": "N/A", "Link": "N/A"}

#     try:
#         WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
#         )
#         listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
        
#         # Extract raw text
#         details_text = listing.get_attribute("innerText").strip()

#         # Format details by adding '|'
#         formatted_details = format_details(details_text)
#     except:
#         driver.quit()
#         return {"Details": "N/A", "Link": fsbo_url}

#     driver.quit()
#     return {"Details": formatted_details, "Link": fsbo_url}

# def main():
#     st.title("FSBO House Price Finder")
#     location = st.text_input("Enter Address:")

#     if st.button("Search"):
#         with st.spinner("Fetching house details..."):
#             house_data = scrape_fsbo_details(location)

#         if house_data:
#             st.success("House Details:")
#             st.write(house_data["Details"])  # Display formatted details
#             st.write(f"[View Listing]({house_data['Link']})")
#             threading.Thread(target=scrape_fsbo_details, args=(location,), daemon=True).start()
#         else:
#             st.error("No results found or address not recognized.")

# if __name__ == "__main__":
#     main()













## working till searching but not able to scrap the details


# import streamlit as st
# import pandas as pd
# import threading
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from webdriver_manager.chrome import ChromeDriverManager
# import time

# def search_redfin_address(location):
#     """Search Redfin for the given address and return the first result's URL."""
#     options = Options()
#     # options.add_argument("--headless")
#     options.add_argument("--incognito")
#     options.add_argument("--disable-blink-features=AutomationControlled")
#     options.add_argument("start-maximized")
#     options.add_argument(
#         "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
#     )

#     driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
#     driver.get("https://fsbo.com/")

#     try:
#         # Find and enter location into the search box
#         search_box = WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
#         )
#         search_box.clear()
#         search_box.send_keys(location)
#         time.sleep(2)
        
#         # Click the search button
#         search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
#         search_button.click()
#         time.sleep(5)  # Allow results to load

#         # Return updated URL
#         redfin_url = driver.current_url
#         return driver, redfin_url
#     except Exception as e:
#         print("Error finding correct Redfin URL:", e)
#         driver.quit()
#         return None, None

# def scrape_redfin_details(location):
#     """Find the correct Redfin URL and scrape house details."""
#     driver, redfin_url = search_redfin_address(location)
#     if not redfin_url:
#         return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": "N/A"}

#     try:
#         WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
#         )
#         listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]")
#     except:
#         driver.quit()
#         return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": redfin_url}

#     houses = []
#     for listing in listings:
#         try:
#             price = listing.find_element(By.XPATH, ".//div/div/div[3]/div[1]/div[1]/span").text
#         except:
#             price = "N/A"
#         try:
#             address = listing.find_element(By.XPATH, ".//div/div/div[3]/div[3]").text
#         except:
#             address = "N/A"
#         try:
#             beds = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[1]").text
#         except:
#             beds = "N/A"
#         try:
#             baths = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[2]").text
#         except:
#             baths = "N/A"
#         try:
#             sqft = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[3]").text
#         except:
#             sqft = "N/A"
#         try:
#             link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
#         except:
#             link = "N/A"
        
#         houses.append({"Price": price, "Address": address, "Beds": beds, "Baths": baths, "Sq Ft": sqft, "Link": link})
#         break  # Return only the first house fast

#     driver.quit()
#     return houses[0]

# def background_scraping(location):
#     """Scrapes additional property details in the background."""
#     pass

# def main():
#     st.title("Redfin House Price Finder")
#     location = st.text_input("Enter Address:")

#     if st.button("Search"):
#         with st.spinner("Fetching house details..."):
#             house_data = scrape_redfin_details(location)

#         if house_data:
#             st.success(f"House Price: {house_data['Price']}")
#             st.write(f"**Address:** {house_data['Address']}")
#             st.write(f"**Beds:** {house_data['Beds']}  |  **Baths:** {house_data['Baths']}  |  **Sq Ft:** {house_data['Sq Ft']}")
#             # st.write(f"[View Listing on Redfin]({house_data['Link']})")
#             threading.Thread(target=background_scraping, args=(location,), daemon=True).start()
#         else:
#             st.error("No results found or address not recognized.")

# if __name__ == "__main__":
#     main()