Spaces:

broadfield-dev
/

Craigslist_API

Sleeping

File size: 7,210 Bytes

1786f25
f9f6113
 
bce38d2
1786f25
 
 
 
b2f4a78
de83290
1786f25
de83290
 
 
 
 
 
 
 
 
f9f6113
1786f25
 
 
 
de54b2c
1786f25
 
 
 
 
 
 
f9e18dd
1786f25
 
 
 
 
 
 
 
 
 
 
 
 
f9e18dd
 
1786f25
 
 
 
 
4ec1af7
f9e18dd
1786f25
 
 
 
 
 
 
 
f9e18dd
de83290
f9e18dd
de83290
bce38d2
 
 
 
de83290
 
bce38d2
 
 
f9e18dd
bce38d2
 
 
de83290
bce38d2
 
 
 
 
 
f9e18dd
 
bce38d2
f9e18dd
f9f6113
f9e18dd
f9f6113
 
de83290
f9e18dd
 
 
 
 
bce38d2
f9e18dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce38d2
f9e18dd
 
 
 
bce38d2
 
 
f9e18dd
bce38d2
f9e18dd
bce38d2
 
 
de83290
 
 
f9e18dd
1786f25
 
 
 
 
 
 
b2f4a78
1786f25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2f4a78
 
1786f25
 
 
 
 
 
 
 
 
 
 
 
 
 
b2f4a78
 
1786f25
 
 
 
 
 
 
 
 
 
b2f4a78
 
1786f25

from flask import Flask, request, render_template
import requests
from bs4 import BeautifulSoup
import urllib.parse
import logging
import re
from typing import List, Dict
import time
from datetime import datetime  # Import datetime for current year

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('craigslist_search.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# List of Craigslist cities (partial list for brevity; expand as needed)
CRAIGSLIST_CITIES = [
    "newyork", "losangeles", "chicago", "houston", "phoenix", "philadelphia",
    "sanantonio", "sandiego", "dallas", "sanjose", "austin", "jacksonville",
    "sanfrancisco", "columbus", "seattle", "denver", "boston", "miami", "atlanta"
]

def search_craigslist(query: str, city: str) -> List[Dict]:
    """
    Search Craigslist for a query in a specific city or all cities.
    Returns a list of posts with title, link, price, location, and city.
    """
    start_time = time.time()
    logger.info(f"Starting search for query: '{query}' in city: '{city}'")
    
    posts = []
    query = urllib.parse.quote(query.strip())  # URL-encode the query

    try:
        if city == "all":
            # Search across multiple cities
            for city_name in CRAIGSLIST_CITIES[1:]:  # Skip "all"
                url = f"https://{city_name}.craigslist.org/search/sss?query={query}"
                logger.debug(f"Fetching URL: {url}")
                html_content = fetch_html_with_retry(url)
                posts.extend(parse_html(html_content, city_name))
                time.sleep(1)  # Add delay to avoid rate limiting
        else:
            # Search in a specific city
            url = f"https://{city}.craigslist.org/search/sss?query={query}"
            logger.debug(f"Fetching URL: {url}")
            html_content = fetch_html_with_retry(url)
            posts.extend(parse_html(html_content, city))

        logger.info(f"Search completed in {time.time() - start_time:.2f} seconds. Found {len(posts)} posts")
        return posts

    except Exception as e:
        logger.error(f"Error during search: {str(e)}")
        return []

def fetch_html_with_retry(url: str, retries: int = 3, delay: int = 2) -> str:
    """
    Fetch HTML content with retry mechanism to handle network issues.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    for attempt in range(retries):
        try:
            logger.debug(f"Fetching URL (attempt {attempt + 1}): {url}")
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            return response.text
                
        except requests.RequestException as e:
            logger.error(f"Network error on attempt {attempt + 1} for {url}: {str(e)}")
        except Exception as e:
            logger.error(f"Unexpected error on attempt {attempt + 1} for {url}: {str(e)}")
            
        if attempt < retries - 1:
            logger.info(f"Retrying after {delay} seconds...")
            time.sleep(delay)
    
    logger.error(f"Failed to fetch HTML after {retries} attempts: {url}")
    return ""  # Return empty string on failure

def parse_html(html_content: str, city: str) -> List[Dict]:
    """
    Parse the HTML search results and extract relevant post information.
    """
    posts = []
    try:
        soup = BeautifulSoup(html_content, "html.parser")
        # Find all search result items
        results = soup.find_all("li", class_="cl-static-search-result")
        
        for index, result in enumerate(results):
            try:
                # Extract title
                title_elem = result.find("div", class_="title")
                title = title_elem.get_text(strip=True) if title_elem else "No title"
                
                # Extract link
                link_elem = result.find("a")
                link = link_elem["href"] if link_elem and "href" in link_elem.attrs else "#"
                
                # Extract price
                price_elem = result.find("div", class_="price")
                price = price_elem.get_text(strip=True) if price_elem else "No price"
                
                # Extract location
                location_elem = result.find("div", class_="location")
                location = location_elem.get_text(strip=True) if location_elem else "No location"
                
                post = {
                    "title": title,
                    "link": link,
                    "price": price,
                    "location": location,
                    "city": city.capitalize()
                }
                posts.append(post)
                
            except Exception as e:
                logger.warning(f"Skipping invalid entry {index} in HTML for city {city}: {str(e)}")
                continue
        
        logger.debug(f"Parsed {len(posts)} valid posts for city: {city}")
        return posts

    except Exception as e:
        logger.error(f"Error parsing HTML for city {city}: {str(e)}")
        return []

@app.route("/", methods=["GET", "POST"])
def index():
    posts = []
    query = ""
    selected_city = "all"
    current_year = datetime.now().year  # Get current year

    try:
        if request.method == "POST":
            query = request.form.get("query", "").strip()
            selected_city = request.form.get("city", "all")
            
            # Validate inputs
            if not query:
                logger.warning("Empty query received")
                return render_template(
                    "index.html",
                    posts=[],
                    query="",
                    cities=CRAIGSLIST_CITIES,
                    selected_city=selected_city,
                    error="Please enter a search query",
                    current_year=current_year
                )
            
            if selected_city not in CRAIGSLIST_CITIES:
                logger.warning(f"Invalid city selected: {selected_city}")
                selected_city = "all"
            
            logger.info(f"Processing POST request: query='{query}', city='{selected_city}'")
            posts = search_craigslist(query, selected_city)

        return render_template(
            "index.html",
            posts=posts,
            query=query,
            cities=CRAIGSLIST_CITIES,
            selected_city=selected_city,
            current_year=current_year
        )

    except Exception as e:
        logger.error(f"Error in index route: {str(e)}")
        return render_template(
            "index.html",
            posts=[],
            query=query,
            cities=CRAIGSLIST_CITIES,
            selected_city=selected_city,
            error="An error occurred while processing your request",
            current_year=current_year
        )

if __name__ == "__main__":
    logger.info("Starting Flask application")
    app.run(host="0.0.0.0", port=7860, debug=True)