File size: 7,210 Bytes
1786f25
f9f6113
 
bce38d2
1786f25
 
 
 
b2f4a78
de83290
1786f25
de83290
 
 
 
 
 
 
 
 
f9f6113
1786f25
 
 
 
de54b2c
1786f25
 
 
 
 
 
 
f9e18dd
1786f25
 
 
 
 
 
 
 
 
 
 
 
 
f9e18dd
 
1786f25
 
 
 
 
4ec1af7
f9e18dd
1786f25
 
 
 
 
 
 
 
f9e18dd
de83290
f9e18dd
de83290
bce38d2
 
 
 
de83290
 
bce38d2
 
 
f9e18dd
bce38d2
 
 
de83290
bce38d2
 
 
 
 
 
f9e18dd
 
bce38d2
f9e18dd
f9f6113
f9e18dd
f9f6113
 
de83290
f9e18dd
 
 
 
 
bce38d2
f9e18dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce38d2
f9e18dd
 
 
 
bce38d2
 
 
f9e18dd
bce38d2
f9e18dd
bce38d2
 
 
de83290
 
 
f9e18dd
1786f25
 
 
 
 
 
 
b2f4a78
1786f25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2f4a78
 
1786f25
 
 
 
 
 
 
 
 
 
 
 
 
 
b2f4a78
 
1786f25
 
 
 
 
 
 
 
 
 
b2f4a78
 
1786f25
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
from flask import Flask, request, render_template
import requests
from bs4 import BeautifulSoup
import urllib.parse
import logging
import re
from typing import List, Dict
import time
from datetime import datetime  # Import datetime for current year

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('craigslist_search.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# List of Craigslist cities (partial list for brevity; expand as needed)
CRAIGSLIST_CITIES = [
    "newyork", "losangeles", "chicago", "houston", "phoenix", "philadelphia",
    "sanantonio", "sandiego", "dallas", "sanjose", "austin", "jacksonville",
    "sanfrancisco", "columbus", "seattle", "denver", "boston", "miami", "atlanta"
]

def search_craigslist(query: str, city: str) -> List[Dict]:
    """
    Search Craigslist for a query in a specific city or all cities.
    Returns a list of posts with title, link, price, location, and city.
    """
    start_time = time.time()
    logger.info(f"Starting search for query: '{query}' in city: '{city}'")
    
    posts = []
    query = urllib.parse.quote(query.strip())  # URL-encode the query

    try:
        if city == "all":
            # Search across multiple cities
            for city_name in CRAIGSLIST_CITIES[1:]:  # Skip "all"
                url = f"https://{city_name}.craigslist.org/search/sss?query={query}"
                logger.debug(f"Fetching URL: {url}")
                html_content = fetch_html_with_retry(url)
                posts.extend(parse_html(html_content, city_name))
                time.sleep(1)  # Add delay to avoid rate limiting
        else:
            # Search in a specific city
            url = f"https://{city}.craigslist.org/search/sss?query={query}"
            logger.debug(f"Fetching URL: {url}")
            html_content = fetch_html_with_retry(url)
            posts.extend(parse_html(html_content, city))

        logger.info(f"Search completed in {time.time() - start_time:.2f} seconds. Found {len(posts)} posts")
        return posts

    except Exception as e:
        logger.error(f"Error during search: {str(e)}")
        return []

def fetch_html_with_retry(url: str, retries: int = 3, delay: int = 2) -> str:
    """
    Fetch HTML content with retry mechanism to handle network issues.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    for attempt in range(retries):
        try:
            logger.debug(f"Fetching URL (attempt {attempt + 1}): {url}")
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            return response.text
                
        except requests.RequestException as e:
            logger.error(f"Network error on attempt {attempt + 1} for {url}: {str(e)}")
        except Exception as e:
            logger.error(f"Unexpected error on attempt {attempt + 1} for {url}: {str(e)}")
            
        if attempt < retries - 1:
            logger.info(f"Retrying after {delay} seconds...")
            time.sleep(delay)
    
    logger.error(f"Failed to fetch HTML after {retries} attempts: {url}")
    return ""  # Return empty string on failure

def parse_html(html_content: str, city: str) -> List[Dict]:
    """
    Parse the HTML search results and extract relevant post information.
    """
    posts = []
    try:
        soup = BeautifulSoup(html_content, "html.parser")
        # Find all search result items
        results = soup.find_all("li", class_="cl-static-search-result")
        
        for index, result in enumerate(results):
            try:
                # Extract title
                title_elem = result.find("div", class_="title")
                title = title_elem.get_text(strip=True) if title_elem else "No title"
                
                # Extract link
                link_elem = result.find("a")
                link = link_elem["href"] if link_elem and "href" in link_elem.attrs else "#"
                
                # Extract price
                price_elem = result.find("div", class_="price")
                price = price_elem.get_text(strip=True) if price_elem else "No price"
                
                # Extract location
                location_elem = result.find("div", class_="location")
                location = location_elem.get_text(strip=True) if location_elem else "No location"
                
                post = {
                    "title": title,
                    "link": link,
                    "price": price,
                    "location": location,
                    "city": city.capitalize()
                }
                posts.append(post)
                
            except Exception as e:
                logger.warning(f"Skipping invalid entry {index} in HTML for city {city}: {str(e)}")
                continue
        
        logger.debug(f"Parsed {len(posts)} valid posts for city: {city}")
        return posts

    except Exception as e:
        logger.error(f"Error parsing HTML for city {city}: {str(e)}")
        return []

@app.route("/", methods=["GET", "POST"])
def index():
    posts = []
    query = ""
    selected_city = "all"
    current_year = datetime.now().year  # Get current year

    try:
        if request.method == "POST":
            query = request.form.get("query", "").strip()
            selected_city = request.form.get("city", "all")
            
            # Validate inputs
            if not query:
                logger.warning("Empty query received")
                return render_template(
                    "index.html",
                    posts=[],
                    query="",
                    cities=CRAIGSLIST_CITIES,
                    selected_city=selected_city,
                    error="Please enter a search query",
                    current_year=current_year
                )
            
            if selected_city not in CRAIGSLIST_CITIES:
                logger.warning(f"Invalid city selected: {selected_city}")
                selected_city = "all"
            
            logger.info(f"Processing POST request: query='{query}', city='{selected_city}'")
            posts = search_craigslist(query, selected_city)

        return render_template(
            "index.html",
            posts=posts,
            query=query,
            cities=CRAIGSLIST_CITIES,
            selected_city=selected_city,
            current_year=current_year
        )

    except Exception as e:
        logger.error(f"Error in index route: {str(e)}")
        return render_template(
            "index.html",
            posts=[],
            query=query,
            cities=CRAIGSLIST_CITIES,
            selected_city=selected_city,
            error="An error occurred while processing your request",
            current_year=current_year
        )

if __name__ == "__main__":
    logger.info("Starting Flask application")
    app.run(host="0.0.0.0", port=7860, debug=True)