Spaces:
Sleeping
Sleeping
File size: 7,210 Bytes
1786f25 f9f6113 bce38d2 1786f25 b2f4a78 de83290 1786f25 de83290 f9f6113 1786f25 de54b2c 1786f25 f9e18dd 1786f25 f9e18dd 1786f25 4ec1af7 f9e18dd 1786f25 f9e18dd de83290 f9e18dd de83290 bce38d2 de83290 bce38d2 f9e18dd bce38d2 de83290 bce38d2 f9e18dd bce38d2 f9e18dd f9f6113 f9e18dd f9f6113 de83290 f9e18dd bce38d2 f9e18dd bce38d2 f9e18dd bce38d2 f9e18dd bce38d2 f9e18dd bce38d2 de83290 f9e18dd 1786f25 b2f4a78 1786f25 b2f4a78 1786f25 b2f4a78 1786f25 b2f4a78 1786f25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
from flask import Flask, request, render_template
import requests
from bs4 import BeautifulSoup
import urllib.parse
import logging
import re
from typing import List, Dict
import time
from datetime import datetime # Import datetime for current year
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('craigslist_search.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
# List of Craigslist cities (partial list for brevity; expand as needed)
CRAIGSLIST_CITIES = [
"newyork", "losangeles", "chicago", "houston", "phoenix", "philadelphia",
"sanantonio", "sandiego", "dallas", "sanjose", "austin", "jacksonville",
"sanfrancisco", "columbus", "seattle", "denver", "boston", "miami", "atlanta"
]
def search_craigslist(query: str, city: str) -> List[Dict]:
"""
Search Craigslist for a query in a specific city or all cities.
Returns a list of posts with title, link, price, location, and city.
"""
start_time = time.time()
logger.info(f"Starting search for query: '{query}' in city: '{city}'")
posts = []
query = urllib.parse.quote(query.strip()) # URL-encode the query
try:
if city == "all":
# Search across multiple cities
for city_name in CRAIGSLIST_CITIES[1:]: # Skip "all"
url = f"https://{city_name}.craigslist.org/search/sss?query={query}"
logger.debug(f"Fetching URL: {url}")
html_content = fetch_html_with_retry(url)
posts.extend(parse_html(html_content, city_name))
time.sleep(1) # Add delay to avoid rate limiting
else:
# Search in a specific city
url = f"https://{city}.craigslist.org/search/sss?query={query}"
logger.debug(f"Fetching URL: {url}")
html_content = fetch_html_with_retry(url)
posts.extend(parse_html(html_content, city))
logger.info(f"Search completed in {time.time() - start_time:.2f} seconds. Found {len(posts)} posts")
return posts
except Exception as e:
logger.error(f"Error during search: {str(e)}")
return []
def fetch_html_with_retry(url: str, retries: int = 3, delay: int = 2) -> str:
"""
Fetch HTML content with retry mechanism to handle network issues.
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for attempt in range(retries):
try:
logger.debug(f"Fetching URL (attempt {attempt + 1}): {url}")
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
return response.text
except requests.RequestException as e:
logger.error(f"Network error on attempt {attempt + 1} for {url}: {str(e)}")
except Exception as e:
logger.error(f"Unexpected error on attempt {attempt + 1} for {url}: {str(e)}")
if attempt < retries - 1:
logger.info(f"Retrying after {delay} seconds...")
time.sleep(delay)
logger.error(f"Failed to fetch HTML after {retries} attempts: {url}")
return "" # Return empty string on failure
def parse_html(html_content: str, city: str) -> List[Dict]:
"""
Parse the HTML search results and extract relevant post information.
"""
posts = []
try:
soup = BeautifulSoup(html_content, "html.parser")
# Find all search result items
results = soup.find_all("li", class_="cl-static-search-result")
for index, result in enumerate(results):
try:
# Extract title
title_elem = result.find("div", class_="title")
title = title_elem.get_text(strip=True) if title_elem else "No title"
# Extract link
link_elem = result.find("a")
link = link_elem["href"] if link_elem and "href" in link_elem.attrs else "#"
# Extract price
price_elem = result.find("div", class_="price")
price = price_elem.get_text(strip=True) if price_elem else "No price"
# Extract location
location_elem = result.find("div", class_="location")
location = location_elem.get_text(strip=True) if location_elem else "No location"
post = {
"title": title,
"link": link,
"price": price,
"location": location,
"city": city.capitalize()
}
posts.append(post)
except Exception as e:
logger.warning(f"Skipping invalid entry {index} in HTML for city {city}: {str(e)}")
continue
logger.debug(f"Parsed {len(posts)} valid posts for city: {city}")
return posts
except Exception as e:
logger.error(f"Error parsing HTML for city {city}: {str(e)}")
return []
@app.route("/", methods=["GET", "POST"])
def index():
posts = []
query = ""
selected_city = "all"
current_year = datetime.now().year # Get current year
try:
if request.method == "POST":
query = request.form.get("query", "").strip()
selected_city = request.form.get("city", "all")
# Validate inputs
if not query:
logger.warning("Empty query received")
return render_template(
"index.html",
posts=[],
query="",
cities=CRAIGSLIST_CITIES,
selected_city=selected_city,
error="Please enter a search query",
current_year=current_year
)
if selected_city not in CRAIGSLIST_CITIES:
logger.warning(f"Invalid city selected: {selected_city}")
selected_city = "all"
logger.info(f"Processing POST request: query='{query}', city='{selected_city}'")
posts = search_craigslist(query, selected_city)
return render_template(
"index.html",
posts=posts,
query=query,
cities=CRAIGSLIST_CITIES,
selected_city=selected_city,
current_year=current_year
)
except Exception as e:
logger.error(f"Error in index route: {str(e)}")
return render_template(
"index.html",
posts=[],
query=query,
cities=CRAIGSLIST_CITIES,
selected_city=selected_city,
error="An error occurred while processing your request",
current_year=current_year
)
if __name__ == "__main__":
logger.info("Starting Flask application")
app.run(host="0.0.0.0", port=7860, debug=True) |