from flask import Flask, request, render_template
import requests
from bs4 import BeautifulSoup
import urllib.parse
import logging
import re
from typing import List, Dict
import time
from datetime import datetime  # Import datetime for current year

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('craigslist_search.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# List of Craigslist cities (partial list for brevity; expand as needed)
CRAIGSLIST_CITIES = [
    "newyork", "losangeles", "chicago", "houston", "phoenix", "philadelphia",
    "sanantonio", "sandiego", "dallas", "sanjose", "austin", "jacksonville",
    "sanfrancisco", "columbus", "seattle", "denver", "boston", "miami", "atlanta"
]

def search_craigslist(query: str, city: str) -> List[Dict]:
    """
    Search Craigslist for a query in a specific city or all cities.
    Returns a list of posts with title, link, price, location, and city.
    """
    start_time = time.time()
    logger.info(f"Starting search for query: '{query}' in city: '{city}'")
    
    posts = []
    query = urllib.parse.quote(query.strip())  # URL-encode the query

    try:
        if city == "all":
            # Search across multiple cities
            for city_name in CRAIGSLIST_CITIES[1:]:  # Skip "all"
                url = f"https://{city_name}.craigslist.org/search/sss?query={query}"
                logger.debug(f"Fetching URL: {url}")
                html_content = fetch_html_with_retry(url)
                posts.extend(parse_html(html_content, city_name))
                time.sleep(1)  # Add delay to avoid rate limiting
        else:
            # Search in a specific city
            url = f"https://{city}.craigslist.org/search/sss?query={query}"
            logger.debug(f"Fetching URL: {url}")
            html_content = fetch_html_with_retry(url)
            posts.extend(parse_html(html_content, city))

        logger.info(f"Search completed in {time.time() - start_time:.2f} seconds. Found {len(posts)} posts")
        return posts

    except Exception as e:
        logger.error(f"Error during search: {str(e)}")
        return []

def fetch_html_with_retry(url: str, retries: int = 3, delay: int = 2) -> str:
    """
    Fetch HTML content with retry mechanism to handle network issues.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    for attempt in range(retries):
        try:
            logger.debug(f"Fetching URL (attempt {attempt + 1}): {url}")
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            return response.text
                
        except requests.RequestException as e:
            logger.error(f"Network error on attempt {attempt + 1} for {url}: {str(e)}")
        except Exception as e:
            logger.error(f"Unexpected error on attempt {attempt + 1} for {url}: {str(e)}")
            
        if attempt < retries - 1:
            logger.info(f"Retrying after {delay} seconds...")
            time.sleep(delay)
    
    logger.error(f"Failed to fetch HTML after {retries} attempts: {url}")
    return ""  # Return empty string on failure

def parse_html(html_content: str, city: str) -> List[Dict]:
    """
    Parse the HTML search results and extract relevant post information.
    """
    posts = []
    try:
        soup = BeautifulSoup(html_content, "html.parser")
        # Find all search result items
        results = soup.find_all("li", class_="cl-static-search-result")
        
        for index, result in enumerate(results):
            try:
                # Extract title
                title_elem = result.find("div", class_="title")
                title = title_elem.get_text(strip=True) if title_elem else "No title"
                
                # Extract link
                link_elem = result.find("a")
                link = link_elem["href"] if link_elem and "href" in link_elem.attrs else "#"
                
                # Extract price
                price_elem = result.find("div", class_="price")
                price = price_elem.get_text(strip=True) if price_elem else "No price"
                
                # Extract location
                location_elem = result.find("div", class_="location")
                location = location_elem.get_text(strip=True) if location_elem else "No location"
                
                post = {
                    "title": title,
                    "link": link,
                    "price": price,
                    "location": location,
                    "city": city.capitalize()
                }
                posts.append(post)
                
            except Exception as e:
                logger.warning(f"Skipping invalid entry {index} in HTML for city {city}: {str(e)}")
                continue
        
        logger.debug(f"Parsed {len(posts)} valid posts for city: {city}")
        return posts

    except Exception as e:
        logger.error(f"Error parsing HTML for city {city}: {str(e)}")
        return []

@app.route("/", methods=["GET", "POST"])
def index():
    posts = []
    query = ""
    selected_city = "all"
    current_year = datetime.now().year  # Get current year

    try:
        if request.method == "POST":
            query = request.form.get("query", "").strip()
            selected_city = request.form.get("city", "all")
            
            # Validate inputs
            if not query:
                logger.warning("Empty query received")
                return render_template(
                    "index.html",
                    posts=[],
                    query="",
                    cities=CRAIGSLIST_CITIES,
                    selected_city=selected_city,
                    error="Please enter a search query",
                    current_year=current_year
                )
            
            if selected_city not in CRAIGSLIST_CITIES:
                logger.warning(f"Invalid city selected: {selected_city}")
                selected_city = "all"
            
            logger.info(f"Processing POST request: query='{query}', city='{selected_city}'")
            posts = search_craigslist(query, selected_city)

        return render_template(
            "index.html",
            posts=posts,
            query=query,
            cities=CRAIGSLIST_CITIES,
            selected_city=selected_city,
            current_year=current_year
        )

    except Exception as e:
        logger.error(f"Error in index route: {str(e)}")
        return render_template(
            "index.html",
            posts=[],
            query=query,
            cities=CRAIGSLIST_CITIES,
            selected_city=selected_city,
            error="An error occurred while processing your request",
            current_year=current_year
        )

if __name__ == "__main__":
    logger.info("Starting Flask application")
    app.run(host="0.0.0.0", port=7860, debug=True)