import pandas as pd
from flask import Flask, render_template, request
import numpy as np
import os
import google.generativeai as genai
import json
import logging
import re

app = Flask(__name__)

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- Configure Google Gemini API ---
try:
    gemini_api_key = "AIzaSyDXBFif6puAw8I7lAOlEv6p24SUWpwUF1k"
    # IMPORTANT: For testing only - uncomment and add your key below, but remove for production
    # gemini_api_key = "YOUR_ACTUAL_GEMINI_API_KEY"  # Replace with your actual key

    if not gemini_api_key:
        raise ValueError("GOOGLE_API_KEY environment variable not set or hardcoded key is empty.")
    genai.configure(api_key=gemini_api_key)
    logging.info("Gemini API configured successfully.")
except ValueError as e:
    logging.error(
        f"Error configuring Gemini API: {e}. Please set your GOOGLE_API_KEY environment variable or provide a valid key.")
    gemini_api_key = None
except Exception as e:
    logging.error(f"Unexpected error during Gemini API configuration: {e}")
    gemini_api_key = None


# --- Helper functions for aggregation ---
def aggregate_unique_strings(series):
    """Aggregates unique non-null string values from a series, joined by '; '."""
    non_null_unique = series.dropna().astype(str).unique()
    if non_null_unique.size == 0:
        return ''
    return '; '.join(non_null_unique)


def aggregate_unique_numerical_values_or_strings(series):
    """
    Aggregates unique non-null numerical values from a series.
    Returns a float if there's only one unique value, otherwise a joined string.
    Returns empty string if no valid numerical values.
    """
    numeric_series = pd.to_numeric(series.dropna(), errors='coerce').dropna()

    if numeric_series.empty:
        return ''

    unique_values = numeric_series.unique()

    if len(unique_values) == 1:
        return float(unique_values[0])
    else:
        return '; '.join(unique_values.astype(str))


# --- Improved JSON Cleaning Function ---
def clean_llm_json_string(json_string):
    """
    Attempts to clean common LLM-generated JSON formatting issues,
    with improved handling to avoid corrupting valid JSON.
    """
    # 1. Strip whitespace and markdown fences
    json_string = json_string.strip()

    # Remove markdown code blocks if present
    if json_string.startswith('```'):
        lines = json_string.split('\n')
        # Find first line that doesn't start with ``` and isn't empty
        start_idx = 0
        for i, line in enumerate(lines):
            if not line.strip().startswith('```') and line.strip():
                start_idx = i
                break

        # Find last line that doesn't start with ``` and isn't empty
        end_idx = len(lines) - 1
        for i in range(len(lines) - 1, -1, -1):
            if not lines[i].strip().startswith('```') and lines[i].strip():
                end_idx = i
                break

        json_string = '\n'.join(lines[start_idx:end_idx + 1])

    # 2. Find JSON boundaries more carefully
    first_brace = json_string.find('{')
    last_brace = json_string.rfind('}')

    if first_brace == -1 or last_brace == -1 or first_brace > last_brace:
        # Try to find array boundaries
        first_bracket = json_string.find('[')
        last_bracket = json_string.rfind(']')
        if first_bracket == -1 or last_bracket == -1 or first_bracket > last_bracket:
            logging.warning("Cannot find valid JSON boundaries")
            return ""
        json_string = json_string[first_bracket:last_bracket + 1]
    else:
        json_string = json_string[first_brace:last_brace + 1]

    # 3. Try to parse as-is first (most LLM responses are actually valid JSON)
    try:
        json.loads(json_string)
        logging.info("JSON is already valid, no cleaning needed")
        return json_string
    except json.JSONDecodeError as e:
        logging.info(f"JSON needs cleaning: {e}")

    # 4. Only apply minimal cleaning if parsing failed
    original_string = json_string

    # Remove comments (// and /* */)
    json_string = re.sub(r'//.*?(?=\n|$)', '', json_string)
    json_string = re.sub(r'/\*[\s\S]*?\*/', '', json_string)

    # Remove trailing commas (most common issue)
    json_string = re.sub(r',(\s*[}\]])', r'\1', json_string)

    # Fix unquoted keys (but be very careful not to break quoted strings)
    # Only match word boundaries that are followed by colon and not inside quotes
    def fix_unquoted_keys(match_obj):
        full_match = match_obj.group(0)
        # Check if we're inside a quoted string by counting quotes before this position
        before_match = json_string[:match_obj.start()]
        quote_count = before_match.count('"') - before_match.count('\\"')
        if quote_count % 2 == 0:  # Even number of quotes = we're outside quoted strings
            key = match_obj.group(1)
            return f'"{key}":'
        return full_match

    json_string = re.sub(r'\b(\w+):', fix_unquoted_keys, json_string)

    # Replace single quotes with double quotes (but avoid apostrophes)
    # Only replace single quotes that appear to be string delimiters
    json_string = re.sub(r"(?<!\w)'([^']*)'(?!\w)", r'"\1"', json_string)

    # Try parsing again
    try:
        json.loads(json_string)
        logging.info("JSON successfully cleaned")
        return json_string
    except json.JSONDecodeError as e:
        logging.warning(f"JSON cleaning failed: {e}")
        # Return original if cleaning made it worse
        return original_string


# --- NEW Gemini-powered LLM functions for separate calls ---

def get_gemini_overall_content(state_name, schemes_data, user_plot_size, user_income):
    """
    Calls Google's Gemini LLM to generate overall summaries and personalized financial advice.
    This function focuses only on the overall content and does not generate per-scheme details.
    """
    if not gemini_api_key:
        logging.warning("Gemini API not configured. Returning placeholder overall LLM content.")
        return {
            "overall_llm_summary": "LLM integration failed: Gemini API key not set.",
            "overall_gemini_summary": "LLM integration failed: Gemini API key not set.",
            "overall_financial_advice": "LLM integration failed: Gemini API key not set. Cannot provide personalized advice.",
            "overall_financial_result": ""
        }

    # Prepare scheme data for the prompt (for context only)
    schemes_for_prompt = []
    for scheme in schemes_data:
        schemes_for_prompt.append({
            "scheme_id": scheme.get('scheme_id'),
            "Scheme Name": scheme.get('scheme_name'),
            "Objective": scheme.get('objective'),
            "Subsidy Types": scheme.get('subsidy_types_general'),
            "Financial Assistance Type": scheme.get('financial_assistance_type'),
            "Financial Value/Unit": str(scheme.get('financial_assistance_value')) + (
                f" {scheme.get('financial_assistance_unit')}" if scheme.get('financial_assistance_unit') else ""),
            "Max Ceiling (INR)": scheme.get('financial_assistance_max_ceiling_INR'),
            "Eligible Beneficiaries": scheme.get('eligibility_beneficiary_types'),
            "Eligibility Conditions": scheme.get('eligibility_conditions'),
            "Benefits Summary": scheme.get('benefits_persuasive_summary'),
            "Non-Financial Benefits": scheme.get('benefits_non_financial_benefits'),
        })

    # Prompt for overall content
    prompt = f"""
    You are an AI assistant specialized in rainwater harvesting and water management schemes in India.
    Based on the available schemes and the user's profile, provide an overall summary and personalized financial advice.

    Provide a response in VALID JSON format with the following exact structure:

    {{
      "overall_llm_summary": "Your general overview of RWH in {state_name.title()}",
      "overall_gemini_summary": "Your enhanced analysis with insights for {state_name.title()}",
      "overall_financial_advice": "Your personalized financial advice for the user considering all relevant schemes",
      "overall_financial_result": "A concise summary of the financial recommendation for the user (e.g., 'Significant savings potential', 'Moderate assistance expected')"
    }}

    User Profile:
    - State: {state_name.title()}
    - Plot Size: {user_plot_size} sq. meters
    - Annual Income: ₹{user_income:,.2f}

    Available Schemes for Context (do NOT generate individual summaries for these here): {json.dumps(schemes_for_prompt, ensure_ascii=False)}

    CRITICAL REQUIREMENTS:
    1. Return ONLY the JSON object, no other text or explanations.
    2. All strings must be properly quoted with double quotes.
    3. No markdown formatting or code blocks.
    4. Ensure proper JSON syntax (commas, brackets, etc.).
    5. The overall_financial_advice should directly address the user's plot size and income.
    """

    try:
        model = genai.GenerativeModel('gemini-2.0-flash')
        response = model.generate_content(
            prompt,
            generation_config=genai.GenerationConfig(
                response_mime_type="application/json",
                temperature=0.1
            )
        )

        response_text = response.text.strip()
        logging.info(f"Raw Gemini Overall Response length: {len(response_text)} chars")
        logging.debug(f"Raw Gemini Overall Response (first 500 chars): {response_text[:500]}...")

        try:
            gemini_parsed_content = json.loads(response_text)
            logging.info("Successfully parsed overall JSON directly from Gemini response")
            return gemini_parsed_content
        except json.JSONDecodeError:
            logging.info("Direct parsing of overall JSON failed. Attempting to clean...")
            cleaned_json_string = clean_llm_json_string(response_text)
            if not cleaned_json_string:
                raise ValueError("JSON cleaning returned empty string for overall content")
            gemini_parsed_content = json.loads(cleaned_json_string)
            logging.info("Successfully parsed overall JSON after cleaning")
            return gemini_parsed_content

    except json.JSONDecodeError as e:
        logging.error(f"JSON parsing failed for overall content even after cleaning: {e}")
        return {
            "overall_llm_summary": f"JSON parsing failed for overall content: {str(e)}",
            "overall_gemini_summary": "Please check server logs for details.",
            "overall_financial_advice": "Could not parse personalized advice due to JSON format error.",
            "overall_financial_result": "JSON parsing error prevented financial analysis"
        }
    except Exception as e:
        logging.error(f"Error calling Gemini API for overall content: {e}")
        return {
            "overall_llm_summary": f"Error communicating with Gemini API for overall content: {str(e)}",
            "overall_gemini_summary": "Ensure API key is valid and quota is available.",
            "overall_financial_advice": "No personalized advice available due to API error",
            "overall_financial_result": "API error prevented financial analysis"
        }


def get_gemini_single_scheme_details(scheme_data, state_name, user_plot_size, user_income):
    """
    Calls Google's Gemini LLM to generate a short summary and user benefit summary for a single scheme.
    """
    # Prepare single scheme data for the prompt
    scheme_for_prompt = {
        "scheme_id": scheme_data.get('scheme_id'),
        "Scheme Name": scheme_data.get('scheme_name'),
        "Objective": scheme_data.get('objective'),
        "Subsidy Types": scheme_data.get('subsidy_types_general'),
        "Financial Assistance Type": scheme_data.get('financial_assistance_type'),
        "Financial Value/Unit": str(scheme_data.get('financial_assistance_value')) + (
            f" {scheme_data.get('financial_assistance_unit')}" if scheme_data.get('financial_assistance_unit') else ""),
        "Max Ceiling (INR)": scheme_data.get('financial_assistance_max_ceiling_INR'),
        "Eligible Beneficiaries": scheme_data.get('eligibility_beneficiary_types'),
        "Eligibility Conditions": scheme_data.get('eligibility_conditions'),
        "Eligibility Parameter": f"{scheme_data.get('eligibility_parameter')} {scheme_data.get('eligibility_operator')} {scheme_data.get('eligibility_value_param')} {scheme_data.get('eligibility_unit_param')}".strip(),
        "Benefits Summary": scheme_data.get('benefits_persuasive_summary'),
        "Non-Financial Benefits": scheme_data.get('benefits_non_financial_benefits'),
        "Application Process": scheme_data.get('application_process_summary')
    }

    prompt = f"""
    You are an AI assistant specialized in rainwater harvesting and water management schemes in India.

    Based on the following scheme and user profile, provide a concise short summary and a user benefit summary specifically for this scheme.
    Provide a response in VALID JSON format with the following exact structure:

    {{
      "scheme_id": "{scheme_data.get('scheme_id', 'unknown_id')}",
      "short_summary": "2-3 lines about what this specific scheme is. Focus on its core purpose and provisions.",
      "user_benefit_summary": "2-3 lines about how this specific scheme would directly benefit a user in {state_name.title()} with a {user_plot_size} sq. meter plot and an annual income of ₹{user_income:,.2f}. Be specific about the financial or non-financial gains relevant to the user's profile."
    }}

    Scheme Details: {json.dumps(scheme_for_prompt, ensure_ascii=False)}

    User Profile:
    - State: {state_name.title()}
    - Plot Size: {user_plot_size} sq. meters
    - Annual Income: ₹{user_income:,.2f}

    CRITICAL REQUIREMENTS:
    1. Return ONLY the JSON object, no other text or explanations.
    2. All strings must be properly quoted with double quotes.
    3. No markdown formatting or code blocks.
    4. Ensure proper JSON syntax (commas, brackets, etc.).
    5. Ensure "scheme_id" in the output matches the input scheme_id exactly.
    """

    fallback_details = {
        "scheme_id": scheme_data.get('scheme_id', 'unknown_id'),
        "short_summary": scheme_data.get('objective', 'Summary unavailable due to LLM error.'),
        "user_benefit_summary": "Benefits unavailable due to LLM error."
    }

    if not gemini_api_key:
        logging.warning(f"Gemini API not configured for scheme {scheme_data.get('scheme_id')}. Returning placeholder.")
        return fallback_details

    try:
        model = genai.GenerativeModel('gemini-2.0-flash')
        response = model.generate_content(
            prompt,
            generation_config=genai.GenerationConfig(
                response_mime_type="application/json",
                temperature=0.1
            )
        )

        response_text = response.text.strip()
        logging.info(f"Raw Gemini Single Scheme Response length for {scheme_data.get('scheme_id')}: {len(response_text)} chars")
        logging.debug(f"Raw Gemini Single Scheme Response (first 500 chars): {response_text[:500]}...")

        try:
            gemini_parsed_content = json.loads(response_text)
            logging.info(f"Successfully parsed single scheme JSON directly for {scheme_data.get('scheme_id')}")
            return gemini_parsed_content
        except json.JSONDecodeError:
            logging.info(f"Direct parsing of single scheme JSON failed for {scheme_data.get('scheme_id')}. Attempting to clean...")
            cleaned_json_string = clean_llm_json_string(response_text)
            if not cleaned_json_string:
                raise ValueError(f"JSON cleaning returned empty string for scheme {scheme_data.get('scheme_id')}")
            gemini_parsed_content = json.loads(cleaned_json_string)
            logging.info(f"Successfully parsed single scheme JSON after cleaning for {scheme_data.get('scheme_id')}")
            return gemini_parsed_content

    except json.JSONDecodeError as e:
        logging.error(f"JSON parsing failed for scheme {scheme_data.get('scheme_id')} even after cleaning: {e}")
        return fallback_details
    except Exception as e:
        logging.error(f"Error calling Gemini API for scheme {scheme_data.get('scheme_id')}: {e}")
        return fallback_details


# --- Load and Aggregate Data on Application Startup ---
df_cleaned = pd.DataFrame()
try:
    df_cleaned = pd.read_csv('rainwater_schemes_cleaned_for_production.csv',
                             parse_dates=['date_of_launch', 'last_updated'])
    logging.info(f"Successfully loaded CSV with {len(df_cleaned)} records")
except FileNotFoundError:
    logging.error(
        "Error: 'rainwater_schemes_cleaned_for_production.csv' not found. Please ensure the file is in the correct directory.")
except Exception as e:
    logging.error(f"An error occurred while loading the CSV: {e}")

# Initialize schemes_for_display
schemes_for_display = []

if not df_cleaned.empty:
    try:
        unique_cols = [
            'scheme_level', 'geographical_scope_state_ut', 'scheme_name',
            'implementing_agency', 'funding_agency', 'date_of_launch',
            'last_updated', 'status', 'objective', 'source_link', 'version', 'notes',
            'eligibility_is_mandatory_for_new_construction'
        ]
        aggregate_cols = [
            'geographical_scope_districts', 'geographical_scope_cities',
            'subsidy_types_general', 'financial_assistance_type', 'financial_assistance_value',
            'financial_assistance_unit', 'financial_assistance_max_ceiling_INR',
            'financial_assistance_description', 'eligibility_beneficiary_types',
            'eligibility_conditions', 'eligibility_parameter', 'eligibility_operator',
            'eligibility_value_param', 'eligibility_unit_param',
            'benefits_persuasive_summary', 'benefits_long_term_savings_estimate',
            'benefits_non_financial_benefits', 'application_process_summary',
            'documents_required', 'NGO_Name', 'NGO_Phone', 'NGO_Email', 'NGO_Address',
            'NGO_Alternate_Number'
        ]

        agg_dict = {}
        for col in unique_cols:
            if col in df_cleaned.columns:
                agg_dict[col] = 'first'

        for col in aggregate_cols:
            if col in df_cleaned.columns:
                if col in ['financial_assistance_value', 'financial_assistance_max_ceiling_INR',
                           'eligibility_value_param']:
                    agg_dict[col] = aggregate_unique_numerical_values_or_strings
                else:
                    agg_dict[col] = aggregate_unique_strings

        # Check if scheme_id column exists
        if 'scheme_id' not in df_cleaned.columns:
            logging.error("scheme_id column not found in CSV. Cannot aggregate data.")
            schemes_for_display = []
        else:
            aggregated_schemes = df_cleaned.groupby('scheme_id').agg(agg_dict).reset_index()
            aggregated_schemes = aggregated_schemes.replace({np.nan: ''})
            schemes_for_display = aggregated_schemes.to_dict(orient='records')
            logging.info(
                f"Data loaded and aggregated successfully. {len(schemes_for_display)} unique schemes available.")

    except Exception as e:
        logging.error(f"Error during data aggregation: {e}")
        schemes_for_display = []
else:
    logging.warning("DataFrame is empty. No schemes to aggregate or display.")


@app.route('/')
def index():
    """Renders the home page with the state selection form."""
    available_states = []
    if not df_cleaned.empty and 'geographical_scope_state_ut' in df_cleaned.columns:
        try:
            available_states = sorted([
                str(state) for state in df_cleaned['geographical_scope_state_ut'].dropna().unique()
                if str(state).lower() not in ['pan-india', 'all districts', '', 'nan']
            ])
            logging.info(f"Available states loaded: {len(available_states)} states")
        except Exception as e:
            logging.error(f"Error loading available states: {e}")
    else:
        logging.warning("No geographical_scope_state_ut column found or DataFrame is empty")

    return render_template('index.html', states=available_states)


@app.route('/schemes', methods=['POST'])
def schemes():
    """
    Processes the submitted state name and displays relevant schemes.
    This function now calls get_gemini_overall_content and get_gemini_single_scheme_details separately.
    """
    selected_state_dropdown = request.form.get('state_name', '').lower().strip()
    manual_state_input = request.form.get('state_name_manual', '').lower().strip()

    # Validate and parse user inputs
    try:
        user_plot_size = int(request.form.get('user_plot_size', 500))
        if user_plot_size <= 0:
            user_plot_size = 500
    except (ValueError, TypeError):
        user_plot_size = 500

    try:
        user_income = int(request.form.get('user_income', 1000000))
        if user_income <= 0:
            user_income = 1000000
    except (ValueError, TypeError):
        user_income = 1000000

    # Determine state to search for
    state_name_to_search = manual_state_input if manual_state_input else selected_state_dropdown

    if not state_name_to_search:
        state_name_to_search = "India (General)"  # Default for LLM if no state selected
        # If no state selected, and thus no schemes, only call for overall content with empty scheme list
        overall_llm_output = get_gemini_overall_content(state_name_to_search, [], user_plot_size, user_income)
        llm_content = {
            "llm_summary": overall_llm_output.get('overall_llm_summary', "LLM Overview not available."),
            "gemini_summary": overall_llm_output.get('overall_gemini_summary', "Gemini Analysis not available."),
            "financial_advice": overall_llm_output.get('overall_financial_advice', "Financial Advice not available."),
            "financial_result": overall_llm_output.get('overall_financial_result', "")
        }
        return render_template('schemes.html',
                               state_name=state_name_to_search,
                               schemes=[],
                               message="Please select or enter a state name to see specific schemes.",
                               llm_content=llm_content,
                               user_plot_size=user_plot_size,
                               user_income=user_income)

    # Filter schemes based on state
    filtered_schemes = []
    if schemes_for_display:
        try:
            for s in schemes_for_display:
                scheme_state_ut = str(s.get('geographical_scope_state_ut', '')).lower()
                scheme_level = str(s.get('scheme_level', '')).lower()

                is_state_specific_match = (scheme_state_ut == state_name_to_search)
                is_national_scheme = (scheme_level == 'national' or scheme_state_ut == 'pan-india')

                if is_state_specific_match or is_national_scheme:
                    filtered_schemes.append(s)

            logging.info(f"Found {len(filtered_schemes)} schemes for {state_name_to_search}")
        except Exception as e:
            logging.error(f"Error filtering schemes: {e}")
            filtered_schemes = []

    # Sort schemes by name
    if filtered_schemes:
        try:
            filtered_schemes.sort(key=lambda x: str(x.get('scheme_name', '')))
        except Exception as e:
            logging.error(f"Error sorting schemes: {e}")

    # Call Gemini API for overall enhanced content
    overall_llm_output = {}
    try:
        # Pass filtered_schemes for context even if no individual scheme summaries are generated later
        overall_llm_output = get_gemini_overall_content(state_name_to_search, filtered_schemes, user_plot_size, user_income)
    except Exception as e:
        logging.error(f"Unexpected error in get_gemini_overall_content: {e}")
        overall_llm_output = {
            "overall_llm_summary": f"Unexpected error in LLM overall processing: {str(e)}",
            "overall_gemini_summary": "Please check server logs for details",
            "overall_financial_advice": "Financial advice unavailable due to processing error",
            "overall_financial_result": ""
        }

    # Initialize list for per-scheme details from Gemini
    per_scheme_details_from_gemini = []

    # Handle case where no schemes are found AFTER filtering (important to do before per-scheme LLM calls)
    if not filtered_schemes:
        message = f"No rainwater harvesting schemes found for '{state_name_to_search.title()}'. Please try another state or check the spelling."
        llm_content = {
            "llm_summary": overall_llm_output.get('overall_llm_summary', "LLM Overview not available."),
            "gemini_summary": overall_llm_output.get('overall_gemini_summary', "Gemini Analysis not available."),
            "financial_advice": overall_llm_output.get('overall_financial_advice', "Financial Advice not available."),
            "financial_result": overall_llm_output.get('overall_financial_result', "")
        }
        return render_template('schemes.html',
                               state_name=state_name_to_search,
                               schemes=[],
                               message=message,
                               llm_content=llm_content,
                               user_plot_size=user_plot_size,
                               user_income=user_income)

    # Call Gemini API for each scheme's details ONLY if there are filtered schemes
    for scheme in filtered_schemes:
        try:
            single_scheme_gemini_output = get_gemini_single_scheme_details(scheme, state_name_to_search, user_plot_size, user_income)
            per_scheme_details_from_gemini.append(single_scheme_gemini_output)
        except Exception as e:
            logging.error(f"Error generating single scheme details for {scheme.get('scheme_id')}: {e}")
            per_scheme_details_from_gemini.append({
                "scheme_id": scheme.get('scheme_id'),
                "short_summary": scheme.get('objective', 'Summary unavailable due to LLM error.'),
                "user_benefit_summary": "Benefits unavailable due to LLM error."
            })

    # Merge per-scheme details from Gemini output back into filtered_schemes
    gemini_scheme_map = {item.get('scheme_id'): item for item in per_scheme_details_from_gemini if item.get('scheme_id')}

    for scheme in filtered_schemes:
        scheme_id = scheme.get('scheme_id')
        if scheme_id and scheme_id in gemini_scheme_map:
            gemini_details = gemini_scheme_map[scheme_id]
            scheme['short_summary_gemini'] = gemini_details.get('short_summary', "Gemini summary not available.")
            scheme['user_benefit_summary_gemini'] = gemini_details.get('user_benefit_summary', "Gemini benefits not available.")
        else:
            logging.debug(f"Scheme ID '{scheme_id}' not found in Gemini per_scheme_details. Using fallback.")
            scheme['short_summary_gemini'] = scheme.get('objective', "No Gemini summary. Defaulting to objective.")
            scheme['user_benefit_summary_gemini'] = "Gemini benefits not available or scheme ID mismatch."

    # Prepare llm_content for the template (overall summaries and advice)
    llm_content = {
        "llm_summary": overall_llm_output.get('overall_llm_summary', "LLM Overview not available."),
        "gemini_summary": overall_llm_output.get('overall_gemini_summary', "Gemini Analysis not available."),
        "financial_advice": overall_llm_output.get('overall_financial_advice', "Financial Advice not available."),
        "financial_result": overall_llm_output.get('overall_financial_result', "")
    }

    return render_template('schemes.html',
                           state_name=state_name_to_search,
                           schemes=filtered_schemes,
                           llm_content=llm_content,
                           user_plot_size=user_plot_size,
                           user_income=user_income)


@app.errorhandler(404)
def not_found_error(error):
    return render_template('error.html',
                           error_message="Page not found. Please check the URL and try again."), 404


@app.errorhandler(500)
def internal_error(error):
    logging.error(f"Internal server error: {error}")
    return render_template('error.html',
                           error_message="Internal server error. Please check the logs and try again."), 500


if __name__ == '__main__':
    # Create templates directory if it doesn't exist
    if not os.path.exists('templates'):
        os.makedirs('templates')
        logging.info("Created templates directory")

    # Validate that we have data to work with
    if df_cleaned.empty:
        logging.warning("Starting application with no data loaded. Check your CSV file.")
    elif not schemes_for_display:
        logging.warning("Starting application with no schemes available for display.")
    else:
        logging.info(f"Starting application with {len(schemes_for_display)} schemes loaded")

    # Start the Flask application
    app.run(debug=True, port=5000, host='0.0.0.0')