import logging import json from typing import Dict, Any, List from flask import Flask, request, jsonify, render_template, Response from services.scraper_service import ScraperService from services.llm_service import LLMService from services.scheduler_service import scheduler_service from utils.rate_limiter import RateLimiter from api.horoscope_routes import horoscope_bp logger = logging.getLogger(__name__) # Create services scraper_service = ScraperService() llm_service = LLMService() # API-wide rate limiter (10 requests per minute) api_rate_limiter = RateLimiter(window_size=60, max_requests=10) def register_routes(app: Flask): """Register API routes with Flask app""" # Register blueprints app.register_blueprint(horoscope_bp) # Start scheduler service scheduler_service.start() @app.route('/ping') def ping(): return "pong", 200 @app.route('/') def index(): """Home page with API documentation""" return render_template('index.html') @app.route('/docs') def docs(): """Detailed API documentation""" return render_template('docs.html') @app.route('/api/health') def health_check(): """Health check endpoint""" return jsonify({ "status": "ok", "services": { "scraper": "up", "llm": "up" if llm_service.api_key else "down", "scheduler": "up" if scheduler_service.running else "down" } }) @app.route('/api/scrape', methods=['POST']) def scrape_endpoint(): """Endpoint to scrape a single URL""" # Check rate limit if not api_rate_limiter.can_proceed(): return jsonify({ "error": "Rate limit exceeded", "wait_seconds": api_rate_limiter.get_wait_time() }), 429 # Record request for rate limiting api_rate_limiter.record_request() # Get URL from request data = request.get_json() if not data or 'url' not in data: return jsonify({"error": "Missing 'url' in request"}), 400 url = data['url'] scraper_type = data.get('type') # Optional scraper type # Perform scraping result = scraper_service.scrape_url(url, scraper_type) return jsonify(result) @app.route('/api/scrape-multiple', methods=['POST']) def scrape_multiple_endpoint(): """Endpoint to scrape multiple URLs""" # Check rate limit if not api_rate_limiter.can_proceed(): return jsonify({ "error": "Rate limit exceeded", "wait_seconds": api_rate_limiter.get_wait_time() }), 429 # Record request for rate limiting api_rate_limiter.record_request() # Get URLs from request data = request.get_json() if not data or 'urls' not in data: return jsonify({"error": "Missing 'urls' in request"}), 400 urls = data['urls'] if not isinstance(urls, list) or not urls: return jsonify({"error": "'urls' must be a non-empty list"}), 400 # Limit number of URLs to prevent abuse if len(urls) > 10: return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400 # Perform scraping results = scraper_service.scrape_multiple_urls(urls) return jsonify({"results": results}) @app.route('/api/consolidate', methods=['POST']) def consolidate_endpoint(): """Endpoint to consolidate data using LLM""" # Check rate limit if not api_rate_limiter.can_proceed(): return jsonify({ "error": "Rate limit exceeded", "wait_seconds": api_rate_limiter.get_wait_time() }), 429 # Record request for rate limiting api_rate_limiter.record_request() # Get data from request data = request.get_json() if not data or 'scraped_data' not in data: return jsonify({"error": "Missing 'scraped_data' in request"}), 400 scraped_data = data['scraped_data'] if not isinstance(scraped_data, list) or not scraped_data: return jsonify({"error": "'scraped_data' must be a non-empty list"}), 400 # Consolidate data using LLM result = llm_service.consolidate_data(scraped_data) return jsonify(result) @app.route('/api/scrape-and-consolidate', methods=['POST']) def scrape_and_consolidate_endpoint(): """Endpoint to scrape URLs and consolidate the data in one request""" # Check rate limit if not api_rate_limiter.can_proceed(): return jsonify({ "error": "Rate limit exceeded", "wait_seconds": api_rate_limiter.get_wait_time() }), 429 # Record request for rate limiting api_rate_limiter.record_request() # Get URLs from request data = request.get_json() if not data or 'urls' not in data: return jsonify({"error": "Missing 'urls' in request"}), 400 urls = data['urls'] if not isinstance(urls, list) or not urls: return jsonify({"error": "'urls' must be a non-empty list"}), 400 # Limit number of URLs to prevent abuse if len(urls) > 10: return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400 # Perform scraping scraped_results = scraper_service.scrape_multiple_urls(urls) # Filter out failed scraping results successful_results = [r for r in scraped_results if r.get('success', False)] if not successful_results: return jsonify({ "error": "All scraping operations failed", "scraped_results": scraped_results }), 500 # Consolidate data using LLM consolidated_result = llm_service.consolidate_data(successful_results) return jsonify({ "consolidated_data": consolidated_result, "scraped_results": scraped_results }) @app.route('/api/summarize', methods=['POST']) def summarize_endpoint(): """Endpoint to summarize content""" # Check rate limit if not api_rate_limiter.can_proceed(): return jsonify({ "error": "Rate limit exceeded", "wait_seconds": api_rate_limiter.get_wait_time() }), 429 # Record request for rate limiting api_rate_limiter.record_request() # Get content from request data = request.get_json() if not data or 'text' not in data: return jsonify({"error": "Missing 'text' in request"}), 400 text = data['text'] max_length = data.get('max_length', 500) # Summarize content using LLM summary = llm_service.summarize_content(text, max_length) return jsonify({"summary": summary}) logger.info("API routes registered")