Spaces:
Running
Running
File size: 7,295 Bytes
2c72e40 a9ea4be 2c72e40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
import logging
import json
from typing import Dict, Any, List
from flask import Flask, request, jsonify, render_template, Response
from services.scraper_service import ScraperService
from services.llm_service import LLMService
from services.scheduler_service import scheduler_service
from utils.rate_limiter import RateLimiter
from api.horoscope_routes import horoscope_bp
logger = logging.getLogger(__name__)
# Create services
scraper_service = ScraperService()
llm_service = LLMService()
# API-wide rate limiter (10 requests per minute)
api_rate_limiter = RateLimiter(window_size=60, max_requests=10)
def register_routes(app: Flask):
"""Register API routes with Flask app"""
# Register blueprints
app.register_blueprint(horoscope_bp)
# Start scheduler service
scheduler_service.start()
@app.route('/ping')
def ping():
return "pong", 200
@app.route('/')
def index():
"""Home page with API documentation"""
return render_template('index.html')
@app.route('/docs')
def docs():
"""Detailed API documentation"""
return render_template('docs.html')
@app.route('/api/health')
def health_check():
"""Health check endpoint"""
return jsonify({
"status": "ok",
"services": {
"scraper": "up",
"llm": "up" if llm_service.api_key else "down",
"scheduler": "up" if scheduler_service.running else "down"
}
})
@app.route('/api/scrape', methods=['POST'])
def scrape_endpoint():
"""Endpoint to scrape a single URL"""
# Check rate limit
if not api_rate_limiter.can_proceed():
return jsonify({
"error": "Rate limit exceeded",
"wait_seconds": api_rate_limiter.get_wait_time()
}), 429
# Record request for rate limiting
api_rate_limiter.record_request()
# Get URL from request
data = request.get_json()
if not data or 'url' not in data:
return jsonify({"error": "Missing 'url' in request"}), 400
url = data['url']
scraper_type = data.get('type') # Optional scraper type
# Perform scraping
result = scraper_service.scrape_url(url, scraper_type)
return jsonify(result)
@app.route('/api/scrape-multiple', methods=['POST'])
def scrape_multiple_endpoint():
"""Endpoint to scrape multiple URLs"""
# Check rate limit
if not api_rate_limiter.can_proceed():
return jsonify({
"error": "Rate limit exceeded",
"wait_seconds": api_rate_limiter.get_wait_time()
}), 429
# Record request for rate limiting
api_rate_limiter.record_request()
# Get URLs from request
data = request.get_json()
if not data or 'urls' not in data:
return jsonify({"error": "Missing 'urls' in request"}), 400
urls = data['urls']
if not isinstance(urls, list) or not urls:
return jsonify({"error": "'urls' must be a non-empty list"}), 400
# Limit number of URLs to prevent abuse
if len(urls) > 10:
return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400
# Perform scraping
results = scraper_service.scrape_multiple_urls(urls)
return jsonify({"results": results})
@app.route('/api/consolidate', methods=['POST'])
def consolidate_endpoint():
"""Endpoint to consolidate data using LLM"""
# Check rate limit
if not api_rate_limiter.can_proceed():
return jsonify({
"error": "Rate limit exceeded",
"wait_seconds": api_rate_limiter.get_wait_time()
}), 429
# Record request for rate limiting
api_rate_limiter.record_request()
# Get data from request
data = request.get_json()
if not data or 'scraped_data' not in data:
return jsonify({"error": "Missing 'scraped_data' in request"}), 400
scraped_data = data['scraped_data']
if not isinstance(scraped_data, list) or not scraped_data:
return jsonify({"error": "'scraped_data' must be a non-empty list"}), 400
# Consolidate data using LLM
result = llm_service.consolidate_data(scraped_data)
return jsonify(result)
@app.route('/api/scrape-and-consolidate', methods=['POST'])
def scrape_and_consolidate_endpoint():
"""Endpoint to scrape URLs and consolidate the data in one request"""
# Check rate limit
if not api_rate_limiter.can_proceed():
return jsonify({
"error": "Rate limit exceeded",
"wait_seconds": api_rate_limiter.get_wait_time()
}), 429
# Record request for rate limiting
api_rate_limiter.record_request()
# Get URLs from request
data = request.get_json()
if not data or 'urls' not in data:
return jsonify({"error": "Missing 'urls' in request"}), 400
urls = data['urls']
if not isinstance(urls, list) or not urls:
return jsonify({"error": "'urls' must be a non-empty list"}), 400
# Limit number of URLs to prevent abuse
if len(urls) > 10:
return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400
# Perform scraping
scraped_results = scraper_service.scrape_multiple_urls(urls)
# Filter out failed scraping results
successful_results = [r for r in scraped_results if r.get('success', False)]
if not successful_results:
return jsonify({
"error": "All scraping operations failed",
"scraped_results": scraped_results
}), 500
# Consolidate data using LLM
consolidated_result = llm_service.consolidate_data(successful_results)
return jsonify({
"consolidated_data": consolidated_result,
"scraped_results": scraped_results
})
@app.route('/api/summarize', methods=['POST'])
def summarize_endpoint():
"""Endpoint to summarize content"""
# Check rate limit
if not api_rate_limiter.can_proceed():
return jsonify({
"error": "Rate limit exceeded",
"wait_seconds": api_rate_limiter.get_wait_time()
}), 429
# Record request for rate limiting
api_rate_limiter.record_request()
# Get content from request
data = request.get_json()
if not data or 'text' not in data:
return jsonify({"error": "Missing 'text' in request"}), 400
text = data['text']
max_length = data.get('max_length', 500)
# Summarize content using LLM
summary = llm_service.summarize_content(text, max_length)
return jsonify({"summary": summary})
logger.info("API routes registered")
|