#!/usr/bin/env python3 """ Standalone Horoscope API Server This script runs the horoscope system without database dependencies """ import os import sys import logging from flask import Flask, jsonify, request, render_template_string from flask_cors import CORS import json from datetime import datetime, date import requests from bs4 import BeautifulSoup import trafilatura import time # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Create Flask app app = Flask(__name__) app.secret_key = os.environ.get("SESSION_SECRET", "dev_secret_key") CORS(app) # Zodiac signs ZODIAC_SIGNS = [ "aries", "taurus", "gemini", "cancer", "leo", "virgo", "libra", "scorpio", "sagittarius", "capricorn", "aquarius", "pisces" ] class HoroscopeScraper: """Base horoscope scraper""" def __init__(self, timeout=30): self.timeout = timeout def fetch_url(self, url): """Fetch content from URL""" try: response = requests.get(url, timeout=self.timeout) response.raise_for_status() return response.text except Exception as e: logger.error(f"Error fetching {url}: {str(e)}") return None def extract_text(self, html_content): """Extract main text content from HTML""" try: text = trafilatura.extract(html_content) return text if text else "" except Exception as e: logger.error(f"Error extracting text: {str(e)}") return "" class AstrologyComScraper(HoroscopeScraper): """Scraper for Astrology.com""" def __init__(self): super().__init__() self.base_url = "https://www.astrology.com" def scrape_sign(self, base_url, sign, date_str=None): """Scrape horoscope for a specific sign""" try: # Format URL for astrology.com url = f"{base_url}/horoscope/daily/{sign}" html_content = self.fetch_url(url) if not html_content: return {"success": False, "error": "Failed to fetch content"} text_content = self.extract_text(html_content) if not text_content: return {"success": False, "error": "Failed to extract text"} # Parse with BeautifulSoup for better extraction soup = BeautifulSoup(html_content, 'html.parser') # Extract horoscope prediction prediction = self._extract_prediction(soup, text_content) if not prediction: return {"success": False, "error": "Could not find horoscope prediction"} return { "success": True, "sign": sign, "prediction": prediction, "date": date.today().isoformat(), "source": "astrology.com", "url": url } except Exception as e: logger.error(f"Error scraping {sign} from astrology.com: {str(e)}") return {"success": False, "error": str(e)} def _extract_prediction(self, soup, text_content): """Extract horoscope prediction from astrology.com""" # Try multiple selectors selectors = [ '.horoscope-content', '.daily-horoscope', 'div[data-testid="horoscope-content"]', '.horoscope-text' ] for selector in selectors: element = soup.select_one(selector) if element: return element.get_text().strip() # Fallback: extract from text content lines = text_content.split('\n') for i, line in enumerate(lines): if any(word in line.lower() for word in ['today', 'daily', 'horoscope']): # Return the next few lines as prediction prediction_lines = lines[i:i+5] return ' '.join(prediction_lines).strip() # Last resort: return first substantial paragraph paragraphs = [p.strip() for p in text_content.split('\n') if len(p.strip()) > 50] return paragraphs[0] if paragraphs else text_content[:300] class HoroscopeComScraper(HoroscopeScraper): """Scraper for Horoscope.com""" def __init__(self): super().__init__() self.base_url = "https://www.horoscope.com" def scrape_sign(self, base_url, sign, date_str=None): """Scrape horoscope for a specific sign""" try: # Map sign to horoscope.com format sign_map = { "aries": 1, "taurus": 2, "gemini": 3, "cancer": 4, "leo": 5, "virgo": 6, "libra": 7, "scorpio": 8, "sagittarius": 9, "capricorn": 10, "aquarius": 11, "pisces": 12 } sign_id = sign_map.get(sign.lower()) if not sign_id: return {"success": False, "error": f"Invalid sign: {sign}"} url = f"{base_url}/us/horoscopes/general/horoscope-general-daily-today.aspx?sign={sign_id}" html_content = self.fetch_url(url) if not html_content: return {"success": False, "error": "Failed to fetch content"} text_content = self.extract_text(html_content) if not text_content: return {"success": False, "error": "Failed to extract text"} # Parse with BeautifulSoup soup = BeautifulSoup(html_content, 'html.parser') # Extract horoscope prediction prediction = self._extract_prediction(soup, text_content) if not prediction: return {"success": False, "error": "Could not find horoscope prediction"} return { "success": True, "sign": sign, "prediction": prediction, "date": date.today().isoformat(), "source": "horoscope.com", "url": url } except Exception as e: logger.error(f"Error scraping {sign} from horoscope.com: {str(e)}") return {"success": False, "error": str(e)} def _extract_prediction(self, soup, text_content): """Extract horoscope prediction from horoscope.com""" # Try multiple selectors selectors = [ '.horoscope-content', '.main-horoscope', '#DailyHoroscope', '.horoscope-text' ] for selector in selectors: element = soup.select_one(selector) if element: return element.get_text().strip() # Fallback: extract meaningful content from text lines = text_content.split('\n') prediction_lines = [] for line in lines: line = line.strip() if len(line) > 30 and not any(skip in line.lower() for skip in ['cookie', 'privacy', 'subscribe', 'newsletter']): prediction_lines.append(line) if len(prediction_lines) >= 3: break return ' '.join(prediction_lines) if prediction_lines else text_content[:300] # Initialize scrapers scrapers = { "astrology.com": AstrologyComScraper(), "horoscope.com": HoroscopeComScraper(), } # HTML Template HTML_TEMPLATE = '''
Ready to integrate with astroastayogini.in
Ready to integrate with your website at astroastayogini.in