import os import json import logging from typing import Dict, Any, List, Optional from openai import OpenAI logger = logging.getLogger(__name__) import os from dotenv import load_dotenv load_dotenv() print("✅ API KEY LOADED:", os.getenv("OPENAI_API_KEY")) class LLMService: """Service for interacting with OpenAI LLM to process and consolidate scraped data""" def __init__(self, model_name: str = "gpt-4o"): """ Initialize LLM service Args: model_name: Name of the OpenAI model to use (default: gpt-4o) """ # the newest OpenAI model is "gpt-4o" which was released May 13, 2024. # do not change this unless explicitly requested by the user self.model_name = model_name self.api_key = os.environ.get("OPENAI_API_KEY") if not self.api_key: logger.warning("OpenAI API key not found in environment variables") self.client = OpenAI(api_key=self.api_key) # This method will be implemented in api/horoscope_routes.py def consolidate_horoscopes(self, horoscope_data): """Placeholder method for consolidating horoscopes""" return {"error": "Method not implemented"} def consolidate_data(self, scraped_data: List[Dict[str, Any]]) -> Dict[str, Any]: """ Consolidate data from multiple sources using LLM Args: scraped_data: List of scraped data from different sources Returns: Consolidated information as a dictionary """ if not scraped_data: return {"error": "No data provided for consolidation"} try: # Prepare data for LLM sources_text = "" for i, data in enumerate(scraped_data, 1): source_type = data.get("type", "unknown") title = data.get("title", "Unknown Title") source = data.get("source", "Unknown Source") text = data.get("text_content", "No content available") sources_text += f"SOURCE {i} ({source_type} from {source}):\n" sources_text += f"Title: {title}\n" sources_text += f"Content: {text[:2000]}...\n\n" # Create prompt for consolidation prompt = f""" Please analyze and consolidate the following information from multiple sources. {sources_text} Provide a comprehensive consolidation of this information in JSON format with the following structure: {{ "main_topics": [list of main topics covered], "key_points": [list of key factual points from all sources], "summary": "A 2-3 paragraph summary that synthesizes the information", "analysis": "Brief analysis of the information and any discrepancies between sources", "sources": [list of sources used] }} Only include factual information present in the sources. Do not add any speculative or additional information. """ # Call OpenAI API response = self.client.chat.completions.create( model=self.model_name, messages=[ {"role": "system", "content": "You are a data analysis expert specializing in consolidating information from multiple sources."}, {"role": "user", "content": prompt} ], response_format={"type": "json_object"}, temperature=0.2 ) # Parse the response content = response.choices[0].message.content if content: result = json.loads(content) return result return {"error": "Empty response from LLM"} except Exception as e: logger.error(f"Error consolidating data with LLM: {str(e)}") return {"error": f"Failed to consolidate data: {str(e)}"} def summarize_content(self, text: str, max_length: int = 500) -> str: """ Summarize a single piece of content Args: text: Text to summarize max_length: Maximum length of summary in characters Returns: Summarized text """ if not text: return "No content to summarize" try: prompt = f""" Please summarize the following text concisely in no more than {max_length} characters, while maintaining all key information: {text[:10000]} """ response = self.client.chat.completions.create( model=self.model_name, messages=[ {"role": "system", "content": "You are a summarization expert."}, {"role": "user", "content": prompt} ], temperature=0.3, max_tokens=max_length // 2 # Approximate token count ) return response.choices[0].message.content except Exception as e: logger.error(f"Error summarizing content with LLM: {str(e)}") return f"Failed to summarize content: {str(e)}" def extract_key_information(self, text: str, info_type: Optional[str] = None) -> Dict[str, Any]: """ Extract specific type of information from content Args: text: Text to extract information from info_type: Type of information to extract (e.g., "news", "product", "research") Returns: Extracted information as dictionary """ if not text: return {"error": "No content provided"} try: type_instruction = "" if info_type: type_instruction = f"This is {info_type} content. " prompt = f""" {type_instruction}Please extract key structured information from the following text. Return the result as a JSON object with appropriate fields based on the content type. {text[:8000]} """ response = self.client.chat.completions.create( model=self.model_name, messages=[ {"role": "system", "content": "You are a data extraction expert."}, {"role": "user", "content": prompt} ], response_format={"type": "json_object"}, temperature=0.1 ) # Parse the response content = response.choices[0].message.content if content: result = json.loads(content) return result return {"error": "Empty response from LLM"} except Exception as e: logger.error(f"Error extracting information with LLM: {str(e)}") return {"error": f"Failed to extract information: {str(e)}"} # Create a singleton instance llm_service = LLMService()