Spaces:

pareshmishra
/

MT564AITraining

Running

File size: 7,299 Bytes

2c72e40

import os
import json
import logging
from typing import Dict, Any, List, Optional
from openai import OpenAI

logger = logging.getLogger(__name__)
import os
from dotenv import load_dotenv
load_dotenv()

print("✅ API KEY LOADED:", os.getenv("OPENAI_API_KEY"))
class LLMService:
    """Service for interacting with OpenAI LLM to process and consolidate scraped data"""
    
    def __init__(self, model_name: str = "gpt-4o"):
        """
        Initialize LLM service
        
        Args:
            model_name: Name of the OpenAI model to use (default: gpt-4o)
        """
        # the newest OpenAI model is "gpt-4o" which was released May 13, 2024.
        # do not change this unless explicitly requested by the user
        self.model_name = model_name
        self.api_key = os.environ.get("OPENAI_API_KEY")
        
        if not self.api_key:
            logger.warning("OpenAI API key not found in environment variables")
        
        self.client = OpenAI(api_key=self.api_key)
        
    # This method will be implemented in api/horoscope_routes.py
    def consolidate_horoscopes(self, horoscope_data):
        """Placeholder method for consolidating horoscopes"""
        return {"error": "Method not implemented"}
    
    def consolidate_data(self, scraped_data: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        Consolidate data from multiple sources using LLM
        
        Args:
            scraped_data: List of scraped data from different sources
            
        Returns:
            Consolidated information as a dictionary
        """
        if not scraped_data:
            return {"error": "No data provided for consolidation"}
        
        try:
            # Prepare data for LLM
            sources_text = ""
            for i, data in enumerate(scraped_data, 1):
                source_type = data.get("type", "unknown")
                title = data.get("title", "Unknown Title")
                source = data.get("source", "Unknown Source")
                text = data.get("text_content", "No content available")
                
                sources_text += f"SOURCE {i} ({source_type} from {source}):\n"
                sources_text += f"Title: {title}\n"
                sources_text += f"Content: {text[:2000]}...\n\n"
            
            # Create prompt for consolidation
            prompt = f"""
            Please analyze and consolidate the following information from multiple sources.
            
            {sources_text}
            
            Provide a comprehensive consolidation of this information in JSON format with the following structure:
            {{
              "main_topics": [list of main topics covered],
              "key_points": [list of key factual points from all sources],
              "summary": "A 2-3 paragraph summary that synthesizes the information",
              "analysis": "Brief analysis of the information and any discrepancies between sources",
              "sources": [list of sources used]
            }}
            
            Only include factual information present in the sources. Do not add any speculative or additional information.
            """
            
            # Call OpenAI API
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": "You are a data analysis expert specializing in consolidating information from multiple sources."},
                    {"role": "user", "content": prompt}
                ],
                response_format={"type": "json_object"},
                temperature=0.2
            )
            
            # Parse the response
            content = response.choices[0].message.content
            if content:
                result = json.loads(content)
                return result
            return {"error": "Empty response from LLM"}
            
        except Exception as e:
            logger.error(f"Error consolidating data with LLM: {str(e)}")
            return {"error": f"Failed to consolidate data: {str(e)}"}
    
    def summarize_content(self, text: str, max_length: int = 500) -> str:
        """
        Summarize a single piece of content
        
        Args:
            text: Text to summarize
            max_length: Maximum length of summary in characters
            
        Returns:
            Summarized text
        """
        if not text:
            return "No content to summarize"
        
        try:
            prompt = f"""
            Please summarize the following text concisely in no more than {max_length} characters, 
            while maintaining all key information:
            
            {text[:10000]}
            """
            
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": "You are a summarization expert."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=max_length // 2  # Approximate token count
            )
            
            return response.choices[0].message.content
            
        except Exception as e:
            logger.error(f"Error summarizing content with LLM: {str(e)}")
            return f"Failed to summarize content: {str(e)}"
    
    def extract_key_information(self, text: str, info_type: Optional[str] = None) -> Dict[str, Any]:
        """
        Extract specific type of information from content
        
        Args:
            text: Text to extract information from
            info_type: Type of information to extract (e.g., "news", "product", "research")
            
        Returns:
            Extracted information as dictionary
        """
        if not text:
            return {"error": "No content provided"}
        
        try:
            type_instruction = ""
            if info_type:
                type_instruction = f"This is {info_type} content. "
            
            prompt = f"""
            {type_instruction}Please extract key structured information from the following text.
            Return the result as a JSON object with appropriate fields based on the content type.
            
            {text[:8000]}
            """
            
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": "You are a data extraction expert."},
                    {"role": "user", "content": prompt}
                ],
                response_format={"type": "json_object"},
                temperature=0.1
            )
            
            # Parse the response
            content = response.choices[0].message.content
            if content:
                result = json.loads(content)
                return result
            return {"error": "Empty response from LLM"}
            
        except Exception as e:
            logger.error(f"Error extracting information with LLM: {str(e)}")
            return {"error": f"Failed to extract information: {str(e)}"}


# Create a singleton instance
llm_service = LLMService()