MT564AITraining / services /llm_service.py
pareshmishra
Add full project source files for MT564 AI
2c72e40
import os
import json
import logging
from typing import Dict, Any, List, Optional
from openai import OpenAI
logger = logging.getLogger(__name__)
import os
from dotenv import load_dotenv
load_dotenv()
print("✅ API KEY LOADED:", os.getenv("OPENAI_API_KEY"))
class LLMService:
"""Service for interacting with OpenAI LLM to process and consolidate scraped data"""
def __init__(self, model_name: str = "gpt-4o"):
"""
Initialize LLM service
Args:
model_name: Name of the OpenAI model to use (default: gpt-4o)
"""
# the newest OpenAI model is "gpt-4o" which was released May 13, 2024.
# do not change this unless explicitly requested by the user
self.model_name = model_name
self.api_key = os.environ.get("OPENAI_API_KEY")
if not self.api_key:
logger.warning("OpenAI API key not found in environment variables")
self.client = OpenAI(api_key=self.api_key)
# This method will be implemented in api/horoscope_routes.py
def consolidate_horoscopes(self, horoscope_data):
"""Placeholder method for consolidating horoscopes"""
return {"error": "Method not implemented"}
def consolidate_data(self, scraped_data: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Consolidate data from multiple sources using LLM
Args:
scraped_data: List of scraped data from different sources
Returns:
Consolidated information as a dictionary
"""
if not scraped_data:
return {"error": "No data provided for consolidation"}
try:
# Prepare data for LLM
sources_text = ""
for i, data in enumerate(scraped_data, 1):
source_type = data.get("type", "unknown")
title = data.get("title", "Unknown Title")
source = data.get("source", "Unknown Source")
text = data.get("text_content", "No content available")
sources_text += f"SOURCE {i} ({source_type} from {source}):\n"
sources_text += f"Title: {title}\n"
sources_text += f"Content: {text[:2000]}...\n\n"
# Create prompt for consolidation
prompt = f"""
Please analyze and consolidate the following information from multiple sources.
{sources_text}
Provide a comprehensive consolidation of this information in JSON format with the following structure:
{{
"main_topics": [list of main topics covered],
"key_points": [list of key factual points from all sources],
"summary": "A 2-3 paragraph summary that synthesizes the information",
"analysis": "Brief analysis of the information and any discrepancies between sources",
"sources": [list of sources used]
}}
Only include factual information present in the sources. Do not add any speculative or additional information.
"""
# Call OpenAI API
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": "You are a data analysis expert specializing in consolidating information from multiple sources."},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.2
)
# Parse the response
content = response.choices[0].message.content
if content:
result = json.loads(content)
return result
return {"error": "Empty response from LLM"}
except Exception as e:
logger.error(f"Error consolidating data with LLM: {str(e)}")
return {"error": f"Failed to consolidate data: {str(e)}"}
def summarize_content(self, text: str, max_length: int = 500) -> str:
"""
Summarize a single piece of content
Args:
text: Text to summarize
max_length: Maximum length of summary in characters
Returns:
Summarized text
"""
if not text:
return "No content to summarize"
try:
prompt = f"""
Please summarize the following text concisely in no more than {max_length} characters,
while maintaining all key information:
{text[:10000]}
"""
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": "You are a summarization expert."},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=max_length // 2 # Approximate token count
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"Error summarizing content with LLM: {str(e)}")
return f"Failed to summarize content: {str(e)}"
def extract_key_information(self, text: str, info_type: Optional[str] = None) -> Dict[str, Any]:
"""
Extract specific type of information from content
Args:
text: Text to extract information from
info_type: Type of information to extract (e.g., "news", "product", "research")
Returns:
Extracted information as dictionary
"""
if not text:
return {"error": "No content provided"}
try:
type_instruction = ""
if info_type:
type_instruction = f"This is {info_type} content. "
prompt = f"""
{type_instruction}Please extract key structured information from the following text.
Return the result as a JSON object with appropriate fields based on the content type.
{text[:8000]}
"""
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": "You are a data extraction expert."},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.1
)
# Parse the response
content = response.choices[0].message.content
if content:
result = json.loads(content)
return result
return {"error": "Empty response from LLM"}
except Exception as e:
logger.error(f"Error extracting information with LLM: {str(e)}")
return {"error": f"Failed to extract information: {str(e)}"}
# Create a singleton instance
llm_service = LLMService()