File size: 15,906 Bytes
db40891 ba7468f 06e5a52 7d7b830 06e5a52 db40891 7e8bc99 ba7468f 7e8bc99 ba7468f 7e8bc99 db40891 ba7468f 45466f9 ba7468f 45466f9 ba7468f 7e8bc99 ba7468f 7e8bc99 db40891 8e1bf17 db40891 8e1bf17 db40891 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 |
import os
from typing import List, Dict, Any, Optional
import logging
import json
logger = logging.getLogger(__name__)
# Try both import methods with proper error handling
try:
import google.generativeai as genai
GENAI_PACKAGE = "generativeai"
except ImportError:
try:
import google.genai as genai
GENAI_PACKAGE = "genai"
except ImportError:
logger.error("Failed to import Google AI package")
raise
from openai import OpenAI
class BaseAgent:
def __init__(self, use_gemini: bool = True, api_key: Optional[str] = None,
openrouter_model: Optional[str] = None, gemini_model: Optional[str] = None):
self.use_gemini = use_gemini
if use_gemini:
if not api_key:
raise ValueError("Gemini API key is required when use_gemini=True")
# Set API key for either package version
os.environ["GOOGLE_API_KEY"] = api_key
if GENAI_PACKAGE == "generativeai":
try:
genai.configure(api_key=api_key)
except Exception as e:
logger.warning(f"Fallback to environment variable: {str(e)}")
self.gemini_model = gemini_model or "gemini-1.5-pro"
else:
self.openrouter_client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=api_key
)
self.model = openrouter_model or "anthropic/claude-3-opus:beta"
def _generate_with_gemini(self, prompt: str, system_prompt: str) -> str:
try:
combined_prompt = f"System: {system_prompt}\n\nUser: {prompt}"
if GENAI_PACKAGE == "generativeai":
model = genai.GenerativeModel(model_name=self.gemini_model)
response = model.generate_content(
combined_prompt,
generation_config={"temperature": 0.1}
)
else:
# Use basic text generation for google.genai
completion = genai.generate_text(
model=self.gemini_model,
prompt=combined_prompt
)
return str(completion)
return response.text if hasattr(response, 'text') else str(response)
except Exception as e:
logger.error(f"Gemini generation failed: {str(e)}")
raise
def _generate_with_openrouter(self, prompt: str, system_prompt: str) -> str:
completion = self.openrouter_client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
temperature=0.1,
)
return completion.choices[0].message.content
def generate(self, prompt: str, system_prompt: str) -> str:
try:
if self.use_gemini:
return self._generate_with_gemini(prompt, system_prompt)
else:
return self._generate_with_openrouter(prompt, system_prompt)
except Exception as e:
logger.error(f"Generation failed: {str(e)}")
raise
class OrchestratorAgent(BaseAgent):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.system_prompt = """You are an expert research planner that develops comprehensive research strategies.
Your role is to create structured research plans that identify what information is needed and why.
Focus on the logical flow of information needed to answer the query comprehensively."""
def create_research_plan(self, query: str) -> Dict[str, List[str]]:
"""Create a structured research plan with clear objectives"""
prompt = f"""Create a detailed research plan for the following query: {query}
Return a JSON object with the following structure:
{{
"core_concepts": ["list of fundamental concepts that need to be understood"],
"key_questions": ["specific questions that need to be answered"],
"information_requirements": ["specific pieces of information needed to answer each question"],
"research_priorities": ["ordered list of research priorities"]
}}
Make sure the plan flows logically and each item contributes to answering the main query."""
response = self.generate(prompt, self.system_prompt)
try:
# Clean the response of any markdown formatting
cleaned_response = response.strip().replace('```json', '').replace('```', '').strip()
plan = json.loads(cleaned_response)
logger.info(f"Generated research plan: {json.dumps(plan, indent=2)}")
return plan
except:
logger.error(f"Failed to parse research plan: {response}")
# Return a basic plan structure if parsing fails
return {
"core_concepts": [query],
"key_questions": [query],
"information_requirements": [query],
"research_priorities": [query]
}
def evaluate_research_progress(self, plan: Dict[str, List[str]], gathered_info: List[str]) -> Dict[str, bool]:
"""Evaluate if we have enough information for each aspect of the plan"""
prompt = f"""Analyze the research plan and gathered information to evaluate completeness.
Research Plan:
{json.dumps(plan, indent=2)}
Gathered Information:
{chr(10).join(gathered_info)}
Your task: Return a STRICTLY FORMATTED JSON object with only three boolean fields indicating whether the gathered information adequately covers each aspect. Do not include any other text, explanation, or comments.
Required exact output format (with true/false values):
{{
"core_concepts": false,
"key_questions": false,
"information_requirements": false
}}
Rules:
- Set a field to true ONLY if the gathered information thoroughly covers that aspect
- Return ONLY the JSON object, no other text
- Must be valid JSON parseable by json.loads()"""
response = self.generate(prompt, self.system_prompt)
try:
# Remove any leading/trailing whitespace and quotes
cleaned_response = response.strip().strip('"').strip()
# Remove any markdown code block formatting
cleaned_response = cleaned_response.replace('```json', '').replace('```', '').strip()
# Parse and validate the response has the correct structure
parsed = json.loads(cleaned_response)
required_keys = {"core_concepts", "key_questions", "information_requirements"}
if not all(isinstance(parsed.get(key), bool) for key in required_keys):
raise ValueError("Response missing required boolean fields")
return parsed
except Exception as e:
logger.error(f"Failed to parse evaluation response: {response}")
# Return a default response indicating no completeness
return {
"core_concepts": False,
"key_questions": False,
"information_requirements": False
}
class PlannerAgent(BaseAgent):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.system_prompt = """You are an expert research planner that creates targeted search strategies.
Your role is to identify the key aspects that need deep investigation, focusing on quality over quantity.
Create research plans that encourage thorough exploration of important concepts rather than shallow coverage of many topics."""
def create_search_strategy(self, research_item: str, item_type: str) -> List[str]:
"""Create targeted search queries based on the type of research item"""
prompt = f"""Create 2-3 highly specific search queries for this {item_type}: {research_item}
Focus on Depth:
- Start with foundational understanding
- Build up to technical specifics and implementation details
- Look for real-world examples and case studies
- Find comparative analyses and benchmarks
- Seek out critical discussions and limitations
Guidelines:
- Prefer fewer, more focused queries over many broad ones
- Each query should build on previous knowledge
- Target high-quality technical sources
- Look for detailed explanations rather than surface-level overviews
Return ONLY a JSON array of 2-3 carefully crafted search queries that will yield deep technical information.
Make each query highly specific and targeted."""
response = self.generate(prompt, self.system_prompt)
try:
cleaned_response = response.strip().replace('```json', '').replace('```', '').strip()
queries = json.loads(cleaned_response)
return [str(q) for q in queries[:3]]
except:
logger.error(f"Failed to parse search queries: {response}")
return [str(research_item)]
def prioritize_unfulfilled_requirements(self, plan: Dict[str, List[str]], progress: Dict[str, bool], gathered_info: List[str] = None) -> List[tuple]:
"""Create a prioritized list of remaining research needs with depth checking"""
items = []
def has_sufficient_depth(topic: str, info: List[str]) -> bool:
if not info:
return False
# Count substantial mentions (more than just a passing reference)
substantial_mentions = 0
for text in info:
topic_words = set(topic.lower().split())
text_lower = text.lower()
# Check if the text contains multiple topic keywords
keyword_matches = sum(1 for word in topic_words if word in text_lower)
# Check for substantial content (contains multiple keywords and is detailed)
if keyword_matches >= 2 and len(text) > 300:
substantial_mentions += 1
# Require multiple substantial mentions
return substantial_mentions >= 2
# First priority: core concepts without sufficient depth
if not progress["core_concepts"]:
for item in plan["core_concepts"]:
if not gathered_info or not has_sufficient_depth(item, gathered_info):
items.append(("core_concepts", item))
# Second priority: key questions without sufficient answers
if not progress["key_questions"]:
for item in plan["key_questions"]:
if not gathered_info or not has_sufficient_depth(item, gathered_info):
items.append(("key_questions", item))
# Third priority: detailed information requirements
if not progress["information_requirements"]:
for item in plan["information_requirements"]:
if not gathered_info or not has_sufficient_depth(item, gathered_info):
items.append(("information_requirements", item))
return items
class ReportAgent(BaseAgent):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.system_prompt = """You are an expert technical writer and researcher that creates
comprehensive, well-structured research reports. Your primary focus is on deep analysis,
synthesis of information, and meaningful organization of content.
Key Principles:
1. Quality over Quantity - Focus on depth and insight rather than filling sections
2. Natural Organization - Let the content guide the structure instead of forcing a rigid outline
3. Meaningful Connections - Draw relationships between different pieces of information
4. Critical Analysis - Question assumptions and evaluate trade-offs
5. Evidence-Based - Support claims with specific technical details and examples"""
def generate_report(self, query: str, research_plan: Dict[str, List[str]],
research_results: List[str], completion_stats: Dict[str, Any]) -> str:
prompt = f"""Generate a comprehensive technical report that synthesizes the research findings into a cohesive narrative.
Query: {query}
Research Plan:
{json.dumps(research_plan, indent=2)}
Research Coverage:
{json.dumps(completion_stats, indent=2)}
Research Findings:
{chr(10).join(research_results)}
Report Requirements:
1. Organization:
- Start with a clear introduction that frames the topic
- Group related concepts together naturally
- Only create sections when there's enough substantial content
- Use appropriate heading levels (# for h1, ## for h2, etc.)
- Maintain a logical flow of ideas, ensuring smooth transitions between sections
2. Content Development:
- Focus on in-depth analysis of important concepts
- Provide concrete examples and technical details
- Compare and contrast different approaches
- Discuss real-world implications
- Acknowledge limitations and trade-offs
3. Synthesis & Analysis:
- Draw meaningful connections between different sources
- Evaluate conflicting information
- Identify patterns and trends
- Provide reasoned analysis supported by evidence
- Offer insights beyond just summarizing sources
4. Technical Accuracy:
- Use precise technical language
- Include relevant code examples with language tags
- Provide performance metrics when available
- Explain technical concepts clearly
- Support technical claims with evidence
5. Formatting:
- Use proper markdown formatting
- Include code blocks with language tags when relevant
- Format lists and tables appropriately
- Add line breaks between sections
- Ensure consistent formatting throughout
Important:
- Do NOT create sections just to fill a structure
- Combine related information even if it came from different parts of the research plan
- Focus on providing meaningful insights rather than covering every possible aspect
- Skip sections or topics where there isn't enough substantive content. However, with points that have been researched, ensure they are well-developed and connected, as well as deeply analyzed.
- Be really thorough and detailed in your analysis, ensuring that each section is rich with information and insights.
- If there are any gaps in the research, acknowledge them and suggest potential follow-up research areas.
- Ensure that the report is cohesive and flows naturally from one section to the next, with clear transitions between topics.
- Use a professional and technical tone appropriate for an expert audience.
- Ensure that the report is self-contained and can be understood without needing to refer back to the research plan or findings.
- Provide a summary at the end that encapsulates the key findings and insights from the research.
- Ensure that the report is well-cited, with references to the sources used in the research findings."""
return self.generate(prompt, self.system_prompt) |