|
import json |
|
import os |
|
import logging |
|
from datetime import datetime |
|
from typing import Dict, Any, Optional, List, Tuple |
|
from markdown_it import MarkdownIt |
|
|
|
def validate_response(response: Any, expected_type: type) -> bool: |
|
"""Validate response type and structure""" |
|
if not isinstance(response, expected_type): |
|
return False |
|
return True |
|
|
|
def format_source_content( |
|
title: str, |
|
url: str, |
|
date: str, |
|
content: str, |
|
source_type: str |
|
) -> str: |
|
"""Format source content with consistent styling""" |
|
return f"""### Source: {title} |
|
URL: {url} |
|
Date: {date if date else 'Not available'} |
|
Type: {source_type} |
|
|
|
**Key Content:** |
|
{content} |
|
|
|
---""" |
|
|
|
def parse_research_results(results: List[Dict[str, Any]]) -> Tuple[List[str], List[Dict[str, str]]]: |
|
"""Parse and validate research results""" |
|
contexts = [] |
|
sources = [] |
|
|
|
for result in results: |
|
title = result.get("title", "").strip() |
|
content = result.get("content", "").strip() |
|
url = result.get("url", "").strip() |
|
date = result.get("published_date", "").strip() |
|
|
|
if title and content: |
|
source_type = ( |
|
"research_paper" |
|
if "arxiv.org" in url or "paper" in url.lower() |
|
else "article" |
|
) |
|
|
|
sources.append({ |
|
"title": title, |
|
"url": url, |
|
"date": date if date else "Date not available", |
|
"type": source_type |
|
}) |
|
|
|
contexts.append( |
|
format_source_content(title, url, date, content, source_type) |
|
) |
|
|
|
return contexts, sources |
|
|
|
def format_sources_section(sources: List[Dict[str, str]]) -> str: |
|
"""Format the sources section of the response with proper markdown""" |
|
sources_section = "\n\n## Sources Cited\n\n" |
|
|
|
if not sources: |
|
return sources_section + "No sources were found during the research phase." |
|
|
|
research_papers = [s for s in sources if s['type'] == 'research_paper'] |
|
articles = [s for s in sources if s['type'] == 'article'] |
|
|
|
if research_papers: |
|
sources_section += "\n### Research Papers\n" |
|
for idx, source in enumerate(research_papers, 1): |
|
sources_section += f"{idx}. [{source['title']}]({source['url']}) - {source['date']}\n" |
|
|
|
if articles: |
|
sources_section += "\n### Technical Articles & Resources\n" |
|
for idx, source in enumerate(articles, 1): |
|
sources_section += f"{idx}. [{source['title']}]({source['url']}) - {source['date']}\n" |
|
|
|
|
|
sources_section += "\n" |
|
return sources_section |
|
|
|
def save_markdown_report(content: str) -> str: |
|
"""Save markdown content to a file and return the file path |
|
|
|
Args: |
|
content: The markdown content to save |
|
|
|
Returns: |
|
str: Path to the generated markdown file |
|
""" |
|
try: |
|
|
|
os.makedirs("generated_reports", exist_ok=True) |
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
filename = f"research_report_{timestamp}.md" |
|
file_path = os.path.join("generated_reports", filename) |
|
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f: |
|
f.write(content) |
|
|
|
return file_path |
|
|
|
except Exception as e: |
|
logger = logging.getLogger(__name__) |
|
logger.error(f"Failed to save markdown report: {str(e)}") |
|
raise |
|
|
|
def convert_to_html(markdown_content: str) -> str: |
|
"""Convert markdown to styled HTML and save to file |
|
|
|
Args: |
|
markdown_content: The markdown content to convert |
|
|
|
Returns: |
|
str: Path to the generated HTML file |
|
""" |
|
try: |
|
|
|
md = MarkdownIt('commonmark', {'html': True}) |
|
|
|
|
|
html_content = md.render(markdown_content) |
|
|
|
|
|
styled_html = f""" |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<style> |
|
body {{ |
|
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, sans-serif; |
|
line-height: 1.6; |
|
max-width: 900px; |
|
margin: 40px auto; |
|
padding: 20px; |
|
color: #333; |
|
}} |
|
h1, h2, h3 {{ color: #2c3e50; }} |
|
code {{ |
|
background-color: #f5f5f5; |
|
padding: 2px 4px; |
|
border-radius: 4px; |
|
font-family: 'Consolas', 'Monaco', 'Andale Mono', monospace; |
|
}} |
|
pre {{ |
|
background-color: #f5f5f5; |
|
padding: 15px; |
|
border-radius: 8px; |
|
overflow-x: auto; |
|
}} |
|
blockquote {{ |
|
border-left: 4px solid #2c3e50; |
|
margin: 0; |
|
padding-left: 20px; |
|
color: #666; |
|
}} |
|
table {{ |
|
border-collapse: collapse; |
|
width: 100%; |
|
margin: 20px 0; |
|
}} |
|
th, td {{ |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
text-align: left; |
|
}} |
|
th {{ background-color: #f5f5f5; }} |
|
img {{ max-width: 100%; height: auto; }} |
|
.sources {{ |
|
margin-top: 40px; |
|
padding-top: 20px; |
|
border-top: 2px solid #eee; |
|
}} |
|
</style> |
|
</head> |
|
<body> |
|
{html_content} |
|
</body> |
|
</html> |
|
""" |
|
|
|
|
|
os.makedirs("generated_reports", exist_ok=True) |
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
html_path = os.path.join("generated_reports", f"report_{timestamp}.html") |
|
|
|
|
|
with open(html_path, 'w', encoding='utf-8') as f: |
|
f.write(styled_html) |
|
|
|
return html_path |
|
|
|
except Exception as e: |
|
logger = logging.getLogger(__name__) |
|
logger.error(f"Failed to convert markdown to HTML: {str(e)}") |
|
raise |