import os import requests from datetime import datetime, timedelta from dotenv import load_dotenv from langchain_openai import ChatOpenAI from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate from langchain_core.prompts import PromptTemplate from fin_interpreter import analyze_article # === Load environment === load_dotenv() OPENAI_KEY = os.getenv("OPENAI_API_KEY") TAVILY_KEY = os.getenv("TAVILY_API_KEY") # === Get OpenAI client === def get_llm(): if not OPENAI_KEY: raise ValueError("OPENAI_API_KEY not found.") return ChatOpenAI(model_name="gpt-4.1", openai_api_key=OPENAI_KEY) # === Related Terms (cleaned) def get_related_terms(topic): llm = get_llm() prompt = f"What are 5 short financial or industry keywords closely related to '{topic}'? Only return a comma-separated list." response = llm.invoke(prompt) raw = response.content.strip().split("\n")[0] return [term.strip() for term in raw.split(",") if term.strip()][:5] # === Tavily Search def tavily_search(query, days, max_results=10): headers = {"Authorization": f"Bearer {TAVILY_KEY}"} payload = { "query": query, "search_depth": "advanced", "topic": "news", "days": int(days), "max_results": max_results, "include_answer": False, "include_raw_content": False } response = requests.post("https://api.tavily.com/search", json=payload, headers=headers) if response.status_code != 200: print(f"⚠️ Tavily API error: {response.status_code} - {response.text}") return {} return response.json() # === Smart News Search def fetch_deep_news(topic, days): all_results = [] seen_urls = set() cutoff = datetime.now() - timedelta(days=days) base_queries = [ topic, f"{topic} AND startup", f"{topic} AND acquisition OR merger OR funding", f"{topic} AND CEO OR executive OR leadership", f"{topic} AND venture capital OR Series A OR Series B", f"{topic} AND government grant OR approval OR contract", f"{topic} AND underrated OR small-cap OR micro-cap" ] investor_queries = [ f"{topic} AND BlackRock OR Vanguard OR SoftBank", f"{topic} AND Elon Musk OR Sam Altman OR Peter Thiel", f"{topic} AND Berkshire Hathaway OR Warren Buffett", f"{topic} AND institutional investor OR hedge fund", ] related_terms = get_related_terms(topic) synonym_queries = [f"{term} AND {kw}" for term in related_terms for kw in ["startup", "funding", "merger", "acquisition"]] all_queries = base_queries + investor_queries + synonym_queries for query in all_queries: print(f"πŸ” Tavily query: {query}") response = tavily_search(query, days) for item in response.get("results", []): url = item.get("url") content = item.get("content", "") or item.get("summary", "") or item.get("title", "") pub_date = item.get("published_date") if not url or url in seen_urls or len(content) < 150: continue # Filter out old news if pub_date: try: date_obj = datetime.fromisoformat(pub_date.rstrip("Z")) if date_obj < cutoff: continue except Exception: pass # Filter out non-financial content finance_keywords = ["valuation", "IPO", "Series A", "revenue", "funding", "merger", "acquisition", "earnings"] if not any(kw in content.lower() for kw in finance_keywords): continue all_results.append({ "title": item.get("title"), "url": url, "content": content }) seen_urls.add(url) print(f"πŸ“° Total articles collected: {len(all_results)}") return all_results # === Generate Markdown Report def generate_value_investor_report(topic, news_results, max_articles=20, max_chars_per_article=400): news_results = sorted(news_results, key=lambda x: len(x.get("content", "")), reverse=True) news_results = news_results[:max_articles] for item in news_results: text = item.get("summary") or item.get("content", "") result = analyze_article(text) item["fin_sentiment"] = result.get("sentiment", "neutral") item["fin_confidence"] = result.get("confidence", 0.0) item["investment_decision"] = result.get("investment_decision", "Watch") article_summary = "".join( f"- **{item['title']}**: {item['content'][:max_chars_per_article]}... " f"(Sentiment: {item['fin_sentiment'].title()}, Confidence: {item['fin_confidence']:.2f}, " f"Decision: {item['investment_decision']}) [link]({item['url']})\n" for item in news_results ) prompt = PromptTemplate.from_template(""" You're a highly focused value investor. Today is {Today}. Analyze this week's news on "{Topic}". Your goal is to uncover: - Meaningful events (e.g., CEO joining a startup, insider buys, big-name partnerships) - Startups or small caps that may signal undervalued opportunity - Connections to key individuals or institutions (e.g., Elon Musk investing, Sam Altman joining) - Companies with strong fundamentals: low P/E, low P/B, high ROE, recent IPOs, moats, or high free cash flow ### News {ArticleSummaries} Write a markdown memo with: 1. **Key Value Signals** 2. **Stocks or Startups to Watch** β€” MUST include rationale and for each: P/E, P/B, Debt-to-Equity, FCF, PEG 3. **What Smart Money Might Be Acting On** 4. **References** 5. **Investment Hypothesis** --- ### πŸ“Œ Executive Summary Summarize the topic's current investment environment in 3–4 bullet points. Include sentiment, risks, and catalysts. --- ### πŸ“Š Signals and Analysis (Include Sources) For each important news item, write a short paragraph with: - What happened - Why it matters (financially) - Embedded source as `[source title](url)` - Bold any key financial terms (e.g., **Series A**, **merger**, **valuation**) --- ### 🧠 Investment Thesis Give a reasoned conclusion: - Is this a buy/sell/watch opportunity? - What’s the risk/reward? - What signals or themes matter most? """) chat_prompt = ChatPromptTemplate.from_messages([ SystemMessagePromptTemplate(prompt=prompt) ]) prompt_value = chat_prompt.format_prompt( Topic=topic, ArticleSummaries=article_summary, Today=datetime.now().strftime("%B %d, %Y") ).to_messages() llm = get_llm() result = llm.invoke(prompt_value) return result.content