Spaces:

sigridveronica
/

ai-news-analyzer

Sleeping

File size: 6,686 Bytes

9df4cc0
 
fe2b98e
9df4cc0
fe2b98e
 
 
9df4cc0
 
97063b2
9df4cc0
fe2b98e
 
9df4cc0
fe2b98e
9df4cc0
fe2b98e
9df4cc0
fe2b98e
9df4cc0
fe2b98e
9df4cc0
 
fe2b98e
9df4cc0
fe2b98e
 
9df4cc0
fe2b98e
9df4cc0
fe2b98e
9df4cc0
 
 
 
 
 
 
 
 
fe2b98e
 
 
 
9df4cc0
 
fe2b98e
24bf2bc
 
 
fe2b98e
24bf2bc
 
 
 
 
 
 
 
 
 
9df4cc0
24bf2bc
 
 
 
 
 
97063b2
24bf2bc
 
 
9df4cc0
24bf2bc
fe2b98e
 
7cb8f2e
fe2b98e
 
 
 
 
 
24bf2bc
9df4cc0
fe2b98e
 
 
 
 
 
 
 
 
 
 
 
 
9df4cc0
fe2b98e
 
 
 
 
 
24bf2bc
 
 
9df4cc0
fe2b98e
9df4cc0
fe2b98e
9df4cc0
 
 
7cb8f2e
 
9df4cc0
 
 
 
 
fe2b98e
9df4cc0
 
 
 
 
 
7d27314
9df4cc0
 
 
 
 
fe2b98e
9df4cc0
 
 
 
 
 
fe2b98e
9df4cc0
 
 
 
fe2b98e
 
 
373f148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9df4cc0
 
 
 
 
 
 
7d27314
 
9df4cc0

import os
import requests
from datetime import datetime, timedelta
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_core.prompts import PromptTemplate
from fin_interpreter import analyze_article

# === Load environment ===
load_dotenv()
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_KEY = os.getenv("TAVILY_API_KEY")

# === Get OpenAI client ===
def get_llm():
    if not OPENAI_KEY:
        raise ValueError("OPENAI_API_KEY not found.")
    return ChatOpenAI(model_name="gpt-4.1", openai_api_key=OPENAI_KEY)

# === Related Terms (cleaned)
def get_related_terms(topic):
    llm = get_llm()
    prompt = f"What are 5 short financial or industry keywords closely related to '{topic}'? Only return a comma-separated list."
    response = llm.invoke(prompt)
    raw = response.content.strip().split("\n")[0]
    return [term.strip() for term in raw.split(",") if term.strip()][:5]

# === Tavily Search
def tavily_search(query, days, max_results=10):
    headers = {"Authorization": f"Bearer {TAVILY_KEY}"}
    payload = {
        "query": query,
        "search_depth": "advanced",
        "topic": "news",
        "days": int(days),
        "max_results": max_results,
        "include_answer": False,
        "include_raw_content": False
    }
    response = requests.post("https://api.tavily.com/search", json=payload, headers=headers)
    if response.status_code != 200:
        print(f"⚠️ Tavily API error: {response.status_code} - {response.text}")
        return {}
    return response.json()

# === Smart News Search
def fetch_deep_news(topic, days):
    all_results = []
    seen_urls = set()
    cutoff = datetime.now() - timedelta(days=days)

    base_queries = [
        topic,
        f"{topic} AND startup",
        f"{topic} AND acquisition OR merger OR funding",
        f"{topic} AND CEO OR executive OR leadership",
        f"{topic} AND venture capital OR Series A OR Series B",
        f"{topic} AND government grant OR approval OR contract",
        f"{topic} AND underrated OR small-cap OR micro-cap"
    ]

    investor_queries = [
        f"{topic} AND BlackRock OR Vanguard OR SoftBank",
        f"{topic} AND Elon Musk OR Sam Altman OR Peter Thiel",
        f"{topic} AND Berkshire Hathaway OR Warren Buffett",
        f"{topic} AND institutional investor OR hedge fund",
    ]

    related_terms = get_related_terms(topic)
    synonym_queries = [f"{term} AND {kw}" for term in related_terms for kw in ["startup", "funding", "merger", "acquisition"]]
    all_queries = base_queries + investor_queries + synonym_queries

    for query in all_queries:
        print(f"🔍 Tavily query: {query}")
        response = tavily_search(query, days)

        for item in response.get("results", []):
            url = item.get("url")
            content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
            pub_date = item.get("published_date")

            if not url or url in seen_urls or len(content) < 150:
                continue

            # Filter out old news
            if pub_date:
                try:
                    date_obj = datetime.fromisoformat(pub_date.rstrip("Z"))
                    if date_obj < cutoff:
                        continue
                except Exception:
                    pass

            # Filter out non-financial content
            finance_keywords = ["valuation", "IPO", "Series A", "revenue", "funding", "merger", "acquisition", "earnings"]
            if not any(kw in content.lower() for kw in finance_keywords):
                continue

            all_results.append({
                "title": item.get("title"),
                "url": url,
                "content": content
            })
            seen_urls.add(url)

    print(f"📰 Total articles collected: {len(all_results)}")
    return all_results

# === Generate Markdown Report
def generate_value_investor_report(topic, news_results, max_articles=20, max_chars_per_article=400):
    news_results = sorted(news_results, key=lambda x: len(x.get("content", "")), reverse=True)
    news_results = news_results[:max_articles]

    for item in news_results:
        text = item.get("summary") or item.get("content", "")
        result = analyze_article(text)
        item["fin_sentiment"] = result.get("sentiment", "neutral")
        item["fin_confidence"] = result.get("confidence", 0.0)
        item["investment_decision"] = result.get("investment_decision", "Watch")

    article_summary = "".join(
        f"- **{item['title']}**: {item['content'][:max_chars_per_article]}... "
        f"(Sentiment: {item['fin_sentiment'].title()}, Confidence: {item['fin_confidence']:.2f}, "
        f"Decision: {item['investment_decision']}) [link]({item['url']})\n"
        for item in news_results
    )

    prompt = PromptTemplate.from_template("""
You're a highly focused value investor. Today is {Today}. Analyze this week's news on "{Topic}".

Your goal is to uncover:
- Meaningful events (e.g., CEO joining a startup, insider buys, big-name partnerships)
- Startups or small caps that may signal undervalued opportunity
- Connections to key individuals or institutions (e.g., Elon Musk investing, Sam Altman joining)
- Companies with strong fundamentals: low P/E, low P/B, high ROE, recent IPOs, moats, or high free cash flow

### News
{ArticleSummaries}

Write a markdown memo with:
1. **Key Value Signals**
2. **Stocks or Startups to Watch** — MUST include rationale and for each: P/E, P/B, Debt-to-Equity, FCF, PEG
3. **What Smart Money Might Be Acting On**
4. **References**
5. **Investment Hypothesis**

---

### 📌 Executive Summary

Summarize the topic's current investment environment in 3–4 bullet points. Include sentiment, risks, and catalysts.

---

### 📊 Signals and Analysis (Include Sources)

For each important news item, write a short paragraph with:
- What happened
- Why it matters (financially)
- Embedded source as `[source title](url)`
- Bold any key financial terms (e.g., **Series A**, **merger**, **valuation**)

---

### 🧠 Investment Thesis

Give a reasoned conclusion:
- Is this a buy/sell/watch opportunity?
- What’s the risk/reward?
- What signals or themes matter most?
""")

    chat_prompt = ChatPromptTemplate.from_messages([
        SystemMessagePromptTemplate(prompt=prompt)
    ])
    prompt_value = chat_prompt.format_prompt(
        Topic=topic,
        ArticleSummaries=article_summary,
        Today=datetime.now().strftime("%B %d, %Y")
    ).to_messages()

    llm = get_llm()
    result = llm.invoke(prompt_value)
    return result.content