Sigrid De los Santos
App is ready
215f78a
import os
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
from md_html import convert_single_md_to_html as convert_md_to_html
from news_analysis import fetch_deep_news, generate_value_investor_report
from fin_interpreter import analyze_article
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
HTML_DIR = os.path.join(BASE_DIR, "html")
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(HTML_DIR, exist_ok=True)
load_dotenv()
# === Priority Logic ===
def derive_priority(sentiment, confidence):
sentiment = sentiment.lower()
if sentiment == "positive" and confidence > 0.7:
return "High"
if sentiment == "negative" and confidence > 0.6:
return "High"
if confidence > 0.5:
return "Medium"
return "Low"
# === Metrics Box ===
def build_metrics_box(topic, num_articles):
now = datetime.now().strftime("%Y-%m-%d %H:%M")
return f"""
> **Topic:** `{topic}`
> **Articles Collected:** `{num_articles}`
> **Generated:** `{now}`
---
"""
# === Main Analysis ===
def run_value_investing_analysis(csv_path, progress_callback=None):
current_df = pd.read_csv(csv_path)
all_articles = []
company_data = []
for _, row in current_df.iterrows():
topic = row.get("topic")
timespan = row.get("timespan_days", 7)
# if progress_callback:
# progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)")
# try:
# news = fetch_deep_news(topic, timespan)
# if progress_callback:
# progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.")
# except Exception as e:
# if progress_callback:
# progress_callback(f"[ERROR] fetch_deep_news failed: {e}")
# continue
try:
news = fetch_deep_news(topic, timespan)
except Exception as e:
if progress_callback:
progress_callback(f"[ERROR] fetch_deep_news failed: {e}")
continue
if not news:
if progress_callback:
progress_callback(f"⚠️ No news found for topic: {topic}")
continue
for article in news:
summary = article.get("summary", "") or article.get("content", "")
title = article.get("title", "Untitled")
url = article.get("url", "")
date = article.get("date", datetime.now().strftime("%Y-%m-%d"))
try:
result = analyze_article(summary)
sentiment = result.get("sentiment", "Neutral")
confidence = float(result.get("confidence", 0.0))
signal = result.get("investment_decision", "Watch")
#if progress_callback:
#progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}, Signal: {signal}")
except Exception as e:
if progress_callback:
progress_callback(f"[FinBERT ERROR] {e}")
sentiment, confidence, signal = "Neutral", 0.0, "Watch"
priority = derive_priority(sentiment, confidence)
all_articles.append({
"Title": title,
"URL": url,
"Summary": summary[:300] + "..." if summary else "",
"Priority": priority,
"Sentiment": sentiment,
"Confidence": confidence,
"Signal": signal,
"Date": date
})
company_data.append({
"Company": topic,
"Sentiment": sentiment,
"Confidence": confidence,
"Signal": signal,
"Summary": summary,
"Priority": priority
})
try:
report_body = generate_value_investor_report(topic, news)
metrics_md = build_metrics_box(topic, len(news))
full_md = metrics_md + report_body
filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
filepath = os.path.join(DATA_DIR, filename)
with open(filepath, "w", encoding="utf-8") as f:
f.write(full_md)
except Exception as e:
if progress_callback:
progress_callback(f"[REPORT ERROR] {e}")
return all_articles, company_data
# === Insights Tab Data ===
def build_company_insights(company_data):
if not company_data:
return pd.DataFrame()
df = pd.DataFrame(company_data)
insights = []
for company, group in df.groupby("Company"):
mentions = len(group)
dominant_signal = group["Signal"].mode()[0] if not group["Signal"].mode().empty else "Watch"
avg_confidence = round(group["Confidence"].mean(), 2)
high_priority_ratio = round((group['Priority'] == 'High').sum() / len(group) * 100, 1)
highlights = " | ".join(group["Summary"].head(2).tolist())
insights.append({
"Company": company,
"Mentions": mentions,
"Dominant Signal": dominant_signal,
"Avg Confidence": avg_confidence,
"Interest % (High Priority)": f"{high_priority_ratio}%",
"Highlights": highlights
})
return pd.DataFrame(insights).sort_values(by="Avg Confidence", ascending=False).head(5)
# === Pipeline ===
def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
os.environ["TAVILY_API_KEY"] = tavily_api_key
# === Clean old reports (MD and HTML) ===
for file in os.listdir(DATA_DIR):
if file.endswith(".md"):
os.remove(os.path.join(DATA_DIR, file))
for file in os.listdir(HTML_DIR):
if file.endswith(".html"):
os.remove(os.path.join(HTML_DIR, file))
# === Run the new analysis ===
all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback)
html_paths = []
for md_file in os.listdir(DATA_DIR):
if md_file.endswith(".md"):
convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR)
html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html")))
articles_df = pd.DataFrame(all_articles)
insights_df = build_company_insights(company_data)
return html_paths, articles_df, insights_df
# import os
# import pandas as pd
# from datetime import datetime
# from dotenv import load_dotenv
# from md_html import convert_single_md_to_html as convert_md_to_html
# from news_analysis import fetch_deep_news, generate_value_investor_report
# from fin_interpreter import analyze_article
# BASE_DIR = os.path.dirname(os.path.dirname(__file__))
# DATA_DIR = os.path.join(BASE_DIR, "data")
# HTML_DIR = os.path.join(BASE_DIR, "html")
# os.makedirs(DATA_DIR, exist_ok=True)
# os.makedirs(HTML_DIR, exist_ok=True)
# load_dotenv()
# # === Priority Logic ===
# def derive_priority(sentiment, confidence):
# sentiment = sentiment.lower()
# if sentiment == "positive" and confidence > 0.7:
# return "High"
# if sentiment == "negative" and confidence > 0.6:
# return "High"
# if confidence > 0.5:
# return "Medium"
# return "Low"
# # === Main Analysis ===
# def run_value_investing_analysis(csv_path, progress_callback=None):
# current_df = pd.read_csv(csv_path)
# all_articles = []
# company_data = []
# for _, row in current_df.iterrows():
# topic = row.get("topic")
# timespan = row.get("timespan_days", 7)
# if progress_callback:
# progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)")
# try:
# news = fetch_deep_news(topic, timespan)
# if progress_callback:
# progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.")
# except Exception as e:
# if progress_callback:
# progress_callback(f"[ERROR] fetch_deep_news failed: {e}")
# continue
# if not news:
# if progress_callback:
# progress_callback(f"⚠️ No news found for topic: {topic}")
# continue
# for article in news:
# summary = article.get("summary", "") or article.get("content", "")
# title = article.get("title", "Untitled")
# url = article.get("url", "")
# date = article.get("date", datetime.now().strftime("%Y-%m-%d"))
# try:
# result = analyze_article(summary)
# sentiment = result.get("sentiment", "Neutral")
# confidence = float(result.get("confidence", 0.0))
# if progress_callback:
# progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}")
# except Exception as e:
# if progress_callback:
# progress_callback(f"[FinBERT ERROR] {e}")
# sentiment, confidence = "Neutral", 0.0
# priority = derive_priority(sentiment, confidence)
# all_articles.append({
# "Title": title,
# "URL": url,
# "Summary": summary[:300] + "..." if summary else "",
# "Priority": priority,
# "Date": date,
# "Sentiment": sentiment,
# "Confidence": confidence
# })
# company_data.append({
# "Company": topic,
# "Sentiment": sentiment,
# "Confidence": confidence,
# "Summary": summary,
# })
# try:
# report_body = generate_value_investor_report(topic, news)
# filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
# filepath = os.path.join(DATA_DIR, filename)
# with open(filepath, "w", encoding="utf-8") as f:
# f.write(report_body)
# except Exception as e:
# if progress_callback:
# progress_callback(f"[REPORT ERROR] {e}")
# return all_articles, company_data
# # === Insights Tab Data ===
# def build_company_insights(company_data):
# if not company_data:
# return pd.DataFrame()
# df = pd.DataFrame(company_data)
# insights = []
# for company, group in df.groupby("Company"):
# mentions = len(group)
# dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral"
# avg_confidence = round(group["Confidence"].mean(), 2)
# highlights = " | ".join(group["Summary"].head(2).tolist())
# insights.append({
# "Company": company,
# "Mentions": mentions,
# "Sentiment": dominant_sentiment,
# "Confidence": avg_confidence,
# "Highlights": highlights
# })
# return pd.DataFrame(insights).sort_values(by="Confidence", ascending=False).head(5)
# # === Pipeline ===
# def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
# os.environ["TAVILY_API_KEY"] = tavily_api_key
# all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback)
# html_paths = []
# for md_file in os.listdir(DATA_DIR):
# if md_file.endswith(".md"):
# convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR)
# html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html")))
# articles_df = pd.DataFrame(all_articles)
# insights_df = build_company_insights(company_data)
# return html_paths, articles_df, insights_df