Spaces:
Running
Running
import os | |
import pandas as pd | |
from datetime import datetime | |
from dotenv import load_dotenv | |
from md_html import convert_single_md_to_html as convert_md_to_html | |
from news_analysis import fetch_deep_news, generate_value_investor_report | |
from fin_interpreter import analyze_article | |
BASE_DIR = os.path.dirname(os.path.dirname(__file__)) | |
DATA_DIR = os.path.join(BASE_DIR, "data") | |
HTML_DIR = os.path.join(BASE_DIR, "html") | |
os.makedirs(DATA_DIR, exist_ok=True) | |
os.makedirs(HTML_DIR, exist_ok=True) | |
load_dotenv() | |
# === Priority Logic === | |
def derive_priority(sentiment, confidence): | |
sentiment = sentiment.lower() | |
if sentiment == "positive" and confidence > 0.7: | |
return "High" | |
if sentiment == "negative" and confidence > 0.6: | |
return "High" | |
if confidence > 0.5: | |
return "Medium" | |
return "Low" | |
# === Metrics Box === | |
def build_metrics_box(topic, num_articles): | |
now = datetime.now().strftime("%Y-%m-%d %H:%M") | |
return f""" | |
> **Topic:** `{topic}` | |
> **Articles Collected:** `{num_articles}` | |
> **Generated:** `{now}` | |
--- | |
""" | |
# === Main Analysis === | |
def run_value_investing_analysis(csv_path, progress_callback=None): | |
current_df = pd.read_csv(csv_path) | |
all_articles = [] | |
company_data = [] | |
for _, row in current_df.iterrows(): | |
topic = row.get("topic") | |
timespan = row.get("timespan_days", 7) | |
# if progress_callback: | |
# progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)") | |
# try: | |
# news = fetch_deep_news(topic, timespan) | |
# if progress_callback: | |
# progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.") | |
# except Exception as e: | |
# if progress_callback: | |
# progress_callback(f"[ERROR] fetch_deep_news failed: {e}") | |
# continue | |
try: | |
news = fetch_deep_news(topic, timespan) | |
except Exception as e: | |
if progress_callback: | |
progress_callback(f"[ERROR] fetch_deep_news failed: {e}") | |
continue | |
if not news: | |
if progress_callback: | |
progress_callback(f"⚠️ No news found for topic: {topic}") | |
continue | |
for article in news: | |
summary = article.get("summary", "") or article.get("content", "") | |
title = article.get("title", "Untitled") | |
url = article.get("url", "") | |
date = article.get("date", datetime.now().strftime("%Y-%m-%d")) | |
try: | |
result = analyze_article(summary) | |
sentiment = result.get("sentiment", "Neutral") | |
confidence = float(result.get("confidence", 0.0)) | |
signal = result.get("investment_decision", "Watch") | |
#if progress_callback: | |
#progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}, Signal: {signal}") | |
except Exception as e: | |
if progress_callback: | |
progress_callback(f"[FinBERT ERROR] {e}") | |
sentiment, confidence, signal = "Neutral", 0.0, "Watch" | |
priority = derive_priority(sentiment, confidence) | |
all_articles.append({ | |
"Title": title, | |
"URL": url, | |
"Summary": summary[:300] + "..." if summary else "", | |
"Priority": priority, | |
"Sentiment": sentiment, | |
"Confidence": confidence, | |
"Signal": signal, | |
"Date": date | |
}) | |
company_data.append({ | |
"Company": topic, | |
"Sentiment": sentiment, | |
"Confidence": confidence, | |
"Signal": signal, | |
"Summary": summary, | |
"Priority": priority | |
}) | |
try: | |
report_body = generate_value_investor_report(topic, news) | |
metrics_md = build_metrics_box(topic, len(news)) | |
full_md = metrics_md + report_body | |
filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" | |
filepath = os.path.join(DATA_DIR, filename) | |
with open(filepath, "w", encoding="utf-8") as f: | |
f.write(full_md) | |
except Exception as e: | |
if progress_callback: | |
progress_callback(f"[REPORT ERROR] {e}") | |
return all_articles, company_data | |
# === Insights Tab Data === | |
def build_company_insights(company_data): | |
if not company_data: | |
return pd.DataFrame() | |
df = pd.DataFrame(company_data) | |
insights = [] | |
for company, group in df.groupby("Company"): | |
mentions = len(group) | |
dominant_signal = group["Signal"].mode()[0] if not group["Signal"].mode().empty else "Watch" | |
avg_confidence = round(group["Confidence"].mean(), 2) | |
high_priority_ratio = round((group['Priority'] == 'High').sum() / len(group) * 100, 1) | |
highlights = " | ".join(group["Summary"].head(2).tolist()) | |
insights.append({ | |
"Company": company, | |
"Mentions": mentions, | |
"Dominant Signal": dominant_signal, | |
"Avg Confidence": avg_confidence, | |
"Interest % (High Priority)": f"{high_priority_ratio}%", | |
"Highlights": highlights | |
}) | |
return pd.DataFrame(insights).sort_values(by="Avg Confidence", ascending=False).head(5) | |
# === Pipeline === | |
def run_pipeline(csv_path, tavily_api_key, progress_callback=None): | |
os.environ["TAVILY_API_KEY"] = tavily_api_key | |
# === Clean old reports (MD and HTML) === | |
for file in os.listdir(DATA_DIR): | |
if file.endswith(".md"): | |
os.remove(os.path.join(DATA_DIR, file)) | |
for file in os.listdir(HTML_DIR): | |
if file.endswith(".html"): | |
os.remove(os.path.join(HTML_DIR, file)) | |
# === Run the new analysis === | |
all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) | |
html_paths = [] | |
for md_file in os.listdir(DATA_DIR): | |
if md_file.endswith(".md"): | |
convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) | |
html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) | |
articles_df = pd.DataFrame(all_articles) | |
insights_df = build_company_insights(company_data) | |
return html_paths, articles_df, insights_df | |
# import os | |
# import pandas as pd | |
# from datetime import datetime | |
# from dotenv import load_dotenv | |
# from md_html import convert_single_md_to_html as convert_md_to_html | |
# from news_analysis import fetch_deep_news, generate_value_investor_report | |
# from fin_interpreter import analyze_article | |
# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) | |
# DATA_DIR = os.path.join(BASE_DIR, "data") | |
# HTML_DIR = os.path.join(BASE_DIR, "html") | |
# os.makedirs(DATA_DIR, exist_ok=True) | |
# os.makedirs(HTML_DIR, exist_ok=True) | |
# load_dotenv() | |
# # === Priority Logic === | |
# def derive_priority(sentiment, confidence): | |
# sentiment = sentiment.lower() | |
# if sentiment == "positive" and confidence > 0.7: | |
# return "High" | |
# if sentiment == "negative" and confidence > 0.6: | |
# return "High" | |
# if confidence > 0.5: | |
# return "Medium" | |
# return "Low" | |
# # === Main Analysis === | |
# def run_value_investing_analysis(csv_path, progress_callback=None): | |
# current_df = pd.read_csv(csv_path) | |
# all_articles = [] | |
# company_data = [] | |
# for _, row in current_df.iterrows(): | |
# topic = row.get("topic") | |
# timespan = row.get("timespan_days", 7) | |
# if progress_callback: | |
# progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)") | |
# try: | |
# news = fetch_deep_news(topic, timespan) | |
# if progress_callback: | |
# progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.") | |
# except Exception as e: | |
# if progress_callback: | |
# progress_callback(f"[ERROR] fetch_deep_news failed: {e}") | |
# continue | |
# if not news: | |
# if progress_callback: | |
# progress_callback(f"⚠️ No news found for topic: {topic}") | |
# continue | |
# for article in news: | |
# summary = article.get("summary", "") or article.get("content", "") | |
# title = article.get("title", "Untitled") | |
# url = article.get("url", "") | |
# date = article.get("date", datetime.now().strftime("%Y-%m-%d")) | |
# try: | |
# result = analyze_article(summary) | |
# sentiment = result.get("sentiment", "Neutral") | |
# confidence = float(result.get("confidence", 0.0)) | |
# if progress_callback: | |
# progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}") | |
# except Exception as e: | |
# if progress_callback: | |
# progress_callback(f"[FinBERT ERROR] {e}") | |
# sentiment, confidence = "Neutral", 0.0 | |
# priority = derive_priority(sentiment, confidence) | |
# all_articles.append({ | |
# "Title": title, | |
# "URL": url, | |
# "Summary": summary[:300] + "..." if summary else "", | |
# "Priority": priority, | |
# "Date": date, | |
# "Sentiment": sentiment, | |
# "Confidence": confidence | |
# }) | |
# company_data.append({ | |
# "Company": topic, | |
# "Sentiment": sentiment, | |
# "Confidence": confidence, | |
# "Summary": summary, | |
# }) | |
# try: | |
# report_body = generate_value_investor_report(topic, news) | |
# filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" | |
# filepath = os.path.join(DATA_DIR, filename) | |
# with open(filepath, "w", encoding="utf-8") as f: | |
# f.write(report_body) | |
# except Exception as e: | |
# if progress_callback: | |
# progress_callback(f"[REPORT ERROR] {e}") | |
# return all_articles, company_data | |
# # === Insights Tab Data === | |
# def build_company_insights(company_data): | |
# if not company_data: | |
# return pd.DataFrame() | |
# df = pd.DataFrame(company_data) | |
# insights = [] | |
# for company, group in df.groupby("Company"): | |
# mentions = len(group) | |
# dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral" | |
# avg_confidence = round(group["Confidence"].mean(), 2) | |
# highlights = " | ".join(group["Summary"].head(2).tolist()) | |
# insights.append({ | |
# "Company": company, | |
# "Mentions": mentions, | |
# "Sentiment": dominant_sentiment, | |
# "Confidence": avg_confidence, | |
# "Highlights": highlights | |
# }) | |
# return pd.DataFrame(insights).sort_values(by="Confidence", ascending=False).head(5) | |
# # === Pipeline === | |
# def run_pipeline(csv_path, tavily_api_key, progress_callback=None): | |
# os.environ["TAVILY_API_KEY"] = tavily_api_key | |
# all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) | |
# html_paths = [] | |
# for md_file in os.listdir(DATA_DIR): | |
# if md_file.endswith(".md"): | |
# convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) | |
# html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) | |
# articles_df = pd.DataFrame(all_articles) | |
# insights_df = build_company_insights(company_data) | |
# return html_paths, articles_df, insights_df | |