import os import pandas as pd from datetime import datetime from dotenv import load_dotenv from md_html import convert_single_md_to_html as convert_md_to_html from news_analysis import fetch_deep_news, generate_value_investor_report from fin_interpreter import analyze_article BASE_DIR = os.path.dirname(os.path.dirname(__file__)) DATA_DIR = os.path.join(BASE_DIR, "data") HTML_DIR = os.path.join(BASE_DIR, "html") os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(HTML_DIR, exist_ok=True) load_dotenv() # === Priority Logic === def derive_priority(sentiment, confidence): sentiment = sentiment.lower() if sentiment == "positive" and confidence > 0.7: return "High" if sentiment == "negative" and confidence > 0.6: return "High" if confidence > 0.5: return "Medium" return "Low" # === Metrics Box === def build_metrics_box(topic, num_articles): now = datetime.now().strftime("%Y-%m-%d %H:%M") return f""" > **Topic:** `{topic}` > **Articles Collected:** `{num_articles}` > **Generated:** `{now}` --- """ # === Main Analysis === def run_value_investing_analysis(csv_path, progress_callback=None): current_df = pd.read_csv(csv_path) all_articles = [] company_data = [] for _, row in current_df.iterrows(): topic = row.get("topic") timespan = row.get("timespan_days", 7) # if progress_callback: # progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)") # try: # news = fetch_deep_news(topic, timespan) # if progress_callback: # progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.") # except Exception as e: # if progress_callback: # progress_callback(f"[ERROR] fetch_deep_news failed: {e}") # continue try: news = fetch_deep_news(topic, timespan) except Exception as e: if progress_callback: progress_callback(f"[ERROR] fetch_deep_news failed: {e}") continue if not news: if progress_callback: progress_callback(f"⚠️ No news found for topic: {topic}") continue for article in news: summary = article.get("summary", "") or article.get("content", "") title = article.get("title", "Untitled") url = article.get("url", "") date = article.get("date", datetime.now().strftime("%Y-%m-%d")) try: result = analyze_article(summary) sentiment = result.get("sentiment", "Neutral") confidence = float(result.get("confidence", 0.0)) signal = result.get("investment_decision", "Watch") #if progress_callback: #progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}, Signal: {signal}") except Exception as e: if progress_callback: progress_callback(f"[FinBERT ERROR] {e}") sentiment, confidence, signal = "Neutral", 0.0, "Watch" priority = derive_priority(sentiment, confidence) all_articles.append({ "Title": title, "URL": url, "Summary": summary[:300] + "..." if summary else "", "Priority": priority, "Sentiment": sentiment, "Confidence": confidence, "Signal": signal, "Date": date }) company_data.append({ "Company": topic, "Sentiment": sentiment, "Confidence": confidence, "Signal": signal, "Summary": summary, "Priority": priority }) try: report_body = generate_value_investor_report(topic, news) metrics_md = build_metrics_box(topic, len(news)) full_md = metrics_md + report_body filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" filepath = os.path.join(DATA_DIR, filename) with open(filepath, "w", encoding="utf-8") as f: f.write(full_md) except Exception as e: if progress_callback: progress_callback(f"[REPORT ERROR] {e}") return all_articles, company_data # === Insights Tab Data === def build_company_insights(company_data): if not company_data: return pd.DataFrame() df = pd.DataFrame(company_data) insights = [] for company, group in df.groupby("Company"): mentions = len(group) dominant_signal = group["Signal"].mode()[0] if not group["Signal"].mode().empty else "Watch" avg_confidence = round(group["Confidence"].mean(), 2) high_priority_ratio = round((group['Priority'] == 'High').sum() / len(group) * 100, 1) highlights = " | ".join(group["Summary"].head(2).tolist()) insights.append({ "Company": company, "Mentions": mentions, "Dominant Signal": dominant_signal, "Avg Confidence": avg_confidence, "Interest % (High Priority)": f"{high_priority_ratio}%", "Highlights": highlights }) return pd.DataFrame(insights).sort_values(by="Avg Confidence", ascending=False).head(5) # === Pipeline === def run_pipeline(csv_path, tavily_api_key, progress_callback=None): os.environ["TAVILY_API_KEY"] = tavily_api_key # === Clean old reports (MD and HTML) === for file in os.listdir(DATA_DIR): if file.endswith(".md"): os.remove(os.path.join(DATA_DIR, file)) for file in os.listdir(HTML_DIR): if file.endswith(".html"): os.remove(os.path.join(HTML_DIR, file)) # === Run the new analysis === all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) html_paths = [] for md_file in os.listdir(DATA_DIR): if md_file.endswith(".md"): convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) articles_df = pd.DataFrame(all_articles) insights_df = build_company_insights(company_data) return html_paths, articles_df, insights_df # import os # import pandas as pd # from datetime import datetime # from dotenv import load_dotenv # from md_html import convert_single_md_to_html as convert_md_to_html # from news_analysis import fetch_deep_news, generate_value_investor_report # from fin_interpreter import analyze_article # BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # DATA_DIR = os.path.join(BASE_DIR, "data") # HTML_DIR = os.path.join(BASE_DIR, "html") # os.makedirs(DATA_DIR, exist_ok=True) # os.makedirs(HTML_DIR, exist_ok=True) # load_dotenv() # # === Priority Logic === # def derive_priority(sentiment, confidence): # sentiment = sentiment.lower() # if sentiment == "positive" and confidence > 0.7: # return "High" # if sentiment == "negative" and confidence > 0.6: # return "High" # if confidence > 0.5: # return "Medium" # return "Low" # # === Main Analysis === # def run_value_investing_analysis(csv_path, progress_callback=None): # current_df = pd.read_csv(csv_path) # all_articles = [] # company_data = [] # for _, row in current_df.iterrows(): # topic = row.get("topic") # timespan = row.get("timespan_days", 7) # if progress_callback: # progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)") # try: # news = fetch_deep_news(topic, timespan) # if progress_callback: # progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.") # except Exception as e: # if progress_callback: # progress_callback(f"[ERROR] fetch_deep_news failed: {e}") # continue # if not news: # if progress_callback: # progress_callback(f"⚠️ No news found for topic: {topic}") # continue # for article in news: # summary = article.get("summary", "") or article.get("content", "") # title = article.get("title", "Untitled") # url = article.get("url", "") # date = article.get("date", datetime.now().strftime("%Y-%m-%d")) # try: # result = analyze_article(summary) # sentiment = result.get("sentiment", "Neutral") # confidence = float(result.get("confidence", 0.0)) # if progress_callback: # progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}") # except Exception as e: # if progress_callback: # progress_callback(f"[FinBERT ERROR] {e}") # sentiment, confidence = "Neutral", 0.0 # priority = derive_priority(sentiment, confidence) # all_articles.append({ # "Title": title, # "URL": url, # "Summary": summary[:300] + "..." if summary else "", # "Priority": priority, # "Date": date, # "Sentiment": sentiment, # "Confidence": confidence # }) # company_data.append({ # "Company": topic, # "Sentiment": sentiment, # "Confidence": confidence, # "Summary": summary, # }) # try: # report_body = generate_value_investor_report(topic, news) # filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" # filepath = os.path.join(DATA_DIR, filename) # with open(filepath, "w", encoding="utf-8") as f: # f.write(report_body) # except Exception as e: # if progress_callback: # progress_callback(f"[REPORT ERROR] {e}") # return all_articles, company_data # # === Insights Tab Data === # def build_company_insights(company_data): # if not company_data: # return pd.DataFrame() # df = pd.DataFrame(company_data) # insights = [] # for company, group in df.groupby("Company"): # mentions = len(group) # dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral" # avg_confidence = round(group["Confidence"].mean(), 2) # highlights = " | ".join(group["Summary"].head(2).tolist()) # insights.append({ # "Company": company, # "Mentions": mentions, # "Sentiment": dominant_sentiment, # "Confidence": avg_confidence, # "Highlights": highlights # }) # return pd.DataFrame(insights).sort_values(by="Confidence", ascending=False).head(5) # # === Pipeline === # def run_pipeline(csv_path, tavily_api_key, progress_callback=None): # os.environ["TAVILY_API_KEY"] = tavily_api_key # all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) # html_paths = [] # for md_file in os.listdir(DATA_DIR): # if md_file.endswith(".md"): # convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) # html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) # articles_df = pd.DataFrame(all_articles) # insights_df = build_company_insights(company_data) # return html_paths, articles_df, insights_df