Spaces:

sigridveronica
/

ai-news-analyzer

Running

Sigrid De los Santos

App is ready

215f78a 7 days ago

11.8 kB

	import os
	import pandas as pd
	from datetime import datetime
	from dotenv import load_dotenv
	from md_html import convert_single_md_to_html as convert_md_to_html
	from news_analysis import fetch_deep_news, generate_value_investor_report
	from fin_interpreter import analyze_article

	BASE_DIR = os.path.dirname(os.path.dirname(__file__))
	DATA_DIR = os.path.join(BASE_DIR, "data")
	HTML_DIR = os.path.join(BASE_DIR, "html")

	os.makedirs(DATA_DIR, exist_ok=True)
	os.makedirs(HTML_DIR, exist_ok=True)

	load_dotenv()

	# === Priority Logic ===
	def derive_priority(sentiment, confidence):
	sentiment = sentiment.lower()
	if sentiment == "positive" and confidence > 0.7:
	return "High"
	if sentiment == "negative" and confidence > 0.6:
	return "High"
	if confidence > 0.5:
	return "Medium"
	return "Low"

	# === Metrics Box ===
	def build_metrics_box(topic, num_articles):
	now = datetime.now().strftime("%Y-%m-%d %H:%M")
	return f"""
	> Topic: `{topic}`
	> Articles Collected: `{num_articles}`
	> Generated: `{now}`
	---
	"""

	# === Main Analysis ===
	def run_value_investing_analysis(csv_path, progress_callback=None):
	current_df = pd.read_csv(csv_path)
	all_articles = []
	company_data = []

	for _, row in current_df.iterrows():
	topic = row.get("topic")
	timespan = row.get("timespan_days", 7)
	# if progress_callback:
	# progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)")
	# try:
	# news = fetch_deep_news(topic, timespan)
	# if progress_callback:
	# progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.")
	# except Exception as e:
	# if progress_callback:
	# progress_callback(f"[ERROR] fetch_deep_news failed: {e}")
	# continue
	try:
	news = fetch_deep_news(topic, timespan)
	except Exception as e:
	if progress_callback:
	progress_callback(f"[ERROR] fetch_deep_news failed: {e}")
	continue

	if not news:
	if progress_callback:
	progress_callback(f"⚠️ No news found for topic: {topic}")
	continue

	for article in news:
	summary = article.get("summary", "") or article.get("content", "")
	title = article.get("title", "Untitled")
	url = article.get("url", "")
	date = article.get("date", datetime.now().strftime("%Y-%m-%d"))

	try:
	result = analyze_article(summary)
	sentiment = result.get("sentiment", "Neutral")
	confidence = float(result.get("confidence", 0.0))
	signal = result.get("investment_decision", "Watch")
	#if progress_callback:
	#progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}, Signal: {signal}")
	except Exception as e:
	if progress_callback:
	progress_callback(f"[FinBERT ERROR] {e}")
	sentiment, confidence, signal = "Neutral", 0.0, "Watch"

	priority = derive_priority(sentiment, confidence)

	all_articles.append({
	"Title": title,
	"URL": url,
	"Summary": summary[:300] + "..." if summary else "",
	"Priority": priority,
	"Sentiment": sentiment,
	"Confidence": confidence,
	"Signal": signal,
	"Date": date
	})

	company_data.append({
	"Company": topic,
	"Sentiment": sentiment,
	"Confidence": confidence,
	"Signal": signal,
	"Summary": summary,
	"Priority": priority
	})

	try:
	report_body = generate_value_investor_report(topic, news)
	metrics_md = build_metrics_box(topic, len(news))
	full_md = metrics_md + report_body
	filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
	filepath = os.path.join(DATA_DIR, filename)
	with open(filepath, "w", encoding="utf-8") as f:
	f.write(full_md)
	except Exception as e:
	if progress_callback:
	progress_callback(f"[REPORT ERROR] {e}")

	return all_articles, company_data

	# === Insights Tab Data ===
	def build_company_insights(company_data):
	if not company_data:
	return pd.DataFrame()

	df = pd.DataFrame(company_data)
	insights = []
	for company, group in df.groupby("Company"):
	mentions = len(group)
	dominant_signal = group["Signal"].mode()[0] if not group["Signal"].mode().empty else "Watch"
	avg_confidence = round(group["Confidence"].mean(), 2)
	high_priority_ratio = round((group['Priority'] == 'High').sum() / len(group) * 100, 1)
	highlights = " \| ".join(group["Summary"].head(2).tolist())
	insights.append({
	"Company": company,
	"Mentions": mentions,
	"Dominant Signal": dominant_signal,
	"Avg Confidence": avg_confidence,
	"Interest % (High Priority)": f"{high_priority_ratio}%",
	"Highlights": highlights
	})
	return pd.DataFrame(insights).sort_values(by="Avg Confidence", ascending=False).head(5)

	# === Pipeline ===
	def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
	os.environ["TAVILY_API_KEY"] = tavily_api_key

	# === Clean old reports (MD and HTML) ===
	for file in os.listdir(DATA_DIR):
	if file.endswith(".md"):
	os.remove(os.path.join(DATA_DIR, file))
	for file in os.listdir(HTML_DIR):
	if file.endswith(".html"):
	os.remove(os.path.join(HTML_DIR, file))

	# === Run the new analysis ===
	all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback)

	html_paths = []
	for md_file in os.listdir(DATA_DIR):
	if md_file.endswith(".md"):
	convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR)
	html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html")))

	articles_df = pd.DataFrame(all_articles)
	insights_df = build_company_insights(company_data)
	return html_paths, articles_df, insights_df

	# import os
	# import pandas as pd
	# from datetime import datetime
	# from dotenv import load_dotenv
	# from md_html import convert_single_md_to_html as convert_md_to_html
	# from news_analysis import fetch_deep_news, generate_value_investor_report
	# from fin_interpreter import analyze_article

	# BASE_DIR = os.path.dirname(os.path.dirname(__file__))
	# DATA_DIR = os.path.join(BASE_DIR, "data")
	# HTML_DIR = os.path.join(BASE_DIR, "html")

	# os.makedirs(DATA_DIR, exist_ok=True)
	# os.makedirs(HTML_DIR, exist_ok=True)

	# load_dotenv()

	# # === Priority Logic ===
	# def derive_priority(sentiment, confidence):
	# sentiment = sentiment.lower()
	# if sentiment == "positive" and confidence > 0.7:
	# return "High"
	# if sentiment == "negative" and confidence > 0.6:
	# return "High"
	# if confidence > 0.5:
	# return "Medium"
	# return "Low"

	# # === Main Analysis ===
	# def run_value_investing_analysis(csv_path, progress_callback=None):
	# current_df = pd.read_csv(csv_path)
	# all_articles = []
	# company_data = []

	# for _, row in current_df.iterrows():
	# topic = row.get("topic")
	# timespan = row.get("timespan_days", 7)
	# if progress_callback:
	# progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)")

	# try:
	# news = fetch_deep_news(topic, timespan)
	# if progress_callback:
	# progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.")
	# except Exception as e:
	# if progress_callback:
	# progress_callback(f"[ERROR] fetch_deep_news failed: {e}")
	# continue

	# if not news:
	# if progress_callback:
	# progress_callback(f"⚠️ No news found for topic: {topic}")
	# continue

	# for article in news:
	# summary = article.get("summary", "") or article.get("content", "")
	# title = article.get("title", "Untitled")
	# url = article.get("url", "")
	# date = article.get("date", datetime.now().strftime("%Y-%m-%d"))

	# try:
	# result = analyze_article(summary)
	# sentiment = result.get("sentiment", "Neutral")
	# confidence = float(result.get("confidence", 0.0))
	# if progress_callback:
	# progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}")
	# except Exception as e:
	# if progress_callback:
	# progress_callback(f"[FinBERT ERROR] {e}")
	# sentiment, confidence = "Neutral", 0.0

	# priority = derive_priority(sentiment, confidence)

	# all_articles.append({
	# "Title": title,
	# "URL": url,
	# "Summary": summary[:300] + "..." if summary else "",
	# "Priority": priority,
	# "Date": date,
	# "Sentiment": sentiment,
	# "Confidence": confidence
	# })

	# company_data.append({
	# "Company": topic,
	# "Sentiment": sentiment,
	# "Confidence": confidence,
	# "Summary": summary,
	# })

	# try:
	# report_body = generate_value_investor_report(topic, news)
	# filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md"
	# filepath = os.path.join(DATA_DIR, filename)
	# with open(filepath, "w", encoding="utf-8") as f:
	# f.write(report_body)
	# except Exception as e:
	# if progress_callback:
	# progress_callback(f"[REPORT ERROR] {e}")

	# return all_articles, company_data

	# # === Insights Tab Data ===
	# def build_company_insights(company_data):
	# if not company_data:
	# return pd.DataFrame()

	# df = pd.DataFrame(company_data)
	# insights = []
	# for company, group in df.groupby("Company"):
	# mentions = len(group)
	# dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral"
	# avg_confidence = round(group["Confidence"].mean(), 2)
	# highlights = " \| ".join(group["Summary"].head(2).tolist())
	# insights.append({
	# "Company": company,
	# "Mentions": mentions,
	# "Sentiment": dominant_sentiment,
	# "Confidence": avg_confidence,
	# "Highlights": highlights
	# })
	# return pd.DataFrame(insights).sort_values(by="Confidence", ascending=False).head(5)

	# # === Pipeline ===
	# def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
	# os.environ["TAVILY_API_KEY"] = tavily_api_key
	# all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback)

	# html_paths = []
	# for md_file in os.listdir(DATA_DIR):
	# if md_file.endswith(".md"):
	# convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR)
	# html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html")))

	# articles_df = pd.DataFrame(all_articles)
	# insights_df = build_company_insights(company_data)
	# return html_paths, articles_df, insights_df