import sqlite3
import pandas as pd
import os
from datetime import datetime
import streamlit as st

# def get_db_path():
#     db_path = "../database/stock_insights.db"
#     if not os.path.exists(db_path) and os.path.exists("/tmp/stock_insights.db"):
#         db_path = "/tmp/stock_insights.db"
#     return db_path

# download from Hugging Face dataset
def ensure_db():
    repo_path = os.path.join(os.getcwd(), "database", "stock_insights.db")
    if os.path.exists(repo_path):
        return repo_path

    candidates = [
        os.path.join("/app", "database", "stock_insights.db"),
        os.path.join("/tmp", "database", "stock_insights.db"),
        os.path.join("/tmp", "stock_insights.db"),
    ]
    for p in candidates:
        if os.path.exists(p):
            return p

    try:
        from huggingface_hub import hf_hub_download
        tmp_dir = os.path.join("/tmp", "database")
        os.makedirs(tmp_dir, exist_ok=True)
        local_file = hf_hub_download(
            repo_id="PuppetLover/stock_insights",
            filename="stock_insights.db",
            repo_type="dataset",
            local_dir=tmp_dir,
            local_dir_use_symlinks=False,
        )
        return local_file
    except Exception as e:
        local_rel = os.path.join("database", "stock_insights.db")
        if os.path.exists(local_rel):
            return local_rel
        raise RuntimeError(f"Cannot access or download database file: {e}")

# gọi và gán hằng DB_PATH dùng trong module
DB_PATH = ensure_db()   

def generate_stock_report(stock_code, time_period):

    start_date, end_date = time_period
    today = datetime.now().date()
    # db_path = os.path.join("database", "stock_insights.db")
    db_path = DB_PATH

    report = {
        "stock_code": stock_code,
        "report_period": f"{start_date} to {end_date}"
    }

    with sqlite3.connect(db_path) as conn:
        # Tạo bảng tạm relevant_articles
        conn.execute("DROP TABLE IF EXISTS relevant_articles;")
        conn.execute("""
            CREATE TEMP TABLE relevant_articles AS
            SELECT DISTINCT article_id FROM entities 
            WHERE entity_text =? 
              AND entity_type IN ('STOCK', 'COMPANY')
              AND confidence = 'high'
              AND article_id IN (
                  SELECT article_id FROM articles WHERE publish_date BETWEEN ? AND ?
              );
        """, (stock_code, start_date, end_date))

        # 1. OVERALL SENTIMENT
        q_sentences = """
            SELECT s.sentiment_score, s.sentiment_label, a.publish_date
            FROM sentences s
            JOIN articles a ON s.article_id = a.article_id
            WHERE s.article_id IN (
                SELECT s2.sentence_id FROM sentences s2
                WHERE s2.article_id IN (SELECT article_id FROM relevant_articles)
            )
            AND s.sentiment_score IS NOT NULL;
        """
        df_sent = pd.read_sql_query(q_sentences, conn)
        
        if not df_sent.empty:
            df_sent['publish_date'] = pd.to_datetime(df_sent['publish_date']).dt.date
            df_sent['days_ago'] = (today - df_sent['publish_date']).apply(lambda x: x.days)
            df_sent['weight'] = 1 / (df_sent['days_ago'] + 1)
            weighted_score = (df_sent['sentiment_score'] * df_sent['weight']).sum() / df_sent['weight'].sum()
            # Chuẩn hóa nhãn sentiment về lower-case
            df_sent['sentiment_label'] = df_sent['sentiment_label'].str.lower()
            sentiment_counts = df_sent['sentiment_label'].value_counts().to_dict()
            trend = "Tích cực" if weighted_score > 0.1 else "Tiêu cực" if weighted_score < -0.1 else "Trung tính"
        else:
            weighted_score, sentiment_counts, trend = 0.0, {}, "Không có dữ liệu"

        report["overall_sentiment"] = {
            "score": weighted_score,
            "trend": trend,
            "positive_mentions": sentiment_counts.get("positive", 0),
            "negative_mentions": sentiment_counts.get("negative", 0),
            "neutral_mentions": sentiment_counts.get("neutral", 0)
        }

        # 2. KEY EVENTS, RISKS, PRICE ACTIONS
        def get_key_entities(entity_type):
            query = f"""
                SELECT
                    e.entity_text,
                    COUNT(e.entity_id) as count,
                    AVG(s.sentiment_score) as avg_sentiment
                FROM entities e
                JOIN sentences s ON e.sentence_id = s.sentence_id
                WHERE e.article_id IN (SELECT article_id FROM relevant_articles)
                  AND e.entity_type =?
                GROUP BY e.entity_text
                ORDER BY count DESC
                LIMIT 5;
            """
            df = pd.read_sql_query(query, conn, params=(entity_type,))
            def score_to_label(score):
                if score is None: return "N/A"
                return "Tích cực" if score > 0.1 else "Tiêu cực" if score < -0.1 else "Trung tính"
            df['sentiment'] = df['avg_sentiment'].apply(score_to_label)
            return df.to_dict('records')

        report["key_events"] = get_key_entities('EVENT')
        report["key_price_actions"] = get_key_entities('PRICE_ACTION')
        report["key_risks_mentioned"] = get_key_entities('RISK')

        # 3. TOP RELATED ENTITIES
        q_related = """
            SELECT e.entity_type, e.entity_text
            FROM entities e
            WHERE e.article_id IN (SELECT article_id FROM relevant_articles)
              AND e.entity_text!=?
              AND e.entity_type IN ('STOCK', 'COMPANY', 'PERSON');
        """
        df_related = pd.read_sql_query(q_related, conn, params=(stock_code,))
        top_related = {}
        if not df_related.empty:
            for etype in ['STOCK', 'COMPANY', 'PERSON']:
                top_related[etype.lower() + 's'] = df_related[df_related['entity_type'] == etype]['entity_text'].value_counts().head(3).index.tolist()
        report["top_related_entities"] = top_related

        # 4. SOURCE ARTICLES
        q_articles = """
            SELECT a.title, a.source_url, s.sentiment_label
            FROM articles a
            JOIN sentences s ON a.article_id = s.article_id
            WHERE a.article_id IN (SELECT article_id FROM relevant_articles)
            GROUP BY a.article_id
            ORDER BY a.publish_date DESC
            LIMIT 5;
        """
        df_articles = pd.read_sql_query(q_articles, conn)
        report["source_articles"] = df_articles.to_dict('records')

    return report

# --- HIỂN THỊ BÁO CÁO ---
def show_report(report_data, summary, stock_code_input):
    st.markdown(
        f"<h3 style='text-align: center; color: #30cfd0; margin-top:2rem;'>Báo cáo Phân tích cho {report_data.get('stock_code', stock_code_input)}</h3>", unsafe_allow_html=True)
    st.markdown(
        f"<p style='text-align: center; color: #94a3b8;'>Giai đoạn: {report_data.get('report_period', 'N/A')}</p>", unsafe_allow_html=True)

    st.markdown("#### 🤖 Tóm tắt từ AI")
    st.info(summary)

    # Tổng quan cảm xúc
    st.markdown("#### 📊 Tổng quan Cảm xúc")
    sentiment = report_data['overall_sentiment']
    score = sentiment['score']
    trend_color = "normal"
    if sentiment['trend'] == "Tích cực":
        trend_color = "normal"
        if sentiment['trend'] == "Tiêu cực":
            trend_color = "inverse"

    st.metric(
        label="Điểm Cảm xúc (có trọng số thời gian)",
        value=f"{score:.2f}" if score is not None else "N/A",
        delta=sentiment['trend'],
        delta_color=trend_color
    )

    col1, col2, col3 = st.columns(3)
    col1.metric("👍 Tích cực", sentiment['positive_mentions'])
    col2.metric("👎 Tiêu cực", sentiment['negative_mentions'])
    col3.metric("😐 Trung tính", sentiment['neutral_mentions'])

    # Các bảng chi tiết
    st.markdown("---")

    col_events, col_risks = st.columns(2)
    with col_events:
        st.markdown("#### ⚡ Sự kiện Nổi bật")
        if report_data["key_events"]:
            # Kiểm tra key thực tế
            df_events = pd.DataFrame(report_data["key_events"])
            if 'avg_sentiment' in df_events.columns:
                df_events = df_events.rename(
                    columns={'entity_text': 'Sự kiện', 'avg_sentiment': 'Sentiment'})
                show_cols = ['Sự kiện', 'count', 'Sentiment']
            elif 'sentiment' in df_events.columns:
                df_events = df_events.rename(
                    columns={'entity_text': 'Sự kiện'})
                show_cols = ['Sự kiện', 'count', 'sentiment']
            else:
                df_events = df_events.rename(
                    columns={'entity_text': 'Sự kiện'})
                show_cols = ['Sự kiện', 'count']
            st.dataframe(df_events[show_cols], use_container_width=True)
        else:
            st.write("Không có sự kiện nổi bật.")

    with col_risks:
        st.markdown("#### ⚠️ Rủi ro được đề cập")
        if report_data["key_risks_mentioned"]:
            df_risks = pd.DataFrame(report_data["key_risks_mentioned"])
            if 'avg_sentiment' in df_risks.columns:
                df_risks = df_risks.rename(
                    columns={'entity_text': 'Rủi ro', 'avg_sentiment': 'Sentiment'})
                show_cols = ['Rủi ro', 'count', 'Sentiment']
            elif 'sentiment' in df_risks.columns:
                df_risks = df_risks.rename(
                    columns={'entity_text': 'Rủi ro'})
                show_cols = ['Rủi ro', 'count', 'sentiment']
            else:
                df_risks = df_risks.rename(
                    columns={'entity_text': 'Rủi ro'})
                show_cols = ['Rủi ro', 'count']
            st.dataframe(df_risks[show_cols], use_container_width=True)
        else:
            st.write("Không có rủi ro nổi bật.")

    st.markdown("#### 📈 Hành động Giá Chính")
    if report_data["key_price_actions"]:
        df_price = pd.DataFrame(report_data["key_price_actions"])
        if 'avg_sentiment' in df_price.columns:
            df_price = df_price.rename(
                columns={'entity_text': 'Hành động giá', 'avg_sentiment': 'Sentiment'})
            show_cols = ['Hành động giá', 'count', 'Sentiment']
        elif 'sentiment' in df_price.columns:
            df_price = df_price.rename(
                columns={'entity_text': 'Hành động giá'})
            show_cols = ['Hành động giá', 'count', 'sentiment']
        else:
            df_price = df_price.rename(
                columns={'entity_text': 'Hành động giá'})
            show_cols = ['Hành động giá', 'count']
        st.dataframe(df_price[show_cols], use_container_width=True)
    else:
        st.write("Không có hành động giá nổi bật.")

    # Thực thể liên quan
    st.markdown("---")
    st.markdown("#### 🔗 Các Thực thể Liên quan nhiều nhất")
    related = report_data['top_related_entities']
    if any(related.values()):
        for etype, entities in related.items():
            if entities:
                st.markdown(
                    f"**{etype.replace('_', ' ').title()}:** {', '.join(entities)}")
    else:
        st.write("Không tìm thấy thực thể liên quan nổi bật.")

    # Nguồn bài viết
    st.markdown("---")
    st.markdown("#### 📰 Nguồn Bài viết Tham khảo")
    if report_data["source_articles"]:
        for article in report_data["source_articles"]:
            st.markdown(
                f"- [{article['title']}]({article['source_url']}) - *Cảm xúc: {article['sentiment_label']}*")
    else:
        st.write("Không có bài viết nào trong khoảng thời gian này.")

    st.markdown("</div>", unsafe_allow_html=True)