File size: 5,362 Bytes
8e9fd43
5cb1b50
d606723
 
 
 
2c42748
8e9fd43
 
5cb1b50
d606723
f00f379
eafca75
aefa1e1
826a1b8
8e9fd43
 
 
c1ba890
8e9fd43
d606723
 
 
8e9fd43
 
 
2f3b9d0
d606723
 
 
 
826a1b8
d606723
ccf9b0b
826a1b8
43cf665
f5e2bc7
d606723
f5e2bc7
 
5cb1b50
 
43cf665
d606723
 
 
 
c1ba890
8e9fd43
d606723
8e9fd43
d606723
eafca75
 
 
 
d606723
91cfd36
ccf9b0b
 
d606723
ccf9b0b
 
d606723
 
ccf9b0b
d606723
 
 
 
ccf9b0b
 
d606723
 
 
 
ccf9b0b
 
 
 
 
658ffaf
 
ccf9b0b
 
d606723
 
 
 
5cb1b50
826a1b8
 
d229ca3
826a1b8
 
5cb1b50
d606723
826a1b8
c60bbd1
d229ca3
5cb1b50
d606723
5cb1b50
 
d606723
5cb1b50
 
 
 
 
d606723
5cb1b50
d606723
68f07e2
d606723
 
68f07e2
d606723
68f07e2
 
 
d606723
68f07e2
d606723
 
68f07e2
 
f5e2bc7
d606723
 
 
 
8573cc3
8e9fd43
 
f5e2bc7
d606723
2c42748
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import json
import datetime
from typing import List, Dict

import requests
from fastapi import APIRouter
from pydantic import BaseModel
from dotenv import load_dotenv

from clients.redis_client import redis_client as _r
from models_initialization.mistral_registry import mistral_generate
from nuse_modules.classifier import classify_question, REVERSE_MAP
from nuse_modules.keyword_extracter import keywords_extractor
from nuse_modules.google_search import search_google_news

load_dotenv()

askMe = APIRouter()

# ──────────────────────────────────────────────────────────────
# Pydantic schema
# ──────────────────────────────────────────────────────────────
class QuestionInput(BaseModel):
    question: str


# ──────────────────────────────────────────────────────────────
# Helper functions
# ──────────────────────────────────────────────────────────────

def should_extract_keywords(type_id: int) -> bool:
    """Map the intent id to whether we need keyword extraction."""
    return type_id in {1, 2, 3, 4, 5, 6, 7, 10, 11, 12}


def extract_answer_after_label(text: str) -> str:
    """Extracts everything after the first 'Answer:' label."""
    if "Answer:" in text:
        return text.split("Answer:", 1)[1].strip()
    return text.strip()


# ──────────────────────────────────────────────────────────────
# FastAPI route
# ──────────────────────────────────────────────────────────────

@askMe.post("/ask")
async def ask_question(input: QuestionInput):
    question = input.question.strip()

    # 1️⃣ Classify intent
    qid = classify_question(question)
    print("Intent ID:", qid)
    print("Category:", REVERSE_MAP.get(qid, "unknown"))

    # Special case: ID 13 β†’ return cached headlines
    if qid == "asking_for_headlines":
        date_str = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        categories = ["world", "india", "finance", "sports", "entertainment"]
        all_headlines: List[Dict] = []

        for cat in categories:
            redis_key = f"headlines:{date_str}:{cat}"
            cached = _r.get(redis_key)
            if cached:
                try:
                    articles = json.loads(cached)
                except json.JSONDecodeError:
                    continue
                for art in articles:
                    all_headlines.append({
                        "title":   art.get("title"),
                        "summary": art.get("summary"),
                        "url":     art.get("url"),
                        "image":   art.get("image"),
                        "category": cat,
                    })

        return {
            "question": question,
            "answer": "Here are today’s top headlines:",
            "headlines": all_headlines,
        }

    # 2️⃣ Keyword‑based flow for other intents
    context = ""
    sources: List[Dict] = []

    if should_extract_keywords(qid):
        keywords = keywords_extractor(question)
        print("Raw extracted keywords:", keywords)

        if not keywords:
            return {"error": "Keyword extraction failed."}

        # Google News search
        results = search_google_news(keywords)
        print("Found articles:", results)

        context = "\n\n".join([
            r.get("snippet") or r.get("description", "") for r in results
        ])[:15000]

        sources = [{"title": r["title"], "url": r["link"]} for r in results]

        if not context.strip():
            return {
                "question": question,
                "answer": "Cannot answer – no relevant context found.",
                "sources": sources,
            }

        answer_prompt = (
            "You are a concise news assistant. Answer the user's question clearly using the provided context if relevant. "
            "If the context is not helpful, rely on your own knowledge but do not mention the context.\n\n"
            f"Context:\n{context}\n\n"
            f"Question: {question}\n\nAnswer:"
        )
        answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)

    else:
        answer_prompt = (
            "You are a concise news assistant. Answer the user's question clearly and accurately.\n\n"
            f"Question: {question}\n\nAnswer:"
        )
        answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)

    # 3️⃣ Post‑process model output
    final_answer = extract_answer_after_label(answer_raw or "") or (
        "Cannot answer – model did not return a valid response."
    )

    return {
        "question": question,
        "answer": final_answer.strip(),
        "sources": sources,
    }