File size: 5,362 Bytes
8e9fd43 5cb1b50 d606723 2c42748 8e9fd43 5cb1b50 d606723 f00f379 eafca75 aefa1e1 826a1b8 8e9fd43 c1ba890 8e9fd43 d606723 8e9fd43 2f3b9d0 d606723 826a1b8 d606723 ccf9b0b 826a1b8 43cf665 f5e2bc7 d606723 f5e2bc7 5cb1b50 43cf665 d606723 c1ba890 8e9fd43 d606723 8e9fd43 d606723 eafca75 d606723 91cfd36 ccf9b0b d606723 ccf9b0b d606723 ccf9b0b d606723 ccf9b0b d606723 ccf9b0b 658ffaf ccf9b0b d606723 5cb1b50 826a1b8 d229ca3 826a1b8 5cb1b50 d606723 826a1b8 c60bbd1 d229ca3 5cb1b50 d606723 5cb1b50 d606723 5cb1b50 d606723 5cb1b50 d606723 68f07e2 d606723 68f07e2 d606723 68f07e2 d606723 68f07e2 d606723 68f07e2 f5e2bc7 d606723 8573cc3 8e9fd43 f5e2bc7 d606723 2c42748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import json
import datetime
from typing import List, Dict
import requests
from fastapi import APIRouter
from pydantic import BaseModel
from dotenv import load_dotenv
from clients.redis_client import redis_client as _r
from models_initialization.mistral_registry import mistral_generate
from nuse_modules.classifier import classify_question, REVERSE_MAP
from nuse_modules.keyword_extracter import keywords_extractor
from nuse_modules.google_search import search_google_news
load_dotenv()
askMe = APIRouter()
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Pydantic schema
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class QuestionInput(BaseModel):
question: str
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Helper functions
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def should_extract_keywords(type_id: int) -> bool:
"""Map the intent id to whether we need keyword extraction."""
return type_id in {1, 2, 3, 4, 5, 6, 7, 10, 11, 12}
def extract_answer_after_label(text: str) -> str:
"""Extracts everything after the first 'Answer:' label."""
if "Answer:" in text:
return text.split("Answer:", 1)[1].strip()
return text.strip()
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# FastAPI route
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@askMe.post("/ask")
async def ask_question(input: QuestionInput):
question = input.question.strip()
# 1οΈβ£ Classify intent
qid = classify_question(question)
print("Intent ID:", qid)
print("Category:", REVERSE_MAP.get(qid, "unknown"))
# Special case: ID 13 β return cached headlines
if qid == "asking_for_headlines":
date_str = datetime.datetime.utcnow().strftime("%Y-%m-%d")
categories = ["world", "india", "finance", "sports", "entertainment"]
all_headlines: List[Dict] = []
for cat in categories:
redis_key = f"headlines:{date_str}:{cat}"
cached = _r.get(redis_key)
if cached:
try:
articles = json.loads(cached)
except json.JSONDecodeError:
continue
for art in articles:
all_headlines.append({
"title": art.get("title"),
"summary": art.get("summary"),
"url": art.get("url"),
"image": art.get("image"),
"category": cat,
})
return {
"question": question,
"answer": "Here are todayβs top headlines:",
"headlines": all_headlines,
}
# 2οΈβ£ Keywordβbased flow for other intents
context = ""
sources: List[Dict] = []
if should_extract_keywords(qid):
keywords = keywords_extractor(question)
print("Raw extracted keywords:", keywords)
if not keywords:
return {"error": "Keyword extraction failed."}
# Google News search
results = search_google_news(keywords)
print("Found articles:", results)
context = "\n\n".join([
r.get("snippet") or r.get("description", "") for r in results
])[:15000]
sources = [{"title": r["title"], "url": r["link"]} for r in results]
if not context.strip():
return {
"question": question,
"answer": "Cannot answer β no relevant context found.",
"sources": sources,
}
answer_prompt = (
"You are a concise news assistant. Answer the user's question clearly using the provided context if relevant. "
"If the context is not helpful, rely on your own knowledge but do not mention the context.\n\n"
f"Context:\n{context}\n\n"
f"Question: {question}\n\nAnswer:"
)
answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)
else:
answer_prompt = (
"You are a concise news assistant. Answer the user's question clearly and accurately.\n\n"
f"Question: {question}\n\nAnswer:"
)
answer_raw = mistral_generate(answer_prompt, max_new_tokens=256)
# 3οΈβ£ Postβprocess model output
final_answer = extract_answer_after_label(answer_raw or "") or (
"Cannot answer β model did not return a valid response."
)
return {
"question": question,
"answer": final_answer.strip(),
"sources": sources,
}
|