vikramvasudevan commited on
Commit
a4577de
·
verified ·
1 Parent(s): 2d52ea0

Upload folder using huggingface_hub

Browse files
modules/llm/summarizer/helpers/db_helper.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from config import SanatanConfig
3
+ from db import SanatanDatabase
4
+ from modules.llm.summarizer.models import ScriptureRequest
5
+ import logging
6
+
7
+ logging.basicConfig()
8
+ logger = logging.getLogger(__name__)
9
+ logger.setLevel(logging.INFO)
10
+
11
+
12
+ async def get_scripture_from_db(req: ScriptureRequest):
13
+ """
14
+ Return a scripture unit (page or verse, based on config),
15
+ including all metadata fields separately.
16
+ Used for page view to fetch by global index.
17
+ """
18
+ # ensure we have a valid request id (deviceId)
19
+ request_id = req.request_id or f"auto-{uuid.uuid4()}"
20
+ logger.info(
21
+ f"get_scripture: received requestId={request_id}, scripture={req.scripture_name}, unit_index={req.unit_index}"
22
+ )
23
+
24
+ # find config entry for the scripture
25
+ config = next(
26
+ (s for s in SanatanConfig().scriptures if s["name"] == req.scripture_name), None
27
+ )
28
+ if not config:
29
+ return {
30
+ "error": f"Scripture '{req.scripture_name}' not found",
31
+ "requestId": request_id,
32
+ }
33
+
34
+ # fetch the raw document from DB
35
+ raw_doc = SanatanDatabase().fetch_document_by_index(
36
+ collection_name=config["collection_name"],
37
+ index=req.unit_index,
38
+ )
39
+
40
+ if not raw_doc or isinstance(raw_doc, str) or "error" in raw_doc:
41
+ return {
42
+ "error": f"No data available for unit {req.unit_index}",
43
+ "requestId": request_id,
44
+ }
45
+
46
+ # canonicalize it
47
+ canonical_doc = SanatanConfig().canonicalize_document(
48
+ scripture_name=req.scripture_name,
49
+ document_text=raw_doc.get("document", ""),
50
+ metadata_doc=raw_doc,
51
+ )
52
+
53
+ # add unit index & total units (so Flutter can paginate)
54
+ canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
55
+ canonical_doc["requestId"] = request_id
56
+
57
+ return canonical_doc
modules/llm/summarizer/helpers/llm_helper.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ from modules.llm.summarizer.models import ScriptureVerseSummary
4
+
5
+
6
+ client = OpenAI()
7
+
8
+
9
+ def summarize_scripture_verse(
10
+ lyrics: str, translation: str, word_by_word_meaning: str, target_language: str
11
+ ) -> ScriptureVerseSummary:
12
+ """
13
+ Generates a simple and detailed meaning of the given lyrics
14
+ in the specified target language using structured output.
15
+ """
16
+ print("Summarizing ...")
17
+ system_prompt = (
18
+ "You are a precise multilingual assistant that summarizes meanings of Sanatan scripture verses.\n"
19
+ "Your job is to derive the verse’s simple and detailed meaning *strictly* from the provided inputs.\n\n"
20
+ "=== MANDATORY RULES ===\n"
21
+ "1. Use ONLY the information from: lyrics, translation, and word-by-word meaning.\n"
22
+ "2. Do NOT invent, guess, or infer anything not explicitly given.\n"
23
+ "3. Do NOT include any English words or transliterations unless they already appear in the input.\n"
24
+ "4. Every part of your output must be written completely in the target language: {target_language}.\n"
25
+ " - If the target language lacks a direct word, explain it *in that language* (do not leave English placeholders).\n"
26
+ " - Preserve transliterated proper nouns (e.g., ‘nanjIyar’, ‘rAmAnuja’) exactly as written — case-sensitive.\n"
27
+ "5. NEVER mix scripts. Do not output Latin letters unless they occur verbatim in the input.\n"
28
+ "6. If unsure of a word’s meaning, omit it or express uncertainty *in the target language*.\n"
29
+ "7. Maintain a respectful, neutral tone. No opinions, commentary, or theological judgment.\n"
30
+ "8. Output must follow the MeaningResponse schema exactly: only 'simple_meaning', 'detailed_meaning', and 'language'.\n"
31
+ "9. Do NOT repeat the input text or provide explanations in any other language.\n"
32
+ "10. Ensure all formatting is plain text — no quotes, brackets, or markdown around meanings.\n"
33
+ )
34
+
35
+ user_prompt = (
36
+ f"Lyrics:\n{lyrics}\n\n"
37
+ f"Translation:\n{translation}\n\n"
38
+ f"Word-by-word meaning:\n{word_by_word_meaning}\n\n"
39
+ f"Now generate the verse’s meaning entirely in **{target_language}**.\n"
40
+ "Provide both a simple summary and a detailed explanation in that language.\n"
41
+ "Do not include any English words unless they appear exactly as in the inputs."
42
+ )
43
+
44
+ response = client.chat.completions.parse(
45
+ model="gpt-4o-mini",
46
+ messages=[
47
+ {"role": "system", "content": system_prompt},
48
+ {"role": "user", "content": user_prompt},
49
+ ],
50
+ response_format=ScriptureVerseSummary,
51
+ )
52
+
53
+ return response.choices[0].message.parsed
modules/llm/summarizer/models.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- Define output schema ---
2
+ from typing import Optional
3
+ from pydantic import BaseModel, Field
4
+
5
+ class ScriptureRequest(BaseModel):
6
+ scripture_name: str
7
+ unit_index: int
8
+ request_id: str | None = Field(
9
+ default=None, alias="request_id"
10
+ ) # optional, backward compatible
11
+ target_language: Optional[str] = Field(default="English")
12
+
13
+ class ScriptureVerseSummary(BaseModel):
14
+ target_language: str
15
+ simple_meaning: str
16
+ detailed_meaning: str
modules/llm/summarizer/service.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.llm.summarizer.helpers.db_helper import get_scripture_from_db
2
+ from modules.llm.summarizer.helpers.llm_helper import summarize_scripture_verse
3
+ from modules.llm.summarizer.models import ScriptureRequest, ScriptureVerseSummary
4
+ from google.cloud import firestore
5
+ import os
6
+ import json
7
+ from google.oauth2 import service_account
8
+
9
+ # Load Firebase credentials from environment variable
10
+ service_account_info = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
11
+
12
+ if not service_account_info:
13
+ raise ValueError("FIREBASE_SERVICE_ACCOUNT_JSON not found in environment variables.")
14
+
15
+ # Parse the JSON (it’s stored as a string in env)
16
+ credentials_dict = json.loads(service_account_info)
17
+ credentials = service_account.Credentials.from_service_account_info(credentials_dict)
18
+
19
+ # Initialize Firestore client with these credentials
20
+ db = firestore.AsyncClient(credentials=credentials, project=credentials.project_id)
21
+ # or db = firestore.Client(credentials=credentials, project=credentials.project_id)
22
+
23
+
24
+ async def svc_summarize_scripture_verse(req: ScriptureRequest) -> ScriptureVerseSummary:
25
+ """
26
+ Summarizes a scripture verse, with Firestore caching per verse and language.
27
+ Firestore structure:
28
+ /scripture_summaries/{scripture_name_global_index}/meanings/{target_language}
29
+ """
30
+
31
+ # Step 1: Get scripture data
32
+ scripture_data = await get_scripture_from_db(req)
33
+
34
+ scripture_name = scripture_data.get("scripture_name", "UnknownScripture")
35
+ global_index = scripture_data.get("_global_index", -1)
36
+ target_language = req.target_language.lower()
37
+
38
+ # Compose the document path
39
+ doc_id = f"{scripture_name}_{global_index}".replace(" ", "_")
40
+ base_ref = db.collection("scripture_summaries").document(doc_id)
41
+ meaning_ref = base_ref.collection("meanings").document(target_language)
42
+
43
+ # Step 2: Check for cached version
44
+ cached_doc = await meaning_ref.get()
45
+ if cached_doc.exists:
46
+ print(f"✅ Using cached summary for {scripture_name} {global_index} ({target_language})")
47
+ return ScriptureVerseSummary(**cached_doc.to_dict())
48
+
49
+ print(f"🧠 Generating new summary for {scripture_name} {global_index} ({target_language})")
50
+
51
+ # Step 3: Generate new summary with LLM
52
+ summary = summarize_scripture_verse(
53
+ scripture_data.get("text","--no lyrics available--"),
54
+ scripture_data.get("translation","--no translation available--"),
55
+ scripture_data.get("word_by_word_native","--no word-by-word meaning available--"),
56
+ target_language,
57
+ )
58
+
59
+ summary_data = summary.model_dump()
60
+
61
+ # Step 4: Ensure parent document exists
62
+ await base_ref.set(
63
+ {
64
+ "scripture_name": scripture_name,
65
+ "global_index": global_index,
66
+ },
67
+ merge=True,
68
+ )
69
+
70
+ # Step 5: Store meaning under nested language doc
71
+ await meaning_ref.set(summary_data)
72
+
73
+ print(f"✅ Stored summary for {scripture_name} {global_index} in Firestore")
74
+
75
+ return summary
server.py CHANGED
@@ -18,6 +18,9 @@ from modules.config.categories import get_scripture_categories
18
  from modules.dropbox.discources import get_discourse_by_id, get_discourse_summaries
19
  from modules.firebase.messaging import FcmRequest, fcm_service
20
  from modules.languages.get_v2 import handle_fetch_languages_v2
 
 
 
21
  from modules.quiz.answer_validator import validate_answer
22
  from modules.quiz.models import Question
23
  from modules.quiz.quiz_helper import generate_question
@@ -218,62 +221,10 @@ async def handle_get_scriptures():
218
  return return_values
219
 
220
 
221
- class ScriptureRequest(BaseModel):
222
- scripture_name: str
223
- unit_index: int
224
- request_id: str | None = Field(
225
- default=None, alias="request_id"
226
- ) # optional, backward compatible
227
-
228
-
229
  @router.post("/scripture")
230
  async def get_scripture(req: ScriptureRequest):
231
- """
232
- Return a scripture unit (page or verse, based on config),
233
- including all metadata fields separately.
234
- Used for page view to fetch by global index.
235
- """
236
- # ensure we have a valid request id (deviceId)
237
- request_id = req.request_id or f"auto-{uuid.uuid4()}"
238
- logger.info(
239
- f"get_scripture: received requestId={request_id}, scripture={req.scripture_name}, unit_index={req.unit_index}"
240
- )
241
-
242
- # find config entry for the scripture
243
- config = next(
244
- (s for s in SanatanConfig().scriptures if s["name"] == req.scripture_name), None
245
- )
246
- if not config:
247
- return {
248
- "error": f"Scripture '{req.scripture_name}' not found",
249
- "requestId": request_id,
250
- }
251
-
252
- # fetch the raw document from DB
253
- raw_doc = SanatanDatabase().fetch_document_by_index(
254
- collection_name=config["collection_name"],
255
- index=req.unit_index,
256
- )
257
-
258
- if not raw_doc or isinstance(raw_doc, str) or "error" in raw_doc:
259
- return {
260
- "error": f"No data available for unit {req.unit_index}",
261
- "requestId": request_id,
262
- }
263
-
264
- # canonicalize it
265
- canonical_doc = SanatanConfig().canonicalize_document(
266
- scripture_name=req.scripture_name,
267
- document_text=raw_doc.get("document", ""),
268
- metadata_doc=raw_doc,
269
- )
270
-
271
- # add unit index & total units (so Flutter can paginate)
272
- canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
273
- canonical_doc["requestId"] = request_id
274
-
275
- return canonical_doc
276
-
277
 
278
  @router.get("/scripture_configs")
279
  async def get_scripture_configs():
@@ -653,4 +604,9 @@ async def send_fcm_endpoint(
653
  if x_admin_key != ADMIN_KEY:
654
  raise HTTPException(status_code=403, detail="Unauthorized")
655
 
656
- return await fcm_service.send_fcm(request)
 
 
 
 
 
 
18
  from modules.dropbox.discources import get_discourse_by_id, get_discourse_summaries
19
  from modules.firebase.messaging import FcmRequest, fcm_service
20
  from modules.languages.get_v2 import handle_fetch_languages_v2
21
+ from modules.llm.summarizer.helpers.db_helper import get_scripture_from_db
22
+ from modules.llm.summarizer.models import ScriptureRequest
23
+ from modules.llm.summarizer.service import svc_summarize_scripture_verse
24
  from modules.quiz.answer_validator import validate_answer
25
  from modules.quiz.models import Question
26
  from modules.quiz.quiz_helper import generate_question
 
221
  return return_values
222
 
223
 
 
 
 
 
 
 
 
 
224
  @router.post("/scripture")
225
  async def get_scripture(req: ScriptureRequest):
226
+ response = await get_scripture_from_db(req)
227
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  @router.get("/scripture_configs")
230
  async def get_scripture_configs():
 
604
  if x_admin_key != ADMIN_KEY:
605
  raise HTTPException(status_code=403, detail="Unauthorized")
606
 
607
+ return await fcm_service.send_fcm(request)
608
+
609
+ @router.post("/summarize_scripture_verse")
610
+ async def summarize_scripture_verse(req: ScriptureRequest):
611
+ response = await svc_summarize_scripture_verse(req)
612
+ return response
static/assets/fonts/MaterialIcons-Regular.otf CHANGED
Binary files a/static/assets/fonts/MaterialIcons-Regular.otf and b/static/assets/fonts/MaterialIcons-Regular.otf differ
 
static/flutter_bootstrap.js CHANGED
@@ -38,6 +38,6 @@ _flutter.buildConfig = {"engineRevision":"ddf47dd3ff96dbde6d9c614db0d7f019d7c7a2
38
 
39
  _flutter.loader.load({
40
  serviceWorkerSettings: {
41
- serviceWorkerVersion: "4183499183"
42
  }
43
  });
 
38
 
39
  _flutter.loader.load({
40
  serviceWorkerSettings: {
41
+ serviceWorkerVersion: "196864405"
42
  }
43
  });
static/flutter_service_worker.js CHANGED
@@ -17,7 +17,7 @@ const RESOURCES = {"assets/AssetManifest.bin": "1b6e81d215d35b84735e3bac9f9afaad
17
  "assets/assets/scriptures/default.jpg": "34bb646134c531b02c4b2fdd29119881",
18
  "assets/assets/thiruman_banner.jpg": "34bb646134c531b02c4b2fdd29119881",
19
  "assets/FontManifest.json": "d4a8d45844bfcdc2b484bfd5676f4e35",
20
- "assets/fonts/MaterialIcons-Regular.otf": "61ce5927c2813ade6579d2828168189f",
21
  "assets/NOTICES": "81bb8caa5fb1d366bb6a83bcbc62b753",
22
  "assets/packages/cupertino_icons/assets/CupertinoIcons.ttf": "33b7d9392238c04c131b6ce224e13711",
23
  "assets/packages/flutter_inappwebview/assets/t_rex_runner/t-rex.css": "5a8d0222407e388155d7d1395a75d5b9",
@@ -40,16 +40,16 @@ const RESOURCES = {"assets/AssetManifest.bin": "1b6e81d215d35b84735e3bac9f9afaad
40
  "canvaskit/skwasm_heavy.wasm": "8034ad26ba2485dab2fd49bdd786837b",
41
  "favicon.png": "74afc5494e90462fd4f3903ec8271c53",
42
  "flutter.js": "888483df48293866f9f41d3d9274a779",
43
- "flutter_bootstrap.js": "0eb96612c0a40fd5f45fecbf05b77346",
44
  "icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",
45
  "icons/Icon-512.png": "96e752610906ba2a93c65f8abe1645f1",
46
  "icons/Icon-maskable-192.png": "c457ef57daa1d16f64b27b786ec2ea3c",
47
  "icons/Icon-maskable-512.png": "301a7604d45b3e739efc881eb04896ea",
48
  "index.html": "155745e967e3e0ecf6d7333ea445658e",
49
  "/": "155745e967e3e0ecf6d7333ea445658e",
50
- "main.dart.js": "da6fc9085ee616f181a6ed95aac529e5",
51
  "manifest.json": "9d43d4621f8c3ed75ad4ffe729655ecc",
52
- "version.json": "a4d3746eceb50edb15ef0d9db3a84e52"};
53
  // The application shell files that are downloaded before a service worker can
54
  // start.
55
  const CORE = ["main.dart.js",
 
17
  "assets/assets/scriptures/default.jpg": "34bb646134c531b02c4b2fdd29119881",
18
  "assets/assets/thiruman_banner.jpg": "34bb646134c531b02c4b2fdd29119881",
19
  "assets/FontManifest.json": "d4a8d45844bfcdc2b484bfd5676f4e35",
20
+ "assets/fonts/MaterialIcons-Regular.otf": "9c121c758ac4d0dc351f3018d8d14170",
21
  "assets/NOTICES": "81bb8caa5fb1d366bb6a83bcbc62b753",
22
  "assets/packages/cupertino_icons/assets/CupertinoIcons.ttf": "33b7d9392238c04c131b6ce224e13711",
23
  "assets/packages/flutter_inappwebview/assets/t_rex_runner/t-rex.css": "5a8d0222407e388155d7d1395a75d5b9",
 
40
  "canvaskit/skwasm_heavy.wasm": "8034ad26ba2485dab2fd49bdd786837b",
41
  "favicon.png": "74afc5494e90462fd4f3903ec8271c53",
42
  "flutter.js": "888483df48293866f9f41d3d9274a779",
43
+ "flutter_bootstrap.js": "6aae013c74d63e80fa365a49ddcb55e9",
44
  "icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",
45
  "icons/Icon-512.png": "96e752610906ba2a93c65f8abe1645f1",
46
  "icons/Icon-maskable-192.png": "c457ef57daa1d16f64b27b786ec2ea3c",
47
  "icons/Icon-maskable-512.png": "301a7604d45b3e739efc881eb04896ea",
48
  "index.html": "155745e967e3e0ecf6d7333ea445658e",
49
  "/": "155745e967e3e0ecf6d7333ea445658e",
50
+ "main.dart.js": "9bcfbbc9bd3e4984d982faa4bacd39dd",
51
  "manifest.json": "9d43d4621f8c3ed75ad4ffe729655ecc",
52
+ "version.json": "397f7fc35ae7b7eb4a183c4d944fd6d5"};
53
  // The application shell files that are downloaded before a service worker can
54
  // start.
55
  const CORE = ["main.dart.js",
static/main.dart.js CHANGED
The diff for this file is too large to render. See raw diff
 
static/version.json CHANGED
@@ -1 +1 @@
1
- {"app_name":"bhashyam_ai","version":"1.0.91","build_number":"128","package_name":"bhashyam_ai"}
 
1
+ {"app_name":"bhashyam_ai","version":"1.0.91","build_number":"133","package_name":"bhashyam_ai"}