Spaces:
Sleeping
Sleeping
Commit
·
a6a76ee
1
Parent(s):
59eb93e
Upload 18 files
Browse files- Rag/Processed_folder/processed_files.json +1 -0
- Rag/__init__.py +0 -0
- Rag/__pycache__/__init__.cpython-311.pyc +0 -0
- Rag/__pycache__/__init__.cpython-39.pyc +0 -0
- Rag/__pycache__/rag_pipeline.cpython-311.pyc +0 -0
- Rag/chromadb.db/01e34d25-3e37-4b52-8953-794b0e9b61dd/header.bin +3 -0
- Rag/chromadb.db/b338c320-325e-4f10-8e0c-ce336d2e26c9/header.bin +3 -0
- Rag/error_log.txt +8 -0
- Rag/rag_pipeline.py +183 -0
Rag/Processed_folder/processed_files.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["VOfwbcveP84_20241225194621.txt", "In9Bq4EJMZw_20241225194705.txt", "DkS1pkKpILY_20241225194325.txt", "ajneRM-ET1Q_20241225194311.txt", "K4Ze-Sp6aUE_20241225194709.txt", "n28W4AmvMDE_20241225194626.txt", "UIy-WQCZd4M_20241225194819.txt", "etbfLTHD_VU_20241225194439.txt", "PVmQOLYckKQ_20241225194814.txt", "F9KrZd_-ge0_20241225194812.txt", "xjEFo3a1AnI_20241225194539.txt", "szqPAPKE5tQ_20241225194712.txt", "3_auLYOilb8_20241225194826.txt", "acgz0C-z-gc_20241225194817.txt", "zVCaYyUWWSw_20241225194412.txt", "doupx8SAs5Y_20241225194603.txt", "wAZn9dF3XTo_20241225194423.txt", "2XGREPnlI8U_20241225194659.txt", "UNCwdFxPtE8_20241225194521.txt", "at37Y8rKDlA_20241225194513.txt", "oL3SkPV1_Ik_20241225194837.txt", "nOgypsWKjm4_20241225194440.txt", "rW9QKc-iFoY_20241225194751.txt", "CQlTmOFM4Qs_20241225194550.txt", "tR73Ny4Dt9s_20241225194413.txt", "t1F7EEGPQwo_20241225194649.txt", "ccrbE0QHy94_20241225194608.txt", "SyWC8ZFVxGo_20241225194333.txt", "zlc4VrDx_qk_20241225194800.txt", "8qaBpM73NSk_20241225194409.txt", "sxgCC4H1dl8_20241225194524.txt", "RBK5KLA5Jjg_20241225194446.txt", "slUCmZJDXrk_20241225194627.txt", "h2aWYjSA1Jc_20241225194702.txt", "Ov4yyK15-K8_20241225194230.txt", "juD99_sPWGU_20241225194340.txt", "q1Ss8sTbFBY_20241225194647.txt", "X8Hw8zeCDTA_20241225194518.txt", "UChhXiFPRgg_20241225194443.txt", "pq6WHJzOkno_20241225194415.txt", "2Ds1m5gflCI_20241225194849.txt", "jGZ1mR9uLU0_20241225194808.txt", "VAEzZeaV5zM_20241225194347.txt", "EhlIkzJwPlk_20241225194656.txt", "HiyzzcuaAac_20241225194255.txt", "C3X0bUAiluE_20241225194259.txt", "kG5Qb9sr0YQ_20241225194810.txt", "wRsX_ZkzxvQ_20241225194619.txt", "U2BPitASUh0_20241225194358.txt", "Wcs2PFz5q6g_20241225194327.txt", "CuzL1qxUyHw_20241225194312.txt", "q37ARYnRDGc_20241225194623.txt", "cp9GXl9Qk_s_20241225194735.txt", "XT_6Lvkhxvo_20241225194342.txt", "bUr_9fgfnto_20241225194256.txt", "LTGGyQS1fZE_20241225194305.txt", "mAlt_HKX4as_20241225194420.txt", "SZSRgyl7pyQ_20241225194418.txt", "RI112zW8GDw_20241225194356.txt", "ycOBZZeVeAc_20241225194707.txt", "6YLdlK2hYnw_20241225194328.txt", "p4ZfkezDTXQ_20241225194615.txt", "LVxL_p_kToc_20241225194558.txt", "HXzTbCEqCJc_20241225194710.txt", "yOoVz9E9kfQ_20241225194901.txt", "C5KpIXjpzdY_20241225194400.txt", "__RAXBLt1iM_20241225194430.txt", "8N7mdkrXgbc_20241225194338.txt", "JnlSDaBjCCU_20241225194450.txt", "IOl28gj_RXw_20241225194431.txt", "Nr5xb-QCBGA_20241225194354.txt", "GzvzWO0NU50_20241225194605.txt", "DtmwtjOoSYU_20241225194633.txt", "CrtR12PBKb0_20241225194632.txt", "gMRph_BvHB4_20241225194516.txt", "QpoaNklmRPc_20241225194248.txt", "9tRohh0gErM_20241225194353.txt", "Xu1FMCxoEFc_20241225194346.txt", "15R2pMqU2ok_20241225194406.txt", "eIxVfln02Ss_20241225194335.txt", "0Dtt95_xabw_20241225194252.txt", "3ZGItIAUQmI_20241225194719.txt", "uxZFl4BDOGk_20241225194757.txt", "hvPGfcAgk9Y_20241225194754.txt", "HYVeP4F0GNU_20241225194559.txt", "z5W74QC3v2I_20241225194308.txt", "31wjVhCcI5Y_20241225194426.txt", "BMTt8gSl13s_20241225194836.txt", "aQDOU3hPci0_20241225194501.txt", "tkH2-_jMCSk_20241225194543.txt", "ntfcfJ28eiU_20241225194522.txt", "S8nPJU9xkNw_20241225194748.txt", "fcxjwA4C4Cw_20241225194553.txt", "iMvtHqLmEkI_20241225194855.txt", "099hgtRoUZw_20241225194436.txt", "4RFEkGKKhdE_20241225194907.txt", "eJU6Df_ffAE_20241225194635.txt", "nqNEtdHVUjM_20241225194437.txt", "1SXDXdngX2M_20241225194316.txt", "X4QE6t-MkYE_20241225194642.txt", "79p1X_7rAMo_20241225194630.txt", "6RZbGrq9BxE_20241225194306.txt", "pkJi9Raxikg_20241225194824.txt", "QbMxDZeB8Ks_20241225194247.txt", "RgAcOqVRfYA_20241225194657.txt", "ncSoor2Iw8k_20241225194833.txt", "i_DEPeCKxs8_20241225194235.txt", "FE0lTEUa7EY_20241225194753.txt", "gE0_8AjTFaM_20241225194852.txt", "kgr22uMsJ5o_20241225194317.txt", "ufsIA5NARIo_20241225194535.txt", "CyDLbrZK75U_20241225194434.txt", "7TkGDj4LaOU_20241225194244.txt", "XLr2RKoD-oY_20241225194738.txt", "yb5zpo5WDG4_20241225194645.txt", "a9yFKPmPZ90_20241225194556.txt", "TG8VM5-CTfw_20241225194636.txt", "eMqWH3LYiII_20241225194351.txt", "CVh3_8e5u8I_20241225194246.txt", "SuR0DaYoe0Y_20241225194302.txt", "FLxIoNguGRU_20241225194233.txt", "GA89kjVY6Ik_20241225194854.txt", "qJ3uV7coZbA_20241225194453.txt", "EQ3GjpGq5Y8_20241225194405.txt", "yOJvm_ri_hk_20241225194555.txt", "cwakOgHIT0E_20241225194421.txt", "DTCmprPCDqc_20241225194733.txt", "qPKd99Pa2iU_20241225194500.txt", "nm1TxQj9IsQ_20241225194611.txt", "LRM5LutB538_20241225194857.txt", "xTtM2AvCRyA_20241225194643.txt", "62lVH-6xYGY_20241225194250.txt", "Rxmv7rT9leo_20241225194417.txt", "ulHrUVV3Kq4_20241225194452.txt", "bGixnNGvSkg_20241225194231.txt", "1CxJVdeyltw_20241225194614.txt", "wgUjIRtote8_20241225194726.txt", "qPKd99Pa2iU_20241225194503.txt", "S_SrHS8FvMM_20241225194807.txt", "xX6hiEmDmxs_20241225194227.txt", "uXs-zPc63kM_20241225194449.txt", "4AwyVTHEU3s_20241225194904.txt", "xaE9XyMMAHY_20241225194848.txt", "hFL6qRIJZ_Y_20241225194428.txt", "FOi5s3OUogo_20241225194245.txt", "cS7cNaBrkxo_20241225194624.txt", "kpTJqwIfHcM_20241225194654.txt", "yixIc1Ai6jM_20241225194829.txt", "vfRtLI6cJrk_20241225194324.txt", "GLgKkG44MGo_20241225194729.txt", "KPlJcD-o-4Q_20241225194617.txt", "AtChcxeaukQ_20241225194646.txt", "tLS6t3FVOTI_20241225194714.txt", "GqPGXG5TlZw_20241225194541.txt", "UF0nqolsNZc_20241225194727.txt", "7R3-3HR6-u4_20241225194519.txt", "tLRCS48Ens4_20241225194447.txt", "V0Sdgn0_kFM_20241225194740.txt", "G1VUSu6sGoU_20241225194251.txt", "m_OazsImOiI_20241225194322.txt", "Og56hmAspV8_20241225194258.txt", "dFR_wFN23ZY_20241225194640.txt", "q-H_A_dQUxQ_20241225194303.txt", "KVjfFN89qvQ_20241225194314.txt", "zU5EYw06wtw_20241225194349.txt", "Z7MU6zrAXsM_20241225194442.txt", "LYYyQcAJZfk_20241225194508.txt", "E7W4OQfJWdw_20241225194717.txt", "azb3Ih68awQ_20241225194505.txt", "ouCWNRvPk20_20241225194401.txt", "uwWOc_RqTBA_20241225194858.txt", "pZX8ikmWvEU_20241225194510.txt", "n9IxomBusuw_20241225194545.txt", "BwyZIWeBpRw_20241225194534.txt", "XY0rBdaDXD8_20241225194226.txt", "1Wo6SqLNmLk_20241225194845.txt", "ddq8JIMhz7c_20241225194529.txt", "VQLU7gpk_X8_20241225194821.txt", "jC8Pu9HBd48_20241225194321.txt", "rZkMpVLcVsg_20241225194319.txt", "gbQFSMayJxk_20241225194736.txt", "F54qXuTpgfM_20241225194843.txt", "p3JLaF_4Tz8_20241225194537.txt", "FeRgqJVALMQ_20241225194433.txt", "hF32FvBH4gI_20241225194332.txt", "CDUetQMKM6g_20241225194454.txt", "wG3UFHR1o48_20241225194229.txt", "6P8hrzjnetU_20241225194336.txt", "WFcYF_pxLgA_20241225194458.txt", "77CdVSpnUX4_20241225194746.txt", "VOfwbcveP84_20241225194742.txt", "VRvn3Oj5r3E_20241225194839.txt", "Gf-kC30SLtc_20241225194846.txt", "S8jWFcDGz4Y_20241225194805.txt", "x3MgDtZovks_20241225194526.txt", "lIo9FcrljDk_20241225194309.txt", "-e9ErUozQo4_20241225194903.txt", "aXvDEmo6uS4_20241225194629.txt", "3gtvNYa3Nd8_20241225194531.txt", "5tYR7e5Wpyc_20241225194238.txt", "OadokY8fcAA_20241225194601.txt", "O640yAgq5f8_20241225194744.txt", "zbpb1wd-wvs_20241225194827.txt", "gXvuJu1kt48_20241225194638.txt", "zEYE-vcVKy8_20241225194547.txt", "Ky-ZJ9SS-x4_20241225194240.txt", "0RYyQRQFgFk_20241225194532.txt", "4F_RBc1akC8_20241225194724.txt", "nDLb8_wgX50_20241225194540.txt", "tcueMCe-0zo_20241225194236.txt", "K-TW2Chpz4k_20241225194330.txt", "XcvhERcZpWw_20241225194731.txt", "Ze2pc6NwsHQ_20241225194704.txt", "_ltcLEM-5HU_20241225194612.txt", "jouFvyRZntk_20241225194507.txt", "uWV9a3zEaL4_20241225194823.txt", "-OBCwiPPfEU_20241225194747.txt", "dzOvi0Aa2EA_20241225194301.txt", "K9lORz2_XSU_20241225194527.txt", "j2sMqSDLd4k_20241225194407.txt", "oNkDA2F7CjM_20241225194651.txt", "50BZQRT1dAg_20241225194403.txt", "q8CHXefn7B4_20241225194411.txt", "Jy4rJcYmtUM_20241225194344.txt", "QmOF0crdyRU_20241225194456.txt", "6ZrlsVx85ek_20241225194758.txt", "CD0bRU1e1ZM_20241225194425.txt", "IAnhFUUCq6c_20241225194804.txt", "Phm-Alz1Zjo_20241225194906.txt", "csubiPlvFWk_20241225194606.txt", "GpgqXCkRO-w_20241225194701.txt", "W5zqC5cYcS0_20241225194241.txt", "T65RDBiB5Hs_20241225194715.txt", "6I5I56uVvLw_20241225194801.txt", "i5611OvTFGM_20241225194548.txt", "wTBSGgbIvsY_20241225194552.txt", "O1YRwWmue4Y_20241225194815.txt", "29n0WG317tM_20241225194511.txt", "xmhsWAqP_0Y_20241225194851.txt", "x4m_PdFbu-s_20241225194722.txt"]
|
Rag/__init__.py
ADDED
File without changes
|
Rag/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (153 Bytes). View file
|
|
Rag/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (111 Bytes). View file
|
|
Rag/__pycache__/rag_pipeline.cpython-311.pyc
ADDED
Binary file (11.6 kB). View file
|
|
Rag/chromadb.db/01e34d25-3e37-4b52-8953-794b0e9b61dd/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b5a01d1e5da2cd59bebe66372da65fee337e4d2d160c1170c240dae082bc3f5
|
3 |
+
size 100
|
Rag/chromadb.db/b338c320-325e-4f10-8e0c-ce336d2e26c9/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9fc8728a7f84f5a3f3e775af521ea3d3062f3b0d229f88d76086b8805199074
|
3 |
+
size 100
|
Rag/error_log.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Traceback (most recent call last):
|
2 |
+
File "/home/nightwing/Codes/Xyzbot/Rag/chunking.py", line 52, in split_text_to_chunks
|
3 |
+
chunks = text_splitter.split_documents(docs)
|
4 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
5 |
+
File "/home/nightwing/anaconda3/envs/xyzbot/lib/python3.11/site-packages/langchain_text_splitters/base.py", line 94, in split_documents
|
6 |
+
texts.append(doc.page_content)
|
7 |
+
^^^^^^^^^^^^^^^^
|
8 |
+
AttributeError: 'str' object has no attribute 'page_content'
|
Rag/rag_pipeline.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
import google.generativeai as genai
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
8 |
+
from Llm.llm_endpoints import get_llm_response
|
9 |
+
from utils.get_link import get_source_link
|
10 |
+
from Prompts.huberman_prompt import huberman_prompt
|
11 |
+
from tqdm import tqdm
|
12 |
+
# Configuration
|
13 |
+
API_KEY = os.getenv("GOOGLE_API_KEY")
|
14 |
+
if API_KEY:
|
15 |
+
genai.configure(api_key=API_KEY)
|
16 |
+
|
17 |
+
chromadb_path = "app/Rag/chromadb.db"
|
18 |
+
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
19 |
+
|
20 |
+
# Logging
|
21 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
|
22 |
+
|
23 |
+
|
24 |
+
# Helper Functions
|
25 |
+
def split_text_to_chunks(docs, chunk_size=1000, chunk_overlap=200):
|
26 |
+
"""Split text into manageable chunks."""
|
27 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
28 |
+
chunks = text_splitter.split_text(docs)
|
29 |
+
return chunks
|
30 |
+
|
31 |
+
|
32 |
+
def get_new_files(transcripts_folder_path, collection):
|
33 |
+
"""Find new transcript files that haven't been processed yet."""
|
34 |
+
all_files = [f for f in os.listdir(transcripts_folder_path) if f.endswith(".txt")]
|
35 |
+
existing_files = [meta["source"] for meta in collection.get()['metadatas']]
|
36 |
+
return [f for f in all_files if f not in existing_files]
|
37 |
+
|
38 |
+
|
39 |
+
def process_single_file(file_path):
|
40 |
+
"""Process a single file and return its chunks."""
|
41 |
+
with open(file_path, 'r') as f:
|
42 |
+
content = f.read()
|
43 |
+
chunks = split_text_to_chunks(content)
|
44 |
+
return chunks, os.path.basename(file_path)
|
45 |
+
|
46 |
+
|
47 |
+
def batch_embed_chunks(chunks, batch_size=32):
|
48 |
+
"""Embed chunks in batches."""
|
49 |
+
embeddings = []
|
50 |
+
for i in tqdm(range(0, len(chunks), batch_size),desc = "Embedding chunks"):
|
51 |
+
batch = chunks[i:i + batch_size]
|
52 |
+
batch_embeddings = embedding_model.encode(batch, show_progress_bar=True)
|
53 |
+
embeddings.extend(batch_embeddings.tolist())
|
54 |
+
return embeddings
|
55 |
+
|
56 |
+
|
57 |
+
def process_and_add_new_files(transcripts_folder_path, collection):
|
58 |
+
"""Process and add new transcript files to the vector database."""
|
59 |
+
new_files = get_new_files(transcripts_folder_path, collection)
|
60 |
+
if not new_files:
|
61 |
+
logging.info("No new files to process")
|
62 |
+
return False
|
63 |
+
|
64 |
+
# Use a reasonable number of workers (4 is usually a good default)
|
65 |
+
n_workers = min(8, len(new_files))
|
66 |
+
logging.info(f"Using {n_workers} workers for processing")
|
67 |
+
|
68 |
+
all_chunks = []
|
69 |
+
all_metadata = []
|
70 |
+
all_ids = []
|
71 |
+
|
72 |
+
# Process files in parallel
|
73 |
+
with ProcessPoolExecutor(max_workers=n_workers) as executor:
|
74 |
+
futures = {
|
75 |
+
executor.submit(process_single_file, os.path.join(transcripts_folder_path, file)): file
|
76 |
+
for file in new_files
|
77 |
+
}
|
78 |
+
|
79 |
+
for future in as_completed(futures):
|
80 |
+
file = futures[future]
|
81 |
+
try:
|
82 |
+
chunks, filename = future.result()
|
83 |
+
file_metadata = [{"source": filename} for _ in range(len(chunks))]
|
84 |
+
file_ids = [f"{filename}_chunk_{i}" for i in range(len(chunks))]
|
85 |
+
|
86 |
+
all_chunks.extend(chunks)
|
87 |
+
all_metadata.extend(file_metadata)
|
88 |
+
all_ids.extend(file_ids)
|
89 |
+
|
90 |
+
logging.info(f"Processed {filename}")
|
91 |
+
except Exception as e:
|
92 |
+
logging.error(f"Error processing {file}: {str(e)}")
|
93 |
+
continue
|
94 |
+
|
95 |
+
# Process embeddings in batches
|
96 |
+
logging.info(f"Generating embeddings for {len(all_chunks)} chunks")
|
97 |
+
embeddings = batch_embed_chunks(all_chunks)
|
98 |
+
|
99 |
+
# Add to database in batches
|
100 |
+
batch_size = 500
|
101 |
+
for i in range(0, len(all_chunks), batch_size):
|
102 |
+
end_idx = min(i + batch_size, len(all_chunks))
|
103 |
+
collection.upsert(
|
104 |
+
documents=all_chunks[i:end_idx],
|
105 |
+
embeddings=embeddings[i:end_idx],
|
106 |
+
metadatas=all_metadata[i:end_idx],
|
107 |
+
ids=all_ids[i:end_idx]
|
108 |
+
)
|
109 |
+
logging.info(f"Added batch {i // batch_size + 1} to database")
|
110 |
+
|
111 |
+
logging.info(f"Successfully processed {len(new_files)} files")
|
112 |
+
return True
|
113 |
+
|
114 |
+
|
115 |
+
def query_database(collection, query_text, n_results=3):
|
116 |
+
"""Retrieve the most relevant chunks for the query."""
|
117 |
+
query_embeddings = embedding_model.encode(query_text).tolist()
|
118 |
+
results = collection.query(query_embeddings=query_embeddings, n_results=n_results)
|
119 |
+
retrieved_docs = results['documents'][0]
|
120 |
+
metadatas = results['metadatas'][0]
|
121 |
+
return retrieved_docs, metadatas
|
122 |
+
|
123 |
+
|
124 |
+
def enhance_query_with_history(query_text, summarized_history):
|
125 |
+
enhance_query = f"{query_text}*2\n\n{summarized_history}"
|
126 |
+
return enhance_query
|
127 |
+
|
128 |
+
|
129 |
+
def update_conversation_history(history, user_query, bot_response):
|
130 |
+
"""Update and keeps track of conversation history between user and the bot."""
|
131 |
+
history.append({"user": user_query, "bot": bot_response})
|
132 |
+
return history
|
133 |
+
|
134 |
+
|
135 |
+
def generate_response(conversation_history, query_text, retrieved_docs, source_links):
|
136 |
+
"""Generate a response using retrieved documents and the generative AI model."""
|
137 |
+
context = " ".join(retrieved_docs)
|
138 |
+
history_str = "\n".join([f"User: {turn['user']}\nBot: {turn['bot']}" for turn in conversation_history])
|
139 |
+
sources_str = "\n".join(source_links)
|
140 |
+
|
141 |
+
prompt = huberman_prompt.format(
|
142 |
+
context=context,
|
143 |
+
sources=sources_str,
|
144 |
+
history=history_str,
|
145 |
+
question=query_text
|
146 |
+
)
|
147 |
+
|
148 |
+
response = get_llm_response(prompt)
|
149 |
+
full_response = f"{response}\n\nSources:\n{sources_str}"
|
150 |
+
return full_response
|
151 |
+
|
152 |
+
|
153 |
+
def main_workflow(transcripts_folder_path, collection):
|
154 |
+
"""Run the full RAG workflow."""
|
155 |
+
new_files_added = process_and_add_new_files(transcripts_folder_path, collection)
|
156 |
+
if new_files_added:
|
157 |
+
logging.info("New transcripts added to the database.")
|
158 |
+
else:
|
159 |
+
logging.info("No new files found. Using existing database.")
|
160 |
+
|
161 |
+
conversation_history = []
|
162 |
+
|
163 |
+
while True:
|
164 |
+
query_text = input("\nEnter your query(or type 'exit' to end):").strip()
|
165 |
+
if query_text.lower() == "exit":
|
166 |
+
print("Ending the conversation. Goodbye")
|
167 |
+
break
|
168 |
+
|
169 |
+
query_text_with_conversation_history = enhance_query_with_history(query_text, conversation_history)
|
170 |
+
retrived_docs, metadatas = query_database(collection, query_text_with_conversation_history)
|
171 |
+
print("-" * 50)
|
172 |
+
source_link = get_source_link(metadatas)
|
173 |
+
print(source_link)
|
174 |
+
print("-" * 50)
|
175 |
+
|
176 |
+
if not retrived_docs:
|
177 |
+
print("No relevent documents is found")
|
178 |
+
continue
|
179 |
+
|
180 |
+
response = generate_response(conversation_history, query_text, retrived_docs, source_link)
|
181 |
+
conversation_history = update_conversation_history(conversation_history, query_text, response)
|
182 |
+
print("\nGenerated Response:")
|
183 |
+
print(response)
|