Spaces:

Nightwing11
/

Hubermanbot2

Sleeping

App Files Files

xet

Community

Nightwing11 commited on Jan 31

Commit

6961452

1 Parent(s): 7fc2087

Yt video source added

Browse files

Files changed (10) hide show

.gitignore +5 -1
Dockerfile +17 -0
Example/__init__.py +0 -0
Example/rag_example.py +10 -0
Rag/{chunking.py → rag_pipeline.py} +19 -13
requirements.txt +1 -0
utils/__init__.py +0 -0
{Rag → utils}/corefrence.py +1 -1
utils/get_link.py +11 -0
{Rag → utils}/summarization.py +0 -0

.gitignore CHANGED Viewed

@@ -129,7 +129,11 @@ Rag/chromadb.db/
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json

 # mkdocs documentation
 /site
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
 # mypy
 .mypy_cache/
 .dmypy.json

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Use an official Python runtime as a base image
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your application
+COPY . .
+# Command to run your application
+CMD ["python", "-m", "Rag"]

Example/__init__.py ADDED Viewed

File without changes

Example/rag_example.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import chromadb
+transcripts_folder_path = '/home/nightwing/Codes/Xyzbot/Data/transcripts'
+chromadb_path = "/home/nightwing/Codes/Xyzbot/Rag/chromadb.db"
+client = chromadb.PersistentClient(path=chromadb_path)
+collection = client.get_or_create_collection(name="yt_transcript_collection")
+from Rag.rag_pipeline import main_workflow
+# Run the application
+if __name__ == "__main__":
+    main_workflow(transcripts_folder_path, collection)

Rag/{chunking.py → rag_pipeline.py} RENAMED Viewed

@@ -3,18 +3,17 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from sentence_transformers import SentenceTransformer
 import google.generativeai as genai
 import os
-import json
 import logging
 from Llm.llm_endpoints import get_llm_response
-from Rag.summarization import summarize_conversation
-from Rag.corefrence import resolve_coreference_in_query
 # Configuration
 API_KEY = os.getenv("GOOGLE_API_KEY")
 if API_KEY:
     genai.configure(api_key=API_KEY)
 chromadb_path = "/home/nightwing/Codes/Xyzbot/Rag/chromadb.db"
-transcripts_folder_path = '/home/nightwing/Codes/Xyzbot/Data/transcripts'
 processed_files_path = "/home/nightwing/Codes/Xyzbot/Rag/Processed_folder/processed_files.json"
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -88,17 +87,22 @@ def update_conversation_history(history, user_query, bot_response):
     return history
-def generate_response(conversation_history, query_text, retrieved_docs):
     """Generate a response using retrieved documents and the generative AI model."""
     context = " ".join(retrieved_docs)
     history_str = "\n".join([f"User: {turn['user']}\nBot: {turn['bot']}" for turn in conversation_history])
     prompt = f"""
     Using the context below and the conversation history, answer the question:
     Context:
     {context}
     Conversation History:
     {history_str}
@@ -106,7 +110,10 @@ def generate_response(conversation_history, query_text, retrieved_docs):
     """
     response = get_llm_response(prompt)
-    return response
 # Main Workflow
@@ -128,20 +135,19 @@ def main_workflow(transcripts_folder_path, collection):
             print("Ending the conversation. Goodbye")
             break
         query_text_with_conversation_history = enhance_query_with_history(query_text, conversation_history)
-        resolved_query = resolve_coreference_in_query(query_text_with_conversation_history, conversation_history)
-        retrived_docs, metadatas = query_database(collection, resolved_query)
         print("-" * 50)
-        print(metadatas)
         print("-" * 50)
         if not retrived_docs:
             print("No relevent documents is found")
             continue
-        response = generate_response(conversation_history, query_text, retrived_docs)
         conversation_history = update_conversation_history(conversation_history, query_text, response)
         print("\nGenerated Response:")
         print(response)
-# Run the application
-if __name__ == "__main__":
-    main_workflow(transcripts_folder_path, collection)

 from sentence_transformers import SentenceTransformer
 import google.generativeai as genai
 import os
 import logging
 from Llm.llm_endpoints import get_llm_response
+from utils.get_link import get_source_link
+# from Rag.corefrence import resolve_coreference_in_query
 # Configuration
 API_KEY = os.getenv("GOOGLE_API_KEY")
 if API_KEY:
     genai.configure(api_key=API_KEY)
 chromadb_path = "/home/nightwing/Codes/Xyzbot/Rag/chromadb.db"
+# transcripts_folder_path = '/home/nightwing/Codes/Xyzbot/Data/transcripts'
 processed_files_path = "/home/nightwing/Codes/Xyzbot/Rag/Processed_folder/processed_files.json"
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
     return history
+def generate_response(conversation_history, query_text, retrieved_docs, source_links):
     """Generate a response using retrieved documents and the generative AI model."""
     context = " ".join(retrieved_docs)
     history_str = "\n".join([f"User: {turn['user']}\nBot: {turn['bot']}" for turn in conversation_history])
+    sources_str = "\n".join(source_links)
     prompt = f"""
     Using the context below and the conversation history, answer the question:
     Context:
     {context}
+    Conversation Sources:
+    {sources_str}
     Conversation History:
     {history_str}
     """
     response = get_llm_response(prompt)
+    # Append sources to the response
+    full_response = f"{response}\n\nSources:\n{sources_str}"
+    return full_response
 # Main Workflow
             print("Ending the conversation. Goodbye")
             break
         query_text_with_conversation_history = enhance_query_with_history(query_text, conversation_history)
+        # resolved_query = resolve_coreference_in_query(query_text_with_conversation_history, conversation_history)
+        retrived_docs, metadatas = query_database(collection, query_text_with_conversation_history)
         print("-" * 50)
+        source_link = get_source_link(metadatas)
+        print(source_link)
         print("-" * 50)
         if not retrived_docs:
             print("No relevent documents is found")
             continue
+        response = generate_response(conversation_history, query_text, retrived_docs, source_link)
         conversation_history = update_conversation_history(conversation_history, query_text, response)
         print("\nGenerated Response:")
         print(response)

requirements.txt CHANGED Viewed

@@ -13,3 +13,4 @@ flask_cors
 sentence_transformers
 tqdm
 torch

 sentence_transformers
 tqdm
 torch
+transformers

utils/__init__.py ADDED Viewed

File without changes

{Rag → utils}/corefrence.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from transformers import pipeline
-coref_pipeline = pipeline("coref-resolution", model="coref-spanbert-large")
 def resolve_coreference_in_query(query_text, conversation_history):

 from transformers import pipeline
+coref_pipeline = pipeline("coref-resolution", model="coref-roberta-large")
 def resolve_coreference_in_query(query_text, conversation_history):

utils/get_link.py ADDED Viewed

	@@ -0,0 +1,11 @@

+def get_source_link(metadatas):
+    link = 'https://www.youtube.com/watch?v='
+    yt_link = []
+    for metadata in metadatas:
+        source = metadata['source']
+        values = source.split('.txt')
+        link = link + values[0]
+        yt_link.append(link)
+        # print(yt_link)
+    return yt_link

{Rag → utils}/summarization.py RENAMED Viewed

File without changes