Spaces:

Nightwing11
/

Hubermanbot2

Sleeping

App Files Files

xet

Community

Nightwing11 commited on Feb 3

Commit

029c5e8

1 Parent(s): 0642efd

Change to prompt/ Dependency issue resolved/ dockarzie

Browse files

Files changed (11) hide show

.dockerignore +36 -0
Dockerfile +23 -0
Example/rag_example.py +6 -1
Prompts/huberman_prompt.py +20 -0
Rag/rag_pipeline.py +3 -3
poetry.lock +0 -0
pyproject.toml +34 -0
requirements.in +18 -0
requirements.txt +11 -13
setup.sh +0 -0
utils/corefrence.py +47 -9

.dockerignore ADDED Viewed

	@@ -0,0 +1,36 @@

+# Ignore virtual environments and cache
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.pyc
+.venv/
+venv/
+__pycache__/
+*.sqlite3-journal
+# Ignore IDE files
+.idea/
+.vscode/
+# Ignore development files
+*.swp
+*.swo
+*.swn
+*.swo
+*.swn
+*.swn
+*.swo
+*.swn
+*.swo
+*.swn
+# Ignore data files
+chromadb.db
+chroma.sqlite3
+Rag/chromadb.db
+# Ignore setup scripts
+setup.sh
+# Ignore environment files
+.env

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use the official Python 3.11.11 image
+FROM python:3.11.11-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+## Download spaCy model
+#RUN python -m spacy download en_core_web_sm
+#
+## Install Coreferee for English
+#RUN python -m coreferee install en
+# Copy the rest of the application code
+COPY . .
+# Set the main entry point
+CMD ["python", "-m", "Example.rag_example"]

Example/rag_example.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import chromadb
-transcripts_folder_path = '/home/nightwing/Codes/Xyzbot/Data/transcripts'
 chromadb_path = "/home/nightwing/Codes/Xyzbot/Rag/chromadb.db"
 client = chromadb.PersistentClient(path=chromadb_path)
 collection = client.get_or_create_collection(name="yt_transcript_collection")
 from Rag.rag_pipeline import main_workflow
 # Run the application
 if __name__ == "__main__":
     main_workflow(transcripts_folder_path, collection)

+import sys
 import chromadb
+# transcripts_folder_path = '/home/nightwing/Codes/Xyzbot/Data/transcripts'
+tr
 chromadb_path = "/home/nightwing/Codes/Xyzbot/Rag/chromadb.db"
 client = chromadb.PersistentClient(path=chromadb_path)
 collection = client.get_or_create_collection(name="yt_transcript_collection")
+print("Python path:", sys.path)
 from Rag.rag_pipeline import main_workflow
 # Run the application
 if __name__ == "__main__":
     main_workflow(transcripts_folder_path, collection)

Prompts/huberman_prompt.py ADDED Viewed

	@@ -0,0 +1,20 @@

+huberman_prompt = """
+You are Dr. Andrew Huberman, an expert neuroscientist and educator known for your clear, engaging, and scientifically accurate explanations. When answering, please consider the following:
+1. Provide a clear and concise summary of the scientific concepts involved.
+2. Highlight any relevant research or studies.
+3. Offer actionable insights or practical advice.
+Context:
+{context}
+Sources:
+{sources}
+Conversation History:
+{history}
+Question:
+{question}
+Please respond in a manner that is informative, research-backed, and reflective of your unique style.
+"""

Rag/rag_pipeline.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 import logging
 from Llm.llm_endpoints import get_llm_response
 from utils.get_link import get_source_link
-from utils.corefrence import resolve_coreferences
 from Prompts.huberman_prompt import huberman_prompt
 # Configuration
 API_KEY = os.getenv("GOOGLE_API_KEY")
@@ -127,8 +127,8 @@ def main_workflow(transcripts_folder_path, collection):
         if query_text.lower() == "exit":
             print("Ending the conversation. Goodbye")
             break
-        resolved_query = resolve_coreferences(query_text, conversation_history)
-        query_text_with_conversation_history = enhance_query_with_history(resolved_query, conversation_history)
         # resolved_query = resolve_coreference_in_query(query_text_with_conversation_history, conversation_history)
         retrived_docs, metadatas = query_database(collection, query_text_with_conversation_history)
         print("-" * 50)

 import logging
 from Llm.llm_endpoints import get_llm_response
 from utils.get_link import get_source_link
+# from utils.corefrence import resolve_coreferences
 from Prompts.huberman_prompt import huberman_prompt
 # Configuration
 API_KEY = os.getenv("GOOGLE_API_KEY")
         if query_text.lower() == "exit":
             print("Ending the conversation. Goodbye")
             break
+        # resolved_query = resolve_coreferences(query_text, conversation_history)
+        query_text_with_conversation_history = enhance_query_with_history(query_text, conversation_history)
         # resolved_query = resolve_coreference_in_query(query_text_with_conversation_history, conversation_history)
         retrived_docs, metadatas = query_database(collection, query_text_with_conversation_history)
         print("-" * 50)

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,34 @@

+[project]
+name = "xyzbot"
+version = "0.1.0"
+description = "A rag application"
+authors = [
+    {name = "Angel njlghmr@gmail.com"}
+]
+license = {text = "MIT"}
+readme = "README.md"
+requires-python =">=3.11,<3.12"
+dependencies = [
+    "pyarrow (>=19.0.0,<20.0.0)",
+    "pandas (>=2.2.3,<3.0.0)",
+    "pendulum (>=3.0.0,<4.0.0)",
+    "google-generativeai (>=0.8.4,<0.9.0)",
+    "langchain (>=0.3.16,<0.4.0)",
+    "langchain-openai (>=0.3.3,<0.4.0)",
+    "langchain-chroma (>=0.2.1,<0.3.0)",
+    "langchain-community (>=0.3.16,<0.4.0)",
+    "chromadb (>=0.4.14)",
+    "pypdf (==4.2.0)",
+    "flask (==3.0.1)",
+    "flask-cors (==3.0.10)",
+    "sentence-transformers (==3.3.1)",
+    "tqdm (==4.67.1)",
+    "torch (==2.5.1)",
+    "transformers (==4.46.3)",
+    "pydantic (>=2.7.4,<3.0.0)"
+]
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"

requirements.in ADDED Viewed

	@@ -0,0 +1,18 @@

+pyarrow
+pandas[performance, parquet, aws]
+pendulum
+google.generativeai
+langchain
+langchain_openai
+langchain_chroma
+langchain_community
+chromadb==0.4.8
+pypdf
+flask
+flask_cors
+sentence_transformers
+tqdm
+torch
+transformers
+spacy==3.5.0
+coreferee==1.4.1

requirements.txt CHANGED Viewed

@@ -1,18 +1,16 @@
 pyarrow
-pandas[performance, parquet, aws]
 pendulum
 google.generativeai
-langchain
 langchain_openai
 langchain_chroma
-langchain_community
-chromadb==0.4.8
-pypdf
-flask
-flask_cors
-sentence_transformers
-tqdm
-torch
-transformers
-spacy==3.5.0
-coreferee==1.4.1

 pyarrow
+pandas
 pendulum
 google.generativeai
+langchain>=0.3.16,<0.4.0
 langchain_openai
 langchain_chroma
+langchain-community>=0.3.16,<0.4.0
+chromadb>=0.4.14
+pypdf==4.2.0
+flask==3.0.1
+flask_cors==3.0.10
+sentence_transformers==3.3.1
+tqdm==4.67.1
+torch==2.5.1
+pydantic>=2.7.4,<3.0.0

setup.sh CHANGED Viewed

File without changes

utils/corefrence.py CHANGED Viewed

@@ -1,14 +1,52 @@
 import spacy
 nlp = spacy.load('en_core_web_sm')
 nlp.add_pipe("coreferee")
-def resolve_corefrence(query_text, conversation_history):
     combined_text = []
     for turn in conversation_history:
-        combined_text.append(f"User:{turn['user']}")
-        combined_text.append(f"Bot:{turn['Bot']}")
-    combined_text.append(f"User:{query_text}")
-    combined_text = "\n".join(combined_text)
-    doc = nlp(combined_text)
-    resolved_text = doc._.corefrence_resolved
-    resolved_query = resolved_text.split('\n')[-1].replace("User: ", "")
-    return resolved_query.strip()

 import spacy
+from spacy.tokens import Doc
+import coreferee
+# Load spaCy model
 nlp = spacy.load('en_core_web_sm')
 nlp.add_pipe("coreferee")
+# Register the custom extension attribute
+Doc.set_extension('resolved_text', default=None, force=True)
+def resolve_coreferences(query_text, conversation_history):
+    """
+    Resolve coreferences in the given text using spaCy and coreferee.
+    Args:
+        query_text (str): The current query to resolve
+        conversation_history (list): List of dictionaries containing previous conversation turns
+    Returns:
+        str: Text with resolved coreferences
+    """
+    # Combine conversation history and current query
     combined_text = []
     for turn in conversation_history:
+        combined_text.append(f"User: {turn['user']}")
+        combined_text.append(f"Bot: {turn['Bot']}")
+    combined_text.append(f"User: {query_text}")
+    text = "\n".join(combined_text)
+    # Process the text
+    doc = nlp(text)
+    # Get all tokens and their potential antecedents
+    resolved_tokens = list(doc)
+    # Resolve coreferences
+    for chain in doc._.coref_chains:
+        for mention in chain:
+            if mention.root_index != chain.most_specific.root_index:
+                # Replace mention with its antecedent
+                resolved_tokens[mention.root_index] = doc[chain.most_specific.root_index]
+    # Reconstruct the text with resolved references
+    resolved_text = "".join([token.text_with_ws if isinstance(token, spacy.tokens.Token)
+                             else token.text + " " for token in resolved_tokens])
+    # Extract the resolved query (last line)
+    resolved_query = resolved_text.split('\n')[-1].replace("User: ", "").strip()
+    return resolved_query