Spaces:
Sleeping
Sleeping
import gradio as gr | |
import chromadb | |
from typing import List, Dict | |
import sys | |
from pathlib import Path | |
from sentence_transformers import SentenceTransformer | |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
project_root = Path(__file__).resolve().parent | |
sys.path.append(str(project_root)) | |
sys.path.append(str(project_root / "Rag")) | |
sys.path.append(str(project_root / "Data")) | |
sys.path.append(str(project_root / "Data" / "transcripts")) | |
sys.path.append(str(project_root / "Data" / "video_links")) | |
sys.path.append(str(project_root / "Llm")) | |
sys.path.append(str(project_root / "Prompts")) | |
sys.path.append(str(project_root / "utils")) | |
from Rag.rag_pipeline import ( | |
query_database, | |
generate_response, | |
enhance_query_with_history, | |
update_conversation_history, | |
process_and_add_new_files | |
) | |
INTRODUCTION = """ | |
# π§ Welcome to HubermanBot! | |
I am your AI assistant trained on Andrew Huberman's podcast content. My knowledge base includes detailed information about: | |
- π― Peak Performance & Focus | |
- π΄ Sleep Science & Optimization | |
- ποΈ Physical Fitness & Recovery | |
- π§ Mental Health & Stress Management | |
- π§ͺ Neuroscience & Biology | |
- πͺ Habit Formation & Behavior Change | |
For each response, I'll provide: | |
- Detailed answers based on podcast content | |
- Direct source links to specific episodes | |
- Scientific context when available | |
Ask me anything about these topics, and I'll help you find relevant information from the Huberman Lab Podcast! | |
Example questions you might ask: | |
- "What does Dr. Huberman recommend for better sleep?" | |
- "How can I improve my focus and concentration?" | |
- "What are the best practices for morning routines?" | |
""" | |
def initialize_chroma_client(rag_path: Path): | |
print(f"Initializing ChromaDB at: {rag_path}") | |
client = chromadb.PersistentClient(path=str(rag_path)) | |
print(f"Available collections: {client.list_collections()}") | |
try: | |
collection = client.get_collection(name="yt_transcript_collection") | |
print(f"Found existing collection with {len(collection.get()['ids'])} documents") | |
except Exception as e: | |
print(f"No existing collection found, creating new one: {str(e)}") | |
collection = client.create_collection(name="yt_transcript_collection") | |
return collection | |
def format_youtube_url(filename: str) -> str: | |
"""Convert filename to YouTube URL""" | |
video_id = filename.split('_')[0] | |
return f"https://www.youtube.com/watch?v={video_id}" | |
class RAGChatInterface: | |
def __init__(self, transcripts_folder_path: str, collection): | |
self.transcripts_folder_path = transcripts_folder_path | |
self.collection = collection | |
self.conversation_history: List[Dict[str, str]] = [] | |
def process_query(self, message: str, history: List[List[str]]) -> str: | |
"""Process a single query and return the response""" | |
self.conversation_history = [ | |
{"user": user_msg, "bot": bot_msg} | |
for user_msg, bot_msg in history | |
] | |
query_with_history = enhance_query_with_history(message, self.conversation_history) | |
retrieved_docs, metadatas = query_database(self.collection, query_with_history) | |
if not retrieved_docs: | |
return "I apologize, but I couldn't find any relevant information about that in my knowledge base. Could you try rephrasing your question or ask about a different topic covered in the Huberman Lab Podcast?" | |
source_links = [meta["source"] for meta in metadatas] | |
response = generate_response( | |
self.conversation_history, | |
message, | |
retrieved_docs, | |
source_links | |
) | |
unique_sources = list(set(source_links)) | |
youtube_urls = [format_youtube_url(source) for source in unique_sources] | |
formatted_response = f"{response}\n\n---\nπ **Source Episodes:**\n" | |
for url in youtube_urls: | |
formatted_response += f"- {url}\n" | |
return formatted_response | |
def create_interface(transcripts_folder_path: str, collection) -> gr.Interface: | |
"""Create and configure the Gradio interface""" | |
rag_chat = RAGChatInterface(transcripts_folder_path, collection) | |
interface = gr.ChatInterface( | |
fn=rag_chat.process_query, | |
title="π§ HubermanBot - Your Neuroscience & Wellness AI Assistant", | |
description=INTRODUCTION, | |
examples=[ | |
"What are Dr. Huberman's top recommendations for better sleep?", | |
"How does sunlight exposure affect our circadian rhythm?", | |
"What supplements does Dr. Huberman recommend for focus?", | |
"What are the best practices for morning routines according to Dr. Huberman?", | |
"How can I optimize my workout recovery based on neuroscience?", | |
], | |
theme=gr.themes.Soft( | |
primary_hue="indigo", | |
secondary_hue="blue", | |
) | |
) | |
return interface | |
def main(): | |
# Get paths using pathlib | |
project_root = Path(__file__).parent | |
rag_path = project_root / "Rag" / "chromadb.db" | |
transcripts_folder_path = project_root / "Data" / "transcripts" | |
# Initialize ChromaDB with proper error handling | |
print("Starting ChromaDB initialization...") | |
collection = initialize_chroma_client(rag_path) | |
print("ChromaDB initialization complete") | |
# Process any new files | |
print("Checking for new files...") | |
new_files_added = process_and_add_new_files(str(transcripts_folder_path), collection) | |
if not new_files_added: | |
print("No new files to process") | |
# Create and launch the interface | |
print("Launching Gradio interface...") | |
interface = create_interface(str(transcripts_folder_path), collection) | |
interface.launch(share=True, server_port=7860) | |
if __name__ == "__main__": | |
main() |