Nightwing11 commited on
Commit
b9981a2
Β·
1 Parent(s): 97a5e6f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import chromadb
3
+ from typing import List, Dict
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ project_root = Path(__file__).resolve().parent
8
+ sys.path.append(str(project_root))
9
+ sys.path.append(str(project_root / "Rag"))
10
+ sys.path.append(str(project_root / "Data"))
11
+ sys.path.append(str(project_root / "Data" / "transcripts"))
12
+ sys.path.append(str(project_root / "Data" / "video_links"))
13
+ sys.path.append(str(project_root / "Llm"))
14
+ sys.path.append(str(project_root / "Prompts"))
15
+ sys.path.append(str(project_root / "utils"))
16
+ from Rag.rag_pipeline import (
17
+ query_database,
18
+ generate_response,
19
+ enhance_query_with_history,
20
+ update_conversation_history,
21
+ process_and_add_new_files
22
+ )
23
+
24
+ INTRODUCTION = """
25
+ # 🧠 Welcome to HubermanBot!
26
+
27
+ I am your AI assistant trained on Andrew Huberman's podcast content. My knowledge base includes detailed information about:
28
+
29
+ - 🎯 Peak Performance & Focus
30
+ - 😴 Sleep Science & Optimization
31
+ - πŸ‹οΈ Physical Fitness & Recovery
32
+ - 🧘 Mental Health & Stress Management
33
+ - πŸ§ͺ Neuroscience & Biology
34
+ - πŸ’ͺ Habit Formation & Behavior Change
35
+
36
+ For each response, I'll provide:
37
+ - Detailed answers based on podcast content
38
+ - Direct source links to specific episodes
39
+ - Scientific context when available
40
+
41
+ Ask me anything about these topics, and I'll help you find relevant information from the Huberman Lab Podcast!
42
+
43
+ Example questions you might ask:
44
+ - "What does Dr. Huberman recommend for better sleep?"
45
+ - "How can I improve my focus and concentration?"
46
+ - "What are the best practices for morning routines?"
47
+ """
48
+
49
+
50
+ def format_youtube_url(filename: str) -> str:
51
+ """Convert filename to YouTube URL"""
52
+ # Extract video ID by removing the timestamp and .txt extension
53
+ video_id = filename.split('_')[0]
54
+ return f"https://www.youtube.com/watch?v={video_id}"
55
+
56
+
57
+ class RAGChatInterface:
58
+ def __init__(self, transcripts_folder_path: str, collection):
59
+ self.transcripts_folder_path = transcripts_folder_path
60
+ self.collection = collection
61
+ self.conversation_history: List[Dict[str, str]] = []
62
+
63
+ def process_query(self, message: str, history: List[List[str]]) -> str:
64
+ """Process a single query and return the response"""
65
+ # Convert Gradio history format to our conversation history format
66
+ self.conversation_history = [
67
+ {"user": user_msg, "bot": bot_msg}
68
+ for user_msg, bot_msg in history
69
+ ]
70
+
71
+ # Enhance query with conversation history
72
+ query_with_history = enhance_query_with_history(message, self.conversation_history)
73
+
74
+ # Get relevant documents
75
+ retrieved_docs, metadatas = query_database(self.collection, query_with_history)
76
+
77
+ if not retrieved_docs:
78
+ return "I apologize, but I couldn't find any relevant information about that in my knowledge base. Could you try rephrasing your question or ask about a different topic covered in the Huberman Lab Podcast?"
79
+
80
+ # Generate response
81
+ source_links = [meta["source"] for meta in metadatas]
82
+ response = generate_response(
83
+ self.conversation_history,
84
+ message,
85
+ retrieved_docs,
86
+ source_links
87
+ )
88
+
89
+ # Remove duplicate sources and convert to YouTube URLs
90
+ unique_sources = list(set(source_links))
91
+ youtube_urls = [format_youtube_url(source) for source in unique_sources]
92
+
93
+ # Format response with markdown for better readability
94
+ formatted_response = f"{response}\n\n---\nπŸ“š **Source Episodes:**\n"
95
+ for url in youtube_urls:
96
+ formatted_response += f"- {url}\n"
97
+
98
+ return formatted_response
99
+
100
+
101
+ def create_interface(transcripts_folder_path: str, collection) -> gr.Interface:
102
+ """Create and configure the Gradio interface"""
103
+ # Initialize the RAG chat interface
104
+ rag_chat = RAGChatInterface(transcripts_folder_path, collection)
105
+
106
+ # Create the Gradio interface with custom styling
107
+ interface = gr.ChatInterface(
108
+ fn=rag_chat.process_query,
109
+ title="🧠 HubermanBot - Your Neuroscience & Wellness AI Assistant",
110
+ description=INTRODUCTION,
111
+ examples=[
112
+ "What are Dr. Huberman's top recommendations for better sleep?",
113
+ "How does sunlight exposure affect our circadian rhythm?",
114
+ "What supplements does Dr. Huberman recommend for focus?",
115
+ "What are the best practices for morning routines according to Dr. Huberman?",
116
+ "How can I optimize my workout recovery based on neuroscience?",
117
+ ],
118
+ theme=gr.themes.Soft(
119
+ primary_hue="indigo",
120
+ secondary_hue="blue",
121
+ )
122
+ )
123
+
124
+ return interface
125
+
126
+
127
+ def main():
128
+ # Get absolute path for ChromaDB
129
+ project_root = Path(__file__).parent
130
+ chromadb_path = project_root / "Rag" / "chromadb.db"
131
+
132
+ client = chromadb.PersistentClient(path=str(chromadb_path))
133
+ collection = client.get_or_create_collection(name="yt_transcript_collection")
134
+
135
+ # Use absolute path for transcripts folder too
136
+ transcripts_folder_path = project_root / "Data" / "transcripts"
137
+
138
+ # Process any new files
139
+ process_and_add_new_files(str(transcripts_folder_path), collection)
140
+
141
+ # Create and launch the interface
142
+ interface = create_interface(str(transcripts_folder_path), collection)
143
+ interface.launch(share=True, server_port=7860)
144
+
145
+
146
+ if __name__ == "__main__":
147
+ main()