Nightwing11 commited on
Commit
9e9f1ea
Β·
1 Parent(s): c706fe8

chat ui with gradio

Browse files
Files changed (4) hide show
  1. .gradio/certificate.pem +31 -0
  2. requirements.txt +3 -1
  3. ui/__init__.py +0 -0
  4. ui/app.py +140 -0
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
requirements.txt CHANGED
@@ -15,4 +15,6 @@ pypdf==4.2.0
15
  # ML/AI Dependencies (with CPU-only versions)
16
  sentence_transformers==2.3.1
17
  --extra-index-url https://download.pytorch.org/whl/cpu
18
- torch==2.1.0+cpu
 
 
 
15
  # ML/AI Dependencies (with CPU-only versions)
16
  sentence_transformers==2.3.1
17
  --extra-index-url https://download.pytorch.org/whl/cpu
18
+ torch==2.1.0+cpu
19
+
20
+ gradio
ui/__init__.py ADDED
File without changes
ui/app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import chromadb
3
+ from typing import List, Dict
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ project_root = Path(__file__).parent.parent
8
+ sys.path.append(str(project_root))
9
+ from Rag.rag_pipeline import (
10
+ query_database,
11
+ generate_response,
12
+ enhance_query_with_history,
13
+ update_conversation_history,
14
+ process_and_add_new_files
15
+ )
16
+
17
+ INTRODUCTION = """
18
+ # 🧠 Welcome to HubermanBot!
19
+
20
+ I am your AI assistant trained on Andrew Huberman's podcast content. My knowledge base includes detailed information about:
21
+
22
+ - 🎯 Peak Performance & Focus
23
+ - 😴 Sleep Science & Optimization
24
+ - πŸ‹οΈ Physical Fitness & Recovery
25
+ - 🧘 Mental Health & Stress Management
26
+ - πŸ§ͺ Neuroscience & Biology
27
+ - πŸ’ͺ Habit Formation & Behavior Change
28
+
29
+ For each response, I'll provide:
30
+ - Detailed answers based on podcast content
31
+ - Direct source links to specific episodes
32
+ - Scientific context when available
33
+
34
+ Ask me anything about these topics, and I'll help you find relevant information from the Huberman Lab Podcast!
35
+
36
+ Example questions you might ask:
37
+ - "What does Dr. Huberman recommend for better sleep?"
38
+ - "How can I improve my focus and concentration?"
39
+ - "What are the best practices for morning routines?"
40
+ """
41
+
42
+
43
+ def format_youtube_url(filename: str) -> str:
44
+ """Convert filename to YouTube URL"""
45
+ # Extract video ID by removing the timestamp and .txt extension
46
+ video_id = filename.split('_')[0]
47
+ return f"https://www.youtube.com/watch?v={video_id}"
48
+
49
+
50
+ class RAGChatInterface:
51
+ def __init__(self, transcripts_folder_path: str, collection):
52
+ self.transcripts_folder_path = transcripts_folder_path
53
+ self.collection = collection
54
+ self.conversation_history: List[Dict[str, str]] = []
55
+
56
+ def process_query(self, message: str, history: List[List[str]]) -> str:
57
+ """Process a single query and return the response"""
58
+ # Convert Gradio history format to our conversation history format
59
+ self.conversation_history = [
60
+ {"user": user_msg, "bot": bot_msg}
61
+ for user_msg, bot_msg in history
62
+ ]
63
+
64
+ # Enhance query with conversation history
65
+ query_with_history = enhance_query_with_history(message, self.conversation_history)
66
+
67
+ # Get relevant documents
68
+ retrieved_docs, metadatas = query_database(self.collection, query_with_history)
69
+
70
+ if not retrieved_docs:
71
+ return "I apologize, but I couldn't find any relevant information about that in my knowledge base. Could you try rephrasing your question or ask about a different topic covered in the Huberman Lab Podcast?"
72
+
73
+ # Generate response
74
+ source_links = [meta["source"] for meta in metadatas]
75
+ response = generate_response(
76
+ self.conversation_history,
77
+ message,
78
+ retrieved_docs,
79
+ source_links
80
+ )
81
+
82
+ # Remove duplicate sources and convert to YouTube URLs
83
+ unique_sources = list(set(source_links))
84
+ youtube_urls = [format_youtube_url(source) for source in unique_sources]
85
+
86
+ # Format response with markdown for better readability
87
+ formatted_response = f"{response}\n\n---\nπŸ“š **Source Episodes:**\n"
88
+ for url in youtube_urls:
89
+ formatted_response += f"- {url}\n"
90
+
91
+ return formatted_response
92
+
93
+
94
+ def create_interface(transcripts_folder_path: str, collection) -> gr.Interface:
95
+ """Create and configure the Gradio interface"""
96
+ # Initialize the RAG chat interface
97
+ rag_chat = RAGChatInterface(transcripts_folder_path, collection)
98
+
99
+ # Create the Gradio interface with custom styling
100
+ interface = gr.ChatInterface(
101
+ fn=rag_chat.process_query,
102
+ title="🧠 HubermanBot - Your Neuroscience & Wellness AI Assistant",
103
+ description=INTRODUCTION,
104
+ examples=[
105
+ "What are Dr. Huberman's top recommendations for better sleep?",
106
+ "How does sunlight exposure affect our circadian rhythm?",
107
+ "What supplements does Dr. Huberman recommend for focus?",
108
+ "What are the best practices for morning routines according to Dr. Huberman?",
109
+ "How can I optimize my workout recovery based on neuroscience?",
110
+ ],
111
+ theme=gr.themes.Soft(
112
+ primary_hue="indigo",
113
+ secondary_hue="blue",
114
+ )
115
+ )
116
+
117
+ return interface
118
+
119
+
120
+ def main():
121
+ # Get absolute path for ChromaDB
122
+ project_root = Path(__file__).parent.parent
123
+ chromadb_path = project_root / "Rag" / "chromadb.db"
124
+
125
+ client = chromadb.PersistentClient(path=str(chromadb_path))
126
+ collection = client.get_or_create_collection(name="yt_transcript_collection")
127
+
128
+ # Use absolute path for transcripts folder too
129
+ transcripts_folder_path = project_root / "Data" / "transcripts"
130
+
131
+ # Process any new files
132
+ process_and_add_new_files(str(transcripts_folder_path), collection)
133
+
134
+ # Create and launch the interface
135
+ interface = create_interface(str(transcripts_folder_path), collection)
136
+ interface.launch(share=True, server_port=7860)
137
+
138
+
139
+ if __name__ == "__main__":
140
+ main()