Nightwing11 commited on
Commit
7f23ebc
·
1 Parent(s): fe1196c

Solving issue of loading transcirpt

Browse files
Data/get_video_link.py CHANGED
@@ -1,17 +1,28 @@
1
  import os
2
  import requests
3
  from dotenv import load_dotenv
4
- from new_video_added import get_new_video_url
5
  from datetime import datetime
6
  import json
7
 
8
  load_dotenv()
9
 
10
  api_key = os.getenv('API_KEY')
 
11
  BASE_URL = "https://www.googleapis.com/youtube/v3"
 
12
  channel = "https://www.youtube.com/@hubermanlab/videos"
13
  new_video_added = False
14
- video_links_folder_name = "videolinks"
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def get_chanel_id(chanel_name):
@@ -104,6 +115,7 @@ def load_video_links():
104
 
105
 
106
  def video_links_main():
 
107
  video_links = load_video_links()
108
  if video_links:
109
  print(f"Using {len(video_links)} saved video links")
 
1
  import os
2
  import requests
3
  from dotenv import load_dotenv
4
+ from Data.new_video_added import get_new_video_url
5
  from datetime import datetime
6
  import json
7
 
8
  load_dotenv()
9
 
10
  api_key = os.getenv('API_KEY')
11
+ CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
12
  BASE_URL = "https://www.googleapis.com/youtube/v3"
13
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
14
  channel = "https://www.youtube.com/@hubermanlab/videos"
15
  new_video_added = False
16
+ # video_links_folder_name = os.path.join(BASE_DIR, "videolinks")
17
+ print("THIS IS BASE DIR:", BASE_DIR)
18
+ print("THIS is current dir:", CURRENT_DIR)
19
+ video_links_folder_name = os.path.join(CURRENT_DIR, "videolinks")
20
+
21
+
22
+ def ensure_directories():
23
+ if not os.path.exists(video_links_folder_name):
24
+ os.makedirs(video_links_folder_name)
25
+ print(f"Directory {video_links_folder_name} created")
26
 
27
 
28
  def get_chanel_id(chanel_name):
 
115
 
116
 
117
  def video_links_main():
118
+ ensure_directories()
119
  video_links = load_video_links()
120
  if video_links:
121
  print(f"Using {len(video_links)} saved video links")
Data/yt_transcript.py CHANGED
@@ -1,5 +1,6 @@
1
  from youtube_transcript_api import YouTubeTranscriptApi
2
- from get_video_link import video_links_main
 
3
  import os
4
  from datetime import datetime
5
 
 
1
  from youtube_transcript_api import YouTubeTranscriptApi
2
+ # from get_video_link import video_links_main
3
+ from Data.get_video_link import video_links_main
4
  import os
5
  from datetime import datetime
6
 
Rag/__init__.py ADDED
File without changes
Rag/rag.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.vectorstores import Chroma
4
+ from langchain.chains import ConversationalRetrievalChain
5
+ from langchain_community.document_loaders import TextLoader
6
+ from langchain.memory import ConversationBufferMemory
7
+ import google.generativeai as genai
8
+ import os
9
+ from typing import Dict, List
10
+ import os
11
+ import sys
12
+ from Data.yt_transcript import all_video_transcript_pipeline
13
+ import google.generativeai as genai
14
+
15
+ PROJECT_ROOT = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
16
+ sys.path.append(PROJECT_ROOT)
17
+ print("THIS IS PROJECT ROOT")
18
+ print(PROJECT_ROOT)
19
+ API_KEY = os.getenv("GOOGLE_API_KEY")
20
+ if API_KEY:
21
+ genai.configure(api_key=API_KEY)
22
+ print(API_KEY)
23
+
24
+ full_transcripts = all_video_transcript_pipeline()
25
+ print("this is full transcripts of all the youtube videos")
26
+ print(full_transcripts)
27
+
28
+ # loader = TextLoader()
requirements.txt CHANGED
@@ -1,2 +1,12 @@
1
- requests~=2.32.3
2
- python-dotenv~=1.0.1
 
 
 
 
 
 
 
 
 
 
 
1
+ pyarrow
2
+ pandas[performance, parquet, aws]
3
+ pendulum
4
+ google.generativeai
5
+ langchain
6
+ langchain_openai
7
+ langchain_chroma
8
+ langchain_community
9
+ chromadb
10
+ pypdf
11
+ flask
12
+ flask_cors