MoizK commited on
Commit
a181620
·
verified ·
1 Parent(s): dbc7aad

Update download_assets.py

Browse files
Files changed (1) hide show
  1. download_assets.py +59 -51
download_assets.py CHANGED
@@ -1,51 +1,59 @@
1
- from huggingface_hub import hf_hub_download
2
- import os
3
-
4
- def download_assets():
5
- """Download necessary assets from Hugging Face Hub"""
6
- # Create directories if they don't exist
7
- os.makedirs('data', exist_ok=True)
8
- os.makedirs('vectorstore/db_faiss', exist_ok=True)
9
-
10
- # Dataset repository ID
11
- repo_id = "MoizK/mindmedic-assets"
12
-
13
- # Download PDF files
14
- pdf_files = [
15
- "71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
16
- "Depression-NIM-2024.pdf",
17
- "Depression-and-Other-Common-Mental-Disorders-Global-Health-Estimates.pdf",
18
- "Doing-What-Matters-in-Times-of-Stress.pdf",
19
- "Generalized-Anxiety-Disorder-When-Worry-Gets-Out-of-Control.pdf",
20
- "WHO-mhGAP-Intervention-Guide-v2.pdf",
21
- "social-anxiety-disorder-more-than-just-shyness.pdf"
22
- ]
23
-
24
- for pdf_file in pdf_files:
25
- try:
26
- hf_hub_download(
27
- repo_id=repo_id,
28
- filename=f"data/{pdf_file}",
29
- local_dir=".",
30
- local_dir_use_symlinks=False
31
- )
32
- print(f"Downloaded {pdf_file}")
33
- except Exception as e:
34
- print(f"Error downloading {pdf_file}: {e}")
35
-
36
- # Download FAISS index files
37
- index_files = ["index.faiss", "index.pkl"]
38
- for index_file in index_files:
39
- try:
40
- hf_hub_download(
41
- repo_id=repo_id,
42
- filename=f"vectorstore/db_faiss/{index_file}",
43
- local_dir=".",
44
- local_dir_use_symlinks=False
45
- )
46
- print(f"Downloaded {index_file}")
47
- except Exception as e:
48
- print(f"Error downloading {index_file}: {e}")
49
-
50
- if __name__ == "__main__":
51
- download_assets()
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+ import os
3
+
4
+ def download_assets():
5
+ """Download necessary assets from Hugging Face Hub"""
6
+ # Create directories if they don't exist
7
+ os.makedirs('data', exist_ok=True)
8
+ os.makedirs('vectorstore/db_faiss', exist_ok=True)
9
+
10
+ # Dataset repository ID (type=dataset)
11
+ repo_id = "MoizK/mindmedic-assets"
12
+ repo_type = "dataset"
13
+
14
+ # Pull your token from the env
15
+ token = os.getenv("HUGGINGFACE_HUB_TOKEN")
16
+ if not token:
17
+ raise RuntimeError("Please set HUGGINGFACE_HUB_TOKEN in your env")
18
+
19
+ # Download PDF files
20
+ pdf_files = [
21
+ "71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
22
+ "Depression-NIM-2024.pdf",
23
+ "Depression-and-Other-Common-Mental-Disorders-Global-Health-Estimates.pdf",
24
+ "Doing-What-Matters-in-Times-of-Stress.pdf",
25
+ "Generalized-Anxiety-Disorder-When-Worry-Gets-Out-of-Control.pdf",
26
+ "WHO-mhGAP-Intervention-Guide-v2.pdf",
27
+ "social-anxiety-disorder-more-than-just-shyness.pdf"
28
+ ]
29
+
30
+ for pdf in pdf_files:
31
+ try:
32
+ path = hf_hub_download(
33
+ repo_id=repo_id,
34
+ repo_type=repo_type,
35
+ filename=f"data/{pdf}",
36
+ local_dir=".",
37
+ use_auth_token=token
38
+ )
39
+ print(f"Downloaded {pdf} → {path}")
40
+ except Exception as e:
41
+ print(f"Error downloading {pdf}: {e}")
42
+
43
+ # Download FAISS index files
44
+ index_files = ["index.faiss", "index.pkl"]
45
+ for idx in index_files:
46
+ try:
47
+ path = hf_hub_download(
48
+ repo_id=repo_id,
49
+ repo_type=repo_type,
50
+ filename=f"vectorstore/db_faiss/{idx}",
51
+ local_dir=".",
52
+ use_auth_token=token
53
+ )
54
+ print(f"Downloaded {idx} → {path}")
55
+ except Exception as e:
56
+ print(f"Error downloading {idx}: {e}")
57
+
58
+ if __name__ == "__main__":
59
+ download_assets()