Update download_assets.py
Browse files- download_assets.py +19 -26
download_assets.py
CHANGED
@@ -2,21 +2,14 @@ from huggingface_hub import hf_hub_download
|
|
2 |
import os
|
3 |
|
4 |
def download_assets():
|
5 |
-
"""Download necessary assets from Hugging Face Hub"""
|
6 |
-
|
7 |
-
os.makedirs(
|
8 |
-
os.makedirs('vectorstore/db_faiss', exist_ok=True)
|
9 |
|
10 |
-
|
11 |
-
repo_id = "MoizK/mindmedic-assets"
|
12 |
repo_type = "dataset"
|
|
|
13 |
|
14 |
-
# Pull your token from the env
|
15 |
-
token = os.getenv("HUGGINGFACE_API_TOKEN")
|
16 |
-
if not token:
|
17 |
-
raise RuntimeError("Please set HUGGINGFACE_HUB_TOKEN in your env")
|
18 |
-
|
19 |
-
# Download PDF files
|
20 |
pdf_files = [
|
21 |
"71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
|
22 |
"Depression-NIM-2024.pdf",
|
@@ -24,36 +17,36 @@ def download_assets():
|
|
24 |
"Doing-What-Matters-in-Times-of-Stress.pdf",
|
25 |
"Generalized-Anxiety-Disorder-When-Worry-Gets-Out-of-Control.pdf",
|
26 |
"WHO-mhGAP-Intervention-Guide-v2.pdf",
|
27 |
-
"social-anxiety-disorder-more-than-just-shyness.pdf"
|
28 |
]
|
29 |
|
30 |
-
for
|
31 |
try:
|
32 |
-
|
33 |
repo_id=repo_id,
|
34 |
repo_type=repo_type,
|
35 |
-
filename=f"data/{
|
36 |
local_dir=".",
|
37 |
-
|
|
|
38 |
)
|
39 |
-
print(f"Downloaded {
|
40 |
except Exception as e:
|
41 |
-
print(f"
|
42 |
|
43 |
-
|
44 |
-
index_files = ["index.faiss", "index.pkl"]
|
45 |
-
for idx in index_files:
|
46 |
try:
|
47 |
-
|
48 |
repo_id=repo_id,
|
49 |
repo_type=repo_type,
|
50 |
filename=f"vectorstore/db_faiss/{idx}",
|
51 |
local_dir=".",
|
52 |
-
|
|
|
53 |
)
|
54 |
-
print(f"Downloaded {idx}
|
55 |
except Exception as e:
|
56 |
-
print(f"
|
57 |
|
58 |
if __name__ == "__main__":
|
59 |
download_assets()
|
|
|
2 |
import os
|
3 |
|
4 |
def download_assets():
|
5 |
+
"""Download necessary assets from Hugging Face Hub."""
|
6 |
+
os.makedirs("data", exist_ok=True)
|
7 |
+
os.makedirs("vectorstore/db_faiss", exist_ok=True)
|
|
|
8 |
|
9 |
+
repo_id = "MoizK/mindmedic-assets"
|
|
|
10 |
repo_type = "dataset"
|
11 |
+
token = os.getenv("HUGGINGFACE_API_TOKEN") # optional
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
pdf_files = [
|
14 |
"71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
|
15 |
"Depression-NIM-2024.pdf",
|
|
|
17 |
"Doing-What-Matters-in-Times-of-Stress.pdf",
|
18 |
"Generalized-Anxiety-Disorder-When-Worry-Gets-Out-of-Control.pdf",
|
19 |
"WHO-mhGAP-Intervention-Guide-v2.pdf",
|
20 |
+
"social-anxiety-disorder-more-than-just-shyness.pdf",
|
21 |
]
|
22 |
|
23 |
+
for fname in pdf_files:
|
24 |
try:
|
25 |
+
hf_hub_download(
|
26 |
repo_id=repo_id,
|
27 |
repo_type=repo_type,
|
28 |
+
filename=f"data/{fname}",
|
29 |
local_dir=".",
|
30 |
+
local_dir_use_symlinks=False,
|
31 |
+
token=token,
|
32 |
)
|
33 |
+
print(f"Downloaded {fname}")
|
34 |
except Exception as e:
|
35 |
+
print(f"⚠️ Failed to download {fname}: {e}")
|
36 |
|
37 |
+
for idx in ("index.faiss", "index.pkl"):
|
|
|
|
|
38 |
try:
|
39 |
+
hf_hub_download(
|
40 |
repo_id=repo_id,
|
41 |
repo_type=repo_type,
|
42 |
filename=f"vectorstore/db_faiss/{idx}",
|
43 |
local_dir=".",
|
44 |
+
local_dir_use_symlinks=False,
|
45 |
+
token=token,
|
46 |
)
|
47 |
+
print(f"Downloaded {idx}")
|
48 |
except Exception as e:
|
49 |
+
print(f"⚠️ Failed to download {idx}: {e}")
|
50 |
|
51 |
if __name__ == "__main__":
|
52 |
download_assets()
|