MoizK commited on
Commit
2a65dde
·
verified ·
1 Parent(s): 9c3b266

Update download_assets.py

Browse files
Files changed (1) hide show
  1. download_assets.py +16 -13
download_assets.py CHANGED
@@ -3,13 +3,16 @@ import os
3
 
4
  def download_assets():
5
  """Download necessary assets from Hugging Face Hub."""
 
6
  os.makedirs("data", exist_ok=True)
7
  os.makedirs("vectorstore/db_faiss", exist_ok=True)
8
 
9
  repo_id = "MoizK/mindmedic-assets"
10
  repo_type = "dataset"
11
- token = os.getenv("HUGGINGFACE_API_TOKEN") # optional
 
12
 
 
13
  pdf_files = [
14
  "71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
15
  "Depression-NIM-2024.pdf",
@@ -22,31 +25,31 @@ def download_assets():
22
 
23
  for fname in pdf_files:
24
  try:
25
- hf_hub_download(
26
  repo_id=repo_id,
27
  repo_type=repo_type,
28
- filename=f"data/{fname}",
29
- local_dir=".",
30
- local_dir_use_symlinks=False,
31
  token=token,
32
  )
33
- print(f"Downloaded {fname}")
34
  except Exception as e:
35
  print(f"⚠️ Failed to download {fname}: {e}")
36
 
37
- for idx in ("index.faiss", "index.pkl"):
 
 
38
  try:
39
- hf_hub_download(
40
  repo_id=repo_id,
41
  repo_type=repo_type,
42
- filename=f"vectorstore/db_faiss/{idx}",
43
- local_dir=".",
44
- local_dir_use_symlinks=False,
45
  token=token,
46
  )
47
- print(f"Downloaded {idx}")
48
  except Exception as e:
49
- print(f"⚠️ Failed to download {idx}: {e}")
50
 
51
  if __name__ == "__main__":
52
  download_assets()
 
3
 
4
  def download_assets():
5
  """Download necessary assets from Hugging Face Hub."""
6
+ # ensure local dirs exist
7
  os.makedirs("data", exist_ok=True)
8
  os.makedirs("vectorstore/db_faiss", exist_ok=True)
9
 
10
  repo_id = "MoizK/mindmedic-assets"
11
  repo_type = "dataset"
12
+ # allow either env var name
13
+ token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HUGGINGFACE_API_TOKEN")
14
 
15
+ # list of PDF filenames *at the repo root*
16
  pdf_files = [
17
  "71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
18
  "Depression-NIM-2024.pdf",
 
25
 
26
  for fname in pdf_files:
27
  try:
28
+ path = hf_hub_download(
29
  repo_id=repo_id,
30
  repo_type=repo_type,
31
+ filename=fname, # no "data/" prefix
32
+ local_dir="data", # drop into ./data/
 
33
  token=token,
34
  )
35
+ print(f"Downloaded {fname} → {path}")
36
  except Exception as e:
37
  print(f"⚠️ Failed to download {fname}: {e}")
38
 
39
+ # FAISS index files also at the repo root
40
+ index_files = ["index.faiss", "index.pkl"]
41
+ for fname in index_files:
42
  try:
43
+ path = hf_hub_download(
44
  repo_id=repo_id,
45
  repo_type=repo_type,
46
+ filename=fname, # root filename
47
+ local_dir="vectorstore/db_faiss", # into ./vectorstore/db_faiss/
 
48
  token=token,
49
  )
50
+ print(f"Downloaded {fname} → {path}")
51
  except Exception as e:
52
+ print(f"⚠️ Failed to download {fname}: {e}")
53
 
54
  if __name__ == "__main__":
55
  download_assets()