File size: 2,157 Bytes
f6e2d8a
 
 
 
 
 
b42e964
f6e2d8a
 
 
 
 
 
 
 
 
 
 
 
 
b42e964
f6e2d8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b42e964
 
f6e2d8a
 
 
 
 
 
26cf2c7
 
 
f6e2d8a
 
 
 
 
b42e964
f6e2d8a
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import requests
import zipfile
import shutil
import boto3
import os


def download_file_from_bucket(bucket_name, s3_key, output_file):
    """Download file from S3 bucket"""

    # https://thecodinginterface.com/blog/aws-s3-python-boto3
    session = boto3.session.Session(
        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
        aws_secret_access_key=os.getenv("AWS_ACCESS_KEY_SECRET"),
        region_name="us-east-1",
    )
    s3_resource = session.resource("s3")
    bucket = s3_resource.Bucket(bucket_name)
    bucket.download_file(Key=s3_key, Filename=output_file)


def download_dropbox_file(shared_url, output_file):
    """Download file from Dropbox"""

    # Modify the shared URL to enable direct download
    direct_url = shared_url.replace(
        "www.dropbox.com", "dl.dropboxusercontent.com"
    ).replace("?dl=0", "")

    # Send a GET request to the direct URL
    response = requests.get(direct_url, stream=True)

    if response.status_code == 200:
        # Write the content to a local file
        with open(output_file, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        print(f"File downloaded successfully as '{output_file}'")
    else:
        print(f"Failed to download file. HTTP Status Code: {response.status_code}")


def download_data():
    """Download the email database"""

    if not os.path.exists("db.zip"):
        # For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
        # db_20250801.zip: chromadb==1.0.13
        # db_20250801a.zip: chromadb==0.6.3
        download_file_from_bucket("r-help-chat", "db_20250801a.zip", "db.zip")
        ## For Dropbox (shared file - key is in URL)
        # shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
        # output_filename = "db.zip"
        # download_dropbox_file(shared_link, output_filename)


def extract_data():
    """Extract the db.zip file"""

    file_path = "db.zip"
    extract_to_path = "./"
    with zipfile.ZipFile(file_path, "r") as zip_ref:
        zip_ref.extractall(extract_to_path)