Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,157 Bytes
f6e2d8a b42e964 f6e2d8a b42e964 f6e2d8a b42e964 f6e2d8a 26cf2c7 f6e2d8a b42e964 f6e2d8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import requests
import zipfile
import shutil
import boto3
import os
def download_file_from_bucket(bucket_name, s3_key, output_file):
"""Download file from S3 bucket"""
# https://thecodinginterface.com/blog/aws-s3-python-boto3
session = boto3.session.Session(
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_ACCESS_KEY_SECRET"),
region_name="us-east-1",
)
s3_resource = session.resource("s3")
bucket = s3_resource.Bucket(bucket_name)
bucket.download_file(Key=s3_key, Filename=output_file)
def download_dropbox_file(shared_url, output_file):
"""Download file from Dropbox"""
# Modify the shared URL to enable direct download
direct_url = shared_url.replace(
"www.dropbox.com", "dl.dropboxusercontent.com"
).replace("?dl=0", "")
# Send a GET request to the direct URL
response = requests.get(direct_url, stream=True)
if response.status_code == 200:
# Write the content to a local file
with open(output_file, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"File downloaded successfully as '{output_file}'")
else:
print(f"Failed to download file. HTTP Status Code: {response.status_code}")
def download_data():
"""Download the email database"""
if not os.path.exists("db.zip"):
# For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
# db_20250801.zip: chromadb==1.0.13
# db_20250801a.zip: chromadb==0.6.3
download_file_from_bucket("r-help-chat", "db_20250801a.zip", "db.zip")
## For Dropbox (shared file - key is in URL)
# shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
# output_filename = "db.zip"
# download_dropbox_file(shared_link, output_filename)
def extract_data():
"""Extract the db.zip file"""
file_path = "db.zip"
extract_to_path = "./"
with zipfile.ZipFile(file_path, "r") as zip_ref:
zip_ref.extractall(extract_to_path)
|