File size: 4,131 Bytes
03375c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
import os
import argparse
import subprocess
from dotenv import load_dotenv
from huggingface_hub import HfApi, upload_folder

# Load environment variables
load_dotenv()

# Get Huggingface token from environment or arguments
HF_TOKEN = os.getenv("HF_TOKEN")

def process_data():
    """Run the data processing script"""
    print("Processing PDF data...")
    subprocess.run(["python", "process_data.py"], check=True)
    print("Data processing complete.")

def validate_processed_data():
    """Validate that processed data exists"""
    data_path = "data/processed_data"
    if not os.path.exists(data_path):
        raise ValueError(f"Processed data directory {data_path} not found. Run 'python process_data.py' first.")
    
    # Check for chunks.pkl
    if not os.path.exists(os.path.join(data_path, "chunks.pkl")):
        raise ValueError("chunks.pkl not found in processed data directory.")
    
    # Check for Qdrant collection files
    collection_files = [f for f in os.listdir(data_path) if f.startswith("kohavi_ab_testing_pdf_collection")]
    if not collection_files:
        raise ValueError("Qdrant collection files not found in processed data directory.")
    
    print("✅ Processed data validation passed")

def build_docker_image(tag="ab-testing-qa:latest"):
    """Build the Docker image"""
    print(f"Building Docker image: {tag}")
    subprocess.run(["docker", "build", "-t", tag, "."], check=True)
    print("Docker image built successfully.")

def upload_processed_data(hf_token=None, space_name=None):
    """Upload processed data to Hugging Face persistent storage"""
    api = HfApi(token=hf_token or HF_TOKEN)
    
    upload_folder(
        folder_path="data/processed_data",
        repo_id=space_name,
        repo_type="space",
        path_in_repo="data/processed_data"
    )
    print("✅ Uploaded processed data to Hugging Face Spaces")

def push_to_huggingface(hf_token=None, space_name=None):
    """Push Docker image to Huggingface"""
    upload_processed_data(hf_token, space_name)

    if not hf_token and not HF_TOKEN:
        raise ValueError("Huggingface token not provided. Either set HF_TOKEN environment variable or pass with --token")
    
    token = hf_token or HF_TOKEN
    
    if not space_name:
        raise ValueError("Huggingface space name not provided. Use --space parameter")
    
    print(f"Logging in to Huggingface container registry...")
    login_cmd = ["docker", "login", "-u", "user", "--password", token, "registry.hf.space"]
    subprocess.run(login_cmd, check=True)
    
    # Tag the image for Huggingface
    hf_tag = f"registry.hf.space/{space_name}/ab-testing-qa:latest"
    subprocess.run(["docker", "tag", "ab-testing-qa:latest", hf_tag], check=True)
    
    # Push the image
    print(f"Pushing Docker image to {hf_tag}...")
    subprocess.run(["docker", "push", hf_tag], check=True)
    
    print(f"✅ Successfully pushed to Huggingface space: {space_name}")
    print(f"Visit your space at: https://huggingface.co/spaces/{space_name}")

def parse_args():
    parser = argparse.ArgumentParser(description="Deploy AB Testing QA App to Huggingface")
    parser.add_argument("--process", action="store_true", help="Process PDF data")
    parser.add_argument("--validate", action="store_true", help="Validate processed data")
    parser.add_argument("--build", action="store_true", help="Build Docker image")
    parser.add_argument("--push", action="store_true", help="Push to Huggingface")
    parser.add_argument("--all", action="store_true", help="Run all steps")
    parser.add_argument("--token", help="Huggingface token (if not in .env)")
    parser.add_argument("--space", help="Huggingface space name (username/space)")
    
    return parser.parse_args()

def main():
    args = parse_args()
    
    if args.all or args.process:
        process_data()
    
    if args.all or args.validate:
        validate_processed_data()
    
    if args.all or args.build:
        build_docker_image()
    
    if args.all or args.push:
        push_to_huggingface(args.token, args.space)

if __name__ == "__main__":
    main()