File size: 6,745 Bytes
3ad6f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4755b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ad6f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4755b
3ad6f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4755b
3ad6f3d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env python3
"""
Debug script for Hugging Face Spaces deployment
This script helps diagnose common issues during deployment
"""

import os
import sys
import subprocess
from pathlib import Path

def check_environment():
    """Check environment variables and system info"""
    print("πŸ” Checking environment...")
    
    # Check Python version
    print(f"Python version: {sys.version}")
    
    # Check environment variables
    env_vars = [
        "HF_HOME", "TRANSFORMERS_CACHE", "HF_HUB_CACHE", 
        "GEMINI_API_KEY", "PINECONE_API_KEY", "PINECONE_ENVIRONMENT"
    ]
    
    for var in env_vars:
        value = os.getenv(var, "NOT SET")
        if "KEY" in var and value != "NOT SET":
            value = f"{value[:8]}..." if len(value) > 8 else "***"
        print(f"  {var}: {value}")
    
    # Check working directory
    print(f"Working directory: {os.getcwd()}")
    
    # Check if we're in a container
    if os.path.exists("/.dockerenv"):
        print("Running in Docker container: βœ…")
    else:
        print("Running in Docker container: ❌")

def check_cache_directories():
    """Check cache directory setup"""
    print("\nπŸ” Checking cache directories...")
    
    cache_dir = "/code/.cache/huggingface"
    
    # Check if directory exists
    if os.path.exists(cache_dir):
        print(f"Cache directory exists: βœ… ({cache_dir})")
        
        # Check permissions
        try:
            stat = os.stat(cache_dir)
            print(f"Cache directory permissions: {oct(stat.st_mode)[-3:]}")
            
            # Check if writable
            if os.access(cache_dir, os.W_OK):
                print("Cache directory writable: βœ…")
            else:
                print("Cache directory writable: ❌")
                
        except Exception as e:
            print(f"Error checking cache directory: {e}")
    else:
        print(f"Cache directory exists: ❌ ({cache_dir})")
        
        # Try to create it
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
            print("Created cache directory: βœ…")
        except Exception as e:
            print(f"Failed to create cache directory: {e}")

def check_dependencies():
    """Check if required packages are installed"""
    print("\nπŸ” Checking dependencies...")
    
    required_packages = [
        "fastapi", "uvicorn", "sentence_transformers", 
        "transformers", "torch", "numpy", "faiss"
    ]
    
    for package in required_packages:
        try:
            __import__(package)
            print(f"  {package}: βœ…")
        except ImportError:
            print(f"  {package}: ❌")

def check_dependency_conflicts():
    """Check for potential dependency conflicts"""
    print("\nπŸ” Checking for dependency conflicts...")
    
    try:
        import transformers
        import tokenizers
        
        print(f"  transformers version: {transformers.__version__}")
        print(f"  tokenizers version: {tokenizers.__version__}")
        
        # Check if versions are compatible
        transformers_version = transformers.__version__
        tokenizers_version = tokenizers.__version__
        
        if transformers_version.startswith("4.35"):
            if tokenizers_version.startswith("0.15"):
                print("  ⚠️  Potential conflict: transformers 4.35.x with tokenizers 0.15.x")
                print("     transformers 4.35.x requires tokenizers < 0.15")
            else:
                print("  βœ… transformers and tokenizers versions are compatible")
        else:
            print("  ℹ️  transformers version not in expected range")
            
    except ImportError as e:
        print(f"  ❌ Could not check dependency conflicts: {e}")

def test_model_loading():
    """Test sentence-transformers model loading"""
    print("\nπŸ” Testing model loading...")
    
    try:
        from sentence_transformers import SentenceTransformer
        
        # Set cache directory
        cache_dir = "/code/.cache/huggingface"
        os.makedirs(cache_dir, exist_ok=True)
        
        # Try different model names
        models_to_try = [
            "all-MiniLM-L6-v2",
            "sentence-transformers/all-MiniLM-L6-v2",
            "sentence-transformers/paraphrase-MiniLM-L6-v2"
        ]
        
        for model_name in models_to_try:
            try:
                print(f"  Trying {model_name}...")
                model = SentenceTransformer(model_name, cache_folder=cache_dir)
                
                # Test encoding
                test_text = ["This is a test sentence."]
                embeddings = model.encode(test_text)
                print(f"  βœ… {model_name} loaded successfully - shape: {embeddings.shape}")
                return True
                
            except Exception as e:
                print(f"  ❌ {model_name} failed: {e}")
                continue
        
        print("  ❌ All models failed to load")
        return False
        
    except Exception as e:
        print(f"  ❌ Error testing model loading: {e}")
        return False

def check_network_connectivity():
    """Check network connectivity to Hugging Face"""
    print("\nπŸ” Checking network connectivity...")
    
    try:
        import requests
        
        # Test basic connectivity
        response = requests.get("https://huggingface.co", timeout=10)
        if response.status_code == 200:
            print("Hugging Face connectivity: βœ…")
        else:
            print(f"Hugging Face connectivity: ❌ (Status: {response.status_code})")
            
    except Exception as e:
        print(f"Hugging Face connectivity: ❌ ({e})")

def main():
    """Run all diagnostic checks"""
    print("πŸš€ HackRx 6.0 - Hugging Face Spaces Debug Script")
    print("=" * 50)
    
    check_environment()
    check_cache_directories()
    check_dependencies()
    check_dependency_conflicts()
    check_network_connectivity()
    model_success = test_model_loading()
    
    print("\n" + "=" * 50)
    print("πŸ“Š Summary:")
    
    if model_success:
        print("βœ… Model loading successful - deployment should work")
    else:
        print("❌ Model loading failed - check the issues above")
        print("\nπŸ’‘ Troubleshooting tips:")
        print("1. Check if all environment variables are set")
        print("2. Verify cache directory permissions")
        print("3. Ensure network connectivity to Hugging Face")
        print("4. Check if all dependencies are installed")
        print("5. Review the Docker build logs")
        print("6. Check for dependency conflicts (especially transformers/tokenizers)")

if __name__ == "__main__":
    main()