File size: 7,370 Bytes
479ced5
 
98aae70
 
479ced5
 
 
 
98aae70
479ced5
98aae70
 
 
 
 
479ced5
98aae70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04fd216
98aae70
04fd216
98aae70
 
04fd216
 
 
 
 
 
 
 
98aae70
 
 
04fd216
98aae70
 
 
 
 
 
 
 
 
04fd216
 
 
 
 
 
 
 
 
 
 
 
98aae70
 
 
 
 
 
 
 
 
479ced5
 
98aae70
 
 
479ced5
98aae70
 
479ced5
98aae70
 
 
 
 
 
 
479ced5
98aae70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479ced5
98aae70
 
 
 
479ced5
6b37abd
 
 
 
98aae70
 
 
 
 
 
6b37abd
479ced5
6b37abd
 
 
98aae70
6b37abd
 
 
 
 
 
 
98aae70
6b37abd
 
 
479ced5
 
98aae70
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/env python3
"""
Test script to verify environment variables and cache directory permissions.
This should be run before the main application to ensure everything is set up correctly.
"""

import os
import tempfile
import sys

def test_environment_setup():
    """Test that environment variables are set correctly."""
    print("=" * 60)
    print("Testing Environment Setup")
    print("=" * 60)
    
    # Check critical environment variables
    critical_vars = [
        'HF_HOME',
        'HF_CACHE_HOME', 
        'HF_HUB_CACHE',
        'TRANSFORMERS_CACHE',
        'HF_DATASETS_CACHE',
        'TEMP_DIR',
        'HOME',
        'TMPDIR'
    ]
    
    all_good = True
    for var in critical_vars:
        value = os.environ.get(var)
        if value:
            print(f"βœ… {var}: {value}")
        else:
            print(f"❌ {var}: NOT SET")
            all_good = False
    
    return all_good

def test_cache_directories():
    """Test that cache directories can be created and accessed."""
    print("\n" + "=" * 60)
    print("Testing Cache Directory Access")
    print("=" * 60)
    
    cache_dirs = [
        os.environ.get('HF_HOME', '/tmp/docling_temp/huggingface'),
        os.environ.get('HF_CACHE_HOME', '/tmp/docling_temp/huggingface_cache'),
        os.environ.get('HF_HUB_CACHE', '/tmp/docling_temp/huggingface_cache'),
        os.environ.get('TRANSFORMERS_CACHE', '/tmp/docling_temp/transformers_cache'),
        os.environ.get('HF_DATASETS_CACHE', '/tmp/docling_temp/datasets_cache'),
        os.environ.get('TORCH_HOME', '/tmp/docling_temp/torch'),
        os.environ.get('TENSORFLOW_HOME', '/tmp/docling_temp/tensorflow'),
        os.environ.get('KERAS_HOME', '/tmp/docling_temp/keras'),
    ]
    
    all_good = True
    for cache_dir in cache_dirs:
        try:
            os.makedirs(cache_dir, exist_ok=True)
            # Test writing a file
            test_file = os.path.join(cache_dir, 'test_write.txt')
            with open(test_file, 'w') as f:
                f.write('test')
            os.remove(test_file)
            print(f"βœ… {cache_dir}: WRITABLE")
        except Exception as e:
            print(f"❌ {cache_dir}: ERROR - {e}")
            all_good = False
    
    return all_good

def test_root_filesystem_access():
    """Test that we cannot access critical root filesystem paths."""
    print("\n" + "=" * 60)
    print("Testing Critical Root Filesystem Access Prevention")
    print("=" * 60)
    
    # Only test critical paths that the application might try to access
    critical_root_paths = [
        '/.cache',  # This is the main one that causes issues
        '/.config',
        '/.local',
        '/.huggingface',
        '/.cache/huggingface',
        '/.cache/transformers',
    ]
    
    all_good = True
    for path in critical_root_paths:
        try:
            os.makedirs(path, exist_ok=True)
            print(f"❌ {path}: SUCCESSFULLY CREATED (SHOULD FAIL)")
            all_good = False
        except PermissionError:
            print(f"βœ… {path}: PERMISSION DENIED (GOOD)")
        except Exception as e:
            print(f"⚠️  {path}: OTHER ERROR - {e}")
    
    # Test that we can access our temp directory (this is what matters)
    print(f"\nTesting temp directory access: {os.environ.get('TEMP_DIR', '/tmp/docling_temp')}")
    try:
        temp_test_file = os.path.join(os.environ.get('TEMP_DIR', '/tmp/docling_temp'), 'test_access.txt')
        with open(temp_test_file, 'w') as f:
            f.write('test')
        os.remove(temp_test_file)
        print(f"βœ… Temp directory is writable (CRITICAL)")
    except Exception as e:
        print(f"❌ Temp directory not writable: {e}")
        all_good = False
    
    return all_good

def test_temp_directory():
    """Test temp directory access."""
    print("\n" + "=" * 60)
    print("Testing Temp Directory Access")
    print("=" * 60)
    
    temp_dir = os.environ.get('TEMP_DIR', '/tmp/docling_temp')
    try:
        os.makedirs(temp_dir, exist_ok=True)
        test_file = os.path.join(temp_dir, 'test_temp.txt')
        with open(test_file, 'w') as f:
            f.write('temp test')
        os.remove(test_file)
        print(f"βœ… {temp_dir}: WRITABLE")
        return True
    except Exception as e:
        print(f"❌ {temp_dir}: ERROR - {e}")
        return False

def main():
    """Run all tests."""
    print("Docling Environment and Permission Test")
    print("This script tests that the environment is set up correctly for Hugging Face Spaces")
    
    # Set environment variables if not already set
    if not os.environ.get('TEMP_DIR'):
        temp_dir = os.path.join(tempfile.gettempdir(), "docling_temp")
        os.environ.update({
            'TEMP_DIR': temp_dir,
            'HOME': temp_dir,
            'USERPROFILE': temp_dir,
            'TMPDIR': temp_dir,
            'TEMP': temp_dir,
            'TMP': temp_dir,
            'HF_HOME': os.path.join(temp_dir, 'huggingface'),
            'HF_CACHE_HOME': os.path.join(temp_dir, 'huggingface_cache'),
            'HF_HUB_CACHE': os.path.join(temp_dir, 'huggingface_cache'),
            'TRANSFORMERS_CACHE': os.path.join(temp_dir, 'transformers_cache'),
            'HF_DATASETS_CACHE': os.path.join(temp_dir, 'datasets_cache'),
            'DIFFUSERS_CACHE': os.path.join(temp_dir, 'diffusers_cache'),
            'ACCELERATE_CACHE': os.path.join(temp_dir, 'accelerate_cache'),
            'TORCH_HOME': os.path.join(temp_dir, 'torch'),
            'TENSORFLOW_HOME': os.path.join(temp_dir, 'tensorflow'),
            'KERAS_HOME': os.path.join(temp_dir, 'keras'),
            'XDG_CACHE_HOME': os.path.join(temp_dir, 'cache'),
            'XDG_CONFIG_HOME': os.path.join(temp_dir, 'config'),
            'XDG_DATA_HOME': os.path.join(temp_dir, 'data'),
        })
    
    # Run tests
    env_ok = test_environment_setup()
    cache_ok = test_cache_directories()
    temp_ok = test_temp_directory()
    
    # Only test critical root paths, not all root access
    root_ok = test_root_filesystem_access()
    
    # Summary - focus on what's critical for the application
    print("\n" + "=" * 60)
    print("TEST SUMMARY")
    print("=" * 60)
    print(f"Environment Variables: {'βœ… PASS' if env_ok else '❌ FAIL'}")
    print(f"Cache Directories: {'βœ… PASS' if cache_ok else '❌ FAIL'}")
    print(f"Temp Directory: {'βœ… PASS' if temp_ok else '❌ FAIL'}")
    print(f"Critical Root Access Prevention: {'βœ… PASS' if root_ok else '❌ FAIL'}")
    
    # The application will work if cache directories and temp directory are working
    critical_success = env_ok and cache_ok and temp_ok
    overall_success = critical_success and root_ok
    
    print(f"\nCritical for Application: {'βœ… PASS' if critical_success else '❌ FAIL'}")
    print(f"Overall Result: {'βœ… ALL TESTS PASSED' if overall_success else '⚠️  SOME TESTS FAILED'}")
    
    # Exit with success if critical tests pass, even if root access test fails
    if critical_success:
        print("\nπŸŽ‰ Critical tests passed! The environment is ready for Docling.")
        print("Note: Some root access tests failed, but this doesn't affect the application.")
        sys.exit(0)
    else:
        print("\n❌ Critical tests failed. Please check the environment setup.")
        sys.exit(1)

if __name__ == "__main__":
    main()