Spaces:
Running
Running
Updated
Browse files- .gitattributes +2 -0
- README.md +17 -12
- app.py +69 -39
- config.py +68 -0
- download_model.py +106 -0
- install_verify.py +123 -0
- setup_and_run.py +108 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.ggml filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -12,15 +12,16 @@ short_description: Plain text to json using llama.cpp
|
|
12 |
|
13 |
# Plain Text to JSON with llama.cpp
|
14 |
|
15 |
-
This Hugging Face Space converts plain text into structured JSON format using llama.cpp for efficient CPU inference.
|
16 |
|
17 |
## Features
|
18 |
|
19 |
-
- **llama.cpp Integration**: Uses llama-cpp-python for efficient model inference
|
|
|
20 |
- **Gradio Interface**: User-friendly web interface
|
21 |
-
- **JSON Conversion**: Converts unstructured text to
|
22 |
-
- **
|
23 |
-
- **Demo Mode**: Basic functionality without requiring
|
24 |
|
25 |
## Setup
|
26 |
|
@@ -28,18 +29,22 @@ The space automatically installs:
|
|
28 |
- `llama-cpp-python` for llama.cpp integration
|
29 |
- Required build tools (`build-essential`, `cmake`)
|
30 |
- Gradio and other dependencies
|
|
|
31 |
|
32 |
## Usage
|
33 |
|
34 |
-
1. **
|
35 |
-
2. **
|
36 |
-
3. **
|
|
|
37 |
|
38 |
-
## Model
|
39 |
|
40 |
-
-
|
41 |
-
-
|
42 |
-
-
|
|
|
|
|
43 |
|
44 |
## Configuration
|
45 |
|
|
|
12 |
|
13 |
# Plain Text to JSON with llama.cpp
|
14 |
|
15 |
+
This Hugging Face Space converts plain text into structured JSON format using llama.cpp for efficient CPU inference, powered by the Osmosis Structure 0.6B model.
|
16 |
|
17 |
## Features
|
18 |
|
19 |
+
- **llama.cpp Integration**: Uses llama-cpp-python for efficient CPU model inference
|
20 |
+
- **Osmosis Structure Model**: Specialized 0.6B parameter model for structured data extraction
|
21 |
- **Gradio Interface**: User-friendly web interface
|
22 |
+
- **JSON Conversion**: Converts unstructured text to well-formatted JSON
|
23 |
+
- **Auto-Download**: Automatically downloads the Osmosis model on first use
|
24 |
+
- **Demo Mode**: Basic functionality without requiring the AI model
|
25 |
|
26 |
## Setup
|
27 |
|
|
|
29 |
- `llama-cpp-python` for llama.cpp integration
|
30 |
- Required build tools (`build-essential`, `cmake`)
|
31 |
- Gradio and other dependencies
|
32 |
+
- Downloads Osmosis Structure 0.6B model (~1.2GB) on first use
|
33 |
|
34 |
## Usage
|
35 |
|
36 |
+
1. **Quick Start**: Run `python setup_and_run.py` for automated setup
|
37 |
+
2. **Demo Mode**: Use "Demo (No Model)" for basic text-to-JSON conversion
|
38 |
+
3. **Full Mode**: Click "Load Model" to download and use the Osmosis model
|
39 |
+
4. **Customize**: Adjust temperature and max_tokens for different output styles
|
40 |
|
41 |
+
## Model Details
|
42 |
|
43 |
+
- **Model**: Osmosis Structure 0.6B BF16 GGUF
|
44 |
+
- **Repository**: https://huggingface.co/osmosis-ai/Osmosis-Structure-0.6B
|
45 |
+
- **Specialization**: Structure extraction and JSON generation
|
46 |
+
- **Size**: ~1.2GB download
|
47 |
+
- **Format**: GGUF (optimized for llama.cpp)
|
48 |
|
49 |
## Configuration
|
50 |
|
app.py
CHANGED
@@ -3,6 +3,7 @@ import json
|
|
3 |
from llama_cpp import Llama
|
4 |
import os
|
5 |
from huggingface_hub import hf_hub_download
|
|
|
6 |
|
7 |
# Global variable to store the model
|
8 |
llm = None
|
@@ -11,66 +12,94 @@ def load_model():
|
|
11 |
"""Load the llama.cpp model"""
|
12 |
global llm
|
13 |
try:
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
except Exception as e:
|
35 |
-
|
36 |
-
|
|
|
37 |
|
38 |
def text_to_json(input_text, max_tokens=512, temperature=0.7):
|
39 |
"""Convert plain text to structured JSON using llama.cpp"""
|
40 |
global llm
|
41 |
|
42 |
if llm is None:
|
43 |
-
return
|
44 |
|
45 |
try:
|
46 |
-
# Create a prompt for
|
47 |
-
prompt = f"""
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
50 |
|
51 |
-
|
|
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# Generate response using llama.cpp
|
54 |
response = llm(
|
55 |
prompt,
|
56 |
-
|
57 |
-
temperature=temperature,
|
58 |
-
stop=["```", "\n\n\n"],
|
59 |
echo=False
|
60 |
)
|
61 |
|
62 |
generated_text = response['choices'][0]['text'].strip()
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# Try to parse as JSON to validate
|
65 |
try:
|
66 |
parsed_json = json.loads(generated_text)
|
67 |
return json.dumps(parsed_json, indent=2)
|
68 |
except json.JSONDecodeError:
|
69 |
-
# If not valid JSON, return as
|
70 |
-
return generated_text
|
71 |
|
72 |
except Exception as e:
|
73 |
-
return f"Error generating JSON: {str(e)}"
|
74 |
|
75 |
def demo_without_model(input_text):
|
76 |
"""Demo function that works without loading a model"""
|
@@ -99,7 +128,7 @@ def demo_without_model(input_text):
|
|
99 |
# Create Gradio interface
|
100 |
with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
|
101 |
gr.Markdown("# Plain Text to JSON Converter")
|
102 |
-
gr.Markdown("Convert plain text into structured JSON format using llama.cpp")
|
103 |
|
104 |
with gr.Tab("Text to JSON"):
|
105 |
with gr.Row():
|
@@ -144,14 +173,15 @@ with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
|
|
144 |
|
145 |
gr.Markdown("""
|
146 |
### Instructions:
|
147 |
-
1. Click "Load Model" to initialize
|
148 |
-
2. Use "Demo (No Model)" for basic functionality without loading
|
149 |
-
3.
|
150 |
|
151 |
### Notes:
|
152 |
-
-
|
153 |
-
-
|
154 |
-
-
|
|
|
155 |
""")
|
156 |
|
157 |
# Event handlers
|
|
|
3 |
from llama_cpp import Llama
|
4 |
import os
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
+
from config import get_model_config, get_generation_config, get_recommended_model
|
7 |
|
8 |
# Global variable to store the model
|
9 |
llm = None
|
|
|
12 |
"""Load the llama.cpp model"""
|
13 |
global llm
|
14 |
try:
|
15 |
+
print("Loading Osmosis Structure model...")
|
16 |
+
|
17 |
+
# Get model info and config
|
18 |
+
model_info = get_recommended_model()
|
19 |
+
model_config = get_model_config()
|
20 |
+
|
21 |
+
# Create models directory
|
22 |
+
os.makedirs("./models", exist_ok=True)
|
23 |
+
|
24 |
+
# Download the Osmosis model
|
25 |
+
print(f"Downloading {model_info['name']} ({model_info['size']})...")
|
26 |
+
model_path = hf_hub_download(
|
27 |
+
repo_id=model_info['repo_id'],
|
28 |
+
filename=model_info['filename'],
|
29 |
+
cache_dir="./models",
|
30 |
+
resume_download=True
|
31 |
+
)
|
32 |
+
|
33 |
+
print(f"Model downloaded to: {model_path}")
|
34 |
+
print("Initializing llama.cpp...")
|
35 |
+
|
36 |
+
# Initialize llama.cpp with the downloaded model
|
37 |
+
llm = Llama(
|
38 |
+
model_path=model_path,
|
39 |
+
**model_config
|
40 |
+
)
|
41 |
+
|
42 |
+
print("β
Osmosis Structure model loaded successfully!")
|
43 |
+
return f"β
Model loaded: {model_info['name']}\nPath: {model_path}\nDescription: {model_info['description']}"
|
44 |
|
45 |
except Exception as e:
|
46 |
+
error_msg = f"β Error loading model: {e}"
|
47 |
+
print(error_msg)
|
48 |
+
return error_msg
|
49 |
|
50 |
def text_to_json(input_text, max_tokens=512, temperature=0.7):
|
51 |
"""Convert plain text to structured JSON using llama.cpp"""
|
52 |
global llm
|
53 |
|
54 |
if llm is None:
|
55 |
+
return "β Model not loaded. Please load the model first."
|
56 |
|
57 |
try:
|
58 |
+
# Create a structured prompt optimized for the Osmosis model
|
59 |
+
prompt = f"""<|system|>
|
60 |
+
You are a helpful assistant that converts unstructured text into well-formatted JSON. Extract key information and organize it into a logical structure.
|
61 |
+
|
62 |
+
<|user|>
|
63 |
+
Convert this text to JSON format:
|
64 |
|
65 |
+
{input_text}
|
66 |
|
67 |
+
<|assistant|>
|
68 |
+
```json"""
|
69 |
|
70 |
+
# Get generation config and override with user settings
|
71 |
+
gen_config = get_generation_config()
|
72 |
+
gen_config.update({
|
73 |
+
"max_tokens": max_tokens,
|
74 |
+
"temperature": temperature
|
75 |
+
})
|
76 |
+
|
77 |
# Generate response using llama.cpp
|
78 |
response = llm(
|
79 |
prompt,
|
80 |
+
**gen_config,
|
|
|
|
|
81 |
echo=False
|
82 |
)
|
83 |
|
84 |
generated_text = response['choices'][0]['text'].strip()
|
85 |
|
86 |
+
# Clean up the response - remove markdown formatting if present
|
87 |
+
if generated_text.startswith('```json'):
|
88 |
+
generated_text = generated_text[7:]
|
89 |
+
if generated_text.endswith('```'):
|
90 |
+
generated_text = generated_text[:-3]
|
91 |
+
generated_text = generated_text.strip()
|
92 |
+
|
93 |
# Try to parse as JSON to validate
|
94 |
try:
|
95 |
parsed_json = json.loads(generated_text)
|
96 |
return json.dumps(parsed_json, indent=2)
|
97 |
except json.JSONDecodeError:
|
98 |
+
# If not valid JSON, try to clean it up or return as is
|
99 |
+
return f"Generated (may need cleanup):\n{generated_text}"
|
100 |
|
101 |
except Exception as e:
|
102 |
+
return f"β Error generating JSON: {str(e)}"
|
103 |
|
104 |
def demo_without_model(input_text):
|
105 |
"""Demo function that works without loading a model"""
|
|
|
128 |
# Create Gradio interface
|
129 |
with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
|
130 |
gr.Markdown("# Plain Text to JSON Converter")
|
131 |
+
gr.Markdown("Convert plain text into structured JSON format using llama.cpp and Osmosis Structure model")
|
132 |
|
133 |
with gr.Tab("Text to JSON"):
|
134 |
with gr.Row():
|
|
|
173 |
|
174 |
gr.Markdown("""
|
175 |
### Instructions:
|
176 |
+
1. Click "Load Model" to download and initialize the Osmosis Structure model
|
177 |
+
2. Use "Demo (No Model)" for basic functionality without loading the AI model
|
178 |
+
3. The Osmosis model is optimized for structured data extraction and JSON generation
|
179 |
|
180 |
### Notes:
|
181 |
+
- Uses llama.cpp for efficient CPU inference
|
182 |
+
- Osmosis Structure 0.6B model (~1.2GB) will be downloaded automatically
|
183 |
+
- Model is specialized for converting unstructured text to structured formats
|
184 |
+
- Adjust max_tokens and temperature for different output styles
|
185 |
""")
|
186 |
|
187 |
# Event handlers
|
config.py
CHANGED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Configuration settings for llama.cpp in Hugging Face Space
|
3 |
+
"""
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
# Model configuration
|
8 |
+
MODEL_CONFIG = {
|
9 |
+
"n_ctx": 2048, # Context window size
|
10 |
+
"n_threads": 2, # Number of threads (conservative for HF Spaces)
|
11 |
+
"n_batch": 8, # Batch size for prompt processing
|
12 |
+
"use_mmap": True, # Use memory mapping for model files
|
13 |
+
"use_mlock": False, # Don't lock model in memory (saves RAM)
|
14 |
+
"verbose": False, # Reduce logging in production
|
15 |
+
}
|
16 |
+
|
17 |
+
# Generation defaults
|
18 |
+
GENERATION_CONFIG = {
|
19 |
+
"temperature": 0.7,
|
20 |
+
"top_p": 0.9,
|
21 |
+
"top_k": 40,
|
22 |
+
"repeat_penalty": 1.1,
|
23 |
+
"stop": ["```", "\n\n\n", "Human:", "Assistant:"],
|
24 |
+
}
|
25 |
+
|
26 |
+
# Hugging Face Space specific settings
|
27 |
+
HF_SPACE_CONFIG = {
|
28 |
+
"max_memory_usage": "2GB", # Conservative memory usage
|
29 |
+
"timeout_seconds": 30, # Request timeout
|
30 |
+
"enable_cpu_only": True, # Force CPU inference
|
31 |
+
}
|
32 |
+
|
33 |
+
# Model download settings
|
34 |
+
MODEL_DOWNLOAD_CONFIG = {
|
35 |
+
"cache_dir": "./models",
|
36 |
+
"use_auth_token": os.getenv("HF_TOKEN", None),
|
37 |
+
"resume_download": True,
|
38 |
+
}
|
39 |
+
|
40 |
+
# Recommended small GGUF models for demonstration
|
41 |
+
RECOMMENDED_MODELS = [
|
42 |
+
{
|
43 |
+
"name": "Osmosis-Structure-0.6B",
|
44 |
+
"repo_id": "osmosis-ai/Osmosis-Structure-0.6B",
|
45 |
+
"filename": "Osmosis-Structure-0.6B-BF16.gguf",
|
46 |
+
"size": "~1.2GB",
|
47 |
+
"description": "Osmosis AI structure-focused model for JSON generation"
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"name": "TinyLlama-1.1B-Chat-v1.0-GGUF",
|
51 |
+
"repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
52 |
+
"filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
|
53 |
+
"size": "~700MB",
|
54 |
+
"description": "Small, fast model good for testing"
|
55 |
+
}
|
56 |
+
]
|
57 |
+
|
58 |
+
def get_model_config():
|
59 |
+
"""Get model configuration optimized for HF Spaces"""
|
60 |
+
return MODEL_CONFIG.copy()
|
61 |
+
|
62 |
+
def get_generation_config():
|
63 |
+
"""Get generation configuration"""
|
64 |
+
return GENERATION_CONFIG.copy()
|
65 |
+
|
66 |
+
def get_recommended_model():
|
67 |
+
"""Get the recommended model for this space"""
|
68 |
+
return RECOMMENDED_MODELS[0] # Return TinyLlama as default
|
download_model.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Download a sample GGUF model for testing llama.cpp integration
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
from huggingface_hub import hf_hub_download
|
8 |
+
from config import get_recommended_model, MODEL_DOWNLOAD_CONFIG
|
9 |
+
|
10 |
+
def download_sample_model():
|
11 |
+
"""Download a recommended small model for testing"""
|
12 |
+
model_info = get_recommended_model()
|
13 |
+
|
14 |
+
print(f"π₯ Downloading {model_info['name']}...")
|
15 |
+
print(f" Repository: {model_info['repo_id']}")
|
16 |
+
print(f" File: {model_info['filename']}")
|
17 |
+
print(f" Size: {model_info['size']}")
|
18 |
+
print(f" Description: {model_info['description']}")
|
19 |
+
|
20 |
+
try:
|
21 |
+
# Create models directory if it doesn't exist
|
22 |
+
os.makedirs(MODEL_DOWNLOAD_CONFIG['cache_dir'], exist_ok=True)
|
23 |
+
|
24 |
+
# Download the model
|
25 |
+
model_path = hf_hub_download(
|
26 |
+
repo_id=model_info['repo_id'],
|
27 |
+
filename=model_info['filename'],
|
28 |
+
cache_dir=MODEL_DOWNLOAD_CONFIG['cache_dir'],
|
29 |
+
resume_download=MODEL_DOWNLOAD_CONFIG['resume_download'],
|
30 |
+
token=MODEL_DOWNLOAD_CONFIG['use_auth_token']
|
31 |
+
)
|
32 |
+
|
33 |
+
print(f"β
Model downloaded successfully!")
|
34 |
+
print(f" Path: {model_path}")
|
35 |
+
|
36 |
+
# Create a symlink in the models directory for easy access
|
37 |
+
symlink_path = os.path.join(MODEL_DOWNLOAD_CONFIG['cache_dir'], "model.gguf")
|
38 |
+
if os.path.exists(symlink_path):
|
39 |
+
os.remove(symlink_path)
|
40 |
+
|
41 |
+
try:
|
42 |
+
os.symlink(model_path, symlink_path)
|
43 |
+
print(f" Symlink created: {symlink_path}")
|
44 |
+
except OSError:
|
45 |
+
# Symlinks might not work on all systems, just copy the path
|
46 |
+
print(f" Use this path in your code: {model_path}")
|
47 |
+
|
48 |
+
return model_path
|
49 |
+
|
50 |
+
except Exception as e:
|
51 |
+
print(f"β Error downloading model: {e}")
|
52 |
+
print("π‘ You can manually download a GGUF model and place it in ./models/")
|
53 |
+
return None
|
54 |
+
|
55 |
+
def list_available_models():
|
56 |
+
"""List models available in the models directory"""
|
57 |
+
models_dir = MODEL_DOWNLOAD_CONFIG['cache_dir']
|
58 |
+
|
59 |
+
if not os.path.exists(models_dir):
|
60 |
+
print(f"π Models directory doesn't exist: {models_dir}")
|
61 |
+
return []
|
62 |
+
|
63 |
+
model_files = []
|
64 |
+
for file in os.listdir(models_dir):
|
65 |
+
if file.endswith('.gguf') or file.endswith('.ggml'):
|
66 |
+
file_path = os.path.join(models_dir, file)
|
67 |
+
file_size = os.path.getsize(file_path)
|
68 |
+
model_files.append({
|
69 |
+
'name': file,
|
70 |
+
'path': file_path,
|
71 |
+
'size_mb': file_size / (1024 * 1024)
|
72 |
+
})
|
73 |
+
|
74 |
+
if model_files:
|
75 |
+
print("π Available models:")
|
76 |
+
for model in model_files:
|
77 |
+
print(f" - {model['name']} ({model['size_mb']:.1f} MB)")
|
78 |
+
else:
|
79 |
+
print("π No GGUF/GGML models found in models directory")
|
80 |
+
|
81 |
+
return model_files
|
82 |
+
|
83 |
+
if __name__ == "__main__":
|
84 |
+
print("π€ Model Download Utility for llama.cpp")
|
85 |
+
print("=" * 50)
|
86 |
+
|
87 |
+
# List existing models
|
88 |
+
print("\nπ Checking for existing models...")
|
89 |
+
existing_models = list_available_models()
|
90 |
+
|
91 |
+
if not existing_models:
|
92 |
+
print("\nπ₯ No models found. Downloading sample model...")
|
93 |
+
download_sample_model()
|
94 |
+
else:
|
95 |
+
print(f"\nβ
Found {len(existing_models)} existing model(s)")
|
96 |
+
|
97 |
+
# Ask if user wants to download another model
|
98 |
+
print("\nβ Download sample model anyway? (y/n): ", end="")
|
99 |
+
try:
|
100 |
+
response = input().lower().strip()
|
101 |
+
if response in ['y', 'yes']:
|
102 |
+
download_sample_model()
|
103 |
+
else:
|
104 |
+
print("π Using existing models")
|
105 |
+
except (EOFError, KeyboardInterrupt):
|
106 |
+
print("\nπ Using existing models")
|
install_verify.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Installation verification script for llama.cpp in Hugging Face Space
|
4 |
+
Run this to verify that llama.cpp is properly installed and configured
|
5 |
+
"""
|
6 |
+
|
7 |
+
import subprocess
|
8 |
+
import sys
|
9 |
+
import os
|
10 |
+
|
11 |
+
def run_command(command, description):
|
12 |
+
"""Run a command and return success status"""
|
13 |
+
print(f"π {description}...")
|
14 |
+
try:
|
15 |
+
result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
|
16 |
+
if result.returncode == 0:
|
17 |
+
print(f"β
{description} - SUCCESS")
|
18 |
+
if result.stdout.strip():
|
19 |
+
print(f" Output: {result.stdout.strip()}")
|
20 |
+
return True
|
21 |
+
else:
|
22 |
+
print(f"β {description} - FAILED")
|
23 |
+
if result.stderr.strip():
|
24 |
+
print(f" Error: {result.stderr.strip()}")
|
25 |
+
return False
|
26 |
+
except subprocess.TimeoutExpired:
|
27 |
+
print(f"β° {description} - TIMEOUT")
|
28 |
+
return False
|
29 |
+
except Exception as e:
|
30 |
+
print(f"β {description} - ERROR: {e}")
|
31 |
+
return False
|
32 |
+
|
33 |
+
def check_python_version():
|
34 |
+
"""Check Python version compatibility"""
|
35 |
+
version = sys.version_info
|
36 |
+
print(f"π Python version: {version.major}.{version.minor}.{version.micro}")
|
37 |
+
|
38 |
+
if version.major >= 3 and version.minor >= 8:
|
39 |
+
print("β
Python version is compatible")
|
40 |
+
return True
|
41 |
+
else:
|
42 |
+
print("β Python version should be 3.8 or higher")
|
43 |
+
return False
|
44 |
+
|
45 |
+
def check_system_packages():
|
46 |
+
"""Check if required system packages are available"""
|
47 |
+
packages = ["gcc", "g++", "cmake", "make"]
|
48 |
+
results = []
|
49 |
+
|
50 |
+
for package in packages:
|
51 |
+
success = run_command(f"which {package}", f"Checking {package}")
|
52 |
+
results.append(success)
|
53 |
+
|
54 |
+
return all(results)
|
55 |
+
|
56 |
+
def install_and_test_llamacpp():
|
57 |
+
"""Install and test llama-cpp-python"""
|
58 |
+
print("\nπ¦ Installing llama-cpp-python...")
|
59 |
+
|
60 |
+
# Install llama-cpp-python
|
61 |
+
install_success = run_command(
|
62 |
+
f"{sys.executable} -m pip install llama-cpp-python --verbose",
|
63 |
+
"Installing llama-cpp-python"
|
64 |
+
)
|
65 |
+
|
66 |
+
if not install_success:
|
67 |
+
print("β Failed to install llama-cpp-python")
|
68 |
+
return False
|
69 |
+
|
70 |
+
# Test import
|
71 |
+
test_success = run_command(
|
72 |
+
f"{sys.executable} -c 'from llama_cpp import Llama; print(\"Import successful\")'",
|
73 |
+
"Testing llama-cpp-python import"
|
74 |
+
)
|
75 |
+
|
76 |
+
return test_success
|
77 |
+
|
78 |
+
def main():
|
79 |
+
"""Main verification function"""
|
80 |
+
print("π llama.cpp Installation Verification for Hugging Face Space")
|
81 |
+
print("=" * 70)
|
82 |
+
|
83 |
+
checks = [
|
84 |
+
("Python Version", check_python_version),
|
85 |
+
("System Packages", check_system_packages),
|
86 |
+
("llama-cpp-python Installation", install_and_test_llamacpp),
|
87 |
+
]
|
88 |
+
|
89 |
+
results = []
|
90 |
+
for check_name, check_func in checks:
|
91 |
+
print(f"\nπ§ͺ Running: {check_name}")
|
92 |
+
print("-" * 40)
|
93 |
+
result = check_func()
|
94 |
+
results.append(result)
|
95 |
+
print()
|
96 |
+
|
97 |
+
print("=" * 70)
|
98 |
+
print("π VERIFICATION SUMMARY:")
|
99 |
+
|
100 |
+
for i, (check_name, _) in enumerate(checks):
|
101 |
+
status = "β
PASSED" if results[i] else "β FAILED"
|
102 |
+
print(f" {check_name}: {status}")
|
103 |
+
|
104 |
+
if all(results):
|
105 |
+
print("\nπ ALL CHECKS PASSED!")
|
106 |
+
print("β
llama.cpp is successfully installed and ready to use.")
|
107 |
+
print("\nπ Next steps:")
|
108 |
+
print(" 1. Run 'python test_llamacpp.py' to test the integration")
|
109 |
+
print(" 2. Start your Gradio app with 'python app.py'")
|
110 |
+
print(" 3. Upload a GGUF model file to enable full functionality")
|
111 |
+
else:
|
112 |
+
print("\nβ οΈ SOME CHECKS FAILED!")
|
113 |
+
print("β Please review the errors above and fix them before proceeding.")
|
114 |
+
print("\nπ§ Common solutions:")
|
115 |
+
print(" - Ensure build tools are installed (build-essential, cmake)")
|
116 |
+
print(" - Check that you have sufficient memory and disk space")
|
117 |
+
print(" - Try reinstalling with: pip install --force-reinstall llama-cpp-python")
|
118 |
+
|
119 |
+
return all(results)
|
120 |
+
|
121 |
+
if __name__ == "__main__":
|
122 |
+
success = main()
|
123 |
+
sys.exit(0 if success else 1)
|
setup_and_run.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Setup and run script for the llama.cpp Hugging Face Space
|
4 |
+
"""
|
5 |
+
|
6 |
+
import subprocess
|
7 |
+
import sys
|
8 |
+
import os
|
9 |
+
|
10 |
+
def install_dependencies():
|
11 |
+
"""Install required dependencies"""
|
12 |
+
print("π¦ Installing dependencies...")
|
13 |
+
|
14 |
+
try:
|
15 |
+
# Upgrade pip first
|
16 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], check=True)
|
17 |
+
|
18 |
+
# Install requirements
|
19 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
|
20 |
+
|
21 |
+
print("β
Dependencies installed successfully!")
|
22 |
+
return True
|
23 |
+
|
24 |
+
except subprocess.CalledProcessError as e:
|
25 |
+
print(f"β Error installing dependencies: {e}")
|
26 |
+
return False
|
27 |
+
|
28 |
+
def test_installation():
|
29 |
+
"""Test if llama.cpp is properly installed"""
|
30 |
+
print("π§ͺ Testing llama.cpp installation...")
|
31 |
+
|
32 |
+
try:
|
33 |
+
# Test import
|
34 |
+
subprocess.run([sys.executable, "-c", "from llama_cpp import Llama; print('β
llama-cpp-python imported successfully')"], check=True)
|
35 |
+
|
36 |
+
# Test other dependencies
|
37 |
+
test_imports = [
|
38 |
+
"import gradio; print('β
Gradio imported')",
|
39 |
+
"import huggingface_hub; print('β
Hugging Face Hub imported')",
|
40 |
+
"from config import get_recommended_model; print('β
Config imported')"
|
41 |
+
]
|
42 |
+
|
43 |
+
for test_import in test_imports:
|
44 |
+
subprocess.run([sys.executable, "-c", test_import], check=True)
|
45 |
+
|
46 |
+
print("β
All tests passed!")
|
47 |
+
return True
|
48 |
+
|
49 |
+
except subprocess.CalledProcessError as e:
|
50 |
+
print(f"β Installation test failed: {e}")
|
51 |
+
return False
|
52 |
+
|
53 |
+
def run_app():
|
54 |
+
"""Run the Gradio app"""
|
55 |
+
print("π Starting the Gradio app...")
|
56 |
+
print("π Note: The Osmosis model will be downloaded on first use")
|
57 |
+
print("π The app will be available at http://localhost:7860")
|
58 |
+
print("βΉοΈ Press Ctrl+C to stop the app")
|
59 |
+
|
60 |
+
try:
|
61 |
+
subprocess.run([sys.executable, "app.py"], check=True)
|
62 |
+
except KeyboardInterrupt:
|
63 |
+
print("\nπ App stopped by user")
|
64 |
+
except subprocess.CalledProcessError as e:
|
65 |
+
print(f"β Error running app: {e}")
|
66 |
+
|
67 |
+
def main():
|
68 |
+
"""Main setup function"""
|
69 |
+
print("π§ llama.cpp Hugging Face Space Setup")
|
70 |
+
print("=" * 50)
|
71 |
+
|
72 |
+
# Check Python version
|
73 |
+
if sys.version_info < (3, 8):
|
74 |
+
print("β Python 3.8 or higher is required")
|
75 |
+
sys.exit(1)
|
76 |
+
|
77 |
+
print(f"β
Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
|
78 |
+
|
79 |
+
# Install dependencies
|
80 |
+
if not install_dependencies():
|
81 |
+
print("β Failed to install dependencies")
|
82 |
+
sys.exit(1)
|
83 |
+
|
84 |
+
# Test installation
|
85 |
+
if not test_installation():
|
86 |
+
print("β Installation test failed")
|
87 |
+
sys.exit(1)
|
88 |
+
|
89 |
+
print("\nπ Setup completed successfully!")
|
90 |
+
print("\nπ What's installed:")
|
91 |
+
print(" - llama-cpp-python for efficient CPU inference")
|
92 |
+
print(" - Gradio for the web interface")
|
93 |
+
print(" - Hugging Face Hub for model downloading")
|
94 |
+
print(" - Osmosis Structure 0.6B model (will download on first use)")
|
95 |
+
|
96 |
+
# Ask if user wants to run the app
|
97 |
+
print("\nβ Would you like to run the app now? (y/n): ", end="")
|
98 |
+
try:
|
99 |
+
response = input().lower().strip()
|
100 |
+
if response in ['y', 'yes']:
|
101 |
+
run_app()
|
102 |
+
else:
|
103 |
+
print("π Setup complete! Run 'python app.py' when ready.")
|
104 |
+
except (EOFError, KeyboardInterrupt):
|
105 |
+
print("\nπ Setup complete! Run 'python app.py' when ready.")
|
106 |
+
|
107 |
+
if __name__ == "__main__":
|
108 |
+
main()
|