Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import subprocess | |
from huggingface_hub import snapshot_download, login, HfApi | |
import tempfile | |
import shutil | |
# Function to download, convert, and quantize model to GGUF | |
def convert_to_gguf(model_id, quantization_method, hf_token=None): | |
try: | |
# Create temporary directories for model files and output | |
temp_dir = tempfile.mkdtemp() | |
output_dir = tempfile.mkdtemp() | |
# Log in to Hugging Face if token is provided (for gated models) | |
if hf_token: | |
login(hf_token) | |
# Download the model from Hugging Face | |
model_path = snapshot_download(repo_id=model_id, local_dir=temp_dir, local_dir_use_symlinks=False) | |
# Clone llama.cpp repository if not already present | |
llama_cpp_dir = "/tmp/llama.cpp" | |
if not os.path.exists(llama_cpp_dir): | |
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", llama_cpp_dir], check=True) | |
# Install llama.cpp Python dependencies | |
subprocess.run(["pip", "install", "-r", f"{llama_cpp_dir}/requirements.txt"], check=True) | |
# Create build directory for CMake | |
build_dir = os.path.join(llama_cpp_dir, "build") | |
os.makedirs(build_dir, exist_ok=True) | |
# Run CMake to configure and build | |
subprocess.run(["cmake", ".."], cwd=build_dir, check=True) | |
subprocess.run(["cmake", "--build", ".", "--config", "Release"], cwd=build_dir, check=True) | |
# Convert model to GGUF with f16 precision | |
intermediate_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-f16.gguf") | |
convert_cmd = [ | |
"python", f"{llama_cpp_dir}/convert_hf_to_gguf.py", | |
model_path, | |
"--outfile", intermediate_gguf, | |
"--outtype", "f16" | |
] | |
subprocess.run(convert_cmd, check=True) | |
# Quantize the GGUF file to the selected method | |
output_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-{quantization_method}.gguf") | |
quantize_cmd = [ | |
os.path.join(build_dir, "bin", "quantize"), | |
intermediate_gguf, | |
output_gguf, | |
quantization_method | |
] | |
subprocess.run(quantize_cmd, check=True) | |
# Return the path to the quantized GGUF file | |
return output_gguf, f"Model converted and quantized successfully! Download the GGUF file: {output_gguf}" | |
except Exception as e: | |
return None, f"Error: {str(e)}" | |
finally: | |
# Clean up temporary directories | |
if os.path.exists(temp_dir): | |
shutil.rmtree(temp_dir) | |
if os.path.exists(llama_cpp_dir): | |
shutil.rmtree(llama_cpp_dir) | |
# Gradio interface | |
def gradio_app(): | |
with gr.Blocks() as demo: | |
gr.Markdown("# Hugging Face to GGUF Converter") | |
gr.Markdown("Enter a Hugging Face model ID and select a quantization method to convert the model to GGUF format.") | |
with gr.Row(): | |
model_id_input = gr.Textbox(label="Hugging Face Model ID", placeholder="e.g., meta-llama/Llama-3.2-3B-Instruct") | |
hf_token_input = gr.Textbox(label="Hugging Face Token (optional for gated models)", type="password", placeholder="Enter your HF token") | |
with gr.Row(): | |
quantization_method = gr.Dropdown( | |
choices=["q8_0", "q4_k_m", "q4_0", "q5_0", "q5_k_m", "q6_k", "q3_k_m"], | |
label="Quantization Method", | |
value="q4_k_m" | |
) | |
convert_button = gr.Button("Convert to GGUF") | |
output_file = gr.File(label="Download GGUF File") | |
output_message = gr.Textbox(label="Status") | |
convert_button.click( | |
fn=convert_to_gguf, | |
inputs=[model_id_input, quantization_method, hf_token_input], | |
outputs=[output_file, output_message] | |
) | |
return demo | |
if __name__ == "__main__": | |
gradio_app().launch() |