model_to_GGUF / app.py
broadfield-dev's picture
Update app.py
d7c5b46 verified
import gradio as gr
import os
import subprocess
from huggingface_hub import snapshot_download, login, HfApi
import tempfile
import shutil
# Function to download, convert, and quantize model to GGUF
def convert_to_gguf(model_id, quantization_method, hf_token=None):
try:
# Create temporary directories for model files and output
temp_dir = tempfile.mkdtemp()
output_dir = tempfile.mkdtemp()
# Log in to Hugging Face if token is provided (for gated models)
if hf_token:
login(hf_token)
# Download the model from Hugging Face
model_path = snapshot_download(repo_id=model_id, local_dir=temp_dir, local_dir_use_symlinks=False)
# Clone llama.cpp repository if not already present
llama_cpp_dir = "/tmp/llama.cpp"
if not os.path.exists(llama_cpp_dir):
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", llama_cpp_dir], check=True)
# Install llama.cpp Python dependencies
subprocess.run(["pip", "install", "-r", f"{llama_cpp_dir}/requirements.txt"], check=True)
# Create build directory for CMake
build_dir = os.path.join(llama_cpp_dir, "build")
os.makedirs(build_dir, exist_ok=True)
# Run CMake to configure and build
subprocess.run(["cmake", ".."], cwd=build_dir, check=True)
subprocess.run(["cmake", "--build", ".", "--config", "Release"], cwd=build_dir, check=True)
# Convert model to GGUF with f16 precision
intermediate_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-f16.gguf")
convert_cmd = [
"python", f"{llama_cpp_dir}/convert_hf_to_gguf.py",
model_path,
"--outfile", intermediate_gguf,
"--outtype", "f16"
]
subprocess.run(convert_cmd, check=True)
# Quantize the GGUF file to the selected method
output_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-{quantization_method}.gguf")
quantize_cmd = [
os.path.join(build_dir, "bin", "quantize"),
intermediate_gguf,
output_gguf,
quantization_method
]
subprocess.run(quantize_cmd, check=True)
# Return the path to the quantized GGUF file
return output_gguf, f"Model converted and quantized successfully! Download the GGUF file: {output_gguf}"
except Exception as e:
return None, f"Error: {str(e)}"
finally:
# Clean up temporary directories
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
if os.path.exists(llama_cpp_dir):
shutil.rmtree(llama_cpp_dir)
# Gradio interface
def gradio_app():
with gr.Blocks() as demo:
gr.Markdown("# Hugging Face to GGUF Converter")
gr.Markdown("Enter a Hugging Face model ID and select a quantization method to convert the model to GGUF format.")
with gr.Row():
model_id_input = gr.Textbox(label="Hugging Face Model ID", placeholder="e.g., meta-llama/Llama-3.2-3B-Instruct")
hf_token_input = gr.Textbox(label="Hugging Face Token (optional for gated models)", type="password", placeholder="Enter your HF token")
with gr.Row():
quantization_method = gr.Dropdown(
choices=["q8_0", "q4_k_m", "q4_0", "q5_0", "q5_k_m", "q6_k", "q3_k_m"],
label="Quantization Method",
value="q4_k_m"
)
convert_button = gr.Button("Convert to GGUF")
output_file = gr.File(label="Download GGUF File")
output_message = gr.Textbox(label="Status")
convert_button.click(
fn=convert_to_gguf,
inputs=[model_id_input, quantization_method, hf_token_input],
outputs=[output_file, output_message]
)
return demo
if __name__ == "__main__":
gradio_app().launch()