Spaces:

broadfield-dev
/

model_to_GGUF

Sleeping

File size: 3,922 Bytes

import gradio as gr
import os
import subprocess
from huggingface_hub import snapshot_download, login, HfApi
import tempfile
import shutil

# Function to download, convert, and quantize model to GGUF
def convert_to_gguf(model_id, quantization_method, hf_token=None):
    try:
        # Create temporary directories for model files and output
        temp_dir = tempfile.mkdtemp()
        output_dir = tempfile.mkdtemp()

        # Log in to Hugging Face if token is provided (for gated models)
        if hf_token:
            login(hf_token)

        # Download the model from Hugging Face
        model_path = snapshot_download(repo_id=model_id, local_dir=temp_dir, local_dir_use_symlinks=False)

        # Clone llama.cpp repository if not already present
        llama_cpp_dir = "/tmp/llama.cpp"
        if not os.path.exists(llama_cpp_dir):
            subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", llama_cpp_dir], check=True)

        # Install llama.cpp Python dependencies
        subprocess.run(["pip", "install", "-r", f"{llama_cpp_dir}/requirements.txt"], check=True)

        # Create build directory for CMake
        build_dir = os.path.join(llama_cpp_dir, "build")
        os.makedirs(build_dir, exist_ok=True)

        # Run CMake to configure and build
        subprocess.run(["cmake", ".."], cwd=build_dir, check=True)
        subprocess.run(["cmake", "--build", ".", "--config", "Release"], cwd=build_dir, check=True)

        # Convert model to GGUF with f16 precision
        intermediate_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-f16.gguf")
        convert_cmd = [
            "python", f"{llama_cpp_dir}/convert_hf_to_gguf.py",
            model_path,
            "--outfile", intermediate_gguf,
            "--outtype", "f16"
        ]
        subprocess.run(convert_cmd, check=True)

        # Quantize the GGUF file to the selected method
        output_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-{quantization_method}.gguf")
        quantize_cmd = [
            os.path.join(build_dir, "bin", "quantize"),
            intermediate_gguf,
            output_gguf,
            quantization_method
        ]
        subprocess.run(quantize_cmd, check=True)

        # Return the path to the quantized GGUF file
        return output_gguf, f"Model converted and quantized successfully! Download the GGUF file: {output_gguf}"

    except Exception as e:
        return None, f"Error: {str(e)}"
    finally:
        # Clean up temporary directories
        if os.path.exists(temp_dir):
            shutil.rmtree(temp_dir)
        if os.path.exists(llama_cpp_dir):
            shutil.rmtree(llama_cpp_dir)

# Gradio interface
def gradio_app():
    with gr.Blocks() as demo:
        gr.Markdown("# Hugging Face to GGUF Converter")
        gr.Markdown("Enter a Hugging Face model ID and select a quantization method to convert the model to GGUF format.")

        with gr.Row():
            model_id_input = gr.Textbox(label="Hugging Face Model ID", placeholder="e.g., meta-llama/Llama-3.2-3B-Instruct")
            hf_token_input = gr.Textbox(label="Hugging Face Token (optional for gated models)", type="password", placeholder="Enter your HF token")

        with gr.Row():
            quantization_method = gr.Dropdown(
                choices=["q8_0", "q4_k_m", "q4_0", "q5_0", "q5_k_m", "q6_k", "q3_k_m"],
                label="Quantization Method",
                value="q4_k_m"
            )

        convert_button = gr.Button("Convert to GGUF")
        output_file = gr.File(label="Download GGUF File")
        output_message = gr.Textbox(label="Status")

        convert_button.click(
            fn=convert_to_gguf,
            inputs=[model_id_input, quantization_method, hf_token_input],
            outputs=[output_file, output_message]
        )

    return demo

if __name__ == "__main__":
    gradio_app().launch()