Spaces:

broadfield-dev
/

model_to_GGUF

Sleeping

App Files Files Community

model_to_GGUF / app.py

broadfield-dev

Update app.py

d7c5b46 verified about 2 months ago

raw

history blame contribute delete

3.92 kB

	import gradio as gr
	import os
	import subprocess
	from huggingface_hub import snapshot_download, login, HfApi
	import tempfile
	import shutil

	# Function to download, convert, and quantize model to GGUF
	def convert_to_gguf(model_id, quantization_method, hf_token=None):
	try:
	# Create temporary directories for model files and output
	temp_dir = tempfile.mkdtemp()
	output_dir = tempfile.mkdtemp()

	# Log in to Hugging Face if token is provided (for gated models)
	if hf_token:
	login(hf_token)

	# Download the model from Hugging Face
	model_path = snapshot_download(repo_id=model_id, local_dir=temp_dir, local_dir_use_symlinks=False)

	# Clone llama.cpp repository if not already present
	llama_cpp_dir = "/tmp/llama.cpp"
	if not os.path.exists(llama_cpp_dir):
	subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", llama_cpp_dir], check=True)

	# Install llama.cpp Python dependencies
	subprocess.run(["pip", "install", "-r", f"{llama_cpp_dir}/requirements.txt"], check=True)

	# Create build directory for CMake
	build_dir = os.path.join(llama_cpp_dir, "build")
	os.makedirs(build_dir, exist_ok=True)

	# Run CMake to configure and build
	subprocess.run(["cmake", ".."], cwd=build_dir, check=True)
	subprocess.run(["cmake", "--build", ".", "--config", "Release"], cwd=build_dir, check=True)

	# Convert model to GGUF with f16 precision
	intermediate_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-f16.gguf")
	convert_cmd = [
	"python", f"{llama_cpp_dir}/convert_hf_to_gguf.py",
	model_path,
	"--outfile", intermediate_gguf,
	"--outtype", "f16"
	]
	subprocess.run(convert_cmd, check=True)

	# Quantize the GGUF file to the selected method
	output_gguf = os.path.join(output_dir, f"{model_id.replace('/', '-')}-{quantization_method}.gguf")
	quantize_cmd = [
	os.path.join(build_dir, "bin", "quantize"),
	intermediate_gguf,
	output_gguf,
	quantization_method
	]
	subprocess.run(quantize_cmd, check=True)

	# Return the path to the quantized GGUF file
	return output_gguf, f"Model converted and quantized successfully! Download the GGUF file: {output_gguf}"

	except Exception as e:
	return None, f"Error: {str(e)}"
	finally:
	# Clean up temporary directories
	if os.path.exists(temp_dir):
	shutil.rmtree(temp_dir)
	if os.path.exists(llama_cpp_dir):
	shutil.rmtree(llama_cpp_dir)

	# Gradio interface
	def gradio_app():
	with gr.Blocks() as demo:
	gr.Markdown("# Hugging Face to GGUF Converter")
	gr.Markdown("Enter a Hugging Face model ID and select a quantization method to convert the model to GGUF format.")

	with gr.Row():
	model_id_input = gr.Textbox(label="Hugging Face Model ID", placeholder="e.g., meta-llama/Llama-3.2-3B-Instruct")
	hf_token_input = gr.Textbox(label="Hugging Face Token (optional for gated models)", type="password", placeholder="Enter your HF token")

	with gr.Row():
	quantization_method = gr.Dropdown(
	choices=["q8_0", "q4_k_m", "q4_0", "q5_0", "q5_k_m", "q6_k", "q3_k_m"],
	label="Quantization Method",
	value="q4_k_m"
	)

	convert_button = gr.Button("Convert to GGUF")
	output_file = gr.File(label="Download GGUF File")
	output_message = gr.Textbox(label="Status")

	convert_button.click(
	fn=convert_to_gguf,
	inputs=[model_id_input, quantization_method, hf_token_input],
	outputs=[output_file, output_message]
	)

	return demo

	if __name__ == "__main__":
	gradio_app().launch()