Spaces:

arubenruben
/

Quantization-Attempts

Sleeping

Rúben Almeida

First Commit with /health route and Dockerfile

d75b820 4 months ago

823 Bytes

	from transformers import AutoModel


	base_model = "ibm-research/granite-3.2-8b-instruct-GGUF"
	GGUF_MODEL = "granite-3.2-8b-instruct-Q4_K_M.gguf"
	#model = AutoModel.from_pretrained("ibm-research/granite-3.2-8b-instruct", device_map="auto")

	model = AutoModel.from_pretrained(base_model, device_map="auto", torch_dtype="auto", quantization_config=None, gguf_file=GGUF_MODEL)
	model.config
	"""
	# pip install gguf
	from transformers import AutoTokenizer, AutoModelForCausalLM

	model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
	filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"

	torch_dtype = torch.float32 # could be torch.float16 or torch.bfloat16 too
	tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
	model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)

	"""