try: from rkllm.api import RKLLM except ImportError: print("RKLLM not installed. Please install from wheel 'https://github.com/airockchip/rknn-llm'.") llm = RKLLM() from getpass import getpass from huggingface_hub import snapshot_download, hf_hub_download def DownloadLoraModel(token) : repo_id = "Prince-1/orpheus-3b-0.1-ft_4_25" local_dir = "OrpheusLora" # "/content/OrpheusLora" # Choose a local directory print("Downloading Lora model from Hugging Face Hub...") snapshot_download(repo_id=repo_id, local_dir=local_dir, token= token) #userdata.get("HF_TOKEN")) print("Lora model downloaded successfully.") print("Downloading main model from Hugging Face Hub...") repo_id = "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit" local_dir = "OrpheusMain" #"/content/OrpheusMain" # Choose a local directory snapshot_download(repo_id=repo_id, local_dir=local_dir, token= token)#userdata.get("HF_TOKEN")) print("Main model downloaded successfully.") return ("OrpheusMain","OrpheusLora") def DownloadGGUF(token) : print("Downloading GGUF model from Hugging Face Hub...") path = hf_hub_download(repo_id="Prince-1/orpheus_3b_0.1_GGUF", filename="unsloth.F16.gguf",token= token,local_dir="GGUF") print("GGUF model downloaded successfully.") return path def UsingHf(llm,modelpath,modelLora) : print("Loading model...") print(modelpath,modelLora) ret = llm.load_huggingface(model=modelpath, model_lora = modelLora,device='cpu') if ret != 0: print('Load model failed!') exit(ret) return llm def UsingGGUF(llm,modelpath) : print("Loading model...") ret = llm.load_gguf(model=modelpath) if ret != 0: print('Load model failed!') exit(ret) return llm password = getpass("Please Enter your Hugging Face Token: ") if password == "" : print("No token provided.") exit(1) while True : print("Do you want to download Lora model or GGUF model ?") print("1. Lora") print("2. GGUF") i = input() if i == "1" : main,lora = DownloadLoraModel(password) UsingHf(llm,main,lora) break elif i == "2" : gguf = DownloadGGUF(password) UsingGGUF(llm,gguf) break else : print("Invalid input. Please enter 1 or 2.") continue # Build model dataset = None qparams = None target_platform = "RK3588" optimization_level = 1 quantized_dtype = "w8a8" #"w4a16_g32" #w4a16_g64 or w4a16_g128 quantized_algorithm = "normal" num_npu_core = 3 print("Building model...") ret = llm.build( do_quantization=False,optimization_level=optimization_level, quantized_dtype=quantized_dtype,quantized_algorithm=quantized_algorithm, target_platform=target_platform, num_npu_core=num_npu_core, extra_qparams=qparams, dataset=dataset) if ret != 0: print('Build model failed!') exit(ret) print("Model Build successfully.") # Export rkllm model ret =llm.export_rkllm(f"orpheus_3b_0.1_ft_{quantized_dtype}_{target_platform[2:]}.rkllm") if ret != 0: print('Export model failed!') exit(ret) print("Model Export successfully.")