from llama_index.core import Settings from llama_index.llms.llama_cpp import LlamaCPP def build_llm(model_path, temperature=0.7, max_tokens=256, context_window=2048): print("build_llm: loading model from", model_path) llm = LlamaCPP( model_path=model_path, temperature=temperature, max_new_tokens=max_tokens, context_window=context_window, verbose=False ) Settings.llm = llm return llm