from llama_index.core import Settings
from llama_index.llms.llama_cpp import LlamaCPP

def build_llm(model_path, temperature=0.7, max_tokens=256, context_window=2048):
    print("build_llm: loading model from", model_path)
    llm = LlamaCPP(
        model_path=model_path,
        temperature=temperature,
        max_new_tokens=max_tokens,
        context_window=context_window,
        verbose=False
    )
    Settings.llm = llm
    return llm