File size: 460 Bytes
bb4e28f
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from llama_cpp import Llama

# Load quantized TinyLlama
llm = Llama(model_path="models/tinyllama-1.1b-chat.gguf", n_ctx=2048)

def tinyllama_chat(prompt: str) -> str:
    try:
        response = llm.create_chat_completion(
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"⚠️ TinyLlama failed: {str(e)}"