Spaces:
Running
Running
File size: 460 Bytes
bb4e28f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
from llama_cpp import Llama
# Load quantized TinyLlama
llm = Llama(model_path="models/tinyllama-1.1b-chat.gguf", n_ctx=2048)
def tinyllama_chat(prompt: str) -> str:
try:
response = llm.create_chat_completion(
messages=[{"role": "user", "content": prompt}],
temperature=0.7
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"⚠️ TinyLlama failed: {str(e)}"
|