Spaces:
Running
Running
from llama_cpp import Llama | |
# Load quantized TinyLlama | |
llm = Llama(model_path="models/tinyllama-1.1b-chat.gguf", n_ctx=2048) | |
def tinyllama_chat(prompt: str) -> str: | |
try: | |
response = llm.create_chat_completion( | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.7 | |
) | |
return response["choices"][0]["message"]["content"] | |
except Exception as e: | |
return f"⚠️ TinyLlama failed: {str(e)}" | |