import streamlit as st from llama_cpp import Llama @st.cache_resource def load_model(): return Llama( model_path="cybertron-v4-qw7B-MGS-IQ2_M.gguf", n_ctx=2048, n_threads=8, n_gpu_layers=20 ) llm = load_model() st.title("Cybertron Chat") prompt = st.text_input("Ask a question:") if prompt: with st.spinner("Generating response..."): response = llm.create_chat_completion( messages=[{"role": "user", "content": prompt}], temperature=0.7, max_tokens=256 ) st.write(response["choices"][0]["message"]["content"])