Arsh-llm-demo / app.py
arshiaafshani's picture
Update app.py
71f62cd verified
raw
history blame
2.44 kB
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
# ⬇️ دانلود مدل
hf_hub_download(
repo_id="mradermacher/Arsh-llm-GGUF",
filename="Arsh-llm.Q4_K_M.gguf",
local_dir="./models"
)
# 🧠 لود مدل (فقط یکبار در ابتدای اجرا)
llm = Llama(
model_path="./models/Arsh-llm.Q4_K_M.gguf",
n_batch=512,
n_ctx=8192,
verbose=False
)
provider = LlamaCppPythonProvider(llm)
agent = LlamaCppAgent(
provider,
system_prompt="You are Arsh, a helpful assistant.",
predefined_messages_formatter_type=MessagesFormatterType.CHATML,
debug_output=False
)
# 💬 تابع پاسخ‌دهنده
def respond(message, chat_history):
messages = BasicChatHistory()
for user_msg, assistant_msg in chat_history:
messages.add_message({
'role': Roles.user,
'content': user_msg
})
messages.add_message({
'role': Roles.assistant,
'content': assistant_msg
})
stream = agent.get_chat_response(
message,
chat_history=messages,
returns_streaming_generator=True,
print_output=False
)
response = ""
for token in stream:
response += token
yield response
# 🎛️ ساخت رابط Gradio
with gr.Blocks() as demo:
gr.Markdown("# Arsh-LLM Q4_K_M Chat Demo")
gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox("You are Arsh, a helpful assistant.", label="System Message", interactive=True),
gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition Penalty"),
],
chatbot=gr.Chatbot(
label="Chat with Arsh-LLM",
bubble_full_width=False,
show_copy_button=True
)
)
# 🚀 اجرای برنامه
if __name__ == "__main__":
demo.launch()