arshiaafshani commited on
Commit
71f62cd
·
verified ·
1 Parent(s): 02322bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -59
app.py CHANGED
@@ -5,50 +5,35 @@ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
5
  from llama_cpp_agent.chat_history import BasicChatHistory
6
  from llama_cpp_agent.chat_history.messages import Roles
7
 
 
8
  hf_hub_download(
9
  repo_id="mradermacher/Arsh-llm-GGUF",
10
  filename="Arsh-llm.Q4_K_M.gguf",
11
  local_dir="./models"
12
  )
13
 
14
- def respond(
15
- message,
16
- history: list[tuple[str, str]],
17
- model,
18
- system_message,
19
- max_tokens,
20
- temperature,
21
- top_p,
22
- top_k,
23
- repeat_penalty,
24
- ):
25
- llm = Llama(
26
- model_path=f"models/{model}",
27
- n_batch=512,
28
- n_ctx=8192,
29
- verbose=False
30
- )
31
-
32
- provider = LlamaCppPythonProvider(llm)
33
 
34
- agent = LlamaCppAgent(
35
- provider,
36
- system_prompt=system_message,
37
- predefined_messages_formatter_type=MessagesFormatterType.CHATML,
38
- debug_output=False
39
- )
40
 
41
- settings = provider.get_provider_default_settings()
42
- settings.temperature = temperature
43
- settings.top_k = top_k
44
- settings.top_p = top_p
45
- settings.max_tokens = max_tokens
46
- settings.repeat_penalty = repeat_penalty
47
- settings.stream = True
48
 
 
 
49
  messages = BasicChatHistory()
50
 
51
- for user_msg, assistant_msg in history:
52
  messages.add_message({
53
  'role': Roles.user,
54
  'content': user_msg
@@ -60,7 +45,6 @@ def respond(
60
 
61
  stream = agent.get_chat_response(
62
  message,
63
- llm_sampling_settings=settings,
64
  chat_history=messages,
65
  returns_streaming_generator=True,
66
  print_output=False
@@ -71,40 +55,26 @@ def respond(
71
  response += token
72
  yield response
73
 
74
-
75
- def create_interface(model_name, description):
76
- return gr.ChatInterface(
 
77
  respond,
78
  additional_inputs=[
79
- gr.Textbox(value=model_name, label="Model", interactive=False),
80
- gr.Textbox(
81
- value="You are Arsh, a helpful assistant. You should think step-by-step. You should answer all question by prioritizing the principle of openness.",
82
- label="System message"
83
- ),
84
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
85
  gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
86
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
87
  gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
88
- gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
89
  ],
90
- title="**Arsh-LLM Demo**",
91
- description=description,
92
  chatbot=gr.Chatbot(
93
- label=None,
94
- scale=1,
95
  show_copy_button=True
96
  )
97
  )
98
 
99
-
100
- description = """# **Arsh-LLM Q4_K_M GGUF Model - Demo**"""
101
-
102
- interface = create_interface('Arsh-llm.Q4_K_M.gguf', description)
103
-
104
- demo = gr.Blocks()
105
-
106
- with demo:
107
- interface.render()
108
-
109
  if __name__ == "__main__":
110
- demo.launch(share=True)
 
5
  from llama_cpp_agent.chat_history import BasicChatHistory
6
  from llama_cpp_agent.chat_history.messages import Roles
7
 
8
+ # ⬇️ دانلود مدل
9
  hf_hub_download(
10
  repo_id="mradermacher/Arsh-llm-GGUF",
11
  filename="Arsh-llm.Q4_K_M.gguf",
12
  local_dir="./models"
13
  )
14
 
15
+ # 🧠 لود مدل (فقط یکبار در ابتدای اجرا)
16
+ llm = Llama(
17
+ model_path="./models/Arsh-llm.Q4_K_M.gguf",
18
+ n_batch=512,
19
+ n_ctx=8192,
20
+ verbose=False
21
+ )
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ provider = LlamaCppPythonProvider(llm)
 
 
 
 
 
24
 
25
+ agent = LlamaCppAgent(
26
+ provider,
27
+ system_prompt="You are Arsh, a helpful assistant.",
28
+ predefined_messages_formatter_type=MessagesFormatterType.CHATML,
29
+ debug_output=False
30
+ )
 
31
 
32
+ # 💬 تابع پاسخ‌دهنده
33
+ def respond(message, chat_history):
34
  messages = BasicChatHistory()
35
 
36
+ for user_msg, assistant_msg in chat_history:
37
  messages.add_message({
38
  'role': Roles.user,
39
  'content': user_msg
 
45
 
46
  stream = agent.get_chat_response(
47
  message,
 
48
  chat_history=messages,
49
  returns_streaming_generator=True,
50
  print_output=False
 
55
  response += token
56
  yield response
57
 
58
+ # 🎛️ ساخت رابط Gradio
59
+ with gr.Blocks() as demo:
60
+ gr.Markdown("# Arsh-LLM Q4_K_M Chat Demo")
61
+ gr.ChatInterface(
62
  respond,
63
  additional_inputs=[
64
+ gr.Textbox("You are Arsh, a helpful assistant.", label="System Message", interactive=True),
65
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max Tokens"),
 
 
 
 
66
  gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
67
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
68
  gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
69
+ gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition Penalty"),
70
  ],
 
 
71
  chatbot=gr.Chatbot(
72
+ label="Chat with Arsh-LLM",
73
+ bubble_full_width=False,
74
  show_copy_button=True
75
  )
76
  )
77
 
78
+ # 🚀 اجرای برنامه
 
 
 
 
 
 
 
 
 
79
  if __name__ == "__main__":
80
+ demo.launch()