vinuajeesh commited on
Commit
e8dc0de
·
verified ·
1 Parent(s): ef35add

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -13
app.py CHANGED
@@ -3,29 +3,39 @@ from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
 
5
  # Download the model from Hugging Face Hub
6
- print("Downloading model...")
7
  model_path = hf_hub_download(
8
- repo_id="unsloth/gemma-3-4b-it-GGUF",
9
- filename="gemma-3-4b-it-UD-Q4_K_XL.gguf"
10
  )
11
- print("Model downloaded to:", model_path)
12
 
13
  # Load the model with llama-cpp-python
14
- print("Loading model...")
15
  llm = Llama(
16
  model_path=model_path,
17
- n_ctx=2048 # You can adjust this if needed
 
18
  )
19
  print("Model loaded.")
20
 
21
- # Chat function
22
- def chat(prompt):
23
- print(f"User input: {prompt}")
 
 
 
 
 
 
 
 
24
  output = llm(
25
- prompt,
26
  max_tokens=512,
27
- stop=["</s>", "User:", "Assistant:"]
28
  )
 
29
  reply = output['choices'][0]['text'].strip()
30
  print(f"Model reply: {reply}")
31
  return reply
@@ -35,8 +45,8 @@ iface = gr.Interface(
35
  fn=chat,
36
  inputs="text",
37
  outputs="text",
38
- title="Llama 2 7B Chat GGUF - Space",
39
- description="Running Llama 2 7B Chat GGUF model using llama-cpp-python on Hugging Face Space"
40
  )
41
 
42
  iface.launch()
 
3
  from llama_cpp import Llama
4
 
5
  # Download the model from Hugging Face Hub
6
+ print("===== Downloading model... =====")
7
  model_path = hf_hub_download(
8
+ repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
9
+ filename="Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf"
10
  )
11
+ print(f"Model downloaded to: {model_path}")
12
 
13
  # Load the model with llama-cpp-python
14
+ print("===== Loading model... =====")
15
  llm = Llama(
16
  model_path=model_path,
17
+ n_ctx=2048, # adjust as per RAM
18
+ n_threads=8 # adjust based on your Space CPU (8 is good default)
19
  )
20
  print("Model loaded.")
21
 
22
+ # Chat function with Dolphin 3.0 template
23
+ def chat(user_input):
24
+ print(f"User input: {user_input}")
25
+ full_prompt = f"""### System:
26
+ You are Dolphin 3.0, a helpful and friendly AI assistant.
27
+
28
+ ### User:
29
+ {user_input}
30
+
31
+ ### Assistant:"""
32
+
33
  output = llm(
34
+ full_prompt,
35
  max_tokens=512,
36
+ stop=["</s>", "### User:", "### Assistant:"]
37
  )
38
+
39
  reply = output['choices'][0]['text'].strip()
40
  print(f"Model reply: {reply}")
41
  return reply
 
45
  fn=chat,
46
  inputs="text",
47
  outputs="text",
48
+ title="🐬 Dolphin 3.0 - Llama 3.2 3B GGUF Chat",
49
+ description="Running Dolphin 3.0 Llama 3.2 3B GGUF model using llama-cpp-python on Hugging Face Space"
50
  )
51
 
52
  iface.launch()