Tonic commited on
Commit
d60b605
·
unverified ·
1 Parent(s): d8524ee

adds OpenReasoner

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +28 -23
  3. globe.py +18 -9
  4. transformers +0 -1
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Nemotron-Mini
3
  emoji: 🐠🤖👌🏻
4
  colorFrom: blue
5
  colorTo: red
 
1
  ---
2
+ title: OpenReasoning Nemotron 14B
3
  emoji: 🐠🤖👌🏻
4
  colorFrom: blue
5
  colorTo: red
app.py CHANGED
@@ -4,32 +4,36 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from globe import title, description, customtool, presentation1, presentation2, joinus
5
  import spaces
6
 
7
- model_path = "nvidia/Mistral-NeMo-Minitron-8B-Instruct"
8
  tokenizer = AutoTokenizer.from_pretrained(model_path)
9
- model = AutoModelForCausalLM.from_pretrained(model_path)
10
  if tokenizer.pad_token_id is None:
11
  tokenizer.pad_token_id = tokenizer.eos_token_id
12
 
13
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
14
 
15
  def create_prompt(system_message, user_message, tool_definition="", context=""):
 
16
  if tool_definition:
17
- return f"""<extra_id_0>System
18
- {system_message}
19
-
20
- <tool>
21
- {tool_definition}
22
- </tool>
23
- <context>
24
- {context}
25
- </context>
26
-
27
- <extra_id_1>User
28
- {user_message}
29
- <extra_id_1>Assistant
30
- """
31
  else:
32
- return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
 
 
 
 
 
 
33
 
34
 
35
  @spaces.GPU(duration=94)
@@ -59,7 +63,8 @@ def generate_response(message, history, system_message, max_tokens, temperature,
59
 
60
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
61
 
62
- assistant_response = response.split("<extra_id_1>Assistant\n")[-1].strip()
 
63
 
64
  if tool_definition and "<toolcall>" in assistant_response:
65
  tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
@@ -95,7 +100,7 @@ with gr.Blocks() as demo:
95
  user_input = gr.TextArea(label="🤷🏻‍♂️User Input", placeholder="Hi there my name is Tonic!", lines=2)
96
  advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
97
  with gr.Column(visible=False) as advanced_settings:
98
- max_length = gr.Slider(label="📏Max Length", minimum=12, maximum=1700, value=650, step=1)
99
  temperature = gr.Slider(label="🌡️Temperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
100
  top_p = gr.Slider(label="⚛️Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
101
  use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
@@ -108,10 +113,10 @@ with gr.Blocks() as demo:
108
  language="json"
109
  )
110
 
111
- generate_button = gr.Button(value="🤖Mistral-NeMo-Minitron")
112
 
113
  with gr.Column(scale=2):
114
- chatbot = gr.Chatbot(label="🤖Mistral-NeMo-Minitron")
115
 
116
  generate_button.click(
117
  user,
@@ -138,4 +143,4 @@ with gr.Blocks() as demo:
138
 
139
  if __name__ == "__main__":
140
  demo.queue()
141
- demo.launch()
 
4
  from globe import title, description, customtool, presentation1, presentation2, joinus
5
  import spaces
6
 
7
+ model_path = "nvidia/OpenReasoning-Nemotron-14B"
8
  tokenizer = AutoTokenizer.from_pretrained(model_path)
9
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto")
10
  if tokenizer.pad_token_id is None:
11
  tokenizer.pad_token_id = tokenizer.eos_token_id
12
 
13
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
14
 
15
  def create_prompt(system_message, user_message, tool_definition="", context=""):
16
+ # OpenReasoning-Nemotron uses a different prompt format
17
  if tool_definition:
18
+ return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
19
+
20
+ System Context: {system_message}
21
+
22
+ Tool Definition: {tool_definition}
23
+
24
+ Context: {context}
25
+
26
+ User: {user_message}
27
+
28
+ Assistant: Let me think about this step by step."""
 
 
 
29
  else:
30
+ return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
31
+
32
+ System Context: {system_message}
33
+
34
+ User: {user_message}
35
+
36
+ Assistant: Let me think about this step by step."""
37
 
38
 
39
  @spaces.GPU(duration=94)
 
63
 
64
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
65
 
66
+ # Extract the assistant response (everything after "Assistant: ")
67
+ assistant_response = response.split("Assistant: ")[-1].strip()
68
 
69
  if tool_definition and "<toolcall>" in assistant_response:
70
  tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
 
100
  user_input = gr.TextArea(label="🤷🏻‍♂️User Input", placeholder="Hi there my name is Tonic!", lines=2)
101
  advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
102
  with gr.Column(visible=False) as advanced_settings:
103
+ max_length = gr.Slider(label="📏Max Length", minimum=12, maximum=64000, value=2048, step=1)
104
  temperature = gr.Slider(label="🌡️Temperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
105
  top_p = gr.Slider(label="⚛️Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
106
  use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
 
113
  language="json"
114
  )
115
 
116
+ generate_button = gr.Button(value="🤖OpenReasoning-Nemotron-14B")
117
 
118
  with gr.Column(scale=2):
119
+ chatbot = gr.Chatbot(label="🤖OpenReasoning-Nemotron-14B")
120
 
121
  generate_button.click(
122
  user,
 
143
 
144
  if __name__ == "__main__":
145
  demo.queue()
146
+ demo.launch(ssr_mode=False, mcp_server=True)
globe.py CHANGED
@@ -3,27 +3,36 @@ joinus = """
3
  🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
4
  """
5
 
6
- title = """# 🙋🏻‍♂️Welcome to Tonic's 🤖 Mistral-NeMo-Minitron Demo 🚀"""
7
 
8
- description = """nvidia/🤖Mistral-NeMo-Minitron-8B-Instruct is a model for generating responses for various text-generation tasks including roleplaying, retrieval augmented generation, and function calling.
9
  """
10
 
11
- presentation1 = """Try this model on [build.nvidia.com](https://build.nvidia.com/nvidia/nemotron-mini-4b-instruct).
12
 
13
- Mistral-NeMo-Minitron-8B-Instruct is a model for generating responses for various text-generation tasks including roleplaying, retrieval augmented generation, and function calling. It is a fine-tuned version of [nvidia/Mistral-NeMo-Minitron-8B-Base](https://huggingface.co/nvidia/Mistral-NeMo-Minitron-8B-Base), which was pruned and distilled from [Mistral-NeMo 12B](https://huggingface.co/nvidia/Mistral-NeMo-12B-Base) using [our LLM compression technique](https://arxiv.org/abs/2407.14679). The model was trained using a multi-stage SFT and preference-based alignment technique with [NeMo Aligner](https://github.com/NVIDIA/NeMo-Aligner). For details on the alignment technique, please refer to the [Nemotron-4 340B Technical Report](https://arxiv.org/abs/2406.11704).
 
 
 
 
 
 
 
 
14
 
15
  ### License
16
 
17
- [NVIDIA Community Model License](https://huggingface.co/nvidia/Nemotron-Mini-4B-Instruct/blob/main/nvidia-community-model-license-aug2024.pdf)"""
18
 
19
  presentation2 = """
20
  ### Model Architecture
21
 
22
- 🤖Nemotron-Mini-4B-Instruct uses a model embedding size of 3072, 32 attention heads, and an MLP intermediate dimension of 9216. It also uses Grouped-Query Attention (GQA) and Rotary Position Embeddings (RoPE).
23
-
24
- **Architecture Type:** Transformer Decoder (auto-regressive language model)
25
 
26
- **Network Architecture:** Nemotron-4 """
 
 
 
27
 
28
  customtool = """{
29
  "name": "custom_tool",
 
3
  🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
4
  """
5
 
6
+ title = """# 🙋🏻‍♂️Welcome to Tonic's 🤖 OpenReasoning-Nemotron-14B Demo 🚀"""
7
 
8
+ description = """nvidia/🤖OpenReasoning-Nemotron-14B is a reasoning model that is post-trained for reasoning about math, code and science solution generation. It demonstrates exceptional performance across challenging reasoning benchmarks.
9
  """
10
 
11
+ presentation1 = """Try this model on [Hugging Face](https://huggingface.co/nvidia/OpenReasoning-Nemotron-14B).
12
 
13
+ OpenReasoning-Nemotron-14B is a large language model (LLM) which is a derivative of Qwen2.5-14B-Instruct. It is a reasoning model that is post-trained for reasoning about math, code and science solution generation. This model has been evaluated with up to 64K output tokens. The OpenReasoning model is available in the following sizes: 1.5B, 7B, 14B and 32B.
14
+
15
+ The models demonstrate exceptional performance across a suite of challenging reasoning benchmarks. The 14B model consistently sets new state-of-the-art records for its size class, achieving:
16
+ - **AIME24**: 87.8% pass@1
17
+ - **AIME25**: 82.0% pass@1
18
+ - **HMMT Feb 25**: 71.2% pass@1
19
+ - **LiveCodeBench v6**: 67.9% pass@1
20
+ - **GPQA**: 71.6% pass@1
21
+ - **MMLU-PRO**: 77.5% pass@1
22
 
23
  ### License
24
 
25
+ Creative Commons Attribution 4.0 International License (CC-BY-4.0) with Apache 2.0 License"""
26
 
27
  presentation2 = """
28
  ### Model Architecture
29
 
30
+ 🤖OpenReasoning-Nemotron-14B uses a dense decoder-only Transformer architecture based on Qwen2.5-14B-Instruct. It has 14B model parameters and supports up to 64,000 output tokens for extended reasoning chains.
 
 
31
 
32
+ **Architecture Type:** Dense decoder-only Transformer model
33
+ **Network Architecture:** Qwen2.5-14B-Instruct
34
+ **Model Size:** 14B parameters
35
+ **Max Output Tokens:** 64,000 """
36
 
37
  customtool = """{
38
  "name": "custom_tool",
transformers DELETED
@@ -1 +0,0 @@
1
- Subproject commit 63d9cb0afd2bf5d4cb5431ba1b2c4e353752a937