Spaces:
Running
on
Zero
Running
on
Zero
adds OpenReasoner
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Nemotron
|
3 |
emoji: 🐠🤖👌🏻
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
|
|
1 |
---
|
2 |
+
title: OpenReasoning Nemotron 14B
|
3 |
emoji: 🐠🤖👌🏻
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
app.py
CHANGED
@@ -4,32 +4,36 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
4 |
from globe import title, description, customtool, presentation1, presentation2, joinus
|
5 |
import spaces
|
6 |
|
7 |
-
model_path = "nvidia/
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
9 |
-
model = AutoModelForCausalLM.from_pretrained(model_path)
|
10 |
if tokenizer.pad_token_id is None:
|
11 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
12 |
|
13 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
14 |
|
15 |
def create_prompt(system_message, user_message, tool_definition="", context=""):
|
|
|
16 |
if tool_definition:
|
17 |
-
return f"""
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
{tool_definition}
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
{user_message}
|
29 |
-
<extra_id_1>Assistant
|
30 |
-
"""
|
31 |
else:
|
32 |
-
return f"
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
|
35 |
@spaces.GPU(duration=94)
|
@@ -59,7 +63,8 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
59 |
|
60 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
61 |
|
62 |
-
|
|
|
63 |
|
64 |
if tool_definition and "<toolcall>" in assistant_response:
|
65 |
tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
|
@@ -95,7 +100,7 @@ with gr.Blocks() as demo:
|
|
95 |
user_input = gr.TextArea(label="🤷🏻♂️User Input", placeholder="Hi there my name is Tonic!", lines=2)
|
96 |
advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
|
97 |
with gr.Column(visible=False) as advanced_settings:
|
98 |
-
max_length = gr.Slider(label="📏Max Length", minimum=12, maximum=
|
99 |
temperature = gr.Slider(label="🌡️Temperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
|
100 |
top_p = gr.Slider(label="⚛️Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
|
101 |
use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
|
@@ -108,10 +113,10 @@ with gr.Blocks() as demo:
|
|
108 |
language="json"
|
109 |
)
|
110 |
|
111 |
-
generate_button = gr.Button(value="🤖
|
112 |
|
113 |
with gr.Column(scale=2):
|
114 |
-
chatbot = gr.Chatbot(label="🤖
|
115 |
|
116 |
generate_button.click(
|
117 |
user,
|
@@ -138,4 +143,4 @@ with gr.Blocks() as demo:
|
|
138 |
|
139 |
if __name__ == "__main__":
|
140 |
demo.queue()
|
141 |
-
demo.launch()
|
|
|
4 |
from globe import title, description, customtool, presentation1, presentation2, joinus
|
5 |
import spaces
|
6 |
|
7 |
+
model_path = "nvidia/OpenReasoning-Nemotron-14B"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
9 |
+
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto")
|
10 |
if tokenizer.pad_token_id is None:
|
11 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
12 |
|
13 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
|
14 |
|
15 |
def create_prompt(system_message, user_message, tool_definition="", context=""):
|
16 |
+
# OpenReasoning-Nemotron uses a different prompt format
|
17 |
if tool_definition:
|
18 |
+
return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
|
19 |
+
|
20 |
+
System Context: {system_message}
|
21 |
+
|
22 |
+
Tool Definition: {tool_definition}
|
23 |
+
|
24 |
+
Context: {context}
|
25 |
+
|
26 |
+
User: {user_message}
|
27 |
+
|
28 |
+
Assistant: Let me think about this step by step."""
|
|
|
|
|
|
|
29 |
else:
|
30 |
+
return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
|
31 |
+
|
32 |
+
System Context: {system_message}
|
33 |
+
|
34 |
+
User: {user_message}
|
35 |
+
|
36 |
+
Assistant: Let me think about this step by step."""
|
37 |
|
38 |
|
39 |
@spaces.GPU(duration=94)
|
|
|
63 |
|
64 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
65 |
|
66 |
+
# Extract the assistant response (everything after "Assistant: ")
|
67 |
+
assistant_response = response.split("Assistant: ")[-1].strip()
|
68 |
|
69 |
if tool_definition and "<toolcall>" in assistant_response:
|
70 |
tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
|
|
|
100 |
user_input = gr.TextArea(label="🤷🏻♂️User Input", placeholder="Hi there my name is Tonic!", lines=2)
|
101 |
advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
|
102 |
with gr.Column(visible=False) as advanced_settings:
|
103 |
+
max_length = gr.Slider(label="📏Max Length", minimum=12, maximum=64000, value=2048, step=1)
|
104 |
temperature = gr.Slider(label="🌡️Temperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
|
105 |
top_p = gr.Slider(label="⚛️Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
|
106 |
use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
|
|
|
113 |
language="json"
|
114 |
)
|
115 |
|
116 |
+
generate_button = gr.Button(value="🤖OpenReasoning-Nemotron-14B")
|
117 |
|
118 |
with gr.Column(scale=2):
|
119 |
+
chatbot = gr.Chatbot(label="🤖OpenReasoning-Nemotron-14B")
|
120 |
|
121 |
generate_button.click(
|
122 |
user,
|
|
|
143 |
|
144 |
if __name__ == "__main__":
|
145 |
demo.queue()
|
146 |
+
demo.launch(ssr_mode=False, mcp_server=True)
|
globe.py
CHANGED
@@ -3,27 +3,36 @@ joinus = """
|
|
3 |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
|
4 |
"""
|
5 |
|
6 |
-
title = """# 🙋🏻♂️Welcome to Tonic's 🤖
|
7 |
|
8 |
-
description = """nvidia/🤖
|
9 |
"""
|
10 |
|
11 |
-
presentation1 = """Try this model on [
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
### License
|
16 |
|
17 |
-
|
18 |
|
19 |
presentation2 = """
|
20 |
### Model Architecture
|
21 |
|
22 |
-
🤖Nemotron-
|
23 |
-
|
24 |
-
**Architecture Type:** Transformer Decoder (auto-regressive language model)
|
25 |
|
26 |
-
**
|
|
|
|
|
|
|
27 |
|
28 |
customtool = """{
|
29 |
"name": "custom_tool",
|
|
|
3 |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
|
4 |
"""
|
5 |
|
6 |
+
title = """# 🙋🏻♂️Welcome to Tonic's 🤖 OpenReasoning-Nemotron-14B Demo 🚀"""
|
7 |
|
8 |
+
description = """nvidia/🤖OpenReasoning-Nemotron-14B is a reasoning model that is post-trained for reasoning about math, code and science solution generation. It demonstrates exceptional performance across challenging reasoning benchmarks.
|
9 |
"""
|
10 |
|
11 |
+
presentation1 = """Try this model on [Hugging Face](https://huggingface.co/nvidia/OpenReasoning-Nemotron-14B).
|
12 |
|
13 |
+
OpenReasoning-Nemotron-14B is a large language model (LLM) which is a derivative of Qwen2.5-14B-Instruct. It is a reasoning model that is post-trained for reasoning about math, code and science solution generation. This model has been evaluated with up to 64K output tokens. The OpenReasoning model is available in the following sizes: 1.5B, 7B, 14B and 32B.
|
14 |
+
|
15 |
+
The models demonstrate exceptional performance across a suite of challenging reasoning benchmarks. The 14B model consistently sets new state-of-the-art records for its size class, achieving:
|
16 |
+
- **AIME24**: 87.8% pass@1
|
17 |
+
- **AIME25**: 82.0% pass@1
|
18 |
+
- **HMMT Feb 25**: 71.2% pass@1
|
19 |
+
- **LiveCodeBench v6**: 67.9% pass@1
|
20 |
+
- **GPQA**: 71.6% pass@1
|
21 |
+
- **MMLU-PRO**: 77.5% pass@1
|
22 |
|
23 |
### License
|
24 |
|
25 |
+
Creative Commons Attribution 4.0 International License (CC-BY-4.0) with Apache 2.0 License"""
|
26 |
|
27 |
presentation2 = """
|
28 |
### Model Architecture
|
29 |
|
30 |
+
🤖OpenReasoning-Nemotron-14B uses a dense decoder-only Transformer architecture based on Qwen2.5-14B-Instruct. It has 14B model parameters and supports up to 64,000 output tokens for extended reasoning chains.
|
|
|
|
|
31 |
|
32 |
+
**Architecture Type:** Dense decoder-only Transformer model
|
33 |
+
**Network Architecture:** Qwen2.5-14B-Instruct
|
34 |
+
**Model Size:** 14B parameters
|
35 |
+
**Max Output Tokens:** 64,000 """
|
36 |
|
37 |
customtool = """{
|
38 |
"name": "custom_tool",
|
transformers
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 63d9cb0afd2bf5d4cb5431ba1b2c4e353752a937
|
|
|
|