Nvidia-OpenReasoning

Running on Zero

App Files Files Community

Tonic commited on 7 days ago

Commit

d60b605

unverified ·

1 Parent(s): d8524ee

adds OpenReasoner

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +28 -23
globe.py +18 -9
transformers +0 -1

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Nemotron-Mini
 emoji: 🐠🤖👌🏻
 colorFrom: blue
 colorTo: red

 ---
+title: OpenReasoning Nemotron 14B
 emoji: 🐠🤖👌🏻
 colorFrom: blue
 colorTo: red

app.py CHANGED Viewed

@@ -4,32 +4,36 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from globe import title, description, customtool, presentation1, presentation2, joinus
 import spaces
-model_path = "nvidia/Mistral-NeMo-Minitron-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForCausalLM.from_pretrained(model_path)
 if tokenizer.pad_token_id is None:
     tokenizer.pad_token_id = tokenizer.eos_token_id
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def create_prompt(system_message, user_message, tool_definition="", context=""):
     if tool_definition:
-        return f"""<extra_id_0>System
-{system_message}
-<tool>
-{tool_definition}
-</tool>
-<context>
-{context}
-</context>
-<extra_id_1>User
-{user_message}
-<extra_id_1>Assistant
-"""
     else:
-        return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
 @spaces.GPU(duration=94)
@@ -59,7 +63,8 @@ def generate_response(message, history, system_message, max_tokens, temperature,
         response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    assistant_response = response.split("<extra_id_1>Assistant\n")[-1].strip()
     if tool_definition and "<toolcall>" in assistant_response:
         tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
@@ -95,7 +100,7 @@ with gr.Blocks() as demo:
             user_input = gr.TextArea(label="🤷🏻‍♂️User Input", placeholder="Hi there my name is Tonic!", lines=2)
             advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
             with gr.Column(visible=False) as advanced_settings:
-                max_length = gr.Slider(label="📏Max Length", minimum=12, maximum=1700, value=650, step=1)
                 temperature = gr.Slider(label="🌡️Temperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
                 top_p = gr.Slider(label="⚛️Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
                 use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
@@ -108,10 +113,10 @@ with gr.Blocks() as demo:
                         language="json"
                     )
-            generate_button = gr.Button(value="🤖Mistral-NeMo-Minitron")
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="🤖Mistral-NeMo-Minitron")
     generate_button.click(
         user,
@@ -138,4 +143,4 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     demo.queue()
-    demo.launch()

 from globe import title, description, customtool, presentation1, presentation2, joinus
 import spaces
+model_path = "nvidia/OpenReasoning-Nemotron-14B"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto")
 if tokenizer.pad_token_id is None:
     tokenizer.pad_token_id = tokenizer.eos_token_id
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
 def create_prompt(system_message, user_message, tool_definition="", context=""):
+    # OpenReasoning-Nemotron uses a different prompt format
     if tool_definition:
+        return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
+System Context: {system_message}
+Tool Definition: {tool_definition}
+Context: {context}
+User: {user_message}
+Assistant: Let me think about this step by step."""
     else:
+        return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
+System Context: {system_message}
+User: {user_message}
+Assistant: Let me think about this step by step."""
 @spaces.GPU(duration=94)
         response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Extract the assistant response (everything after "Assistant: ")
+    assistant_response = response.split("Assistant: ")[-1].strip()
     if tool_definition and "<toolcall>" in assistant_response:
         tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
             user_input = gr.TextArea(label="🤷🏻‍♂️User Input", placeholder="Hi there my name is Tonic!", lines=2)
             advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
             with gr.Column(visible=False) as advanced_settings:
+                max_length = gr.Slider(label="📏Max Length", minimum=12, maximum=64000, value=2048, step=1)
                 temperature = gr.Slider(label="🌡️Temperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
                 top_p = gr.Slider(label="⚛️Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
                 use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
                         language="json"
                     )
+            generate_button = gr.Button(value="🤖OpenReasoning-Nemotron-14B")
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="🤖OpenReasoning-Nemotron-14B")
     generate_button.click(
         user,
 if __name__ == "__main__":
     demo.queue()
+    demo.launch(ssr_mode=False, mcp_server=True)

globe.py CHANGED Viewed

@@ -3,27 +3,36 @@ joinus = """
 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
 """
-title =  """# 🙋🏻‍♂️Welcome to Tonic's 🤖 Mistral-NeMo-Minitron Demo 🚀"""
-description = """nvidia/🤖Mistral-NeMo-Minitron-8B-Instruct is a model for generating responses for various text-generation tasks including roleplaying, retrieval augmented generation, and function calling.
 """
-presentation1 = """Try this model on [build.nvidia.com](https://build.nvidia.com/nvidia/nemotron-mini-4b-instruct).
-Mistral-NeMo-Minitron-8B-Instruct is a model for generating responses for various text-generation tasks including roleplaying, retrieval augmented generation, and function calling. It is a fine-tuned version of [nvidia/Mistral-NeMo-Minitron-8B-Base](https://huggingface.co/nvidia/Mistral-NeMo-Minitron-8B-Base), which was pruned and distilled from [Mistral-NeMo 12B](https://huggingface.co/nvidia/Mistral-NeMo-12B-Base) using [our LLM compression technique](https://arxiv.org/abs/2407.14679). The model was trained using a multi-stage SFT and preference-based alignment technique with [NeMo Aligner](https://github.com/NVIDIA/NeMo-Aligner). For details on the alignment technique, please refer to the [Nemotron-4 340B Technical Report](https://arxiv.org/abs/2406.11704).
 ### License
-[NVIDIA Community Model License](https://huggingface.co/nvidia/Nemotron-Mini-4B-Instruct/blob/main/nvidia-community-model-license-aug2024.pdf)"""
 presentation2 = """
 ###  Model Architecture
-🤖Nemotron-Mini-4B-Instruct uses a model embedding size of 3072, 32 attention heads, and an MLP intermediate dimension of 9216. It also uses Grouped-Query Attention (GQA) and Rotary Position Embeddings (RoPE).
-**Architecture Type:** Transformer Decoder (auto-regressive language model)
-**Network Architecture:** Nemotron-4 """
 customtool = """{
   "name": "custom_tool",

 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
 """
+title =  """# 🙋🏻‍♂️Welcome to Tonic's 🤖 OpenReasoning-Nemotron-14B Demo 🚀"""
+description = """nvidia/🤖OpenReasoning-Nemotron-14B is a reasoning model that is post-trained for reasoning about math, code and science solution generation. It demonstrates exceptional performance across challenging reasoning benchmarks.
 """
+presentation1 = """Try this model on [Hugging Face](https://huggingface.co/nvidia/OpenReasoning-Nemotron-14B).
+OpenReasoning-Nemotron-14B is a large language model (LLM) which is a derivative of Qwen2.5-14B-Instruct. It is a reasoning model that is post-trained for reasoning about math, code and science solution generation. This model has been evaluated with up to 64K output tokens. The OpenReasoning model is available in the following sizes: 1.5B, 7B, 14B and 32B.
+The models demonstrate exceptional performance across a suite of challenging reasoning benchmarks. The 14B model consistently sets new state-of-the-art records for its size class, achieving:
+- **AIME24**: 87.8% pass@1
+- **AIME25**: 82.0% pass@1
+- **HMMT Feb 25**: 71.2% pass@1
+- **LiveCodeBench v6**: 67.9% pass@1
+- **GPQA**: 71.6% pass@1
+- **MMLU-PRO**: 77.5% pass@1
 ### License
+Creative Commons Attribution 4.0 International License (CC-BY-4.0) with Apache 2.0 License"""
 presentation2 = """
 ###  Model Architecture
+🤖OpenReasoning-Nemotron-14B uses a dense decoder-only Transformer architecture based on Qwen2.5-14B-Instruct. It has 14B model parameters and supports up to 64,000 output tokens for extended reasoning chains.
+**Architecture Type:** Dense decoder-only Transformer model
+**Network Architecture:** Qwen2.5-14B-Instruct
+**Model Size:** 14B parameters
+**Max Output Tokens:** 64,000 """
 customtool = """{
   "name": "custom_tool",

transformers DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 63d9cb0afd2bf5d4cb5431ba1b2c4e353752a937