Spaces:

ServiceNow-AI
/

Apriel-Chat

Running

App Files Files Community

add-model-selector

by bradnow - opened May 8

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+147

-1121

This PR is in draft mode

Files changed (11) hide show

.gitignore +1 -4
README.md +4 -4
__pycache__/utils.cpython-310.pyc +0 -0
app.py +141 -356
gradio_runner.py +0 -10
log_chat.py +0 -237
requirements.txt +1 -3
styles.css +0 -118
theme.py +0 -148
timer.py +0 -114
utils.py +0 -127

.gitignore CHANGED Viewed

@@ -1,4 +1 @@
-.idea/*
-__pycache__/
-/.run*/
-/train.csv


1	+ .idea/*

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Apriel Chat
 emoji: 💬
-colorFrom: green
-colorTo: blue
 sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
@@ -11,4 +11,4 @@ license: mit
 short_description: ServiceNow-AI model chat
 ---
-A chatbot for ServiceNow-AI model chat. This is a demo of the Apriel Nemotron Chat model. The chatbot can answer questions, provide information, etc.

 ---
+title: Apriel Nemotron Chat
 emoji: 💬
+colorFrom: yellow
+colorTo: purple
 sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
 short_description: ServiceNow-AI model chat
 ---
+An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

__pycache__/utils.cpython-310.pyc DELETED Viewed

Binary file (2.59 kB)

app.py CHANGED Viewed

@@ -1,381 +1,166 @@
 import datetime
-from uuid import uuid4
 from openai import OpenAI
 import gradio as gr
-from theme import apriel
-from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_config, \
-    logged_event_handler, DEBUG_MODEL, log_debug, log_info, log_error
-from log_chat import log_chat
-MODEL_TEMPERATURE = 0.8
-BUTTON_WIDTH = 160
-DEFAULT_OPT_OUT_VALUE = False
-DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker" if not DEBUG_MODEL else "Apriel-5b"
-BUTTON_ENABLED = gr.update(interactive=True)
-BUTTON_DISABLED = gr.update(interactive=False)
-INPUT_ENABLED = gr.update(interactive=True)
-INPUT_DISABLED = gr.update(interactive=False)
-DROPDOWN_ENABLED = gr.update(interactive=True)
-DROPDOWN_DISABLED = gr.update(interactive=False)
-SEND_BUTTON_ENABLED = gr.update(interactive=True, visible=True)
-SEND_BUTTON_DISABLED = gr.update(interactive=True, visible=False)
-STOP_BUTTON_ENABLED = gr.update(interactive=True, visible=True)
-STOP_BUTTON_DISABLED = gr.update(interactive=True, visible=False)
 chat_start_count = 0
-model_config = {}
-openai_client = None
-def app_loaded(state, request: gr.Request):
-    message_html = setup_model(DEFAULT_MODEL_NAME, intial=False)
-    state['session'] = request.session_hash if request else uuid4().hex
-    log_debug(f"app_loaded() --> Session: {state['session']}")
-    return state, message_html
-def update_model_and_clear_chat(model_name):
-    actual_model_name = model_name.replace("Model: ", "")
-    desc = setup_model(actual_model_name)
-    return desc, []
-def setup_model(model_name, intial=False):
-    global model_config, openai_client
-    model_config = get_model_config(model_name)
-    log_debug(f"update_model() --> Model config: {model_config}")
-    openai_client = OpenAI(
-        api_key=model_config.get('AUTH_TOKEN'),
-        base_url=model_config.get('VLLM_API_URL')
-    )
-    _model_hf_name = model_config.get("MODEL_HF_URL").split('https://huggingface.co/')[1]
-    _link = f"<a href='{model_config.get('MODEL_HF_URL')}{COMMUNITY_POSTFIX_URL}' target='_blank'>{_model_hf_name}</a>"
-    _description = f"We'd love to hear your thoughts on the model. Click here to provide feedback - {_link}"
-    log_debug(f"Switched to model {_model_hf_name}")
-    if intial:
-        return
-    else:
-        return _description
-def chat_started():
-    # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
-    return (DROPDOWN_DISABLED, gr.update(value="", interactive=False),
-            SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED)
-def chat_finished():
-    # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
-    return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED
-def stop_chat(state):
-    state["stop_flag"] = True
-    gr.Info("Chat stopped")
-    return state
-def toggle_opt_out(state, checkbox):
-    state["opt_out"] = checkbox
-    return state
-def run_chat_inference(history, message, state):
     global chat_start_count
-    state["is_streaming"] = True
-    state["stop_flag"] = False
-    error = None
-    model_name = model_config.get('MODEL_NAME')
-    if len(history) == 0:
-        state["chat_id"] = uuid4().hex
-    if openai_client is None:
-        log_info("Client UI is stale, letting user know to refresh the page")
-        gr.Warning("Client UI is stale, please refresh the page")
-        return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
-    # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, session_state
-    log_debug(f"{'-' * 80}")
-    log_debug(f"chat_fn() --> Message: {message}")
-    log_debug(f"chat_fn() --> History: {history}")
-    try:
-        # Check if the message is empty
-        if not message.strip():
-            gr.Info("Please enter a message before sending")
-            yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
-            return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
-        chat_start_count = chat_start_count + 1
-        user_messages_count = sum(1 for item in history if isinstance(item, dict) and item.get("role") == "user")
-        log_info(f"chat_start_count: {chat_start_count}, turns: {user_messages_count}, model: {model_name}")
-        is_reasoning = model_config.get("REASONING")
-        # Remove any assistant messages with metadata from history for multiple turns
-        log_debug(f"Initial History: {history}")
-        check_format(history, "messages")
-        history.append({"role": "user", "content": message})
-        log_debug(f"History with user message: {history}")
-        check_format(history, "messages")
-        # Create the streaming response
-        try:
-            history_no_thoughts = [item for item in history if
-                                   not (isinstance(item, dict) and
-                                        item.get("role") == "assistant" and
-                                        isinstance(item.get("metadata"), dict) and
-                                        item.get("metadata", {}).get("title") is not None)]
-            log_debug(f"Updated History: {history_no_thoughts}")
-            check_format(history_no_thoughts, "messages")
-            log_debug(f"history_no_thoughts with user message: {history_no_thoughts}")
-            stream = openai_client.chat.completions.create(
-                model=model_name,
-                messages=history_no_thoughts,
-                temperature=MODEL_TEMPERATURE,
-                stream=True
-            )
-        except Exception as e:
-            log_error(f"Error: {e}")
-            error = str(e)
-            yield ([{"role": "assistant",
-                     "content": "😔 The model is unavailable at the moment. Please try again later."}],
-                   INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state)
-            if state["opt_out"] is not True:
-                log_chat(chat_id=state["chat_id"],
-                         session_id=state["session"],
-                         model_name=model_name,
-                         prompt=message,
-                         history=history,
-                         info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE,
-                               "stopped": True, "error": str(e)},
-                         )
-            else:
-                log_info(f"User opted out of chat history. Not logging chat. model: {model_name}")
-            return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
-        if is_reasoning:
-            history.append(gr.ChatMessage(
                 role="assistant",
-                content="Thinking...",
-                metadata={"title": "🧠 Thought"}
-            ))
-            log_debug(f"History added thinking: {history}")
-            check_format(history, "messages")
-        else:
             history.append(gr.ChatMessage(
                 role="assistant",
-                content="",
             ))
-            log_debug(f"History added empty assistant: {history}")
-            check_format(history, "messages")
-        output = ""
-        completion_started = False
-        for chunk in stream:
-            if state["stop_flag"]:
-                log_debug(f"chat_fn() --> Stopping streaming...")
-                break  # Exit the loop if the stop flag is set
-            # Extract the new content from the delta field
-            content = getattr(chunk.choices[0].delta, "content", "")
-            output += content
-            if is_reasoning:
-                parts = output.split("[BEGIN FINAL RESPONSE]")
-                if len(parts) > 1:
-                    if parts[1].endswith("[END FINAL RESPONSE]"):
-                        parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
-                    if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>"):
-                        parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>", "")
-                    if parts[1].endswith("<|end|>"):
-                        parts[1] = parts[1].replace("<|end|>", "")
-                history[-1 if not completion_started else -2] = gr.ChatMessage(
-                    role="assistant",
-                    content=parts[0],
-                    metadata={"title": "🧠 Thought"}
-                )
-                if completion_started:
-                    history[-1] = gr.ChatMessage(
-                        role="assistant",
-                        content=parts[1]
-                    )
-                elif len(parts) > 1 and not completion_started:
-                    completion_started = True
-                    history.append(gr.ChatMessage(
-                        role="assistant",
-                        content=parts[1]
-                    ))
-            else:
-                if output.endswith("<|end|>"):
-                    output = output.replace("<|end|>", "")
-                history[-1] = gr.ChatMessage(
-                    role="assistant",
-                    content=output
-                )
-            # log_message(f"Yielding messages: {history}")
-            yield history, INPUT_DISABLED, SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED, state
-        log_debug(f"Final History: {history}")
-        check_format(history, "messages")
-        yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
-    finally:
-        if error is None:
-            log_debug(f"chat_fn() --> Finished streaming. {chat_start_count} chats started.")
-            if state["opt_out"] is not True:
-                log_chat(chat_id=state["chat_id"],
-                         session_id=state["session"],
-                         model_name=model_name,
-                         prompt=message,
-                         history=history,
-                         info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE,
-                               "stopped": state["stop_flag"]},
-                         )
-            else:
-                log_info(f"User opted out of chat history. Not logging chat. model: {model_name}")
-        state["is_streaming"] = False
-        state["stop_flag"] = False
-        return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
-log_info(f"Gradio version: {gr.__version__}")
-title = None
-description = None
-theme = apriel
-with open('styles.css', 'r') as f:
-    custom_css = f.read()
-with gr.Blocks(theme=theme, css=custom_css) as demo:
-    session_state = gr.State(value={
-        "is_streaming": False,
-        "stop_flag": False,
-        "chat_id": None,
-        "session": None,
-        "opt_out": DEFAULT_OPT_OUT_VALUE,
-    })  # Store session state as a dictionary
-    gr.HTML(f"""
-        <style>
-            @media (min-width: 1024px) {{
-                .send-button-container, .clear-button-container {{
-                    max-width: {BUTTON_WIDTH}px;
-                }}
-            }}
-        </style>
-        """, elem_classes="css-styles")
-    with gr.Row(variant="panel", elem_classes="responsive-row"):
-        with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
-            model_dropdown = gr.Dropdown(
-                choices=[f"Model: {model}" for model in models_config.keys()],
-                value=f"Model: {DEFAULT_MODEL_NAME}",
-                label=None,
-                interactive=True,
-                container=False,
-                scale=0,
-                min_width=400
-            )
-        with gr.Column(scale=4, min_width=0):
-            feedback_message_html = gr.HTML(description, elem_classes="model-message")
-    chatbot = gr.Chatbot(
-        type="messages",
-        height="calc(100dvh - 310px)",
-        elem_classes="chatbot",
-    )
-    with gr.Row():
-        with gr.Column(scale=10, min_width=400):
-            with gr.Row():
-                user_input = gr.Textbox(
-                    show_label=False,
-                    placeholder="Type your message here and press Enter",
-                    container=False
-                )
-        with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
-            with gr.Row():
-                with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
-                    send_btn = gr.Button("Send", variant="primary")
-                    stop_btn = gr.Button("Stop", variant="cancel", visible=False)
-                with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
-                    clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary")
-    with gr.Row():
-        with gr.Column(min_width=400, elem_classes="opt-out-container"):
-            with gr.Row():
-                gr.HTML(
-                    "We may use your chats to improve our AI. You may opt out if you don’t want your conversations saved.",
-                    elem_classes="opt-out-message")
-            with gr.Row():
-                opt_out_checkbox = gr.Checkbox(
-                    label="Don’t save my chat history for improvements or training",
-                    value=DEFAULT_OPT_OUT_VALUE,
-                    elem_classes="opt-out-checkbox",
-                    interactive=True,
-                    container=False
-                )
-    gr.on(
-        triggers=[send_btn.click, user_input.submit],
-        fn=run_chat_inference,  # this generator streams results. do not use logged_event_handler wrapper
-        inputs=[chatbot, user_input, session_state],
-        outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state],
-        concurrency_limit=4,
-        api_name=False
-    ).then(
-        fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn], queue=False)
-    # In parallel, disable or update the UI controls
-    gr.on(
-        triggers=[send_btn.click, user_input.submit],
-        fn=chat_started,
-        inputs=None,
-        outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn],
-        queue=False,
-        show_progress='hidden',
-        api_name=False
-    )
-    stop_btn.click(
-        fn=stop_chat,
-        inputs=[session_state],
-        outputs=[session_state],
-        api_name=False
-    )
-    opt_out_checkbox.change(fn=toggle_opt_out, inputs=[session_state, opt_out_checkbox], outputs=[session_state])
-    # Ensure the model is reset to default on page reload
-    demo.load(
-        fn=logged_event_handler(
-            log_msg="Browser session started",
-            event_handler=app_loaded
-        ),
-        inputs=[session_state],
-        outputs=[session_state, feedback_message_html],
-        queue=True,
-        api_name=False
-    )
-    model_dropdown.change(
-        fn=update_model_and_clear_chat,
-        inputs=[model_dropdown],
-        outputs=[feedback_message_html, chatbot],
-        api_name=False
-    )
-demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False)
-log_info("Gradio app launched")

+import os
+import sys
 import datetime
 from openai import OpenAI
 import gradio as gr
+from gradio.components.chatbot import ChatMessage, Message
+from typing import (
+    Any,
+    Literal,
+)
+DEBUG_LOG = False or os.environ.get("DEBUG_LOG") == "True"
+print(f"Gradio version: {gr.__version__}")
+title = None  # "ServiceNow-AI Chat" # modelConfig.get('MODE_DISPLAY_NAME')
+description = "Please use the community section on this space to provide feedback! <a href=\"https://huggingface.co/ServiceNow-AI/Apriel-Nemotron-15b-Thinker/discussions\">ServiceNow-AI/Apriel-Nemotron-Chat</a>"
 chat_start_count = 0
+model_config = {
+    "MODEL_NAME": os.environ.get("MODEL_NAME"),
+    "MODE_DISPLAY_NAME": os.environ.get("MODE_DISPLAY_NAME"),
+    "MODEL_HF_URL": os.environ.get("MODEL_HF_URL"),
+    "VLLM_API_URL": os.environ.get("VLLM_API_URL"),
+    "AUTH_TOKEN": os.environ.get("AUTH_TOKEN")
+}
+# Initialize the OpenAI client with the vLLM API URL and token
+client = OpenAI(
+    api_key=model_config.get('AUTH_TOKEN'),
+    base_url=model_config.get('VLLM_API_URL')
+)
+def log_message(message):
+    if DEBUG_LOG is True:
+        print(message)
+# Gradio 5.0.1 had issues with checking the message formats.  5.29.0 does not!
+def _check_format(messages: Any, type: Literal["messages", "tuples"] = "messages") -> None:
+    if type == "messages":
+        all_valid = all(
+            isinstance(message, dict)
+            and "role" in message
+            and "content" in message
+            or isinstance(message, ChatMessage | Message)
+            for message in messages
+        )
+        if not all_valid:
+            # Display which message is not valid
+            for i, message in enumerate(messages):
+                if not (isinstance(message, dict) and
+                        "role" in message and
+                        "content" in message) and not isinstance(message, ChatMessage | Message):
+                    print(f"_check_format() --> Invalid message at index {i}: {message}\n", file=sys.stderr)
+                    break
+            raise Exception(
+                "Data incompatible with messages format. Each message should be a dictionary with 'role' and 'content' keys or a ChatMessage object."
+            )
+        # else:
+        #     print("_check_format() --> All messages are valid.")
+    elif not all(
+            isinstance(message, (tuple, list)) and len(message) == 2
+            for message in messages
+    ):
+        raise Exception(
+            "Data incompatible with tuples format. Each message should be a list of length 2."
+        )
+def chat_fn(message, history):
+    log_message(f"{'-' * 80}\nchat_fn() --> Message: {message}")
     global chat_start_count
+    chat_start_count = chat_start_count + 1
+    print(
+        f"{datetime.datetime.now()}: chat_start_count: {chat_start_count}, turns: {int(len(history if history else []) / 3)}")
+    # Remove any assistant messages with metadata from history for multiple turns
+    log_message(f"Original History: {history}")
+    _check_format(history, "messages")
+    history = [item for item in history if
+               not (isinstance(item, dict) and
+                    item.get("role") == "assistant" and
+                    isinstance(item.get("metadata"), dict) and
+                    item.get("metadata", {}).get("title") is not None)]
+    log_message(f"Updated History: {history}")
+    _check_format(history, "messages")
+    history.append({"role": "user", "content": message})
+    log_message(f"History with user message: {history}")
+    _check_format(history, "messages")
+    # Create the streaming response
+    stream = client.chat.completions.create(
+        model=model_config.get('MODEL_NAME'),
+        messages=history,
+        temperature=0.8,
+        stream=True
+    )
+    history.append(gr.ChatMessage(
+        role="assistant",
+        content="Thinking...",
+        metadata={"title": "🧠 Thought"}
+    ))
+    log_message(f"History added thinking: {history}")
+    _check_format(history, "messages")
+    output = ""
+    completion_started = False
+    for chunk in stream:
+        # Extract the new content from the delta field
+        content = getattr(chunk.choices[0].delta, "content", "")
+        output += content
+        parts = output.split("[BEGIN FINAL RESPONSE]")
+        if len(parts) > 1:
+            if parts[1].endswith("[END FINAL RESPONSE]"):
+                parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
+            if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>"):
+                parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>", "")
+        history[-1 if not completion_started else -2] = gr.ChatMessage(
+            role="assistant",
+            content=parts[0],
+            metadata={"title": "🧠 Thought"}
+        )
+        if completion_started:
+            history[-1] = gr.ChatMessage(
                 role="assistant",
+                content=parts[1]
+            )
+        elif len(parts) > 1 and not completion_started:
+            completion_started = True
             history.append(gr.ChatMessage(
                 role="assistant",
+                content=parts[1]
             ))
+        # only yield the most recent assistant messages
+        messages_to_yield = history[-1:] if not completion_started else history[-2:]
+        # _check_format(messages_to_yield, "messages")
+        yield messages_to_yield
+    log_message(f"Final History: {history}")
+    _check_format(history, "messages")
+# Add the model display name and Hugging Face URL to the description
+# description = f"### Model: [{MODE_DISPLAY_NAME}]({MODEL_HF_URL})"
+print(f"Running model {model_config.get('MODE_DISPLAY_NAME')} ({model_config.get('MODEL_NAME')})")
+gr.ChatInterface(
+    chat_fn,
+    title=title,
+    description=description,
+    theme=gr.themes.Default(primary_hue="green"),
+    type="messages",
+).launch()

gradio_runner.py DELETED Viewed

@@ -1,10 +0,0 @@
-import re
-import sys
-from gradio.cli import cli
-# This runs a gradio app so that it can be automatically reloaded in the browser
-# Example: python gradio_runner.py app.py
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(cli())

log_chat.py DELETED Viewed

@@ -1,237 +0,0 @@
-import csv
-import os
-import time
-from datetime import datetime
-from queue import Queue
-import threading
-import pandas as pd
-from gradio import ChatMessage
-from huggingface_hub import HfApi, hf_hub_download
-from timer import Timer
-from utils import log_warning, log_info, log_debug, log_error
-HF_TOKEN = os.environ.get("HF_TOKEN")
-DATASET_REPO_ID = os.environ.get("APRIEL_PROMPT_DATASET")
-CSV_FILENAME = "train.csv"
-def log_chat(chat_id: str, session_id: str, model_name: str, prompt: str, history: list[str], info: dict) -> None:
-    log_info(f"log_chat() called for chat: {chat_id}, queue size: {log_chat_queue.qsize()}, model: {model_name}")
-    log_chat_queue.put((chat_id, session_id, model_name, prompt, history, info))
-def _log_chat_worker():
-    while True:
-        chat_id, session_id, model_name, prompt, history, info = log_chat_queue.get()
-        try:
-            try:
-                _log_chat(chat_id, session_id, model_name, prompt, history, info)
-            except Exception as e:
-                log_error(f"Error logging chat: {e}")
-        finally:
-            log_chat_queue.task_done()
-def _log_chat(chat_id: str, session_id: str, model_name: str, prompt: str, history: list[str], info: dict) -> bool:
-    log_info(f"_log_chat() storing chat {chat_id}")
-    if DATASET_REPO_ID is None:
-        log_warning("No dataset repo ID provided. Skipping logging of prompt.")
-        return False
-    if HF_TOKEN is None:
-        log_warning("No HF token provided. Skipping logging of prompt.")
-        return False
-    log_timer = Timer('log_chat')
-    log_timer.start()
-    # Initialize HF API
-    api = HfApi(token=HF_TOKEN)
-    # Check if the dataset repo exists, if not, create it
-    try:
-        repo_info = api.repo_info(repo_id=DATASET_REPO_ID, repo_type="dataset")
-        log_debug(f"log_chat() --> Dataset repo found: {repo_info.id} private={repo_info.private}")
-    except Exception:  # Create new dataset if none exists
-        log_debug(f"log_chat() --> No dataset repo found, creating a new one...")
-        api.create_repo(repo_id=DATASET_REPO_ID, repo_type="dataset", private=True)
-    # Ensure messages are in the correct format
-    messages = [
-        {"role": item.role, "content": item.content,
-         "type": "thought" if item.metadata else "completion"} if isinstance(
-            item, ChatMessage) else item
-        for item in history
-        if isinstance(item, dict) and "role" in item and "content" in item or isinstance(item, ChatMessage)
-    ]
-    if len(messages) != len(history):
-        log_warning("log_chat() --> Some messages in history are missing 'role' or 'content' keys.")
-    user_messages_count = sum(1 for item in messages if isinstance(item, dict) and item.get("role") == "user")
-    # These must match the keys in the new row
-    expected_headers = ["timestamp", "chat_id", "turns", "prompt", "messages", "model", "session_id", "info"]
-    # Prepare new data row
-    new_row = {
-        "timestamp": datetime.now().isoformat(),
-        "chat_id": chat_id,
-        "turns": user_messages_count,
-        "prompt": prompt,
-        "messages": messages,
-        "model": model_name,
-        "session_id": session_id,
-        "info": info,
-    }
-    log_timer.add_step("Prepared new data row")
-    # Try to download existing CSV with retry logic
-    max_retries = 3
-    retry_count = 0
-    file_exists = False
-    while retry_count < max_retries:
-        try:
-            csv_path = hf_hub_download(
-                repo_id=DATASET_REPO_ID,
-                filename=CSV_FILENAME,
-                repo_type="dataset",
-                token=HF_TOKEN  # Only needed if not already logged in
-            )
-            pd.read_csv(csv_path)
-            file_exists = True
-            log_debug(f"log_chat() --> Downloaded existing CSV with {len(pd.read_csv(csv_path))} rows")
-            break  # Success, exit the loop
-        except Exception as e:
-            retry_count += 1
-            if retry_count < max_retries:
-                retry_delay = 2 * retry_count  # Exponential backoff: 2s, 4s, 6s, 8s
-                log_warning(
-                    f"log_chat() --> Download attempt {retry_count} failed: {e}. Retrying in {retry_delay} seconds...")
-                time.sleep(retry_delay)
-            else:
-                log_warning(f"log_chat() --> Failed to download CSV after {max_retries} attempts: {e}")
-        file_exists = False
-    log_timer.add_step(f"Downloaded existing CSV (attempts: {retry_count + 1})")
-    # Handle the case where the CSV file does not exist or is invalid
-    if file_exists and len(pd.read_csv(csv_path)) == 0:
-        log_warning(f"log_chat() --> CSV {csv_path} exists but is empty, will create a new one.")
-        dump_hub_csv()
-        file_exists = False
-    elif file_exists:
-        # Check that the headers match our standard headers of "timestamp", "chat_id", "turns", ...
-        existing_headers = pd.read_csv(csv_path).columns.tolist()
-        if set(existing_headers) != set(expected_headers):
-            log_warning(f"log_chat() --> CSV {csv_path} has unexpected headers: {existing_headers}. "
-                        f"\nExpected {existing_headers} "
-                        f"Will create a new one.")
-            dump_hub_csv()
-            file_exists = False
-        else:
-            log_debug(f"log_chat() --> CSV {csv_path} has expected headers: {existing_headers}")
-    # Write out the new row to the CSV file (append isn't working in HF container, so recreate each time)
-    log_debug(f"log_chat() --> Writing CSV file, file_exists={file_exists}")
-    try:
-        with open(CSV_FILENAME, "w", newline="\n") as f:
-            writer = csv.DictWriter(f, fieldnames=new_row.keys())
-            writer.writeheader()  # Always write the header
-            if file_exists:
-                for _, row in pd.read_csv(csv_path).iterrows():
-                    writer.writerow(row.to_dict())  # Write existing rows
-            writer.writerow(new_row)  # Write the new row
-        log_debug("log_chat() --> Wrote out CSV with new row")
-        # dump_local_csv()
-    except Exception as e:
-        log_error(f"log_chat() --> Error writing to CSV: {e}")
-        return False
-    # Upload updated CSV
-    api.upload_file(
-        path_or_fileobj=CSV_FILENAME,
-        path_in_repo=CSV_FILENAME,
-        repo_id=DATASET_REPO_ID,
-        repo_type="dataset",
-        commit_message=f"Added new chat entry at {datetime.now().isoformat()}"
-    )
-    log_timer.add_step("Uploaded updated CSV")
-    log_timer.end()
-    log_debug("log_chat() --> Finished logging chat")
-    log_debug(log_timer.formatted_result())
-    return True
-def dump_hub_csv():
-    # Verify the file contents by loading it from the hub and printing it out
-    try:
-        csv_path = hf_hub_download(
-            repo_id=DATASET_REPO_ID,
-            filename=CSV_FILENAME,
-            repo_type="dataset",
-            token=HF_TOKEN  # Only needed if not already logged in
-        )
-        df = pd.read_csv(csv_path)
-        log_info(df)
-        if (df.empty):
-            # show raw contents of downloaded csv file
-            log_info("Raw file contents:")
-            with open(csv_path, 'r') as f:
-                print(f.read())
-    except Exception as e:
-        log_error(f"Error loading CSV from hub: {e}")
-def dump_local_csv():
-    # Verify the file contents by loading it from the local file and printing it out
-    try:
-        df = pd.read_csv(CSV_FILENAME)
-        log_info(df)
-    except Exception as e:
-        log_error(f"Error loading CSV from local file: {e}")
-def test_log_chat():
-    # Example usage
-    chat_id = "12345"
-    session_id = "67890"
-    model_name = "Apriel-Model"
-    prompt = "Hello"
-    history = [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Hi there!"}]
-    prompt = "100 + 1"
-    history = [{'role': 'user', 'content': prompt}, ChatMessage(
-        content='Okay, that\'s a simple addition problem. , answer is 2.\n', role='assistant',
-        metadata={'title': '🧠 Thought'}, options=[]),
-               ChatMessage(content='\nThe result of adding 1 and 1 is:\n\n**2**\n', role='assistant', metadata={},
-                           options=[])
-               ]
-    info = {"additional_info": "Some extra data"}
-    log_debug("Starting test_log_chat()")
-    dump_hub_csv()
-    log_chat(chat_id, session_id, model_name, prompt, history, info)
-    log_debug("log_chat 1 returned")
-    log_chat(chat_id, session_id, model_name, prompt + " + 2", history, info)
-    log_debug("log_chat 2 returned")
-    log_chat(chat_id, session_id, model_name, prompt + " + 3", history, info)
-    log_debug("log_chat 3 returned")
-    log_chat(chat_id, session_id, model_name, prompt + " + 4", history, info)
-    log_debug("log_chat 4 returned")
-    sleep_seconds = 10
-    log_debug(f"Sleeping {sleep_seconds} seconds to let it finish and log the result.")
-    time.sleep(sleep_seconds)
-    log_debug("Finished sleeping.")
-    dump_hub_csv()
-# Create a queue for logging chat messages
-log_chat_queue = Queue()
-# Start the worker thread
-threading.Thread(target=_log_chat_worker, daemon=True).start()
-if __name__ == "__main__":
-    test_log_chat()

requirements.txt CHANGED Viewed

@@ -1,5 +1,3 @@
 huggingface_hub==0.28.1
 gradio==5.29.0
-openai~=1.78.0
-pandas~=2.2.3
-datasets~=2.14.4

 huggingface_hub==0.28.1
 gradio==5.29.0
+openai

styles.css DELETED Viewed

@@ -1,118 +0,0 @@
-:root {
-    --color-grey-50: #f9fafb;
-}
-.toast-body {
-    background-color: var(--color-grey-50);
-}
-.html-container:has(.css-styles) {
-    padding: 0;
-    margin: 0;
-}
-.css-styles {
-    height: 0;
-}
-.model-message {
-    text-align: end;
-}
-.model-dropdown-container {
-    display: flex;
-    align-items: center;
-    gap: 10px;
-    padding: 0;
-}
-.chatbot {
-    max-height: 1400px;
-}
-button.cancel {
-    border: var(--button-border-width) solid var(--button-cancel-border-color);
-    background: var(--button-cancel-background-fill);
-    color: var(--button-cancel-text-color);
-    box-shadow: var(--button-cancel-shadow);
-}
-button.cancel:hover, .cancel[disabled] {
-    background: var(--button-cancel-background-fill-hover);
-    color: var(--button-cancel-text-color-hover);
-}
-.opt-out-message {
-    top: 8px;
-}
-.opt-out-message .html-container, .opt-out-checkbox label {
-    font-size: 14px !important;
-    padding: 0 !important;
-    margin: 0 !important;
-    color: var(--neutral-400) !important;
-}
-@media (max-width: 800px) {
-    .responsive-row {
-        flex-direction: column;
-    }
-    .model-message {
-        text-align: start;
-        font-size: 10px !important;
-    }
-    .model-dropdown-container {
-        flex-direction: column;
-        align-items: flex-start;
-    }
-    .chatbot {
-        max-height: 800px;
-    }
-}
-@media (max-width: 400px) {
-    .responsive-row {
-        flex-direction: column;
-    }
-    .model-message {
-        text-align: start;
-        font-size: 10px !important;
-    }
-    .model-dropdown-container {
-        flex-direction: column;
-        align-items: flex-start;
-    }
-    .chatbot {
-        max-height: 360px;
-    }
-}
-@media (max-width: 1280px) {
-    .chatbot {
-        max-height: 900px;
-    }
-}
-@media (max-height: 932px) {
-    .chatbot {
-        max-height: calc(100dvh - 400px);
-    }
-}
-@media (max-height: 1280px) {
-    .chatbot {
-        max-height: calc(100dvh - 400px);
-    }
-}
-@media (min-height: 1281px) {
-    .chatbot {
-        /*max-height: calc(100dvh - 400px);*/
-    }
-}

theme.py DELETED Viewed

@@ -1,148 +0,0 @@
-from typing import Iterable
-from gradio.themes import Soft
-from gradio.themes.utils import colors, fonts, sizes
-colors.teal_gray = colors.Color(
-    name="teal_gray",
-    c50="#e8f1f4",
-    c100="#cddde3",
-    c200="#a8c3cf",
-    c300="#7da6b8",
-    c400="#588aa2",
-    c500="#3d6e87",
-    c600="#335b70",
-    c700="#2b495a",
-    c800="#2c5364",
-    c900="#233f4b",
-    c950="#1b323c",
-)
-colors.red_gray = colors.Color(
-    name="red_gray",
-    c50="#f7eded",
-    c100="#f5dcdc",
-    c200="#efb4b4",
-    c300="#e78f8f",
-    c400="#d96a6a",
-    c500="#c65353",
-    c600="#b24444",
-    c700="#8f3434",
-    c800="#732d2d",
-    c900="#5f2626",
-    c950="#4d2020",
-)
-class Apriel(Soft):
-    def __init__(
-            self,
-            *,
-            primary_hue: colors.Color | str = colors.gray,
-            secondary_hue: colors.Color | str = colors.teal_gray,
-            neutral_hue: colors.Color | str = colors.slate,
-            # spacing_size: sizes.Size | str = sizes.spacing_md,
-            # radius_size: sizes.Size | str = sizes.radius_md,
-            text_size: sizes.Size | str = sizes.text_md,
-            font: fonts.Font
-                  | str
-                  | Iterable[fonts.Font | str] = (
-                    fonts.GoogleFont("Inconsolata"),
-                    "Arial",
-                    "sans-serif",
-            ),
-            font_mono: fonts.Font
-                       | str
-                       | Iterable[fonts.Font | str] = (
-                    fonts.GoogleFont("IBM Plex Mono"),
-                    "ui-monospace",
-                    "monospace",
-            ),
-    ):
-        super().__init__(
-            primary_hue=primary_hue,
-            secondary_hue=secondary_hue,
-            neutral_hue=neutral_hue,
-            # spacing_size=spacing_size,
-            # radius_size=radius_size,
-            text_size=text_size,
-            font=font,
-            font_mono=font_mono,
-        )
-        super().set(
-            background_fill_primary="*primary_50",
-            background_fill_primary_dark="*primary_900",
-            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
-            body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
-            button_primary_text_color="white",
-            button_primary_text_color_hover="black",
-            button_primary_background_fill="linear-gradient(90deg, *secondary_400, *secondary_400)",
-            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_300, *secondary_300)",
-            button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
-            button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
-            button_secondary_text_color="black",
-            button_secondary_text_color_hover="white",
-            button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
-            button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
-            button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
-            button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
-            button_cancel_background_fill=f"linear-gradient(90deg, {colors.red_gray.c400}, {colors.red_gray.c500})",
-            button_cancel_background_fill_dark=f"linear-gradient(90deg, {colors.red_gray.c700}, {colors.red_gray.c800})",
-            button_cancel_background_fill_hover=f"linear-gradient(90deg, {colors.red_gray.c500}, {colors.red_gray.c600})",
-            button_cancel_background_fill_hover_dark=f"linear-gradient(90deg, {colors.red_gray.c800}, {colors.red_gray.c900})",
-            # button_cancel_background_fill=f"linear-gradient(90deg, {colors.red.c400}, {colors.red.c500})",
-            # button_cancel_background_fill_dark=f"linear-gradient(90deg, {colors.red.c700}, {colors.red.c800})",
-            # button_cancel_background_fill_hover=f"linear-gradient(90deg, {colors.red.c500}, {colors.red.c600})",
-            # button_cancel_background_fill_hover_dark=f"linear-gradient(90deg, {colors.red.c800}, {colors.red.c900})",
-            button_cancel_text_color="white",
-            button_cancel_text_color_dark="white",
-            button_cancel_text_color_hover="white",
-            button_cancel_text_color_hover_dark="white",
-            # button_cancel_background_fill=colors.red.c500,
-            # button_cancel_background_fill_dark=colors.red.c700,
-            # button_cancel_background_fill_hover=colors.red.c600,
-            # button_cancel_background_fill_hover_dark=colors.red.c800,
-            # button_cancel_text_color="white",
-            # button_cancel_text_color_dark="white",
-            # button_cancel_text_color_hover="white",
-            # button_cancel_text_color_hover_dark="white",
-            slider_color="*secondary_300",
-            slider_color_dark="*secondary_600",
-            block_title_text_weight="600",
-            block_border_width="3px",
-            block_shadow="*shadow_drop_lg",
-            button_primary_shadow="*shadow_drop_lg",
-            button_large_padding="11px",
-            color_accent_soft="*primary_100",
-            block_label_background_fill="*primary_200",
-        )
-apriel = Apriel()
-# with gr.Blocks(theme=apriel) as demo:
-#     textbox = gr.Textbox(label="Name")
-#     slider = gr.Slider(label="Count", minimum=0, maximum=100, step=1)
-#     with gr.Row():
-#         button = gr.Button("Submit", variant="primary")
-#         clear = gr.Button("Clear")
-#     output = gr.Textbox(label="Output")
-#
-#
-#     def repeat(name, count):
-#         time.sleep(3)
-#         return name * count
-#
-#
-#     button.click(repeat, [textbox, slider], output)
-#
-# if __name__ == "__main__":
-#     demo.launch()

timer.py DELETED Viewed

@@ -1,114 +0,0 @@
-import time
-import json
-class Timer:
-    def __init__(self, name=None):
-        self.name = name
-        self.start_time = None
-        self.steps = []
-        self.total_time = None
-    def clear(self):
-        self.start_time = None
-        self.steps = []
-        self.total_time = None
-    def start(self):
-        """Start the timer."""
-        self.start_time = time.time()
-    def is_running(self):
-        return self.start_time is not None
-    def add_step(self, step_name):
-        """Add a step with its duration since the last step or start."""
-        if self.start_time is None:
-            self.start()
-        current_time = time.time()
-        if not self.steps:
-            elapsed = current_time - self.start_time
-        else:
-            elapsed = current_time - self.steps[-1]['timestamp']
-        self.steps.append({
-            "step_name": step_name,
-            "duration": round(elapsed, 4),
-            "total_duration": round(current_time - self.start_time, 4),
-            "timestamp": current_time
-        })
-    def end(self):
-        """End the timer and calculate the total duration."""
-        if self.start_time is None:
-            raise RuntimeError("Timer has not been started.")
-        if not self.steps:
-            raise RuntimeError("No steps have been added.")
-        self.total_time = time.time() - self.start_time
-    def to_json(self):
-        """Return a JSON of the timing steps."""
-        if self.total_time is None:
-            raise RuntimeError("Timer has not been ended.")
-        output_steps = {}
-        for step in self.steps:
-            output_steps[step["step_name"]] = step["duration"]
-        highlights = {"total_time": round(self.total_time, 4)}
-        if self.name:
-            highlights = {"name": self.name, **highlights}
-        output = {
-            **highlights,
-            **output_steps
-        }
-        return output
-    def to_json_str(self):
-        """Return a human-readable JSON of the timing steps."""
-        return json.dumps(self.to_json(), indent=4)
-    def formatted_result(self):
-        """Return a list of the steps, their duration, and total duration."""
-        if self.total_time is None:
-            raise RuntimeError("Timer has not been ended.")
-        line_buffer = []
-        if self.name:
-            line_buffer.append(f"Timer: {self.name}")
-        for step in self.steps:
-            line_buffer.append(f"[{step['duration']:05.2f}s, {step['total_duration']:05.2f}s] {step['step_name']}")
-        # for step in self.steps:
-        #     line_buffer.append(f"{step['step_name']}: {step['duration']:.2f}s ({step['total_duration']:.2f}s)")
-        line_buffer.append(f"Total time: {self.total_time:.2f}s")
-        return "\n".join(line_buffer)
-    def log_formatted_result(self):
-        print(self.formatted_result())
-def example():
-    # Example usage
-    timer = Timer()
-    timer.start()
-    # Simulating some steps
-    time.sleep(1)  # Simulate work for step 1
-    timer.add_step("Step 1")
-    time.sleep(2)  # Simulate work for step 2
-    timer.add_step("Step 2")
-    timer.end()
-    # Print the timer output
-    print(timer.formatted_result())
-    print(timer.to_json_str())
-if __name__ == "__main__":
-    example()

utils.py DELETED Viewed

@@ -1,127 +0,0 @@
-import os
-import sys
-import time
-from functools import wraps
-from typing import Any, Literal
-from gradio import ChatMessage
-from gradio.components.chatbot import Message
-COMMUNITY_POSTFIX_URL = "/discussions"
-DEBUG_MODE = False or os.environ.get("DEBUG_MODE") == "True"
-DEBUG_MODEL = False or os.environ.get("DEBUG_MODEL") == "True"
-models_config = {
-    "Apriel-Nemotron-15b-Thinker": {
-        "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",
-        "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-Nemotron-15b-Thinker",
-        "MODEL_NAME": os.environ.get("MODEL_NAME_NEMO_15B"),
-        "VLLM_API_URL": os.environ.get("VLLM_API_URL_NEMO_15B"),
-        "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
-        "REASONING": True
-    },
-    "Apriel-5b": {
-        "MODEL_DISPLAY_NAME": "Apriel-5b",
-        "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-5B-Instruct",
-        "MODEL_NAME": os.environ.get("MODEL_NAME_5B"),
-        "VLLM_API_URL": os.environ.get("VLLM_API_URL_5B"),
-        "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
-        "REASONING": False
-    }
-}
-def get_model_config(model_name: str) -> dict:
-    config = models_config.get(model_name)
-    if not config:
-        raise ValueError(f"Model {model_name} not found in models_config")
-    if not config.get("MODEL_NAME"):
-        raise ValueError(f"Model name not found in config for {model_name}")
-    if not config.get("VLLM_API_URL"):
-        raise ValueError(f"VLLM API URL not found in config for {model_name}")
-    return config
-def _log_message(prefix, message, icon=""):
-    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-    if len(icon) > 0:
-        icon = f"{icon} "
-    print(f"{timestamp}: {prefix} {icon}{message}")
-def log_debug(message):
-    if DEBUG_MODE is True:
-        _log_message("DEBUG", message)
-def log_info(message):
-    _log_message("INFO ", message)
-def log_warning(message):
-    _log_message("WARN ", message, "⚠️")
-def log_error(message):
-    _log_message("ERROR", message, "‼️")
-# Gradio 5.0.1 had issues with checking the message formats.  5.29.0 does not!
-def check_format(messages: Any, type: Literal["messages", "tuples"] = "messages") -> None:
-    if not DEBUG_MODE:
-        return
-    if type == "messages":
-        all_valid = all(
-            isinstance(message, dict)
-            and "role" in message
-            and "content" in message
-            or isinstance(message, ChatMessage | Message)
-            for message in messages
-        )
-        if not all_valid:
-            # Display which message is not valid
-            for i, message in enumerate(messages):
-                if not (isinstance(message, dict) and
-                        "role" in message and
-                        "content" in message) and not isinstance(message, ChatMessage | Message):
-                    print(f"_check_format() --> Invalid message at index {i}: {message}\n", file=sys.stderr)
-                    break
-            raise Exception(
-                "Data incompatible with messages format. Each message should be a dictionary with 'role' and 'content' keys or a ChatMessage object."
-            )
-        # else:
-        #     print("_check_format() --> All messages are valid.")
-    elif not all(
-            isinstance(message, (tuple, list)) and len(message) == 2
-            for message in messages
-    ):
-        raise Exception(
-            "Data incompatible with tuples format. Each message should be a list of length 2."
-        )
-# Adds timing info for a gradio event handler (non-generator functions)
-def logged_event_handler(log_msg='', event_handler=None, log_timer=None, clear_timer=False):
-    @wraps(event_handler)
-    def wrapped_event_handler(*args, **kwargs):
-        # Log before
-        if log_timer:
-            if clear_timer:
-                log_timer.clear()
-            log_timer.add_step(f"Start: {log_debug}")
-        log_debug(f"::: Before event: {log_msg}")
-        # Call the original event handler
-        result = event_handler(*args, **kwargs)
-        # Log after
-        if log_timer:
-            log_timer.add_step(f"Completed: {log_msg}")
-        log_debug(f"::: After event: {log_msg}")
-        return result
-    return wrapped_event_handler