import os
import gradio as gr
from openai import OpenAI

title = None  # "ServiceNow-AI Chat"
description = None

modelConfig = {
    "MODEL_NAME": os.environ.get("MODEL_NAME"),
    "MODE_DISPLAY_NAME": os.environ.get("MODE_DISPLAY_NAME"),
    "MODEL_HF_URL": os.environ.get("MODEL_HF_URL"),
    "VLLM_API_URL": os.environ.get("VLLM_API_URL"),
    "AUTH_TOKEN": os.environ.get("AUTH_TOKEN")
}

# Initialize the OpenAI client with the vLLM API URL and token
client = OpenAI(
    api_key=modelConfig.get('AUTH_TOKEN'),
    base_url=modelConfig.get('VLLM_API_URL')
)


def chat_fn(message, history):
    # Format history as OpenAI expects
    formatted = [{"role": "user", "content": user} if i % 2 == 0 else {"role": "assistant", "content": assistant}
                 for i, (user, assistant) in enumerate(history)]
    formatted.append({"role": "user", "content": message})

    # Create the streaming response
    stream = client.chat.completions.create(
        model=modelConfig.get('MODEL_NAME'),
        messages=formatted,
        temperature=0.8,
        stream=True
    )

    output = ""
    for chunk in stream:
        # Extract the new content from the delta field
        content = getattr(chunk.choices[0].delta, "content", "")
        output += content
        # Yield the current accumulated output, removing "<|end|>" if present
        if output.endswith("<|end|>"):
            yield {"role": "assistant", "content": output[:-7]}
        else:
            yield {"role": "assistant", "content": output}


# Add the model display name and Hugging Face URL to the description
# description = f"### Model: [{MODE_DISPLAY_NAME}]({MODEL_HF_URL})"

print(f"Running model {modelConfig.get('MODE_DISPLAY_NAME')} ({modelConfig.get('MODEL_NAME')})")

gr.ChatInterface(
    chat_fn,
    title=title,
    description=description,
    theme=gr.themes.Default(primary_hue="green"),
    type="messages"
).launch()