OpenAI-compatible Responses API
responses.js is an open-source lightweight translation layer between the two main LLM APIs currently available, Responses API <> Chat Completions.
Works with any Chat Completion API, local or remotely hosted.
API Endpoint:
${baseUrl}
Get started by sending requests to this endpoint
OpenAI-compatible
Stateless implementation of the Responses API
Stateless implementation of the Responses API
Provider Agnostic
Works with any Chat Completion API (local or remote).
Works with any Chat Completion API (local or remote).
Multi-modal, streaming, structured output
Supports text and image inputs, streaming output, JSON schema, and function calling.
Supports text and image inputs, streaming output, JSON schema, and function calling.
Remote MCP
Server-side MCP tool execution.
Server-side MCP tool execution.
Examples
from openai import OpenAI
import os
client = OpenAI(
base_url="${baseUrl}",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
response = client.responses.create(
model="moonshotai/Kimi-K2-Instruct:groq",
instructions="You are a helpful assistant.",
input="Tell me a three sentence bedtime story about a unicorn.",
)
print(response)
print(response.output_text)
from openai import OpenAI
import os
client = OpenAI(
base_url="${baseUrl}",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
response = client.responses.create(
model="Qwen/Qwen2.5-VL-7B-Instruct",
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": "what is in this image?"},
{
"type": "input_image",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
],
}
],
)
print(response)
print(response.output_text)
from openai import OpenAI
import os
client = OpenAI(
base_url="${baseUrl}",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
response = client.responses.create(
model="moonshotai/Kimi-K2-Instruct:groq",
input=[
{
"role": "developer",
"content": "Talk like a pirate.",
},
{
"role": "user",
"content": "Are semicolons optional in JavaScript?",
},
],
)
print(response)
print(response.output_text)
from openai import OpenAI
import os
client = OpenAI(
base_url="${baseUrl}",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
stream = client.responses.create(
model="moonshotai/Kimi-K2-Instruct:groq",
input=[
{
"role": "user",
"content": "Say 'double bubble bath' ten times fast.",
},
],
stream=True,
)
for event in stream:
print(event)
from openai import OpenAI
import os
client = OpenAI(
base_url="${baseUrl}",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
tools = [
{
"type": "function",
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location", "unit"],
},
}
]
response = client.responses.create(
model="moonshotai/Kimi-K2-Instruct:groq",
tools=tools,
input="What is the weather like in Boston today?",
tool_choice="auto",
)
print(response)
from openai import OpenAI
from pydantic import BaseModel
import os
client = OpenAI(
base_url="${baseUrl}",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
response = client.responses.parse(
model="moonshotai/Kimi-K2-Instruct:groq",
input=[
{"role": "system", "content": "Extract the event information."},
{
"role": "user",
"content": "Alice and Bob are going to a science fair on Friday.",
},
],
text_format=CalendarEvent,
)
print(response.output_parsed)
from openai import OpenAI
import os
client = OpenAI(
base_url="${baseUrl}",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
response = client.responses.create(
model="moonshotai/Kimi-K2-Instruct:groq",
input="how does tiktoken work?",
tools=[
{
"type": "mcp",
"server_label": "gitmcp",
"server_url": "https://gitmcp.io/openai/tiktoken",
"allowed_tools": ["search_tiktoken_documentation", "fetch_tiktoken_documentation"],
"require_approval": "never",
},
],
)
for output in response.output:
print(output)