Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- README.md +2 -2
- package.json +0 -2
- pnpm-lock.yaml +0 -26
- src/routes/landingPageHtml.ts +3 -3
- src/routes/responses.ts +69 -67
README.md
CHANGED
@@ -112,7 +112,7 @@ Experience the API through our interactive web interface, adapted from the [open
|
|
112 |
```bash
|
113 |
# Create demo/.env
|
114 |
cat > demo/.env << EOF
|
115 |
-
MODEL="
|
116 |
OPENAI_BASE_URL=http://localhost:3000/v1
|
117 |
OPENAI_API_KEY=${HF_TOKEN:-<your-huggingface-token>}
|
118 |
EOF
|
@@ -181,7 +181,7 @@ responses.js/
|
|
181 |
- [x] Multi-turn conversation fixes for text messages + tool calls
|
182 |
- [x] Correctly return "usage" field
|
183 |
- [x] MCP support (non-streaming)
|
184 |
-
- [
|
185 |
- [ ] Tools execution (web search, file search, image generation, code interpreter)
|
186 |
- [ ] Background mode support
|
187 |
- [ ] Additional API routes (GET, DELETE, CANCEL, LIST responses)
|
|
|
112 |
```bash
|
113 |
# Create demo/.env
|
114 |
cat > demo/.env << EOF
|
115 |
+
MODEL="CohereLabs/c4ai-command-a-03-2025"
|
116 |
OPENAI_BASE_URL=http://localhost:3000/v1
|
117 |
OPENAI_API_KEY=${HF_TOKEN:-<your-huggingface-token>}
|
118 |
EOF
|
|
|
181 |
- [x] Multi-turn conversation fixes for text messages + tool calls
|
182 |
- [x] Correctly return "usage" field
|
183 |
- [x] MCP support (non-streaming)
|
184 |
+
- [x] MCP support (streaming)
|
185 |
- [ ] Tools execution (web search, file search, image generation, code interpreter)
|
186 |
- [ ] Background mode support
|
187 |
- [ ] Additional API routes (GET, DELETE, CANCEL, LIST responses)
|
package.json
CHANGED
@@ -58,8 +58,6 @@
|
|
58 |
"author": "Hugging Face",
|
59 |
"license": "MIT",
|
60 |
"dependencies": {
|
61 |
-
"@huggingface/inference": "^4.3.1",
|
62 |
-
"@huggingface/tasks": "^0.19.22",
|
63 |
"@modelcontextprotocol/sdk": "^1.15.0",
|
64 |
"express": "^4.21.2",
|
65 |
"openai": "^5.8.2",
|
|
|
58 |
"author": "Hugging Face",
|
59 |
"license": "MIT",
|
60 |
"dependencies": {
|
|
|
|
|
61 |
"@modelcontextprotocol/sdk": "^1.15.0",
|
62 |
"express": "^4.21.2",
|
63 |
"openai": "^5.8.2",
|
pnpm-lock.yaml
CHANGED
@@ -8,12 +8,6 @@ importers:
|
|
8 |
|
9 |
.:
|
10 |
dependencies:
|
11 |
-
'@huggingface/inference':
|
12 |
-
specifier: ^4.3.1
|
13 |
-
version: 4.3.1
|
14 |
-
'@huggingface/tasks':
|
15 |
-
specifier: ^0.19.22
|
16 |
-
version: 0.19.22
|
17 |
'@modelcontextprotocol/sdk':
|
18 |
specifier: ^1.15.0
|
19 |
version: 1.15.0
|
@@ -258,17 +252,6 @@ packages:
|
|
258 |
resolution: {integrity: sha512-1+WqvgNMhmlAambTvT3KPtCl/Ibr68VldY2XY40SL1CE0ZXiakFR/cbTspaF5HsnpDMvcYYoJHfl4980NBjGag==}
|
259 |
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
|
260 |
|
261 |
-
'@huggingface/inference@4.3.1':
|
262 |
-
resolution: {integrity: sha512-wn5ErcX+HTeAYfNIkgjl6pkzGvTeskKRoCFodSmEfa+SmZnMo0/YDP46Ivnz2JV6DJwMd3naOtgYH6WZVD3qoQ==}
|
263 |
-
engines: {node: '>=18'}
|
264 |
-
|
265 |
-
'@huggingface/jinja@0.5.0':
|
266 |
-
resolution: {integrity: sha512-Ptc03/jGRiYRoi0bUYKZ14MkDslsBRT24oxmsvUlfYrvQMldrxCevhPnT+hfX8awKTT8/f/0ZBBWldoeAcMHdQ==}
|
267 |
-
engines: {node: '>=18'}
|
268 |
-
|
269 |
-
'@huggingface/tasks@0.19.22':
|
270 |
-
resolution: {integrity: sha512-jtRXsJZTES01X4gJ5VOUnEm3ONyyfXUcWKObbWkr/SQmjaH/kxtWqc2zVWKaxL4QLoXqXJ+T+Pi5xupMStSudQ==}
|
271 |
-
|
272 |
'@humanfs/core@0.19.1':
|
273 |
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
|
274 |
engines: {node: '>=18.18.0'}
|
@@ -1833,15 +1816,6 @@ snapshots:
|
|
1833 |
'@eslint/core': 0.15.1
|
1834 |
levn: 0.4.1
|
1835 |
|
1836 |
-
'@huggingface/inference@4.3.1':
|
1837 |
-
dependencies:
|
1838 |
-
'@huggingface/jinja': 0.5.0
|
1839 |
-
'@huggingface/tasks': 0.19.22
|
1840 |
-
|
1841 |
-
'@huggingface/jinja@0.5.0': {}
|
1842 |
-
|
1843 |
-
'@huggingface/tasks@0.19.22': {}
|
1844 |
-
|
1845 |
'@humanfs/core@0.19.1': {}
|
1846 |
|
1847 |
'@humanfs/node@0.16.6':
|
|
|
8 |
|
9 |
.:
|
10 |
dependencies:
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
'@modelcontextprotocol/sdk':
|
12 |
specifier: ^1.15.0
|
13 |
version: 1.15.0
|
|
|
252 |
resolution: {integrity: sha512-1+WqvgNMhmlAambTvT3KPtCl/Ibr68VldY2XY40SL1CE0ZXiakFR/cbTspaF5HsnpDMvcYYoJHfl4980NBjGag==}
|
253 |
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
|
254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
'@humanfs/core@0.19.1':
|
256 |
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
|
257 |
engines: {node: '>=18.18.0'}
|
|
|
1816 |
'@eslint/core': 0.15.1
|
1817 |
levn: 0.4.1
|
1818 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1819 |
'@humanfs/core@0.19.1': {}
|
1820 |
|
1821 |
'@humanfs/node@0.16.6':
|
src/routes/landingPageHtml.ts
CHANGED
@@ -621,7 +621,7 @@ tools = [
|
|
621 |
]
|
622 |
|
623 |
response = client.responses.create(
|
624 |
-
model="
|
625 |
tools=tools,
|
626 |
input="What is the weather like in Boston today?",
|
627 |
tool_choice="auto",
|
@@ -645,7 +645,7 @@ class CalendarEvent(BaseModel):
|
|
645 |
participants: list[str]
|
646 |
|
647 |
response = client.responses.parse(
|
648 |
-
model="
|
649 |
input=[
|
650 |
{"role": "system", "content": "Extract the event information."},
|
651 |
{
|
@@ -668,7 +668,7 @@ client = OpenAI(
|
|
668 |
)
|
669 |
|
670 |
response = client.responses.create(
|
671 |
-
model="
|
672 |
input="how does tiktoken work?",
|
673 |
tools=[
|
674 |
{
|
|
|
621 |
]
|
622 |
|
623 |
response = client.responses.create(
|
624 |
+
model="meta-llama/Llama-3.3-70B-Instruct:cerebras",
|
625 |
tools=tools,
|
626 |
input="What is the weather like in Boston today?",
|
627 |
tool_choice="auto",
|
|
|
645 |
participants: list[str]
|
646 |
|
647 |
response = client.responses.parse(
|
648 |
+
model="meta-llama/Meta-Llama-3-70B-Instruct:novita",
|
649 |
input=[
|
650 |
{"role": "system", "content": "Extract the event information."},
|
651 |
{
|
|
|
668 |
)
|
669 |
|
670 |
response = client.responses.create(
|
671 |
+
model="meta-llama/Llama-3.3-70B-Instruct:cerebras",
|
672 |
input="how does tiktoken work?",
|
673 |
tools=[
|
674 |
{
|
src/routes/responses.ts
CHANGED
@@ -2,13 +2,7 @@ import { type Response as ExpressResponse } from "express";
|
|
2 |
import { type ValidatedRequest } from "../middleware/validation.js";
|
3 |
import type { CreateResponseParams, McpServerParams, McpApprovalRequestParams } from "../schemas.js";
|
4 |
import { generateUniqueId } from "../lib/generateUniqueId.js";
|
5 |
-
import {
|
6 |
-
import type {
|
7 |
-
ChatCompletionInputMessage,
|
8 |
-
ChatCompletionInputMessageChunkType,
|
9 |
-
ChatCompletionInput,
|
10 |
-
} from "@huggingface/tasks";
|
11 |
-
|
12 |
import type {
|
13 |
Response,
|
14 |
ResponseStreamEvent,
|
@@ -18,9 +12,11 @@ import type {
|
|
18 |
ResponseOutputItem,
|
19 |
} from "openai/resources/responses/responses";
|
20 |
import type {
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
24 |
import { callMcpTool, connectMcpServer } from "../mcp.js";
|
25 |
|
26 |
class StreamingError extends Error {
|
@@ -163,7 +159,7 @@ async function* innerRunStream(
|
|
163 |
}
|
164 |
|
165 |
// List MCP tools from server (if required) + prepare tools for the LLM
|
166 |
-
let tools:
|
167 |
const mcpToolsMapping: Record<string, McpServerParams> = {};
|
168 |
if (req.body.tools) {
|
169 |
for (const tool of req.body.tools) {
|
@@ -213,7 +209,7 @@ async function* innerRunStream(
|
|
213 |
type: "function" as const,
|
214 |
function: {
|
215 |
name: mcpTool.name,
|
216 |
-
parameters: mcpTool.input_schema,
|
217 |
description: mcpTool.description ?? undefined,
|
218 |
},
|
219 |
});
|
@@ -232,12 +228,8 @@ async function* innerRunStream(
|
|
232 |
|
233 |
// Prepare payload for the LLM
|
234 |
|
235 |
-
// Resolve model and provider
|
236 |
-
const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
|
237 |
-
const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
|
238 |
-
|
239 |
// Format input to Chat Completion format
|
240 |
-
const messages:
|
241 |
? [{ role: "system", content: req.body.instructions }]
|
242 |
: [];
|
243 |
if (Array.isArray(req.body.input)) {
|
@@ -247,22 +239,20 @@ async function* innerRunStream(
|
|
247 |
switch (item.type) {
|
248 |
case "function_call":
|
249 |
return {
|
250 |
-
|
251 |
-
role: "assistant",
|
252 |
-
name: `function_call ${item.name} ${item.call_id}`,
|
253 |
content: item.arguments,
|
|
|
254 |
};
|
255 |
case "function_call_output":
|
256 |
return {
|
257 |
-
|
258 |
-
role: "assistant",
|
259 |
-
name: `function_call_output ${item.call_id}`,
|
260 |
content: item.output,
|
|
|
261 |
};
|
262 |
case "message":
|
263 |
-
|
264 |
-
|
265 |
-
content
|
266 |
typeof item.content === "string"
|
267 |
? item.content
|
268 |
: item.content
|
@@ -270,7 +260,7 @@ async function* innerRunStream(
|
|
270 |
switch (content.type) {
|
271 |
case "input_image":
|
272 |
return {
|
273 |
-
type: "image_url" as
|
274 |
image_url: {
|
275 |
url: content.image_url,
|
276 |
},
|
@@ -278,7 +268,7 @@ async function* innerRunStream(
|
|
278 |
case "output_text":
|
279 |
return content.text
|
280 |
? {
|
281 |
-
type: "text" as
|
282 |
text: content.text,
|
283 |
}
|
284 |
: undefined;
|
@@ -286,72 +276,80 @@ async function* innerRunStream(
|
|
286 |
return undefined;
|
287 |
case "input_text":
|
288 |
return {
|
289 |
-
type: "text" as
|
290 |
text: content.text,
|
291 |
};
|
292 |
}
|
293 |
})
|
294 |
-
.filter((item) =>
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
case "mcp_list_tools": {
|
297 |
-
// Hacky: will be dropped by filter since tools are passed as separate objects
|
298 |
return {
|
299 |
-
role: "
|
300 |
-
|
301 |
-
|
302 |
};
|
303 |
}
|
304 |
case "mcp_call": {
|
305 |
return {
|
306 |
-
role: "
|
307 |
-
name: "mcp_call",
|
308 |
content: `MCP call (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
|
|
309 |
};
|
310 |
}
|
311 |
case "mcp_approval_request": {
|
312 |
return {
|
313 |
-
role: "
|
314 |
-
name: "mcp_approval_request",
|
315 |
content: `MCP approval request (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
|
|
316 |
};
|
317 |
}
|
318 |
case "mcp_approval_response": {
|
319 |
return {
|
320 |
-
role: "
|
321 |
-
name: "mcp_approval_response",
|
322 |
content: `MCP approval response (${item.id}). Approved: ${item.approve}. Reason: ${item.reason}.`,
|
|
|
323 |
};
|
324 |
}
|
325 |
}
|
326 |
})
|
327 |
-
.filter(
|
|
|
|
|
|
|
|
|
328 |
);
|
329 |
} else {
|
330 |
-
messages.push({ role: "user", content: req.body.input });
|
331 |
}
|
332 |
|
333 |
// Prepare payload for the LLM
|
334 |
-
const payload:
|
335 |
// main params
|
336 |
-
model,
|
337 |
-
provider,
|
338 |
messages,
|
339 |
-
stream:
|
340 |
// options
|
341 |
max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
|
342 |
response_format: req.body.text?.format
|
343 |
-
?
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
}
|
355 |
: undefined,
|
356 |
temperature: req.body.temperature,
|
357 |
tool_choice:
|
@@ -475,11 +473,15 @@ async function* listMcpToolsStream(
|
|
475 |
*/
|
476 |
async function* handleOneTurnStream(
|
477 |
apiKey: string | undefined,
|
478 |
-
payload:
|
479 |
responseObject: IncompleteResponse,
|
480 |
mcpToolsMapping: Record<string, McpServerParams>
|
481 |
): AsyncGenerator<ResponseStreamEvent> {
|
482 |
-
const
|
|
|
|
|
|
|
|
|
483 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
484 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|
485 |
let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
|
@@ -565,7 +567,7 @@ async function* handleOneTurnStream(
|
|
565 |
}
|
566 |
|
567 |
let currentOutputItem = responseObject.output.at(-1);
|
568 |
-
if (delta.tool_calls[0].function
|
569 |
const functionName = delta.tool_calls[0].function.name;
|
570 |
// Tool call with a name => new tool call
|
571 |
let newOutputObject:
|
@@ -594,7 +596,7 @@ async function* handleOneTurnStream(
|
|
594 |
newOutputObject = {
|
595 |
type: "function_call",
|
596 |
id: generateUniqueId("fc"),
|
597 |
-
call_id: delta.tool_calls[0].id,
|
598 |
name: functionName,
|
599 |
arguments: "",
|
600 |
};
|
@@ -618,7 +620,7 @@ async function* handleOneTurnStream(
|
|
618 |
}
|
619 |
}
|
620 |
|
621 |
-
if (delta.tool_calls[0].function
|
622 |
// Current item is necessarily a tool call
|
623 |
currentOutputItem = responseObject.output.at(-1) as
|
624 |
| ResponseOutputItem.McpCall
|
@@ -737,7 +739,7 @@ async function* handleOneTurnStream(
|
|
737 |
arguments: lastOutputItem.arguments,
|
738 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
739 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
740 |
-
}
|
741 |
},
|
742 |
],
|
743 |
},
|
@@ -775,7 +777,7 @@ async function* callApprovedMCPToolStream(
|
|
775 |
approvalRequest: McpApprovalRequestParams | undefined,
|
776 |
mcpToolsMapping: Record<string, McpServerParams>,
|
777 |
responseObject: IncompleteResponse,
|
778 |
-
payload:
|
779 |
): AsyncGenerator<ResponseStreamEvent> {
|
780 |
if (!approvalRequest) {
|
781 |
throw new Error(`MCP approval request '${approval_request_id}' not found`);
|
@@ -842,7 +844,7 @@ async function* callApprovedMCPToolStream(
|
|
842 |
arguments: outputObject.arguments,
|
843 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
844 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
845 |
-
}
|
846 |
},
|
847 |
],
|
848 |
},
|
|
|
2 |
import { type ValidatedRequest } from "../middleware/validation.js";
|
3 |
import type { CreateResponseParams, McpServerParams, McpApprovalRequestParams } from "../schemas.js";
|
4 |
import { generateUniqueId } from "../lib/generateUniqueId.js";
|
5 |
+
import { OpenAI } from "openai";
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import type {
|
7 |
Response,
|
8 |
ResponseStreamEvent,
|
|
|
12 |
ResponseOutputItem,
|
13 |
} from "openai/resources/responses/responses";
|
14 |
import type {
|
15 |
+
ChatCompletionCreateParamsStreaming,
|
16 |
+
ChatCompletionMessageParam,
|
17 |
+
ChatCompletionTool,
|
18 |
+
} from "openai/resources/chat/completions.js";
|
19 |
+
import type { FunctionParameters } from "openai/resources/shared.js";
|
20 |
import { callMcpTool, connectMcpServer } from "../mcp.js";
|
21 |
|
22 |
class StreamingError extends Error {
|
|
|
159 |
}
|
160 |
|
161 |
// List MCP tools from server (if required) + prepare tools for the LLM
|
162 |
+
let tools: ChatCompletionTool[] | undefined = [];
|
163 |
const mcpToolsMapping: Record<string, McpServerParams> = {};
|
164 |
if (req.body.tools) {
|
165 |
for (const tool of req.body.tools) {
|
|
|
209 |
type: "function" as const,
|
210 |
function: {
|
211 |
name: mcpTool.name,
|
212 |
+
parameters: mcpTool.input_schema as FunctionParameters,
|
213 |
description: mcpTool.description ?? undefined,
|
214 |
},
|
215 |
});
|
|
|
228 |
|
229 |
// Prepare payload for the LLM
|
230 |
|
|
|
|
|
|
|
|
|
231 |
// Format input to Chat Completion format
|
232 |
+
const messages: ChatCompletionMessageParam[] = req.body.instructions
|
233 |
? [{ role: "system", content: req.body.instructions }]
|
234 |
: [];
|
235 |
if (Array.isArray(req.body.input)) {
|
|
|
239 |
switch (item.type) {
|
240 |
case "function_call":
|
241 |
return {
|
242 |
+
role: "tool" as const,
|
|
|
|
|
243 |
content: item.arguments,
|
244 |
+
tool_call_id: item.call_id,
|
245 |
};
|
246 |
case "function_call_output":
|
247 |
return {
|
248 |
+
role: "tool" as const,
|
|
|
|
|
249 |
content: item.output,
|
250 |
+
tool_call_id: item.call_id,
|
251 |
};
|
252 |
case "message":
|
253 |
+
case undefined:
|
254 |
+
if (item.role === "assistant" || item.role === "user" || item.role === "system") {
|
255 |
+
const content =
|
256 |
typeof item.content === "string"
|
257 |
? item.content
|
258 |
: item.content
|
|
|
260 |
switch (content.type) {
|
261 |
case "input_image":
|
262 |
return {
|
263 |
+
type: "image_url" as const,
|
264 |
image_url: {
|
265 |
url: content.image_url,
|
266 |
},
|
|
|
268 |
case "output_text":
|
269 |
return content.text
|
270 |
? {
|
271 |
+
type: "text" as const,
|
272 |
text: content.text,
|
273 |
}
|
274 |
: undefined;
|
|
|
276 |
return undefined;
|
277 |
case "input_text":
|
278 |
return {
|
279 |
+
type: "text" as const,
|
280 |
text: content.text,
|
281 |
};
|
282 |
}
|
283 |
})
|
284 |
+
.filter((item) => {
|
285 |
+
return item !== undefined;
|
286 |
+
});
|
287 |
+
return {
|
288 |
+
role: item.role,
|
289 |
+
content,
|
290 |
+
} as ChatCompletionMessageParam;
|
291 |
+
}
|
292 |
+
return undefined;
|
293 |
case "mcp_list_tools": {
|
|
|
294 |
return {
|
295 |
+
role: "tool" as const,
|
296 |
+
content: "MCP list tools. Server: '${item.server_label}'.",
|
297 |
+
tool_call_id: "mcp_list_tools",
|
298 |
};
|
299 |
}
|
300 |
case "mcp_call": {
|
301 |
return {
|
302 |
+
role: "tool" as const,
|
|
|
303 |
content: `MCP call (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
304 |
+
tool_call_id: "mcp_call",
|
305 |
};
|
306 |
}
|
307 |
case "mcp_approval_request": {
|
308 |
return {
|
309 |
+
role: "tool" as const,
|
|
|
310 |
content: `MCP approval request (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
311 |
+
tool_call_id: "mcp_approval_request",
|
312 |
};
|
313 |
}
|
314 |
case "mcp_approval_response": {
|
315 |
return {
|
316 |
+
role: "tool" as const,
|
|
|
317 |
content: `MCP approval response (${item.id}). Approved: ${item.approve}. Reason: ${item.reason}.`,
|
318 |
+
tool_call_id: "mcp_approval_response",
|
319 |
};
|
320 |
}
|
321 |
}
|
322 |
})
|
323 |
+
.filter(
|
324 |
+
(message): message is NonNullable<typeof message> =>
|
325 |
+
message !== undefined &&
|
326 |
+
(typeof message.content === "string" || (Array.isArray(message.content) && message.content.length !== 0))
|
327 |
+
)
|
328 |
);
|
329 |
} else {
|
330 |
+
messages.push({ role: "user", content: req.body.input } as const);
|
331 |
}
|
332 |
|
333 |
// Prepare payload for the LLM
|
334 |
+
const payload: ChatCompletionCreateParamsStreaming = {
|
335 |
// main params
|
336 |
+
model: req.body.model,
|
|
|
337 |
messages,
|
338 |
+
stream: true,
|
339 |
// options
|
340 |
max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
|
341 |
response_format: req.body.text?.format
|
342 |
+
? req.body.text.format.type === "json_schema"
|
343 |
+
? {
|
344 |
+
type: "json_schema",
|
345 |
+
json_schema: {
|
346 |
+
description: req.body.text.format.description,
|
347 |
+
name: req.body.text.format.name,
|
348 |
+
schema: req.body.text.format.schema,
|
349 |
+
strict: req.body.text.format.strict,
|
350 |
+
},
|
351 |
+
}
|
352 |
+
: { type: req.body.text.format.type }
|
|
|
353 |
: undefined,
|
354 |
temperature: req.body.temperature,
|
355 |
tool_choice:
|
|
|
473 |
*/
|
474 |
async function* handleOneTurnStream(
|
475 |
apiKey: string | undefined,
|
476 |
+
payload: ChatCompletionCreateParamsStreaming,
|
477 |
responseObject: IncompleteResponse,
|
478 |
mcpToolsMapping: Record<string, McpServerParams>
|
479 |
): AsyncGenerator<ResponseStreamEvent> {
|
480 |
+
const client = new OpenAI({
|
481 |
+
baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
|
482 |
+
apiKey: apiKey,
|
483 |
+
});
|
484 |
+
const stream = await client.chat.completions.create(payload);
|
485 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
486 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|
487 |
let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
|
|
|
567 |
}
|
568 |
|
569 |
let currentOutputItem = responseObject.output.at(-1);
|
570 |
+
if (delta.tool_calls[0].function?.name) {
|
571 |
const functionName = delta.tool_calls[0].function.name;
|
572 |
// Tool call with a name => new tool call
|
573 |
let newOutputObject:
|
|
|
596 |
newOutputObject = {
|
597 |
type: "function_call",
|
598 |
id: generateUniqueId("fc"),
|
599 |
+
call_id: delta.tool_calls[0].id ?? "",
|
600 |
name: functionName,
|
601 |
arguments: "",
|
602 |
};
|
|
|
620 |
}
|
621 |
}
|
622 |
|
623 |
+
if (delta.tool_calls[0].function?.arguments) {
|
624 |
// Current item is necessarily a tool call
|
625 |
currentOutputItem = responseObject.output.at(-1) as
|
626 |
| ResponseOutputItem.McpCall
|
|
|
739 |
arguments: lastOutputItem.arguments,
|
740 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
741 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
742 |
+
},
|
743 |
},
|
744 |
],
|
745 |
},
|
|
|
777 |
approvalRequest: McpApprovalRequestParams | undefined,
|
778 |
mcpToolsMapping: Record<string, McpServerParams>,
|
779 |
responseObject: IncompleteResponse,
|
780 |
+
payload: ChatCompletionCreateParamsStreaming
|
781 |
): AsyncGenerator<ResponseStreamEvent> {
|
782 |
if (!approvalRequest) {
|
783 |
throw new Error(`MCP approval request '${approval_request_id}' not found`);
|
|
|
844 |
arguments: outputObject.arguments,
|
845 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
846 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
847 |
+
},
|
848 |
},
|
849 |
],
|
850 |
},
|