Spaces:
Running
Running
import { getPresetManager } from './preset-manager.js'; | |
import { extractMessageFromData, getGenerateUrl, getRequestHeaders } from '../script.js'; | |
import { getTextGenServer } from './textgen-settings.js'; | |
import { extractReasoningFromData } from './reasoning.js'; | |
import { formatInstructModeChat, formatInstructModePrompt, getInstructStoppingSequences, names_behavior_types } from './instruct-mode.js'; | |
import { getStreamingReply, tryParseStreamingError } from './openai.js'; | |
import EventSourceStream from './sse-stream.js'; | |
// #region Type Definitions | |
/** | |
* @typedef {Object} TextCompletionRequestBase | |
* @property {boolean?} [stream=false] - Whether to stream the response | |
* @property {number} max_tokens - Maximum number of tokens to generate | |
* @property {string} [model] - Optional model name | |
* @property {string} api_type - Type of API to use | |
* @property {string} [api_server] - Optional API server URL | |
* @property {number} [temperature] - Optional temperature parameter | |
* @property {number} [min_p] - Optional min_p parameter | |
*/ | |
/** | |
* @typedef {Object} TextCompletionPayloadBase | |
* @property {boolean?} [stream=false] - Whether to stream the response | |
* @property {string} prompt - The text prompt for completion | |
* @property {number} max_tokens - Maximum number of tokens to generate | |
* @property {number} max_new_tokens - Alias for max_tokens | |
* @property {string} [model] - Optional model name | |
* @property {string} api_type - Type of API to use | |
* @property {string} api_server - API server URL | |
* @property {number} [temperature] - Optional temperature parameter | |
*/ | |
/** @typedef {Record<string, any> & TextCompletionPayloadBase} TextCompletionPayload */ | |
/** | |
* @typedef {Object} ChatCompletionMessage | |
* @property {string} role - The role of the message author (e.g., "user", "assistant", "system") | |
* @property {string} content - The content of the message | |
*/ | |
/** | |
* @typedef {Object} ChatCompletionPayloadBase | |
* @property {boolean?} [stream=false] - Whether to stream the response | |
* @property {ChatCompletionMessage[]} messages - Array of chat messages | |
* @property {string} [model] - Optional model name to use for completion | |
* @property {string} chat_completion_source - Source provider | |
* @property {number} max_tokens - Maximum number of tokens to generate | |
* @property {number} [temperature] - Optional temperature parameter for response randomness | |
* @property {string} [custom_url] - Optional custom URL | |
* @property {string} [reverse_proxy] - Optional reverse proxy URL | |
* @property {string} [proxy_password] - Optional proxy password | |
*/ | |
/** @typedef {Record<string, any> & ChatCompletionPayloadBase} ChatCompletionPayload */ | |
/** | |
* @typedef {Object} ExtractedData | |
* @property {string} content - Extracted content. | |
* @property {string} reasoning - Extracted reasoning. | |
*/ | |
/** | |
* @typedef {Object} StreamResponse | |
* @property {string} text - Generated text. | |
* @property {string[]} swipes - Generated swipes | |
* @property {Object} state - Generated state | |
* @property {string?} [state.reasoning] - Generated reasoning | |
* @property {string?} [state.image] - Generated image | |
*/ | |
// #endregion | |
/** | |
* Creates & sends a text completion request. | |
*/ | |
export class TextCompletionService { | |
static TYPE = 'textgenerationwebui'; | |
/** | |
* @param {Record<string, any> & TextCompletionRequestBase & {prompt: string}} custom | |
* @returns {TextCompletionPayload} | |
*/ | |
static createRequestData({ stream = false, prompt, max_tokens, model, api_type, api_server, temperature, min_p, ...props }) { | |
const payload = { | |
stream, | |
prompt, | |
max_tokens, | |
max_new_tokens: max_tokens, | |
model, | |
api_type, | |
api_server: api_server ?? getTextGenServer(api_type), | |
temperature, | |
min_p, | |
...props, | |
}; | |
// Remove undefined values to avoid API errors | |
Object.keys(payload).forEach(key => { | |
if (payload[key] === undefined) { | |
delete payload[key]; | |
} | |
}); | |
return payload; | |
} | |
/** | |
* Sends a text completion request to the specified server | |
* @param {TextCompletionPayload} data Request data | |
* @param {boolean?} extractData Extract message from the response. Default true | |
* @param {AbortSignal?} signal | |
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator | |
* @throws {Error} | |
*/ | |
static async sendRequest(data, extractData = true, signal = null) { | |
if (!data.stream) { | |
const response = await fetch(getGenerateUrl(this.TYPE), { | |
method: 'POST', | |
headers: getRequestHeaders(), | |
cache: 'no-cache', | |
body: JSON.stringify(data), | |
signal: signal ?? new AbortController().signal, | |
}); | |
const json = await response.json(); | |
if (!response.ok || json.error) { | |
throw json; | |
} | |
if (!extractData) { | |
return json; | |
} | |
return { | |
content: extractMessageFromData(json, this.TYPE), | |
reasoning: extractReasoningFromData(json, { | |
mainApi: this.TYPE, | |
textGenType: data.api_type, | |
ignoreShowThoughts: true, | |
}), | |
}; | |
} | |
const response = await fetch('/api/backends/text-completions/generate', { | |
method: 'POST', | |
headers: getRequestHeaders(), | |
cache: 'no-cache', | |
body: JSON.stringify(data), | |
signal: signal ?? new AbortController().signal, | |
}); | |
if (!response.ok) { | |
const text = await response.text(); | |
tryParseStreamingError(response, text, { quiet: true }); | |
throw new Error(`Got response status ${response.status}`); | |
} | |
const eventStream = new EventSourceStream(); | |
response.body.pipeThrough(eventStream); | |
const reader = eventStream.readable.getReader(); | |
return async function* streamData() { | |
let text = ''; | |
const swipes = []; | |
const state = { reasoning: '' }; | |
while (true) { | |
const { done, value } = await reader.read(); | |
if (done) return; | |
if (value.data === '[DONE]') return; | |
tryParseStreamingError(response, value.data, { quiet: true }); | |
let data = JSON.parse(value.data); | |
if (data?.choices?.[0]?.index > 0) { | |
const swipeIndex = data.choices[0].index - 1; | |
swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text; | |
} else { | |
const newText = data?.choices?.[0]?.text || data?.content || ''; | |
text += newText; | |
state.reasoning += data?.choices?.[0]?.reasoning ?? ''; | |
} | |
yield { text, swipes, state }; | |
} | |
}; | |
} | |
/** | |
* Process and send a text completion request with optional preset & instruct | |
* @param {Record<string, any> & TextCompletionRequestBase & {prompt: (ChatCompletionMessage & {ignoreInstruct?: boolean})[] |string}} custom | |
* @param {Object} options - Configuration options | |
* @param {string?} [options.presetName] - Name of the preset to use for generation settings | |
* @param {string?} [options.instructName] - Name of instruct preset for message formatting | |
* @param {Partial<InstructSettings>?} [options.instructSettings] - Override instruct settings | |
* @param {boolean} extractData - Whether to extract structured data from response | |
* @param {AbortSignal?} [signal] | |
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator | |
* @throws {Error} | |
*/ | |
static async processRequest( | |
custom, | |
options = {}, | |
extractData = true, | |
signal = null, | |
) { | |
const { presetName, instructName } = options; | |
let requestData = { ...custom }; | |
const prompt = custom.prompt; | |
// Apply generation preset if specified | |
if (presetName) { | |
const presetManager = getPresetManager(this.TYPE); | |
if (presetManager) { | |
const preset = presetManager.getCompletionPresetByName(presetName); | |
if (preset) { | |
// Convert preset to payload and merge with custom parameters | |
const presetPayload = this.presetToGeneratePayload(preset, {}); | |
requestData = { ...presetPayload, ...requestData }; | |
} else { | |
console.warn(`Preset "${presetName}" not found, continuing with default settings`); | |
} | |
} else { | |
console.warn('Preset manager not found, continuing with default settings'); | |
} | |
} | |
/** @type {InstructSettings | undefined} */ | |
let instructPreset; | |
// Handle instruct formatting if requested | |
if (Array.isArray(prompt) && instructName) { | |
const instructPresetManager = getPresetManager('instruct'); | |
instructPreset = instructPresetManager?.getCompletionPresetByName(instructName); | |
if (instructPreset) { | |
// Clone the preset to avoid modifying the original | |
instructPreset = structuredClone(instructPreset); | |
instructPreset.names_behavior = names_behavior_types.NONE; | |
if (options.instructSettings) { | |
Object.assign(instructPreset, options.instructSettings); | |
} | |
// Format messages using instruct formatting | |
const formattedMessages = []; | |
const prefillActive = prompt.length > 0 ? prompt[prompt.length - 1].role === 'assistant' : false; | |
for (const message of prompt) { | |
let messageContent = message.content; | |
if (!message.ignoreInstruct) { | |
const isLastMessage = message === prompt[prompt.length - 1]; | |
// This complicated logic means: | |
// 1. If prefill is not active, format all messages | |
// 2. If prefill is active, format all messages except the last one | |
if (!isLastMessage || !prefillActive) { | |
messageContent = formatInstructModeChat( | |
message.role, | |
message.content, | |
message.role === 'user', | |
false, | |
undefined, | |
undefined, | |
undefined, | |
undefined, | |
instructPreset, | |
); | |
} | |
// Add prompt formatting for the last message. | |
if (isLastMessage) { | |
if (!prefillActive) { // e.g. "<|im_start|>user:" | |
messageContent += formatInstructModePrompt( | |
undefined, | |
false, | |
undefined, | |
undefined, | |
undefined, | |
false, | |
false, | |
instructPreset, | |
); | |
} else { // e.g. "<|im_start|>assistant: Hello, my name is" | |
const overriddenInstructPreset = structuredClone(instructPreset); | |
overriddenInstructPreset.output_suffix = ''; | |
overriddenInstructPreset.wrap = false; | |
messageContent = formatInstructModeChat( | |
message.role, | |
message.content, | |
false, // since it is assistant | |
false, | |
undefined, | |
undefined, | |
undefined, | |
undefined, | |
overriddenInstructPreset, | |
); | |
} | |
} | |
} | |
formattedMessages.push(messageContent); | |
} | |
requestData.prompt = formattedMessages.join(''); | |
const stoppingStrings = getInstructStoppingSequences({ customInstruct: instructPreset, useStopStrings: false }); | |
requestData.stop = stoppingStrings; | |
requestData.stopping_strings = stoppingStrings; | |
} else { | |
console.warn(`Instruct preset "${instructName}" not found, using basic formatting`); | |
requestData.prompt = prompt.map(x => x.content).join('\n\n'); | |
} | |
} else if (typeof prompt === 'string') { | |
requestData.prompt = prompt; | |
} else { | |
requestData.prompt = prompt.map(x => x.content).join('\n\n'); | |
} | |
// @ts-ignore | |
const data = this.createRequestData(requestData); | |
const response = await this.sendRequest(data, extractData, signal); | |
// Remove stopping strings from the end | |
if (!data.stream && extractData) { | |
/** @type {ExtractedData} */ | |
// @ts-ignore | |
const extractedData = response; | |
let message = extractedData.content; | |
message = message.replace(/[^\S\r\n]+$/gm, ''); | |
if (requestData.stopping_strings) { | |
for (const stoppingString of requestData.stopping_strings) { | |
if (stoppingString.length) { | |
for (let j = stoppingString.length; j > 0; j--) { | |
if (message.slice(-j) === stoppingString.slice(0, j)) { | |
message = message.slice(0, -j); | |
break; | |
} | |
} | |
} | |
} | |
} | |
if (instructPreset) { | |
[ | |
instructPreset.stop_sequence, | |
instructPreset.input_sequence, | |
].forEach(sequence => { | |
if (sequence?.trim()) { | |
const index = message.indexOf(sequence); | |
if (index !== -1) { | |
message = message.substring(0, index); | |
} | |
} | |
}); | |
[ | |
instructPreset.output_sequence, | |
instructPreset.last_output_sequence, | |
].forEach(sequences => { | |
if (sequences) { | |
sequences.split('\n') | |
.filter(line => line.trim() !== '') | |
.forEach(line => { | |
message = message.replaceAll(line, ''); | |
}); | |
} | |
}); | |
} | |
extractedData.content = message; | |
} | |
return response; | |
} | |
/** | |
* Converts a preset to a valid text completion payload. | |
* Only supports temperature. | |
* @param {Object} preset - The preset configuration | |
* @param {Object} customPreset - Additional parameters to override preset values | |
* @returns {Object} - Formatted payload for text completion API | |
*/ | |
static presetToGeneratePayload(preset, customPreset = {}) { | |
if (!preset || typeof preset !== 'object') { | |
throw new Error('Invalid preset: must be an object'); | |
} | |
// Merge preset with custom parameters | |
const settings = { ...preset, ...customPreset }; | |
// Initialize base payload with common parameters | |
let payload = { | |
'temperature': settings.temp ? Number(settings.temp) : undefined, | |
'min_p': settings.min_p ? Number(settings.min_p) : undefined, | |
}; | |
// Remove undefined values to avoid API errors | |
Object.keys(payload).forEach(key => { | |
if (payload[key] === undefined) { | |
delete payload[key]; | |
} | |
}); | |
return payload; | |
} | |
} | |
/** | |
* Creates & sends a chat completion request. | |
*/ | |
export class ChatCompletionService { | |
static TYPE = 'openai'; | |
/** | |
* @param {ChatCompletionPayload} custom | |
* @returns {ChatCompletionPayload} | |
*/ | |
static createRequestData({ stream = false, messages, model, chat_completion_source, max_tokens, temperature, custom_url, reverse_proxy, proxy_password, ...props }) { | |
const payload = { | |
stream, | |
messages, | |
model, | |
chat_completion_source, | |
max_tokens, | |
temperature, | |
custom_url, | |
reverse_proxy, | |
proxy_password, | |
use_makersuite_sysprompt: true, | |
claude_use_sysprompt: true, | |
...props, | |
}; | |
// Remove undefined values to avoid API errors | |
Object.keys(payload).forEach(key => { | |
if (payload[key] === undefined) { | |
delete payload[key]; | |
} | |
}); | |
return payload; | |
} | |
/** | |
* Sends a chat completion request | |
* @param {ChatCompletionPayload} data Request data | |
* @param {boolean?} extractData Extract message from the response. Default true | |
* @param {AbortSignal?} signal Abort signal | |
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator | |
* @throws {Error} | |
*/ | |
static async sendRequest(data, extractData = true, signal = null) { | |
const response = await fetch('/api/backends/chat-completions/generate', { | |
method: 'POST', | |
headers: getRequestHeaders(), | |
cache: 'no-cache', | |
body: JSON.stringify(data), | |
signal: signal ?? new AbortController().signal, | |
}); | |
if (!data.stream) { | |
const json = await response.json(); | |
if (!response.ok || json.error) { | |
throw json; | |
} | |
if (!extractData) { | |
return json; | |
} | |
return { | |
content: extractMessageFromData(json, this.TYPE), | |
reasoning: extractReasoningFromData(json, { | |
mainApi: this.TYPE, | |
textGenType: data.chat_completion_source, | |
ignoreShowThoughts: true, | |
}), | |
}; | |
} | |
if (!response.ok) { | |
const text = await response.text(); | |
tryParseStreamingError(response, text, { quiet: true }); | |
throw new Error(`Got response status ${response.status}`); | |
} | |
const eventStream = new EventSourceStream(); | |
response.body.pipeThrough(eventStream); | |
const reader = eventStream.readable.getReader(); | |
return async function* streamData() { | |
let text = ''; | |
const swipes = []; | |
const state = { reasoning: '', image: '' }; | |
while (true) { | |
const { done, value } = await reader.read(); | |
if (done) return; | |
const rawData = value.data; | |
if (rawData === '[DONE]') return; | |
tryParseStreamingError(response, rawData, { quiet: true }); | |
const parsed = JSON.parse(rawData); | |
const reply = getStreamingReply(parsed, state, { | |
chatCompletionSource: data.chat_completion_source, | |
overrideShowThoughts: true, | |
}); | |
if (Array.isArray(parsed?.choices) && parsed?.choices?.[0]?.index > 0) { | |
const swipeIndex = parsed.choices[0].index - 1; | |
swipes[swipeIndex] = (swipes[swipeIndex] || '') + reply; | |
} else { | |
text += reply; | |
} | |
yield { text, swipes: swipes, state }; | |
} | |
}; | |
} | |
/** | |
* Process and send a chat completion request with optional preset | |
* @param {ChatCompletionPayload} custom | |
* @param {Object} options - Configuration options | |
* @param {string?} [options.presetName] - Name of the preset to use for generation settings | |
* @param {boolean} [extractData=true] - Whether to extract structured data from response | |
* @param {AbortSignal?} [signal] - Abort signal | |
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator | |
* @throws {Error} | |
*/ | |
static async processRequest(custom, options, extractData = true, signal = null) { | |
const { presetName } = options; | |
let requestData = { ...custom }; | |
// Apply generation preset if specified | |
if (presetName) { | |
const presetManager = getPresetManager(this.TYPE); | |
if (presetManager) { | |
const preset = presetManager.getCompletionPresetByName(presetName); | |
if (preset) { | |
// Convert preset to payload and merge with custom parameters | |
const presetPayload = this.presetToGeneratePayload(preset, {}); | |
requestData = { ...presetPayload, ...requestData }; | |
} else { | |
console.warn(`Preset "${presetName}" not found, continuing with default settings`); | |
} | |
} else { | |
console.warn('Preset manager not found, continuing with default settings'); | |
} | |
} | |
const data = this.createRequestData(requestData); | |
return await this.sendRequest(data, extractData, signal); | |
} | |
/** | |
* Converts a preset to a valid chat completion payload | |
* Only supports temperature. | |
* @param {Object} preset - The preset configuration | |
* @param {Object} customParams - Additional parameters to override preset values | |
* @returns {Object} - Formatted payload for chat completion API | |
*/ | |
static presetToGeneratePayload(preset, customParams = {}) { | |
if (!preset || typeof preset !== 'object') { | |
throw new Error('Invalid preset: must be an object'); | |
} | |
// Merge preset with custom parameters | |
const settings = { ...preset, ...customParams }; | |
// Initialize base payload with common parameters | |
const payload = { | |
temperature: settings.temperature ? Number(settings.temperature) : undefined, | |
}; | |
// Remove undefined values to avoid API errors | |
Object.keys(payload).forEach(key => { | |
if (payload[key] === undefined) { | |
delete payload[key]; | |
} | |
}); | |
return payload; | |
} | |
} | |