Spaces:
Sleeping
Sleeping
import { existsSync, mkdirSync, writeFileSync } from 'fs'; | |
import { resolve } from 'path'; | |
import { d as private_env } from './shared-server-49TKSBDM.js'; | |
import dns from 'node:dns'; | |
dns.setDefaultResultOrder("ipv4first"); | |
class LlamaCppService { | |
url = ""; | |
constructor(url) { | |
this.url = url; | |
} | |
async health() { | |
try { | |
const r = await fetch(`${this.url}/health`, { | |
method: "GET", | |
headers: { | |
"Content-Type": "application/json" | |
} | |
}); | |
const data = await r.json(); | |
if (data.status === "ok" || data.status === "error" || data.status === "loading model") { | |
return data.status; | |
} | |
} catch (error) { | |
console.log(JSON.parse(JSON.stringify(error))); | |
} | |
return "unavailable"; | |
} | |
async tokenize(prompt, abortController) { | |
const response = await fetch(`${this.url}/tokenize`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json" | |
}, | |
body: JSON.stringify({ | |
"content": prompt | |
}), | |
signal: abortController.signal | |
}); | |
if (response.ok) { | |
const data = await response.json(); | |
if (data.tokens) { | |
return data.tokens; | |
} | |
} else if (response.status === 404) { | |
console.log("Tokenization endpoint not found (404)."); | |
} else { | |
console.log(`Failed to tokenize: ${await response.text()}`); | |
} | |
return null; | |
} | |
async detokenize(tokens, abortController) { | |
const response = await fetch(`${this.url}/detokenize`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json" | |
}, | |
body: JSON.stringify({ | |
"tokens": tokens | |
}), | |
signal: abortController.signal | |
}); | |
if (response.ok) { | |
const data = await response.json(); | |
if (data.content) { | |
return data.content.trim(); | |
} | |
} else if (response.status === 404) { | |
console.log("Detokenization endpoint not found (404)."); | |
} else { | |
console.log(`Failed to detokenize: ${await response.text()}`); | |
} | |
return null; | |
} | |
createRequest(prompt) { | |
const request = { | |
"stream": true, | |
"stop": ["</s>", "bot:", "user:"], | |
"prompt": prompt | |
}; | |
request["n_predict"] = private_env.LLM_API_N_PREDICT ? Number(private_env.LLM_API_N_PREDICT) : -1; | |
request["temperature"] = private_env.LLM_API_TEMPERATURE ? Number(private_env.LLM_API_TEMPERATURE) : 0; | |
if (private_env.LLM_API_TOP_K) { | |
request["top_k"] = Number(private_env.LLM_API_TOP_K); | |
} | |
if (private_env.LLM_API_TOP_P) { | |
request["top_p"] = Number(private_env.LLM_API_TOP_P); | |
} | |
if (private_env.LLM_API_MIN_P) { | |
request["min_p"] = Number(private_env.LLM_API_MIN_P); | |
} | |
if (private_env.LLM_API_SEED) { | |
request["seed"] = Number(private_env.LLM_API_SEED); | |
} | |
if (private_env.LLM_API_N_KEEP) { | |
request["n_keep"] = Number(private_env.LLM_API_N_KEEP); | |
} | |
if (private_env.LLM_CACHE_PROMPT) { | |
request["cache_prompt"] = Boolean(private_env.LLM_CACHE_PROMPT); | |
} | |
request["repeat_penalty"] = private_env.LLM_API_REPEAT_PENALTY ? Number(private_env.LLM_API_REPEAT_PENALTY) : 1.2; | |
request["repeat_last_n"] = private_env.LLM_API_REPEAT_LAST_N ? Number(private_env.LLM_API_REPEAT_LAST_N) : 256; | |
return request; | |
} | |
predict(prompt, { abortController }) { | |
return async ({ prompt: prompt2 }) => { | |
prompt2 = `[INST] ${prompt2} [/INST] `; | |
let tokens = await this.tokenize(prompt2, abortController); | |
if (tokens !== null) { | |
tokens = tokens.slice(0, 30700); | |
const detokenizedPrompt = await this.detokenize(tokens, abortController); | |
if (detokenizedPrompt !== null) { | |
prompt2 = detokenizedPrompt; | |
} else { | |
prompt2 = prompt2.substring(0, 32768); | |
} | |
} else { | |
prompt2 = prompt2.substring(0, 32768); | |
} | |
const request = this.createRequest(prompt2); | |
let r; | |
while (true) { | |
r = await fetch(`${this.url}/completion`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
"Accept": "text/event-stream" | |
}, | |
body: JSON.stringify(request), | |
signal: abortController.signal | |
}); | |
if (r.status === 404) { | |
if (!private_env.LLM_API_404_RETRY_INTERVAL) { | |
break; | |
} | |
console.log(`Received 404, retrying after ${private_env.LLM_API_404_RETRY_INTERVAL} seconds...`); | |
await new Promise((resolve2) => setTimeout(resolve2, Number(private_env.LLM_API_404_RETRY_INTERVAL) * 1e3)); | |
} else { | |
break; | |
} | |
} | |
if (!r.ok) { | |
throw new Error(`Failed to generate text: ${await r.text()}`); | |
} | |
const encoder = new TextDecoderStream(); | |
const reader = await r.body?.pipeThrough(encoder).getReader(); | |
let t = this; | |
return async function* () { | |
let stop = false; | |
let generatedText = ""; | |
let tokenId = 0; | |
while (!stop) { | |
const out = await reader?.read() ?? { done: false, value: void 0 }; | |
if (out.done) { | |
stop = true; | |
reader?.cancel(); | |
t.createLogFile(JSON.stringify({ | |
"request": prompt2, | |
"response": generatedText | |
}), "llm-service"); | |
return; | |
} | |
if (!out.value) { | |
stop = true; | |
reader?.cancel(); | |
return; | |
} | |
let tokenValue = ""; | |
if (out.value.startsWith("data: ")) { | |
try { | |
const data = JSON.parse(out.value.slice(6)); | |
tokenValue = data.content; | |
} catch (e) { | |
} | |
} else if (private_env.LLM_API_VERSION == "v1") { | |
tokenValue = out.value; | |
} | |
generatedText += tokenValue.replace("</s>", "") ?? ""; | |
yield { | |
token: { | |
id: tokenId++, | |
text: tokenValue.replace("</s>", "") ?? "", | |
logprob: 0, | |
special: false | |
}, | |
generated_text: null, | |
details: null | |
}; | |
} | |
}(); | |
}; | |
} | |
conversation(history, { abortController }) { | |
return async ({ history: history2 }) => { | |
let prompt = this.formatPrompt(history2); | |
const request = this.createRequest(prompt); | |
let tokens = await this.tokenize(prompt, abortController); | |
if (tokens !== null) { | |
tokens = tokens.slice(-30700); | |
const detokenizedPrompt = await this.detokenize(tokens, abortController); | |
if (detokenizedPrompt !== null) { | |
prompt = detokenizedPrompt; | |
} else { | |
prompt = prompt.substring(Math.max(0, prompt.length - 30700)); | |
} | |
} else { | |
prompt = prompt.substring(Math.max(0, prompt.length - 30700)); | |
} | |
const r = await fetch(`${this.url}/completion`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
"Accept": "text/event-stream" | |
}, | |
body: JSON.stringify(request), | |
signal: abortController.signal | |
}); | |
if (!r.ok) { | |
throw new Error(`Failed to generate text: ${await r.text()}`); | |
} | |
const encoder = new TextDecoderStream(); | |
const reader = r.body?.pipeThrough(encoder).getReader(); | |
return async function* () { | |
let stop = false; | |
let generatedText = ""; | |
let tokenId = 0; | |
while (!stop) { | |
const out = await reader?.read() ?? { done: false, value: void 0 }; | |
if (out.done) { | |
reader?.cancel(); | |
return; | |
} | |
if (!out.value) { | |
reader?.cancel(); | |
return; | |
} | |
let tokenValue = ""; | |
if (out.value.startsWith("data: ")) { | |
try { | |
let data2 = JSON.parse(out.value.slice(6)); | |
tokenValue = data2.content; | |
} catch (e) { | |
} | |
} else if (private_env.LLM_API_VERSION == "v1") { | |
tokenValue = out.value; | |
} | |
generatedText += tokenValue.replace("</s>", "") ?? ""; | |
yield { | |
token: { | |
id: tokenId++, | |
text: tokenValue.replace("</s>", "") ?? "", | |
logprob: 0, | |
special: false | |
}, | |
generated_text: null, | |
details: null | |
}; | |
} | |
}(); | |
}; | |
} | |
formatPrompt(history) { | |
let prompt = ""; | |
for (const [userPrompt, botResponse] of history) { | |
prompt += `[INST] ${userPrompt} [/INST]`; | |
if (botResponse) { | |
prompt += `${botResponse}`; | |
} | |
} | |
return prompt; | |
} | |
createLogFile(text, namePrefix = "") { | |
try { | |
const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); | |
if (!existsSync(logsDirectory)) { | |
mkdirSync(logsDirectory, { | |
recursive: true | |
}); | |
} | |
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, ""); | |
const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); | |
writeFileSync(logFilePath, text); | |
console.log(`Log file created: ${logFilePath}`); | |
} catch (e) { | |
console.log(`Failed to create log file in llama service`); | |
console.log(e); | |
} | |
} | |
} | |
export { LlamaCppService as L }; | |
//# sourceMappingURL=LlamaCppService-XtF0SQo9.js.map | |