Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import { isUrl } from "./isUrl"; | |
| /** | |
| * We want to make calls to the huggingface hub the least possible, eg if | |
| * someone is calling Inference Endpoints 1000 times per second, we don't want | |
| * to make 1000 calls to the hub to get the task name. | |
| */ | |
| const taskCache = new Map<string, { task: string; date: Date }>(); | |
| const CACHE_DURATION = 10 * 60 * 1000; | |
| const MAX_CACHE_ITEMS = 1000; | |
| export const HF_HUB_URL = "https://huggingface.co"; | |
| export interface DefaultTaskOptions { | |
| fetch?: typeof fetch; | |
| } | |
| /** | |
| * Get the default task. Use a LRU cache of 1000 items with 10 minutes expiration | |
| * to avoid making too many calls to the HF hub. | |
| * | |
| * @returns The default task for the model, or `null` if it was impossible to get it | |
| */ | |
| export async function getDefaultTask( | |
| model: string, | |
| accessToken: string | undefined, | |
| options?: DefaultTaskOptions | |
| ): Promise<string | null> { | |
| if (isUrl(model)) { | |
| return null; | |
| } | |
| const key = `${model}:${accessToken}`; | |
| let cachedTask = taskCache.get(key); | |
| if (cachedTask && cachedTask.date < new Date(Date.now() - CACHE_DURATION)) { | |
| taskCache.delete(key); | |
| cachedTask = undefined; | |
| } | |
| if (cachedTask === undefined) { | |
| const modelTask = await (options?.fetch ?? fetch)(`${HF_HUB_URL}/api/models/${model}?expand[]=pipeline_tag`, { | |
| headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {}, | |
| }) | |
| .then((resp) => resp.json()) | |
| .then((json) => json.pipeline_tag) | |
| .catch(() => null); | |
| if (!modelTask) { | |
| return null; | |
| } | |
| cachedTask = { task: modelTask, date: new Date() }; | |
| taskCache.set(key, { task: modelTask, date: new Date() }); | |
| if (taskCache.size > MAX_CACHE_ITEMS) { | |
| taskCache.delete(taskCache.keys().next().value); | |
| } | |
| } | |
| return cachedTask.task; | |
| } | |