Spaces:

enzostvs
/

deepsite

Running

App Files Files Community

294

ollama-basic

#226

by texiwa9160 - opened 15 days ago

base: refs/heads/main

←

from: refs/pr/226

Discussion Files changed

+61

-26

Files changed (3) hide show

app/api/ask-ai/route.ts +29 -25
lib/providers.ts +31 -1
public/providers/ollama.svg +1 -0

app/api/ask-ai/route.ts CHANGED Viewed

@@ -51,10 +51,22 @@ export async function POST(request: NextRequest) {
       { status: 400 }
     );
   }
-  let token = userToken;
   let billTo: string | null = null;
   /**
    * Handle local usage token, this bypass the need for a user token
    * and allows local testing without authentication.
@@ -64,11 +76,7 @@ export async function POST(request: NextRequest) {
     token = process.env.HF_TOKEN;
   }
-  const ip = authHeaders.get("x-forwarded-for")?.includes(",")
-    ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
-    : authHeaders.get("x-forwarded-for");
-  if (!token) {
     ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
     if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
       return NextResponse.json(
@@ -85,12 +93,6 @@ export async function POST(request: NextRequest) {
     billTo = "huggingface";
   }
-  const DEFAULT_PROVIDER = PROVIDERS.novita;
-  const selectedProvider =
-    provider === "auto"
-      ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
-      : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
   try {
     // Create a stream response
     const encoder = new TextEncoder();
@@ -109,11 +111,11 @@ export async function POST(request: NextRequest) {
     (async () => {
       let completeResponse = "";
       try {
-        const client = new InferenceClient(token);
         const chatCompletion = client.chatCompletionStream(
           {
             model: selectedModel.value,
-            provider: selectedProvider.id as any,
             messages: [
               {
                 role: "system",
@@ -234,9 +236,17 @@ export async function PUT(request: NextRequest) {
   const selectedModel = MODELS[0];
-  let token = userToken;
   let billTo: string | null = null;
   /**
    * Handle local usage token, this bypass the need for a user token
    * and allows local testing without authentication.
@@ -250,7 +260,7 @@ export async function PUT(request: NextRequest) {
     ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
     : authHeaders.get("x-forwarded-for");
-  if (!token) {
     ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
     if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
       return NextResponse.json(
@@ -267,19 +277,13 @@ export async function PUT(request: NextRequest) {
     billTo = "huggingface";
   }
-  const client = new InferenceClient(token);
-  const DEFAULT_PROVIDER = PROVIDERS.novita;
-  const selectedProvider =
-    provider === "auto"
-      ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
-      : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
   try {
     const response = await client.chatCompletion(
       {
         model: selectedModel.value,
-        provider: selectedProvider.id as any,
         messages: [
           {
             role: "system",

       { status: 400 }
     );
   }
   let billTo: string | null = null;
+  const ip = authHeaders.get("x-forwarded-for")?.includes(",")
+    ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
+    : authHeaders.get("x-forwarded-for");
+  const DEFAULT_PROVIDER = PROVIDERS.novita;
+  const selectedProvider =
+    provider === "auto"
+      ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
+      : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
+  const isCustom = selectedProvider.ic != null;
+  const {accessToken, defaultOptions} = isCustom ? selectedProvider.ic! : {accessToken: userToken, defaultOptions:undefined};
+  let token = accessToken;
   /**
    * Handle local usage token, this bypass the need for a user token
    * and allows local testing without authentication.
     token = process.env.HF_TOKEN;
   }
+  if (!token && !selectedProvider.ic) {
     ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
     if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
       return NextResponse.json(
     billTo = "huggingface";
   }
   try {
     // Create a stream response
     const encoder = new TextEncoder();
     (async () => {
       let completeResponse = "";
       try {
+        const client = new InferenceClient(token, defaultOptions);
         const chatCompletion = client.chatCompletionStream(
           {
             model: selectedModel.value,
+            provider: isCustom ? undefined : selectedProvider.id as any,
             messages: [
               {
                 role: "system",
   const selectedModel = MODELS[0];
   let billTo: string | null = null;
+  const DEFAULT_PROVIDER = PROVIDERS.novita;
+  const selectedProvider =
+    provider === "auto"
+      ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
+      : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
+  const isCustom = selectedProvider.ic != null;
+  const {accessToken, defaultOptions} = isCustom ? selectedProvider.ic! : {accessToken: userToken, defaultOptions:undefined};
+  let token = accessToken;
   /**
    * Handle local usage token, this bypass the need for a user token
    * and allows local testing without authentication.
     ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
     : authHeaders.get("x-forwarded-for");
+  if (!token && !selectedProvider.ic) {
     ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
     if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
       return NextResponse.json(
     billTo = "huggingface";
   }
+  const client = new InferenceClient(token, defaultOptions);
   try {
     const response = await client.chatCompletion(
       {
         model: selectedModel.value,
+        provider: isCustom ? undefined : selectedProvider.id as any,
         messages: [
           {
             role: "system",

lib/providers.ts CHANGED Viewed

@@ -1,4 +1,8 @@
-export const PROVIDERS = {
   "fireworks-ai": {
     name: "Fireworks AI",
     max_tokens: 131_000,
@@ -29,9 +33,35 @@ export const PROVIDERS = {
     max_tokens: 128_000,
     id: "together",
   },
 };
 export const MODELS = [
   {
     value: "deepseek-ai/DeepSeek-V3-0324",
     label: "DeepSeek V3 O324",

+import { type Options } from "@huggingface/inference";
+type ProviderType = {[key:string]: {name: string, max_tokens: number, id: string, ic?: {accessToken?:string, defaultOptions: Options & { endpointUrl?: string;}}}};
+export const PROVIDERS: ProviderType = {
   "fireworks-ai": {
     name: "Fireworks AI",
     max_tokens: 131_000,
     max_tokens: 128_000,
     id: "together",
   },
+  ollama: {
+    name: "Ollama AI",
+    max_tokens: 128_000,
+    id: "ollama",
+    ic: {
+      accessToken: undefined,
+      defaultOptions: {endpointUrl: 'http://localhost:11434'}
+    }
+  },
 };
 export const MODELS = [
+  {
+    value: "deepseek-r1:latest",
+    label: "DeepSeek R1 0528 (Ollama)",
+    providers: [
+      "ollama"
+    ],
+    autoProvider: "ollama",
+    isThinker: true,
+  },
+  {
+    value: "gemma3:4b",
+    label: "Gemma 3 (Ollama)",
+    providers: [
+      "ollama"
+    ],
+    autoProvider: "ollama",
+  },
   {
     value: "deepseek-ai/DeepSeek-V3-0324",
     label: "DeepSeek V3 O324",

public/providers/ollama.svg ADDED Viewed