app/api/ask-ai/route.ts CHANGED
@@ -51,10 +51,22 @@ export async function POST(request: NextRequest) {
51
  { status: 400 }
52
  );
53
  }
54
-
55
- let token = userToken;
56
  let billTo: string | null = null;
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  /**
59
  * Handle local usage token, this bypass the need for a user token
60
  * and allows local testing without authentication.
@@ -64,11 +76,7 @@ export async function POST(request: NextRequest) {
64
  token = process.env.HF_TOKEN;
65
  }
66
 
67
- const ip = authHeaders.get("x-forwarded-for")?.includes(",")
68
- ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
69
- : authHeaders.get("x-forwarded-for");
70
-
71
- if (!token) {
72
  ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
73
  if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
74
  return NextResponse.json(
@@ -85,12 +93,6 @@ export async function POST(request: NextRequest) {
85
  billTo = "huggingface";
86
  }
87
 
88
- const DEFAULT_PROVIDER = PROVIDERS.novita;
89
- const selectedProvider =
90
- provider === "auto"
91
- ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
92
- : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
93
-
94
  try {
95
  // Create a stream response
96
  const encoder = new TextEncoder();
@@ -109,11 +111,11 @@ export async function POST(request: NextRequest) {
109
  (async () => {
110
  let completeResponse = "";
111
  try {
112
- const client = new InferenceClient(token);
113
  const chatCompletion = client.chatCompletionStream(
114
  {
115
  model: selectedModel.value,
116
- provider: selectedProvider.id as any,
117
  messages: [
118
  {
119
  role: "system",
@@ -234,9 +236,17 @@ export async function PUT(request: NextRequest) {
234
 
235
  const selectedModel = MODELS[0];
236
 
237
- let token = userToken;
238
  let billTo: string | null = null;
239
 
 
 
 
 
 
 
 
 
 
240
  /**
241
  * Handle local usage token, this bypass the need for a user token
242
  * and allows local testing without authentication.
@@ -250,7 +260,7 @@ export async function PUT(request: NextRequest) {
250
  ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
251
  : authHeaders.get("x-forwarded-for");
252
 
253
- if (!token) {
254
  ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
255
  if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
256
  return NextResponse.json(
@@ -267,19 +277,13 @@ export async function PUT(request: NextRequest) {
267
  billTo = "huggingface";
268
  }
269
 
270
- const client = new InferenceClient(token);
271
-
272
- const DEFAULT_PROVIDER = PROVIDERS.novita;
273
- const selectedProvider =
274
- provider === "auto"
275
- ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
276
- : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
277
 
278
  try {
279
  const response = await client.chatCompletion(
280
  {
281
  model: selectedModel.value,
282
- provider: selectedProvider.id as any,
283
  messages: [
284
  {
285
  role: "system",
 
51
  { status: 400 }
52
  );
53
  }
 
 
54
  let billTo: string | null = null;
55
 
56
+ const ip = authHeaders.get("x-forwarded-for")?.includes(",")
57
+ ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
58
+ : authHeaders.get("x-forwarded-for");
59
+
60
+ const DEFAULT_PROVIDER = PROVIDERS.novita;
61
+ const selectedProvider =
62
+ provider === "auto"
63
+ ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
64
+ : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
65
+
66
+ const isCustom = selectedProvider.ic != null;
67
+ const {accessToken, defaultOptions} = isCustom ? selectedProvider.ic! : {accessToken: userToken, defaultOptions:undefined};
68
+ let token = accessToken;
69
+
70
  /**
71
  * Handle local usage token, this bypass the need for a user token
72
  * and allows local testing without authentication.
 
76
  token = process.env.HF_TOKEN;
77
  }
78
 
79
+ if (!token && !selectedProvider.ic) {
 
 
 
 
80
  ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
81
  if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
82
  return NextResponse.json(
 
93
  billTo = "huggingface";
94
  }
95
 
 
 
 
 
 
 
96
  try {
97
  // Create a stream response
98
  const encoder = new TextEncoder();
 
111
  (async () => {
112
  let completeResponse = "";
113
  try {
114
+ const client = new InferenceClient(token, defaultOptions);
115
  const chatCompletion = client.chatCompletionStream(
116
  {
117
  model: selectedModel.value,
118
+ provider: isCustom ? undefined : selectedProvider.id as any,
119
  messages: [
120
  {
121
  role: "system",
 
236
 
237
  const selectedModel = MODELS[0];
238
 
 
239
  let billTo: string | null = null;
240
 
241
+ const DEFAULT_PROVIDER = PROVIDERS.novita;
242
+ const selectedProvider =
243
+ provider === "auto"
244
+ ? PROVIDERS[selectedModel.autoProvider as keyof typeof PROVIDERS]
245
+ : PROVIDERS[provider as keyof typeof PROVIDERS] ?? DEFAULT_PROVIDER;
246
+ const isCustom = selectedProvider.ic != null;
247
+ const {accessToken, defaultOptions} = isCustom ? selectedProvider.ic! : {accessToken: userToken, defaultOptions:undefined};
248
+ let token = accessToken;
249
+
250
  /**
251
  * Handle local usage token, this bypass the need for a user token
252
  * and allows local testing without authentication.
 
260
  ? authHeaders.get("x-forwarded-for")?.split(",")[1].trim()
261
  : authHeaders.get("x-forwarded-for");
262
 
263
+ if (!token && !selectedProvider.ic) {
264
  ipAddresses.set(ip, (ipAddresses.get(ip) || 0) + 1);
265
  if (ipAddresses.get(ip) > MAX_REQUESTS_PER_IP) {
266
  return NextResponse.json(
 
277
  billTo = "huggingface";
278
  }
279
 
280
+ const client = new InferenceClient(token, defaultOptions);
 
 
 
 
 
 
281
 
282
  try {
283
  const response = await client.chatCompletion(
284
  {
285
  model: selectedModel.value,
286
+ provider: isCustom ? undefined : selectedProvider.id as any,
287
  messages: [
288
  {
289
  role: "system",
lib/providers.ts CHANGED
@@ -1,4 +1,8 @@
1
- export const PROVIDERS = {
 
 
 
 
2
  "fireworks-ai": {
3
  name: "Fireworks AI",
4
  max_tokens: 131_000,
@@ -29,9 +33,35 @@ export const PROVIDERS = {
29
  max_tokens: 128_000,
30
  id: "together",
31
  },
 
 
 
 
 
 
 
 
 
32
  };
33
 
34
  export const MODELS = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  {
36
  value: "deepseek-ai/DeepSeek-V3-0324",
37
  label: "DeepSeek V3 O324",
 
1
+ import { type Options } from "@huggingface/inference";
2
+
3
+ type ProviderType = {[key:string]: {name: string, max_tokens: number, id: string, ic?: {accessToken?:string, defaultOptions: Options & { endpointUrl?: string;}}}};
4
+
5
+ export const PROVIDERS: ProviderType = {
6
  "fireworks-ai": {
7
  name: "Fireworks AI",
8
  max_tokens: 131_000,
 
33
  max_tokens: 128_000,
34
  id: "together",
35
  },
36
+ ollama: {
37
+ name: "Ollama AI",
38
+ max_tokens: 128_000,
39
+ id: "ollama",
40
+ ic: {
41
+ accessToken: undefined,
42
+ defaultOptions: {endpointUrl: 'http://localhost:11434'}
43
+ }
44
+ },
45
  };
46
 
47
  export const MODELS = [
48
+ {
49
+ value: "deepseek-r1:latest",
50
+ label: "DeepSeek R1 0528 (Ollama)",
51
+ providers: [
52
+ "ollama"
53
+ ],
54
+ autoProvider: "ollama",
55
+ isThinker: true,
56
+ },
57
+ {
58
+ value: "gemma3:4b",
59
+ label: "Gemma 3 (Ollama)",
60
+ providers: [
61
+ "ollama"
62
+ ],
63
+ autoProvider: "ollama",
64
+ },
65
  {
66
  value: "deepseek-ai/DeepSeek-V3-0324",
67
  label: "DeepSeek V3 O324",
public/providers/ollama.svg ADDED