deepak191z commited on
Commit
14cd2f6
·
verified ·
1 Parent(s): b1f8565

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +249 -38
main.py CHANGED
@@ -1,60 +1,271 @@
1
- from fastapi import FastAPI, HTTPException
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
- from duckai import DuckAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  app = FastAPI()
7
 
8
- # Configure CORS
9
  app.add_middleware(
10
  CORSMiddleware,
11
  allow_origins=["*"],
12
- allow_credentials=True,
13
- allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
14
  allow_headers=["*"],
15
  )
16
 
17
- class ChatQuery(BaseModel):
18
- query: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- @app.get("/chat/")
21
- async def chat(query: str):
22
- if not query:
23
- raise HTTPException(status_code=400, detail="Query parameter is required")
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- duck = DuckAI()
 
26
  try:
27
- results = duck.chat(query, model='gpt-4o-mini')
28
- return {"results": results}
29
- except Exception as e1:
30
- print(f"Primary model (gpt-4o-mini) failed: {e1}")
31
- try:
32
- results = duck.chat(query, model='claude-3-haiku')
33
- return {"results": results}
34
- except Exception as e2:
35
- print(f"Fallback model (claude-3-haiku) also failed: {e2}")
36
- raise HTTPException(
37
- status_code=500,
38
- detail={
39
- "error": "Both models failed",
40
- "primary_error": str(e1),
41
- "fallback_error": str(e2)
42
- }
43
  )
 
 
 
 
44
 
45
- async def chat_with_model(query: str, model: str):
 
 
 
46
  try:
47
- duck = DuckAI()
48
- results = duck.chat(query, model=model)
49
- return {"results": results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
 
 
 
 
51
  raise HTTPException(status_code=500, detail=str(e))
52
- @app.get("/health")
53
- @app.get("/")
54
- async def health_check():
55
- logger.info("Health check endpoint called")
56
- return {"status": "healthy"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  if __name__ == "__main__":
59
  import uvicorn
60
- uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info", reload=True)
 
1
+ from fastapi import FastAPI, Request, HTTPException
2
+ from fastapi.responses import JSONResponse, StreamingResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
+ from typing import List, Dict, Any, Union
6
+ import os
7
+ import time
8
+ import httpx
9
+ import json
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ # Simple configuration
15
+ API_PREFIX = os.getenv("API_PREFIX", "/")
16
+ MAX_RETRY_COUNT = int(os.getenv("MAX_RETRY_COUNT", "3"))
17
+ RETRY_DELAY = int(os.getenv("RETRY_DELAY", "5000"))
18
+
19
+ # Default headers for DuckDuckGo requests
20
+ FAKE_HEADERS = {
21
+ "Accept": "*/*",
22
+ "Accept-Language": "en-US,en;q=0.9",
23
+ "Origin": "https://duckduckgo.com/",
24
+ "Referer": "https://duckduckgo.com/",
25
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
26
+ }
27
 
28
  app = FastAPI()
29
 
30
+ # Add CORS middleware
31
  app.add_middleware(
32
  CORSMiddleware,
33
  allow_origins=["*"],
34
+ allow_methods=["*"],
 
35
  allow_headers=["*"],
36
  )
37
 
38
+ # Models for request validation
39
+ class Message(BaseModel):
40
+ role: str
41
+ content: Union[str, List[Dict[str, Any]]]
42
+
43
+ class ChatCompletionRequest(BaseModel):
44
+ model: str
45
+ messages: List[Message]
46
+ stream: bool = False
47
+
48
+ # Add timing information
49
+ @app.middleware("http")
50
+ async def add_process_time(request: Request, call_next):
51
+ start_time = time.time()
52
+ response = await call_next(request)
53
+ process_time = time.time() - start_time
54
+ print(f"{request.method} {response.status_code} {request.url.path} {process_time*1000:.2f} ms")
55
+ return response
56
+
57
+ @app.get("/")
58
+ async def root():
59
+ return {"message": "API server running"}
60
+
61
+ @app.get("/ping")
62
+ async def ping():
63
+ return {"message": "pong"}
64
+
65
+ @app.get(f"{API_PREFIX}v1/models")
66
+ async def get_models():
67
+ return {
68
+ "object": "list",
69
+ "data": [
70
+ {"id": "gpt-4o-mini", "object": "model", "owned_by": "ddg"},
71
+ {"id": "claude-3-haiku", "object": "model", "owned_by": "ddg"},
72
+ {"id": "llama-3.1-70b", "object": "model", "owned_by": "ddg"},
73
+ {"id": "mixtral-8x7b", "object": "model", "owned_by": "ddg"},
74
+ {"id": "o3-mini", "object": "model", "owned_by": "ddg"},
75
+ ],
76
+ }
77
+
78
+ @app.post(f"{API_PREFIX}v1/chat/completions")
79
+ async def chat_completions(request: ChatCompletionRequest):
80
+ try:
81
+ model = convert_model(request.model)
82
+ content = messages_to_text(request.messages)
83
+ return await create_completion(model, content, request.stream)
84
+ except Exception as e:
85
+ raise HTTPException(status_code=500, detail=str(e))
86
+
87
+ def convert_model(input_model: str) -> str:
88
+ """Convert public model names to DuckDuckGo internal model names"""
89
+ model_mapping = {
90
+ "claude-3-haiku": "claude-3-haiku-20240307",
91
+ "llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
92
+ "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
93
+ "o3-mini": "o3-mini"
94
+ }
95
+ return model_mapping.get(input_model.lower(), "gpt-4o-mini")
96
 
97
+ def messages_to_text(messages: List[Message]) -> str:
98
+ """Convert message array to text format expected by DuckDuckGo API"""
99
+ result = ""
100
+ for message in messages:
101
+ role = "user" if message.role == "system" else message.role
102
+
103
+ if role in ["user", "assistant"]:
104
+ # Handle both string content and structured content
105
+ if isinstance(message.content, list):
106
+ content_str = "".join([item.get("text", "") for item in message.content if item.get("text", "")])
107
+ else:
108
+ content_str = message.content
109
+
110
+ result += f"{role}:{content_str};\r\n"
111
+
112
+ return result
113
 
114
+ async def request_token() -> str:
115
+ """Get auth token from DuckDuckGo"""
116
  try:
117
+ async with httpx.AsyncClient() as client:
118
+ response = await client.get(
119
+ "https://duckduckgo.com/duckchat/v1/status",
120
+ headers={**FAKE_HEADERS, "x-vqd-accept": "1"}
 
 
 
 
 
 
 
 
 
 
 
 
121
  )
122
+ return response.headers.get("x-vqd-4", "")
123
+ except Exception as e:
124
+ print(f"Token request error: {e}")
125
+ return ""
126
 
127
+ async def create_completion(model: str, content: str, return_stream: bool, retry_count: int = 0):
128
+ """Create a chat completion via DuckDuckGo API"""
129
+ token = await request_token()
130
+
131
  try:
132
+ async with httpx.AsyncClient() as client:
133
+ response = await client.post(
134
+ "https://duckduckgo.com/duckchat/v1/chat",
135
+ headers={
136
+ **FAKE_HEADERS,
137
+ "Accept": "text/event-stream",
138
+ "Content-Type": "application/json",
139
+ "x-vqd-4": token,
140
+ },
141
+ json={
142
+ "model": model,
143
+ "messages": [{"role": "user", "content": content}]
144
+ },
145
+ stream=True
146
+ )
147
+
148
+ if response.status_code != 200:
149
+ raise HTTPException(status_code=response.status_code, detail="API request failed")
150
+
151
+ return await process_stream(model, response, return_stream)
152
  except Exception as e:
153
+ if retry_count < MAX_RETRY_COUNT:
154
+ print(f"Retrying... attempt {retry_count + 1}")
155
+ await asyncio.sleep(RETRY_DELAY / 1000)
156
+ return await create_completion(model, content, return_stream, retry_count + 1)
157
  raise HTTPException(status_code=500, detail=str(e))
158
+
159
+ async def process_stream(model: str, response, return_stream: bool):
160
+ """Process streaming response from DuckDuckGo"""
161
+ buffer = ""
162
+ full_text = ""
163
+
164
+ async def generate_stream():
165
+ nonlocal buffer, full_text
166
+
167
+ # Process chunks as they arrive
168
+ async for chunk in response.aiter_bytes():
169
+ chunk_str = chunk.decode('utf-8').strip()
170
+
171
+ # Handle buffer from previous chunk if needed
172
+ if buffer:
173
+ chunk_str = buffer + chunk_str
174
+ buffer = ""
175
+
176
+ # Handle incomplete chunks
177
+ if not chunk_str.endswith('"}') and "[DONE]" not in chunk_str:
178
+ buffer = chunk_str
179
+ continue
180
+
181
+ # Process each line in the chunk
182
+ for line in chunk_str.split('\n'):
183
+ if len(line) < 6:
184
+ continue
185
+
186
+ # Remove prefix (data: )
187
+ line = line[6:] if line.startswith("data: ") else line
188
+
189
+ # Handle completion signal
190
+ if line == "[DONE]":
191
+ if return_stream:
192
+ yield f"data: {json.dumps(create_stop_chunk(model))}\n\n"
193
+ return
194
+
195
+ # Parse and handle message content
196
+ try:
197
+ data = json.loads(line)
198
+ if data.get("action") == "success" and "message" in data:
199
+ message = data["message"]
200
+ full_text += message
201
+
202
+ if return_stream:
203
+ yield f"data: {json.dumps(create_chunk(message, model))}\n\n"
204
+ except json.JSONDecodeError:
205
+ continue
206
+
207
+ # Return appropriate response based on streaming preference
208
+ if return_stream:
209
+ return StreamingResponse(generate_stream(), media_type="text/event-stream")
210
+ else:
211
+ # For non-streaming, consume the generator and return complete response
212
+ async for _ in generate_stream():
213
+ pass # Just collecting text in full_text
214
+
215
+ return JSONResponse(content=create_complete_response(full_text, model))
216
+
217
+ def create_chunk(text: str, model: str) -> dict:
218
+ """Create a streaming chunk response"""
219
+ return {
220
+ "id": "chatcmpl-123",
221
+ "object": "chat.completion.chunk",
222
+ "created": int(time.time()),
223
+ "model": model,
224
+ "choices": [
225
+ {
226
+ "index": 0,
227
+ "delta": {"content": text},
228
+ "finish_reason": None,
229
+ },
230
+ ],
231
+ }
232
+
233
+ def create_stop_chunk(model: str) -> dict:
234
+ """Create a final streaming chunk with stop reason"""
235
+ return {
236
+ "id": "chatcmpl-123",
237
+ "object": "chat.completion.chunk",
238
+ "created": int(time.time()),
239
+ "model": model,
240
+ "choices": [
241
+ {
242
+ "index": 0,
243
+ "delta": {},
244
+ "finish_reason": "stop",
245
+ },
246
+ ],
247
+ }
248
+
249
+ def create_complete_response(text: str, model: str) -> dict:
250
+ """Create a complete non-streaming response"""
251
+ return {
252
+ "id": "chatcmpl-123",
253
+ "object": "chat.completion",
254
+ "created": int(time.time()),
255
+ "model": model,
256
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
257
+ "choices": [
258
+ {
259
+ "message": {"content": text, "role": "assistant"},
260
+ "index": 0,
261
+ "finish_reason": "stop",
262
+ },
263
+ ],
264
+ }
265
+
266
+ # Only needed for retry delays
267
+ import asyncio
268
 
269
  if __name__ == "__main__":
270
  import uvicorn
271
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)