eienmojiki commited on
Commit
ca8bea5
·
verified ·
1 Parent(s): fb14067

Upload 7 files

Browse files

Add FastAPI chat completion service and configuration files


- Implement chat completion API
- Add Dockerfile for containerization
- Include requirements for dependencies
- Configure VSCode settings
- Create .gitignore for project files

.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ test.py
2
+ .venv
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "editor.wordWrap": "on"
3
+ }
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ from fastapi import FastAPI
3
+
4
+ app = FastAPI()
5
+
6
+ @app.get("/")
7
+ async def root():
8
+ return {"message": "Hello World"}
models/chat_completion.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional
3
+ from huggingface_hub import ChatCompletionInputMessage, ChatCompletionInputGrammarType, ChatCompletionInputStreamOptions, ChatCompletionInputToolChoiceClass, ChatCompletionInputTool
4
+
5
+ class ChatRequest(BaseModel):
6
+ model: str = Field(..., description="The model to use for chat-completion. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for chat-based text-generation will be used. See https://huggingface.co/tasks/text-generation for more details.")
7
+ messages: List[ChatCompletionInputMessage] = Field(..., description="Conversation history consisting of roles and content pairs.")
8
+ frequency_penalty: Optional[float] = Field(0.0, ge=-2.0, le=2.0, description="Penalizes new tokens based on their existing frequency in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.")
9
+ logit_bias: Optional[dict] = Field(None, description="Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens to an associated bias value from -100 to 100.")
10
+ logprobs: Optional[bool] = Field(None, description="Whether to return log probabilities of the output tokens or not.")
11
+ max_tokens: Optional[int] = Field(100, description="Maximum number of tokens allowed in the response. Defaults to 100.")
12
+ n: Optional[int] = Field(None, description="UNUSED.")
13
+ presence_penalty: Optional[float] = Field(None, ge=-2.0, le=2.0, description="Positive values penalize new tokens based on whether they appear in the text so far.")
14
+ response_format: Optional[ChatCompletionInputGrammarType] = Field(None, description="Grammar constraints. Can be either a JSONSchema or a regex.")
15
+ seed: Optional[int] = Field(None, description="Seed for reproducible control flow.")
16
+ stop: Optional[str] = Field(None, description="Up to four strings which trigger the end of the response.")
17
+ stream: Optional[bool] = Field(False, description="Enable realtime streaming of responses. Defaults to False.")
18
+ stream_options: Optional[ChatCompletionInputStreamOptions] = Field(None, description="Options for streaming completions.")
19
+ temperature: Optional[float] = Field(1.0, ge=0.0, le=2.0, description="Controls randomness of the generations. Lower values ensure less random completions.")
20
+ top_logprobs: Optional[int] = Field(None, ge=0, le=5, description="Specifying the number of most likely tokens to return at each token position.")
21
+ top_p: Optional[float] = Field(1.0, ge=0.0, le=1.0, description="Fraction of the most likely next words to sample from.")
22
+ tool_choice: Optional[ChatCompletionInputToolChoiceClass] = Field("auto", description="The tool to use for the completion. Defaults to 'auto'.")
23
+ tool_prompt: Optional[str] = Field(None, description="A prompt to be appended before the tools.")
24
+ tools: Optional[List[ChatCompletionInputTool]] = Field(None, description="A list of tools the model may call.")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ huggingface_hub==0.27.1
routes/chatCompletion.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from fastapi.responses import StreamingResponse
3
+ from models.chat_completion import ChatRequest
4
+ from huggingface_hub import InferenceClient
5
+ import json
6
+
7
+ router = APIRouter()
8
+
9
+ def generate_stream(response):
10
+ for chunk in response:
11
+ yield f"data: {json.dumps(chunk.__dict__, separators=(',', ':'))}\n\n"
12
+
13
+ @router.post("/v1/chat-completion", tags=["Chat Completion"])
14
+ async def chat_completion(body: ChatRequest):
15
+ client = InferenceClient(model=body.model)
16
+
17
+ res = client.chat_completion(
18
+ messages=body.messages,
19
+ frequency_penalty=body.frequency_penalty,
20
+ logit_bias=body.logit_bias,
21
+ logprobs=body.logprobs,
22
+ max_tokens=body.max_tokens,
23
+ n=body.n,
24
+ presence_penalty=body.presence_penalty,
25
+ response_format=body.response_format,
26
+ seed=body.seed,
27
+ stop=body.stop,
28
+ stream=body.stream,
29
+ stream_options=body.stream_options,
30
+ temperature=body.temperature,
31
+ top_logprobs=body.top_logprobs,
32
+ top_p=body.top_p,
33
+ tool_choice=body.tool_choice,
34
+ tool_prompt=body.tool_prompt,
35
+ tools=body.tools
36
+ )
37
+
38
+ if not body.stream:
39
+ return json.dumps(res.__dict__, indent=2)
40
+ else:
41
+ return StreamingResponse(generate_stream(res), media_type="text/event-stream")