Rúben Almeida commited on
Commit
af9aed3
·
1 Parent(s): 5e9b3af

RedirectResponse import path not exists

Browse files
Files changed (1) hide show
  1. main.py +22 -6
main.py CHANGED
@@ -1,10 +1,11 @@
1
- from typing import Optional
 
2
  from awq import AutoAWQForCausalLM
3
  from pydantic import BaseModel, Field
4
  from transformers import AutoTokenizer
5
  from contextlib import asynccontextmanager
6
- from starlette.responses import FileResponse
7
- from fastapi import FastAPI, HTTPException, RedirectResponse
8
 
9
  ### FastAPI Initialization
10
  @asynccontextmanager
@@ -23,6 +24,7 @@ class QuantizationConfig(BaseModel):
23
 
24
  class ConvertRequest(BaseModel):
25
  hf_model_name: str
 
26
  hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
27
  hf_push_repo: Optional[str] = Field(None, description="Hugging Face repo to push the converted model. If not provided, the model will be downloaded only.")
28
  quantization_config: QuantizationConfig = Field(QuantizationConfig(), description="Quantization configuration")
@@ -39,9 +41,23 @@ def read_root():
39
  return {"status": "ok"}
40
 
41
  @app.post("/convert")
42
- def convert(request: ConvertRequest)->FileResponse:
43
- model = AutoAWQForCausalLM.from_pretrained(model_path)
44
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  raise HTTPException(status_code=501, detail="Not Implemented yet")
47
  #return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)
 
1
+ import zipfile
2
+ from typing import Optional, Union
3
  from awq import AutoAWQForCausalLM
4
  from pydantic import BaseModel, Field
5
  from transformers import AutoTokenizer
6
  from contextlib import asynccontextmanager
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.responses import RedirectResponse, FileResponse
9
 
10
  ### FastAPI Initialization
11
  @asynccontextmanager
 
24
 
25
  class ConvertRequest(BaseModel):
26
  hf_model_name: str
27
+ hf_tokenizer_name: Optional[str] = Field(None, description="Hugging Face tokenizer name. Defaults to hf_model_name")
28
  hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
29
  hf_push_repo: Optional[str] = Field(None, description="Hugging Face repo to push the converted model. If not provided, the model will be downloaded only.")
30
  quantization_config: QuantizationConfig = Field(QuantizationConfig(), description="Quantization configuration")
 
41
  return {"status": "ok"}
42
 
43
  @app.post("/convert")
44
+ def convert(request: ConvertRequest)->Union[FileResponse, dict]:
45
+ model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
46
+ tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
47
+
48
+ model.quantize(tokenizer, quant_config=quant_config)
49
+
50
+ if request.hf_push_repo:
51
+ model.save_quantized(quant_path)
52
+ tokenizer.save_pretrained(quant_path)
53
+
54
+ return {
55
+ "status": "ok",
56
+ "message": f"Model saved to {quant_path}"
57
+ }
58
+
59
+ # Return a zip file with the converted model
60
+
61
 
62
  raise HTTPException(status_code=501, detail="Not Implemented yet")
63
  #return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)