Lord-Raven commited on
Commit
1dfce92
·
1 Parent(s): ad14b41

Trying ONNX models on CPU.

Browse files
Files changed (2) hide show
  1. app.py +12 -6
  2. requirements.txt +1 -1
app.py CHANGED
@@ -4,9 +4,10 @@ import gradio
4
  import json
5
  import time
6
  from datetime import datetime
 
7
  from fastapi import FastAPI
8
  from fastapi.middleware.cors import CORSMiddleware
9
- from optimum.pipelines import pipeline
10
 
11
  # CORS Config - This isn't actually working; instead, I am taking a gross approach to origin whitelisting within the service.
12
  app = FastAPI()
@@ -24,12 +25,17 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
24
 
25
  # "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
26
 
27
- # model_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
28
- # tokenizer_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
29
- model_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
30
 
31
- classifier_cpu = pipeline(task="zero-shot-classification", model=model_name, provider="CPUExecutionProvider")
32
- classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, provider="CUDAExecutionProvider", device="cuda:0")
 
 
 
 
 
33
 
34
  def classify(data_string, request: gradio.Request):
35
  if request:
 
4
  import json
5
  import time
6
  from datetime import datetime
7
+ from transformers import pipeline
8
  from fastapi import FastAPI
9
  from fastapi.middleware.cors import CORSMiddleware
10
+ from optimum.onnxruntime import ORTModelForSequenceClassification
11
 
12
  # CORS Config - This isn't actually working; instead, I am taking a gross approach to origin whitelisting within the service.
13
  app = FastAPI()
 
25
 
26
  # "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
27
 
28
+ model_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
29
+ tokenizer_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
30
+ file_name = "onnx/model.onnx"
31
 
32
+ model_name_cpu = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
33
+
34
+ model_cpu = ORTModelForSequenceClassification.from_pretrained(model_id=model_name_cpu, file_name=file_name)
35
+ tokenizer_cpu = AutoTokenizer.from_pretrained(model_name_cpu)
36
+
37
+ classifier_cpu = pipeline(task="zero-shot-classification", model=model_cpu, tokenizer=tokenizer_cpu)
38
+ classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0")
39
 
40
  def classify(data_string, request: gradio.Request):
41
  if request:
requirements.txt CHANGED
@@ -4,5 +4,5 @@ huggingface_hub==0.32.4
4
  json5==0.9.25
5
  numpy
6
  uvicorn
7
- optimum[pipelines,exporters,onnxruntime-gpu]==1.25.3
8
  transformers==4.51.3
 
4
  json5==0.9.25
5
  numpy
6
  uvicorn
7
+ optimum[exporters,onnxruntime,onnxruntime-gpu]==1.25.3
8
  transformers==4.51.3