Lord-Raven commited on
Commit
4287a84
·
1 Parent(s): 58b0c40

Trying ONNX models on CPU.

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -2,12 +2,12 @@ import spaces
2
  import torch
3
  import gradio
4
  import json
 
5
  import time
6
  from datetime import datetime
7
- from transformers import AutoTokenizer, pipeline
8
  from fastapi import FastAPI
9
  from fastapi.middleware.cors import CORSMiddleware
10
- from optimum.onnxruntime import ORTModelForSequenceClassification
11
 
12
  # CORS Config - This isn't actually working; instead, I am taking a gross approach to origin whitelisting within the service.
13
  app = FastAPI()
@@ -25,15 +25,10 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
25
 
26
  # "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
27
 
28
- model_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
29
- tokenizer_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
30
 
31
- model_name_cpu = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
32
-
33
- model_cpu = ORTModelForSequenceClassification.from_pretrained(model_id=model_name_cpu, subfolder="onnx", file_name="model.onnx")
34
- tokenizer_cpu = AutoTokenizer.from_pretrained(model_name_cpu)
35
-
36
- classifier_cpu = pipeline(task="zero-shot-classification", model=model_cpu, tokenizer=tokenizer_cpu)
37
  classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0")
38
 
39
  def classify(data_string, request: gradio.Request):
 
2
  import torch
3
  import gradio
4
  import json
5
+ import onnxruntime
6
  import time
7
  from datetime import datetime
8
+ from transformers import pipeline
9
  from fastapi import FastAPI
10
  from fastapi.middleware.cors import CORSMiddleware
 
11
 
12
  # CORS Config - This isn't actually working; instead, I am taking a gross approach to origin whitelisting within the service.
13
  app = FastAPI()
 
25
 
26
  # "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
27
 
28
+ model_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
29
+ tokenizer_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
30
 
31
+ classifier_cpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name)
 
 
 
 
 
32
  classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0")
33
 
34
  def classify(data_string, request: gradio.Request):